1557 * a list of rows (as tuples with one element per column) |
1557 * a list of rows (as tuples with one element per column) |
1558 |
1558 |
1559 Tables are saved in chunks in different files in order to prevent |
1559 Tables are saved in chunks in different files in order to prevent |
1560 a too high memory consumption. |
1560 a too high memory consumption. |
1561 """ |
1561 """ |
|
1562 blocksize = 100 |
|
1563 |
1562 def __init__(self, source): |
1564 def __init__(self, source): |
1563 """ |
1565 """ |
1564 :param: source an instance of the system source |
1566 :param: source an instance of the system source |
1565 """ |
1567 """ |
1566 self._source = source |
1568 self._source = source |
1640 self.cursor.execute(nb_lines_sql) |
1642 self.cursor.execute(nb_lines_sql) |
1641 rowcount = self.cursor.fetchone()[0] |
1643 rowcount = self.cursor.fetchone()[0] |
1642 sql = 'SELECT * FROM %s' % table |
1644 sql = 'SELECT * FROM %s' % table |
1643 columns, rows_iterator = self._get_cols_and_rows(sql) |
1645 columns, rows_iterator = self._get_cols_and_rows(sql) |
1644 self.logger.info('number of rows: %d', rowcount) |
1646 self.logger.info('number of rows: %d', rowcount) |
1645 if table.startswith('cw_'): # entities |
1647 blocksize = self.blocksize |
1646 blocksize = 2000 |
|
1647 else: # relations and metadata |
|
1648 blocksize = 10000 |
|
1649 if rowcount > 0: |
1648 if rowcount > 0: |
1650 for i, start in enumerate(xrange(0, rowcount, blocksize)): |
1649 for i, start in enumerate(xrange(0, rowcount, blocksize)): |
1651 rows = list(itertools.islice(rows_iterator, blocksize)) |
1650 rows = list(itertools.islice(rows_iterator, blocksize)) |
1652 serialized = self._serialize(table, columns, rows) |
1651 serialized = self._serialize(table, columns, rows) |
1653 archive.writestr('tables/%s.%04d' % (table, i), serialized) |
1652 archive.writestr('tables/%s.%04d' % (table, i), serialized) |