1704 * a list of rows (as tuples with one element per column) |
1704 * a list of rows (as tuples with one element per column) |
1705 |
1705 |
1706 Tables are saved in chunks in different files in order to prevent |
1706 Tables are saved in chunks in different files in order to prevent |
1707 a too high memory consumption. |
1707 a too high memory consumption. |
1708 """ |
1708 """ |
|
1709 blocksize = 100 |
|
1710 |
1709 def __init__(self, source): |
1711 def __init__(self, source): |
1710 """ |
1712 """ |
1711 :param: source an instance of the system source |
1713 :param: source an instance of the system source |
1712 """ |
1714 """ |
1713 self._source = source |
1715 self._source = source |
1788 self.cursor.execute(nb_lines_sql) |
1790 self.cursor.execute(nb_lines_sql) |
1789 rowcount = self.cursor.fetchone()[0] |
1791 rowcount = self.cursor.fetchone()[0] |
1790 sql = 'SELECT * FROM %s' % table |
1792 sql = 'SELECT * FROM %s' % table |
1791 columns, rows_iterator = self._get_cols_and_rows(sql) |
1793 columns, rows_iterator = self._get_cols_and_rows(sql) |
1792 self.logger.info('number of rows: %d', rowcount) |
1794 self.logger.info('number of rows: %d', rowcount) |
1793 if table.startswith('cw_'): # entities |
1795 blocksize = self.blocksize |
1794 blocksize = 2000 |
|
1795 else: # relations and metadata |
|
1796 blocksize = 10000 |
|
1797 if rowcount > 0: |
1796 if rowcount > 0: |
1798 for i, start in enumerate(xrange(0, rowcount, blocksize)): |
1797 for i, start in enumerate(xrange(0, rowcount, blocksize)): |
1799 rows = list(itertools.islice(rows_iterator, blocksize)) |
1798 rows = list(itertools.islice(rows_iterator, blocksize)) |
1800 serialized = self._serialize(table, columns, rows) |
1799 serialized = self._serialize(table, columns, rows) |
1801 archive.writestr('tables/%s.%04d' % (table, i), serialized) |
1800 archive.writestr('tables/%s.%04d' % (table, i), serialized) |