# HG changeset patch # User Julien Cristau # Date 1447321948 -3600 # Node ID 5669e136860b6a7edb0e6e42f7a33b08e2d9f65a # Parent b6cfbcdccc7f790b5956a0b0875b0b773064e65a# Parent 166c6f7b1be45228413708881ec953e4e695136a merge head published by accident diff -r b6cfbcdccc7f -r 5669e136860b entities/__init__.py --- a/entities/__init__.py Mon Nov 09 11:11:31 2015 +0100 +++ b/entities/__init__.py Thu Nov 12 10:52:28 2015 +0100 @@ -27,6 +27,12 @@ from cubicweb.entity import Entity +def chunks(seq, step): + """See http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks-in-python)""" + return (seq[i:i+step] + for i in xrange(0, len(seq), step)) + + class AnyEntity(Entity): """an entity instance has e_schema automagically set on the class and instances have access to their issuing cursor @@ -45,21 +51,25 @@ @classmethod def cw_fti_index_rql_queries(cls, req): - """return the list of rql queries to fetch entities to FT-index + """return an iterator on rql queries to fetch entities to FT-index - The default is to fetch all entities at once and to prefetch - indexable attributes but one could imagine iterating over + The default is to fetch entities 1000 per 1000 and to prefetch + indexable attributes, but one could imagine iterating over "smaller" resultsets if the table is very big or returning a subset of entities that match some business-logic condition. """ - restrictions = ['X is %s' % cls.__regid__] + restrictions = [] selected = ['X'] for attrschema in sorted(cls.e_schema.indexable_attributes()): varname = attrschema.type.upper() restrictions.append('X %s %s' % (attrschema, varname)) selected.append(varname) - return ['Any %s WHERE %s' % (', '.join(selected), - ', '.join(restrictions))] + rset = req.execute('Any EID WHERE X eid EID, X is %s' % cls.__regid__) + for rows in chunks(rset.rows, 1000): + q_restrictions = restrictions + [ + 'X eid IN (%s)' % ', '.join(str(r[0]) for r in rows)] + yield 'Any %s WHERE %s' % (', '.join(selected), + ', '.join(q_restrictions)) # meta data api ########################################################### diff -r b6cfbcdccc7f -r 5669e136860b server/checkintegrity.py --- a/server/checkintegrity.py Mon Nov 09 11:11:31 2015 +0100 +++ b/server/checkintegrity.py Thu Nov 12 10:52:28 2015 +0100 @@ -124,7 +124,10 @@ source = repo.system_source for eschema in etypes: etype_class = cnx.vreg['etypes'].etype_class(str(eschema)) - for fti_rql in etype_class.cw_fti_index_rql_queries(cnx): + queries = list(etype_class.cw_fti_index_rql_queries(cnx)) + for i, fti_rql in enumerate(queries): + if withpb: + pb.text = "%s: %s%%" % (str(eschema), i * 100 / len(queries)) rset = cnx.execute(fti_rql) source.fti_index_entities(cnx, rset.entities()) # clear entity cache to avoid high memory consumption on big tables diff -r b6cfbcdccc7f -r 5669e136860b statsd_logger.py --- a/statsd_logger.py Mon Nov 09 11:11:31 2015 +0100 +++ b/statsd_logger.py Thu Nov 12 10:52:28 2015 +0100 @@ -78,7 +78,7 @@ packed = None for family in (socket.AF_INET6, socket.AF_INET): try: - packed = socket.inet_pton(family, address) + packed = socket.inet_pton(family, address[0]) break except socket.error: continue