merge head published by accident
authorJulien Cristau <julien.cristau@logilab.fr>
Thu, 12 Nov 2015 10:52:28 +0100
changeset 10840 5669e136860b
parent 10838 b6cfbcdccc7f (current diff)
parent 10839 166c6f7b1be4 (diff)
child 10841 84a0505cb70b
merge head published by accident
entities/__init__.py
server/checkintegrity.py
--- a/entities/__init__.py	Mon Nov 09 11:11:31 2015 +0100
+++ b/entities/__init__.py	Thu Nov 12 10:52:28 2015 +0100
@@ -27,6 +27,12 @@
 from cubicweb.entity import Entity
 
 
+def chunks(seq, step):
+    """See http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks-in-python)"""
+    return (seq[i:i+step]
+            for i in xrange(0, len(seq), step))
+
+
 class AnyEntity(Entity):
     """an entity instance has e_schema automagically set on the class and
     instances have access to their issuing cursor
@@ -45,21 +51,25 @@
 
     @classmethod
     def cw_fti_index_rql_queries(cls, req):
-        """return the list of rql queries to fetch entities to FT-index
+        """return an iterator on rql queries to fetch entities to FT-index
 
-        The default is to fetch all entities at once and to prefetch
-        indexable attributes but one could imagine iterating over
+        The default is to fetch entities 1000 per 1000 and to prefetch
+        indexable attributes, but one could imagine iterating over
         "smaller" resultsets if the table is very big or returning
         a subset of entities that match some business-logic condition.
         """
-        restrictions = ['X is %s' % cls.__regid__]
+        restrictions = []
         selected = ['X']
         for attrschema in sorted(cls.e_schema.indexable_attributes()):
             varname = attrschema.type.upper()
             restrictions.append('X %s %s' % (attrschema, varname))
             selected.append(varname)
-        return ['Any %s WHERE %s' % (', '.join(selected),
-                                     ', '.join(restrictions))]
+        rset = req.execute('Any EID WHERE X eid EID, X is %s' % cls.__regid__)
+        for rows in chunks(rset.rows, 1000):
+            q_restrictions = restrictions + [
+                'X eid IN (%s)' % ', '.join(str(r[0]) for r in rows)]
+            yield 'Any %s WHERE %s' % (', '.join(selected),
+                                       ', '.join(q_restrictions))
 
     # meta data api ###########################################################
 
--- a/server/checkintegrity.py	Mon Nov 09 11:11:31 2015 +0100
+++ b/server/checkintegrity.py	Thu Nov 12 10:52:28 2015 +0100
@@ -124,7 +124,10 @@
     source = repo.system_source
     for eschema in etypes:
         etype_class = cnx.vreg['etypes'].etype_class(str(eschema))
-        for fti_rql in etype_class.cw_fti_index_rql_queries(cnx):
+        queries = list(etype_class.cw_fti_index_rql_queries(cnx))
+        for i, fti_rql in enumerate(queries):
+            if withpb:
+                pb.text = "%s: %s%%" % (str(eschema), i * 100 / len(queries))
             rset = cnx.execute(fti_rql)
             source.fti_index_entities(cnx, rset.entities())
             # clear entity cache to avoid high memory consumption on big tables
--- a/statsd_logger.py	Mon Nov 09 11:11:31 2015 +0100
+++ b/statsd_logger.py	Thu Nov 12 10:52:28 2015 +0100
@@ -78,7 +78,7 @@
     packed = None
     for family in (socket.AF_INET6, socket.AF_INET):
         try:
-            packed = socket.inet_pton(family, address)
+            packed = socket.inet_pton(family, address[0])
             break
         except socket.error:
             continue