fix extid handling: ensure encoded string is given, and store them as base64 (see note in native.py). stable
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Wed, 27 May 2009 11:24:35 +0200
branchstable
changeset 1952 8e19c813750d
parent 1951 f28e7f300d3f
child 1953 8834399f076e
fix extid handling: ensure encoded string is given, and store them as base64 (see note in native.py). Bump version to force migration on dev instances.
__pkginfo__.py
misc/migration/bootstrapmigration_repository.py
server/sources/ldapuser.py
server/sources/native.py
server/sources/pyrorql.py
server/test/unittest_multisources.py
--- a/__pkginfo__.py	Wed May 27 11:19:37 2009 +0200
+++ b/__pkginfo__.py	Wed May 27 11:24:35 2009 +0200
@@ -6,7 +6,7 @@
 distname = "cubicweb"
 modname = "cubicweb"
 
-numversion = (3, 2, 1)
+numversion = (3, 2, 2)
 version = '.'.join(str(num) for num in numversion)
 
 license = 'LGPL v2'
--- a/misc/migration/bootstrapmigration_repository.py	Wed May 27 11:19:37 2009 +0200
+++ b/misc/migration/bootstrapmigration_repository.py	Wed May 27 11:24:35 2009 +0200
@@ -7,6 +7,15 @@
 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
 """
 
+if applcubicwebversion < (3, 2, 2) and cubicwebversion >= (3, 2, 1):
+   from base64 import b64encode
+   for table in ('entities', 'deleted_entities'):
+      for eid, extid in sql('SELECT eid, extid FROM %s WHERE extid is NOT NULL'
+                            % table, ask_confirm=False):
+         sql('UPDATE %s SET extid=%%(extid)s WHERE eid=%%(eid)s' % table,
+             {'extid': b64encode(extid), 'eid': eid}, ask_confirm=False)
+   checkpoint()
+
 if applcubicwebversion < (3, 2, 0) and cubicwebversion >= (3, 2, 0):
    add_cube('card', update_database=False)
 
--- a/server/sources/ldapuser.py	Wed May 27 11:19:37 2009 +0200
+++ b/server/sources/ldapuser.py	Wed May 27 11:24:35 2009 +0200
@@ -20,6 +20,8 @@
 FOR A PARTICULAR PURPOSE.
 """
 
+from base64 import b64decode
+
 from logilab.common.textutils import get_csv
 from rql.nodes import Relation, VariableRef, Constant, Function
 
@@ -166,7 +168,8 @@
         try:
             cursor = session.system_sql("SELECT eid, extid FROM entities WHERE "
                                         "source='%s'" % self.uri)
-            for eid, extid in cursor.fetchall():
+            for eid, b64extid in cursor.fetchall():
+                extid = b64decode(b64extid)
                 # if no result found, _search automatically delete entity information
                 res = self._search(session, extid, BASE)
                 if res:
--- a/server/sources/native.py	Wed May 27 11:19:37 2009 +0200
+++ b/server/sources/native.py	Wed May 27 11:24:35 2009 +0200
@@ -1,5 +1,11 @@
 """Adapters for native cubicweb sources.
 
+Notes:
+* extid (aka external id, the primary key of an entity in the external source
+  from which it comes from) are stored in a varchar column encoded as a base64
+  string. This is because it should actually be Bytes but we want an index on
+  it for fast querying.
+  
 :organization: Logilab
 :copyright: 2001-2009 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
@@ -8,6 +14,7 @@
 
 from threading import Lock
 from datetime import datetime
+from base64 import b64decode, b64encode
 
 from logilab.common.cache import Cache
 from logilab.common.configuration import REQUIRED
@@ -456,14 +463,18 @@
             raise UnknownEid(eid)
         if res is None:
             raise UnknownEid(eid)
+        if res[-1] is not None:
+            if not isinstance(res, list):
+                res = list(res)
+            res[-1] = b64decode(res[-1])
         return res
 
-    def extid2eid(self, session, source, lid):
-        """get eid from a local id. An eid is attributed if no record is found"""
+    def extid2eid(self, session, source, extid):
+        """get eid from an external id. Return None if no record found."""
+        assert isinstance(extid, str)
         cursor = session.system_sql('SELECT eid FROM entities WHERE '
                                     'extid=%(x)s AND source=%(s)s',
-                                    # str() necessary with pg 8.3
-                                    {'x': str(lid), 's': source.uri})
+                                    {'x': b64encode(extid), 's': source.uri})
         # XXX testing rowcount cause strange bug with sqlite, results are there
         #     but rowcount is 0
         #if cursor.rowcount > 0:
@@ -499,8 +510,11 @@
     def add_info(self, session, entity, source, extid=None):
         """add type and source info for an eid into the system table"""
         # begin by inserting eid/type/source/extid into the entities table
-        attrs = {'type': str(entity.e_schema), 'eid': entity.eid,
-                 'extid': extid, 'source': source.uri, 'mtime': datetime.now()}
+        if extid is not None:
+            assert isinstance(extid, str)
+            extid = b64encode(extid)
+        attrs = {'type': entity.id, 'eid': entity.eid, 'extid': extid,
+                 'source': source.uri, 'mtime': datetime.now()}
         session.system_sql(self.sqlgen.insert('entities', attrs), attrs)
 
     def delete_info(self, session, eid, etype, uri, extid):
@@ -510,6 +524,9 @@
         attrs = {'eid': eid}
         session.system_sql(self.sqlgen.delete('entities', attrs), attrs)
         if self.has_deleted_entitites_table:
+            if extid is not None:
+                assert isinstance(extid, str), type(extid)
+                extid = b64encode(extid)
             attrs = {'type': etype, 'eid': eid, 'extid': extid,
                      'source': uri, 'dtime': datetime.now()}
             session.system_sql(self.sqlgen.insert('deleted_entities', attrs), attrs)
--- a/server/sources/pyrorql.py	Wed May 27 11:19:37 2009 +0200
+++ b/server/sources/pyrorql.py	Wed May 27 11:24:35 2009 +0200
@@ -11,6 +11,7 @@
 
 from time import mktime
 from datetime import datetime
+from base64 import b64decode
 
 from Pyro.errors import PyroError, ConnectionClosedError
 
@@ -177,7 +178,7 @@
                 try:
                     exturi = cnx.describe(extid)[1]
                     if exturi == 'system' or not exturi in repo.sources_by_uri:
-                        eid = self.extid2eid(extid, etype, session)
+                        eid = self.extid2eid(str(extid), etype, session)
                         rset = session.eid_rset(eid, etype)
                         entity = rset.get_entity(0, 0)
                         entity.complete(entity.e_schema.indexable_attributes())
@@ -188,7 +189,8 @@
                     continue
             for etype, extid in deleted:
                 try:
-                    eid = self.extid2eid(extid, etype, session, insert=False)
+                    eid = self.extid2eid(str(extid), etype, session,
+                                         insert=False)
                     # entity has been deleted from external repository but is not known here
                     if eid is not None:
                         repo.delete_info(session, eid)
@@ -307,7 +309,8 @@
                             etype = descr[rowindex][colindex]
                             exttype, exturi, extid = cnx.describe(row[colindex])
                             if exturi == 'system' or not exturi in self.repo.sources_by_uri:
-                                eid = self.extid2eid(row[colindex], etype, session)
+                                eid = self.extid2eid(str(row[colindex]), etype,
+                                                     session)
                                 row[colindex] = eid
                             else:
                                 # skip this row
@@ -494,7 +497,7 @@
             # XXX what about optional relation or outer NOT EXISTS()
             raise
         except ReplaceByInOperator, ex:
-            rhs = 'IN (%s)' % ','.join(str(eid) for eid in ex.eids)
+            rhs = 'IN (%s)' % ','.join(eid for eid in ex.eids)
         self.need_translation = False
         self.current_operator = None
         if node.optional in ('right', 'both'):
@@ -586,6 +589,6 @@
                 # results
                 rows = cu.fetchall()
                 if rows:
-                    raise ReplaceByInOperator((r[0] for r in rows))
+                    raise ReplaceByInOperator((b64decode(r[0]) for r in rows))
             raise
 
--- a/server/test/unittest_multisources.py	Wed May 27 11:19:37 2009 +0200
+++ b/server/test/unittest_multisources.py	Wed May 27 11:24:35 2009 +0200
@@ -149,7 +149,7 @@
         self.execute('Any X ORDERBY DUMB_SORT(RF) WHERE X title RF')
 
     def test_in_eid(self):
-        iec1 = self.repo.extid2eid(self.repo.sources_by_uri['extern'], ec1,
+        iec1 = self.repo.extid2eid(self.repo.sources_by_uri['extern'], str(ec1),
                                    'Card', self.session)
         rset = self.execute('Any X WHERE X eid IN (%s, %s)' % (iec1, self.ic1))
         self.assertEquals(sorted(r[0] for r in rset.rows), sorted([iec1, self.ic1]))