dataimport/stores.py
changeset 11057 0b59724cb3f2
parent 11052 058bb3dc685f
child 11058 23eb30449fe5
equal deleted inserted replaced
11052:058bb3dc685f 11057:0b59724cb3f2
     1 # copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
       
     2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     3 #
       
     4 # This file is part of CubicWeb.
       
     5 #
       
     6 # CubicWeb is free software: you can redistribute it and/or modify it under the
       
     7 # terms of the GNU Lesser General Public License as published by the Free
       
     8 # Software Foundation, either version 2.1 of the License, or (at your option)
       
     9 # any later version.
       
    10 #
       
    11 # CubicWeb is distributed in the hope that it will be useful, but WITHOUT
       
    12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
       
    13 # FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
       
    14 # details.
       
    15 #
       
    16 # You should have received a copy of the GNU Lesser General Public License along
       
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
       
    18 """
       
    19 Stores are responsible to insert properly formatted entities and relations into the database. They
       
    20 have the following API::
       
    21 
       
    22     >>> user_eid = store.prepare_insert_entity('CWUser', login=u'johndoe')
       
    23     >>> group_eid = store.prepare_insert_entity('CWUser', name=u'unknown')
       
    24     >>> store.prepare_insert_relation(user_eid, 'in_group', group_eid)
       
    25     >>> store.flush()
       
    26     >>> store.commit()
       
    27     >>> store.finish()
       
    28 
       
    29 Some store **requires a flush** to copy data in the database, so if you want to have store
       
    30 independant code you should explicitly call it. (There may be multiple flushes during the
       
    31 process, or only one at the end if there is no memory issue). This is different from the
       
    32 commit which validates the database transaction. At last, the `finish()` method should be called in
       
    33 case the store requires additional work once everything is done.
       
    34 
       
    35 * ``prepare_insert_entity(<entity type>, **kwargs) -> eid``: given an entity
       
    36   type, attributes and inlined relations, return the eid of the entity to be
       
    37   inserted, *with no guarantee that anything has been inserted in database*,
       
    38 
       
    39 * ``prepare_update_entity(<entity type>, eid, **kwargs) -> None``: given an
       
    40   entity type and eid, promise for update given attributes and inlined
       
    41   relations *with no guarantee that anything has been inserted in database*,
       
    42 
       
    43 * ``prepare_insert_relation(eid_from, rtype, eid_to) -> None``: indicate that a
       
    44   relation ``rtype`` should be added between entities with eids ``eid_from``
       
    45   and ``eid_to``. Similar to ``prepare_insert_entity()``, *there is no
       
    46   guarantee that the relation will be inserted in database*,
       
    47 
       
    48 * ``flush() -> None``: flush any temporary data to database. May be called
       
    49   several times during an import,
       
    50 
       
    51 * ``commit() -> None``: commit the database transaction,
       
    52 
       
    53 * ``finish() -> None``: additional stuff to do after import is terminated.
       
    54 
       
    55 .. autoclass:: cubicweb.dataimport.stores.RQLObjectStore
       
    56 .. autoclass:: cubicweb.dataimport.stores.NoHookRQLObjectStore
       
    57 .. autoclass:: cubicweb.dataimport.stores.MetaGenerator
       
    58 """
       
    59 import inspect
       
    60 import warnings
       
    61 from datetime import datetime
       
    62 from copy import copy
       
    63 
       
    64 from six import text_type
       
    65 
       
    66 from logilab.common.deprecation import deprecated
       
    67 from logilab.common.decorators import cached
       
    68 
       
    69 from cubicweb.schema import META_RTYPES, VIRTUAL_RTYPES
       
    70 from cubicweb.server.edition import EditedEntity
       
    71 
       
    72 
       
    73 class RQLObjectStore(object):
       
    74     """Store that works by making RQL queries, hence with all the cubicweb's machinery activated.
       
    75     """
       
    76 
       
    77     def __init__(self, cnx, commit=None):
       
    78         if commit is not None:
       
    79             warnings.warn('[3.19] commit argument should not be specified '
       
    80                           'as the cnx object already provides it.',
       
    81                           DeprecationWarning, stacklevel=2)
       
    82         self._cnx = cnx
       
    83         self._commit = commit or cnx.commit
       
    84         # XXX 3.21 deprecated attributes
       
    85         self.eids = {}
       
    86         self.types = {}
       
    87 
       
    88     def rql(self, *args):
       
    89         """Execute a RQL query. This is NOT part of the store API."""
       
    90         return self._cnx.execute(*args)
       
    91 
       
    92     def prepare_insert_entity(self, *args, **kwargs):
       
    93         """Given an entity type, attributes and inlined relations, returns the inserted entity's
       
    94         eid.
       
    95         """
       
    96         entity = self._cnx.create_entity(*args, **kwargs)
       
    97         self.eids[entity.eid] = entity
       
    98         self.types.setdefault(args[0], []).append(entity.eid)
       
    99         return entity.eid
       
   100 
       
   101     def prepare_update_entity(self, etype, eid, **kwargs):
       
   102         """Given an entity type and eid, updates the corresponding entity with specified attributes
       
   103         and inlined relations.
       
   104         """
       
   105         entity = self._cnx.entity_from_eid(eid)
       
   106         assert entity.cw_etype == etype, 'Trying to update with wrong type %s' % etype
       
   107         # XXX some inlined relations may already exists
       
   108         entity.cw_set(**kwargs)
       
   109 
       
   110     def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
       
   111         """Insert into the database a  relation ``rtype`` between entities with eids ``eid_from``
       
   112         and ``eid_to``.
       
   113         """
       
   114         self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype,
       
   115                  {'x': int(eid_from), 'y': int(eid_to)})
       
   116 
       
   117     def flush(self):
       
   118         """Nothing to flush for this store."""
       
   119         pass
       
   120 
       
   121     def commit(self):
       
   122         """Commit the database transaction."""
       
   123         return self._commit()
       
   124 
       
   125     def finish(self):
       
   126         """Nothing to do once import is terminated for this store."""
       
   127         pass
       
   128 
       
   129     @property
       
   130     def session(self):
       
   131         warnings.warn('[3.19] deprecated property.', DeprecationWarning, stacklevel=2)
       
   132         return self._cnx.repo._get_session(self._cnx.sessionid)
       
   133 
       
   134     @deprecated("[3.19] use cnx.find(*args, **kwargs).entities() instead")
       
   135     def find_entities(self, *args, **kwargs):
       
   136         return self._cnx.find(*args, **kwargs).entities()
       
   137 
       
   138     @deprecated("[3.19] use cnx.find(*args, **kwargs).one() instead")
       
   139     def find_one_entity(self, *args, **kwargs):
       
   140         return self._cnx.find(*args, **kwargs).one()
       
   141 
       
   142     @deprecated('[3.21] use prepare_insert_entity instead')
       
   143     def create_entity(self, *args, **kwargs):
       
   144         eid = self.prepare_insert_entity(*args, **kwargs)
       
   145         return self._cnx.entity_from_eid(eid)
       
   146 
       
   147     @deprecated('[3.21] use prepare_insert_relation instead')
       
   148     def relate(self, eid_from, rtype, eid_to, **kwargs):
       
   149         self.prepare_insert_relation(eid_from, rtype, eid_to, **kwargs)
       
   150 
       
   151 
       
   152 class NoHookRQLObjectStore(RQLObjectStore):
       
   153     """Store that works by accessing low-level CubicWeb's source API, with all hooks deactivated. It
       
   154     must be given a metadata generator object to handle metadata which are usually handled by hooks
       
   155     (see :class:`MetaGenerator`).
       
   156     """
       
   157 
       
   158     def __init__(self, cnx, metagen=None):
       
   159         super(NoHookRQLObjectStore, self).__init__(cnx)
       
   160         self.source = cnx.repo.system_source
       
   161         self.rschema = cnx.repo.schema.rschema
       
   162         self.add_relation = self.source.add_relation
       
   163         if metagen is None:
       
   164             metagen = MetaGenerator(cnx)
       
   165         self.metagen = metagen
       
   166         self._nb_inserted_entities = 0
       
   167         self._nb_inserted_types = 0
       
   168         self._nb_inserted_relations = 0
       
   169         # deactivate security
       
   170         cnx.read_security = False
       
   171         cnx.write_security = False
       
   172 
       
   173     def prepare_insert_entity(self, etype, **kwargs):
       
   174         """Given an entity type, attributes and inlined relations, returns the inserted entity's
       
   175         eid.
       
   176         """
       
   177         for k, v in kwargs.items():
       
   178             kwargs[k] = getattr(v, 'eid', v)
       
   179         entity, rels = self.metagen.base_etype_dicts(etype)
       
   180         # make a copy to keep cached entity pristine
       
   181         entity = copy(entity)
       
   182         entity.cw_edited = copy(entity.cw_edited)
       
   183         entity.cw_clear_relation_cache()
       
   184         entity.cw_edited.update(kwargs, skipsec=False)
       
   185         entity_source, extid = self.metagen.init_entity(entity)
       
   186         cnx = self._cnx
       
   187         self.source.add_info(cnx, entity, entity_source, extid)
       
   188         self.source.add_entity(cnx, entity)
       
   189         kwargs = dict()
       
   190         if inspect.getargspec(self.add_relation).keywords:
       
   191             kwargs['subjtype'] = entity.cw_etype
       
   192         for rtype, targeteids in rels.items():
       
   193             # targeteids may be a single eid or a list of eids
       
   194             inlined = self.rschema(rtype).inlined
       
   195             try:
       
   196                 for targeteid in targeteids:
       
   197                     self.add_relation(cnx, entity.eid, rtype, targeteid,
       
   198                                       inlined, **kwargs)
       
   199             except TypeError:
       
   200                 self.add_relation(cnx, entity.eid, rtype, targeteids,
       
   201                                   inlined, **kwargs)
       
   202         self._nb_inserted_entities += 1
       
   203         return entity.eid
       
   204 
       
   205     # XXX: prepare_update_entity is inherited from RQLObjectStore, it should be reimplemented to
       
   206     # actually skip hooks as prepare_insert_entity
       
   207 
       
   208     def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
       
   209         """Insert into the database a  relation ``rtype`` between entities with eids ``eid_from``
       
   210         and ``eid_to``.
       
   211         """
       
   212         assert not rtype.startswith('reverse_')
       
   213         self.add_relation(self._cnx, eid_from, rtype, eid_to,
       
   214                           self.rschema(rtype).inlined)
       
   215         if self.rschema(rtype).symmetric:
       
   216             self.add_relation(self._cnx, eid_to, rtype, eid_from,
       
   217                               self.rschema(rtype).inlined)
       
   218         self._nb_inserted_relations += 1
       
   219 
       
   220     @property
       
   221     @deprecated('[3.21] deprecated')
       
   222     def nb_inserted_entities(self):
       
   223         return self._nb_inserted_entities
       
   224 
       
   225     @property
       
   226     @deprecated('[3.21] deprecated')
       
   227     def nb_inserted_types(self):
       
   228         return self._nb_inserted_types
       
   229 
       
   230     @property
       
   231     @deprecated('[3.21] deprecated')
       
   232     def nb_inserted_relations(self):
       
   233         return self._nb_inserted_relations
       
   234 
       
   235 
       
   236 class MetaGenerator(object):
       
   237     """Class responsible for generating standard metadata for imported entities. You may want to
       
   238     derive it to add application specific's metadata.
       
   239 
       
   240     Parameters:
       
   241     * `cnx`: connection to the repository
       
   242     * `baseurl`: optional base URL to be used for `cwuri` generation - default to config['base-url']
       
   243     * `source`: optional source to be used as `cw_source` for imported entities
       
   244     """
       
   245     META_RELATIONS = (META_RTYPES
       
   246                       - VIRTUAL_RTYPES
       
   247                       - set(('eid', 'cwuri',
       
   248                              'is', 'is_instance_of', 'cw_source')))
       
   249 
       
   250     def __init__(self, cnx, baseurl=None, source=None):
       
   251         self._cnx = cnx
       
   252         if baseurl is None:
       
   253             config = cnx.vreg.config
       
   254             baseurl = config['base-url'] or config.default_base_url()
       
   255         if not baseurl[-1] == '/':
       
   256             baseurl += '/'
       
   257         self.baseurl = baseurl
       
   258         if source is None:
       
   259             source = cnx.repo.system_source
       
   260         self.source = source
       
   261         self.create_eid = cnx.repo.system_source.create_eid
       
   262         self.time = datetime.utcnow()
       
   263         # attributes/relations shared by all entities of the same type
       
   264         self.etype_attrs = []
       
   265         self.etype_rels = []
       
   266         # attributes/relations specific to each entity
       
   267         self.entity_attrs = ['cwuri']
       
   268         #self.entity_rels = [] XXX not handled (YAGNI?)
       
   269         schema = cnx.vreg.schema
       
   270         rschema = schema.rschema
       
   271         for rtype in self.META_RELATIONS:
       
   272             # skip owned_by / created_by if user is the internal manager
       
   273             if cnx.user.eid == -1 and rtype in ('owned_by', 'created_by'):
       
   274                 continue
       
   275             if rschema(rtype).final:
       
   276                 self.etype_attrs.append(rtype)
       
   277             else:
       
   278                 self.etype_rels.append(rtype)
       
   279 
       
   280     @cached
       
   281     def base_etype_dicts(self, etype):
       
   282         entity = self._cnx.vreg['etypes'].etype_class(etype)(self._cnx)
       
   283         # entity are "surface" copied, avoid shared dict between copies
       
   284         del entity.cw_extra_kwargs
       
   285         entity.cw_edited = EditedEntity(entity)
       
   286         for attr in self.etype_attrs:
       
   287             genfunc = self.generate(attr)
       
   288             if genfunc:
       
   289                 entity.cw_edited.edited_attribute(attr, genfunc(entity))
       
   290         rels = {}
       
   291         for rel in self.etype_rels:
       
   292             genfunc = self.generate(rel)
       
   293             if genfunc:
       
   294                 rels[rel] = genfunc(entity)
       
   295         return entity, rels
       
   296 
       
   297     def init_entity(self, entity):
       
   298         entity.eid = self.create_eid(self._cnx)
       
   299         extid = entity.cw_edited.get('cwuri')
       
   300         for attr in self.entity_attrs:
       
   301             if attr in entity.cw_edited:
       
   302                 # already set, skip this attribute
       
   303                 continue
       
   304             genfunc = self.generate(attr)
       
   305             if genfunc:
       
   306                 entity.cw_edited.edited_attribute(attr, genfunc(entity))
       
   307         if isinstance(extid, text_type):
       
   308             extid = extid.encode('utf-8')
       
   309         return self.source, extid
       
   310 
       
   311     def generate(self, rtype):
       
   312         return getattr(self, 'gen_%s' % rtype, None)
       
   313 
       
   314     def gen_cwuri(self, entity):
       
   315         assert self.baseurl, 'baseurl is None while generating cwuri'
       
   316         return u'%s%s' % (self.baseurl, entity.eid)
       
   317 
       
   318     def gen_creation_date(self, entity):
       
   319         return self.time
       
   320 
       
   321     def gen_modification_date(self, entity):
       
   322         return self.time
       
   323 
       
   324     def gen_created_by(self, entity):
       
   325         return self._cnx.user.eid
       
   326 
       
   327     def gen_owned_by(self, entity):
       
   328         return self._cnx.user.eid