dataimport/stores.py
changeset 10513 7bec01a59f92
child 10662 10942ed172de
equal deleted inserted replaced
10512:99bdd4bddd77 10513:7bec01a59f92
       
     1 # copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
       
     2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     3 #
       
     4 # This file is part of CubicWeb.
       
     5 #
       
     6 # CubicWeb is free software: you can redistribute it and/or modify it under the
       
     7 # terms of the GNU Lesser General Public License as published by the Free
       
     8 # Software Foundation, either version 2.1 of the License, or (at your option)
       
     9 # any later version.
       
    10 #
       
    11 # CubicWeb is distributed in the hope that it will be useful, but WITHOUT
       
    12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
       
    13 # FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
       
    14 # details.
       
    15 #
       
    16 # You should have received a copy of the GNU Lesser General Public License along
       
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
       
    18 """
       
    19 Stores are responsible to insert properly formatted entities and relations into the database. They
       
    20 have the following API::
       
    21 
       
    22     >>> user_eid = store.prepare_insert_entity('CWUser', login=u'johndoe')
       
    23     >>> group_eid = store.prepare_insert_entity('CWUser', name=u'unknown')
       
    24     >>> store.relate(user_eid, 'in_group', group_eid)
       
    25     >>> store.flush()
       
    26     >>> store.commit()
       
    27     >>> store.finish()
       
    28 
       
    29 Some store **requires a flush** to copy data in the database, so if you want to have store
       
    30 independant code you should explicitly call it. (There may be multiple flushes during the
       
    31 process, or only one at the end if there is no memory issue). This is different from the
       
    32 commit which validates the database transaction. At last, the `finish()` method should be called in
       
    33 case the store requires additional work once everything is done.
       
    34 
       
    35 * ``prepare_insert_entity(<entity type>, **kwargs) -> eid``: given an entity
       
    36   type, attributes and inlined relations, return the eid of the entity to be
       
    37   inserted, *with no guarantee that anything has been inserted in database*,
       
    38 
       
    39 * ``prepare_update_entity(<entity type>, eid, **kwargs) -> None``: given an
       
    40   entity type and eid, promise for update given attributes and inlined
       
    41   relations *with no guarantee that anything has been inserted in database*,
       
    42 
       
    43 * ``prepare_insert_relation(eid_from, rtype, eid_to) -> None``: indicate that a
       
    44   relation ``rtype`` should be added between entities with eids ``eid_from``
       
    45   and ``eid_to``. Similar to ``prepare_insert_entity()``, *there is no
       
    46   guarantee that the relation will be inserted in database*,
       
    47 
       
    48 * ``flush() -> None``: flush any temporary data to database. May be called
       
    49   several times during an import,
       
    50 
       
    51 * ``commit() -> None``: commit the database transaction,
       
    52 
       
    53 * ``finish() -> None``: additional stuff to do after import is terminated.
       
    54 
       
    55 .. autoclass:: cubicweb.dataimport.stores.RQLObjectStore
       
    56 .. autoclass:: cubicweb.dataimport.stores.NoHookRQLObjectStore
       
    57 .. autoclass:: cubicweb.dataimport.stores.MetaGenerator
       
    58 """
       
    59 import inspect
       
    60 import warnings
       
    61 from datetime import datetime
       
    62 from copy import copy
       
    63 
       
    64 from logilab.common.deprecation import deprecated
       
    65 from logilab.common.decorators import cached
       
    66 
       
    67 from cubicweb.schema import META_RTYPES, VIRTUAL_RTYPES
       
    68 from cubicweb.server.edition import EditedEntity
       
    69 
       
    70 
       
    71 class RQLObjectStore(object):
       
    72     """Store that works by making RQL queries, hence with all the cubicweb's machinery activated.
       
    73     """
       
    74 
       
    75     def __init__(self, cnx, commit=None):
       
    76         if commit is not None:
       
    77             warnings.warn('[3.19] commit argument should not be specified '
       
    78                           'as the cnx object already provides it.',
       
    79                           DeprecationWarning, stacklevel=2)
       
    80         self._cnx = cnx
       
    81         self._commit = commit or cnx.commit
       
    82         # XXX 3.21 deprecated attributes
       
    83         self.eids = {}
       
    84         self.types = {}
       
    85 
       
    86     def rql(self, *args):
       
    87         """Execute a RQL query. This is NOT part of the store API."""
       
    88         return self._cnx.execute(*args)
       
    89 
       
    90     def prepare_insert_entity(self, *args, **kwargs):
       
    91         """Given an entity type, attributes and inlined relations, returns the inserted entity's
       
    92         eid.
       
    93         """
       
    94         entity = self._cnx.create_entity(*args, **kwargs)
       
    95         self.eids[entity.eid] = entity
       
    96         self.types.setdefault(args[0], []).append(entity.eid)
       
    97         return entity.eid
       
    98 
       
    99     def prepare_update_entity(self, etype, eid, **kwargs):
       
   100         """Given an entity type and eid, updates the corresponding entity with specified attributes
       
   101         and inlined relations.
       
   102         """
       
   103         entity = self._cnx.entity_from_eid(eid)
       
   104         assert entity.cw_etype == etype, 'Trying to update with wrong type {}'.format(etype)
       
   105         # XXX some inlined relations may already exists
       
   106         entity.cw_set(**kwargs)
       
   107 
       
   108     def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
       
   109         """Insert into the database a  relation ``rtype`` between entities with eids ``eid_from``
       
   110         and ``eid_to``.
       
   111         """
       
   112         self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype,
       
   113                  {'x': int(eid_from), 'y': int(eid_to)})
       
   114 
       
   115     def flush(self):
       
   116         """Nothing to flush for this store."""
       
   117         pass
       
   118 
       
   119     def commit(self):
       
   120         """Commit the database transaction."""
       
   121         return self._commit()
       
   122 
       
   123     @property
       
   124     def session(self):
       
   125         warnings.warn('[3.19] deprecated property.', DeprecationWarning, stacklevel=2)
       
   126         return self._cnx.repo._get_session(self._cnx.sessionid)
       
   127 
       
   128     @deprecated("[3.19] use cnx.find(*args, **kwargs).entities() instead")
       
   129     def find_entities(self, *args, **kwargs):
       
   130         return self._cnx.find(*args, **kwargs).entities()
       
   131 
       
   132     @deprecated("[3.19] use cnx.find(*args, **kwargs).one() instead")
       
   133     def find_one_entity(self, *args, **kwargs):
       
   134         return self._cnx.find(*args, **kwargs).one()
       
   135 
       
   136     @deprecated('[3.21] use prepare_insert_entity instead')
       
   137     def create_entity(self, *args, **kwargs):
       
   138         eid = self.prepare_insert_entity(*args, **kwargs)
       
   139         return self._cnx.entity_from_eid(eid)
       
   140 
       
   141     @deprecated('[3.21] use prepare_insert_relation instead')
       
   142     def relate(self, eid_from, rtype, eid_to, **kwargs):
       
   143         self.prepare_insert_relation(eid_from, rtype, eid_to, **kwargs)
       
   144 
       
   145 
       
   146 class NoHookRQLObjectStore(RQLObjectStore):
       
   147     """Store that works by accessing low-level CubicWeb's source API, with all hooks deactivated. It
       
   148     must be given a metadata generator object to handle metadata which are usually handled by hooks
       
   149     (see :class:`MetaGenerator`).
       
   150     """
       
   151 
       
   152     def __init__(self, cnx, metagen=None):
       
   153         super(NoHookRQLObjectStore, self).__init__(cnx)
       
   154         self.source = cnx.repo.system_source
       
   155         self.rschema = cnx.repo.schema.rschema
       
   156         self.add_relation = self.source.add_relation
       
   157         if metagen is None:
       
   158             metagen = MetaGenerator(cnx)
       
   159         self.metagen = metagen
       
   160         self._nb_inserted_entities = 0
       
   161         self._nb_inserted_types = 0
       
   162         self._nb_inserted_relations = 0
       
   163         # deactivate security
       
   164         cnx.read_security = False
       
   165         cnx.write_security = False
       
   166 
       
   167     def prepare_insert_entity(self, etype, **kwargs):
       
   168         """Given an entity type, attributes and inlined relations, returns the inserted entity's
       
   169         eid.
       
   170         """
       
   171         for k, v in kwargs.iteritems():
       
   172             kwargs[k] = getattr(v, 'eid', v)
       
   173         entity, rels = self.metagen.base_etype_dicts(etype)
       
   174         # make a copy to keep cached entity pristine
       
   175         entity = copy(entity)
       
   176         entity.cw_edited = copy(entity.cw_edited)
       
   177         entity.cw_clear_relation_cache()
       
   178         entity.cw_edited.update(kwargs, skipsec=False)
       
   179         entity_source, extid = self.metagen.init_entity(entity)
       
   180         cnx = self._cnx
       
   181         self.source.add_info(cnx, entity, entity_source, extid)
       
   182         self.source.add_entity(cnx, entity)
       
   183         kwargs = dict()
       
   184         if inspect.getargspec(self.add_relation).keywords:
       
   185             kwargs['subjtype'] = entity.cw_etype
       
   186         for rtype, targeteids in rels.iteritems():
       
   187             # targeteids may be a single eid or a list of eids
       
   188             inlined = self.rschema(rtype).inlined
       
   189             try:
       
   190                 for targeteid in targeteids:
       
   191                     self.add_relation(cnx, entity.eid, rtype, targeteid,
       
   192                                       inlined, **kwargs)
       
   193             except TypeError:
       
   194                 self.add_relation(cnx, entity.eid, rtype, targeteids,
       
   195                                   inlined, **kwargs)
       
   196         self._nb_inserted_entities += 1
       
   197         return entity.eid
       
   198 
       
   199     # XXX: prepare_update_entity is inherited from RQLObjectStore, it should be reimplemented to
       
   200     # actually skip hooks as prepare_insert_entity
       
   201 
       
   202     def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
       
   203         """Insert into the database a  relation ``rtype`` between entities with eids ``eid_from``
       
   204         and ``eid_to``.
       
   205         """
       
   206         assert not rtype.startswith('reverse_')
       
   207         self.add_relation(self._cnx, eid_from, rtype, eid_to,
       
   208                           self.rschema(rtype).inlined)
       
   209         if self.rschema(rtype).symmetric:
       
   210             self.add_relation(self._cnx, eid_to, rtype, eid_from,
       
   211                               self.rschema(rtype).inlined)
       
   212         self._nb_inserted_relations += 1
       
   213 
       
   214     @property
       
   215     @deprecated('[3.21] deprecated')
       
   216     def nb_inserted_entities(self):
       
   217         return self._nb_inserted_entities
       
   218 
       
   219     @property
       
   220     @deprecated('[3.21] deprecated')
       
   221     def nb_inserted_types(self):
       
   222         return self._nb_inserted_types
       
   223 
       
   224     @property
       
   225     @deprecated('[3.21] deprecated')
       
   226     def nb_inserted_relations(self):
       
   227         return self._nb_inserted_relations
       
   228 
       
   229 
       
   230 class MetaGenerator(object):
       
   231     """Class responsible for generating standard metadata for imported entities. You may want to
       
   232     derive it to add application specific's metadata.
       
   233 
       
   234     Parameters:
       
   235     * `cnx`: connection to the repository
       
   236     * `baseurl`: optional base URL to be used for `cwuri` generation - default to config['base-url']
       
   237     * `source`: optional source to be used as `cw_source` for imported entities
       
   238     """
       
   239     META_RELATIONS = (META_RTYPES
       
   240                       - VIRTUAL_RTYPES
       
   241                       - set(('eid', 'cwuri',
       
   242                              'is', 'is_instance_of', 'cw_source')))
       
   243 
       
   244     def __init__(self, cnx, baseurl=None, source=None):
       
   245         self._cnx = cnx
       
   246         if baseurl is None:
       
   247             config = cnx.vreg.config
       
   248             baseurl = config['base-url'] or config.default_base_url()
       
   249         if not baseurl[-1] == '/':
       
   250             baseurl += '/'
       
   251         self.baseurl = baseurl
       
   252         if source is None:
       
   253             source = cnx.repo.system_source
       
   254         self.source = source
       
   255         self.create_eid = cnx.repo.system_source.create_eid
       
   256         self.time = datetime.now()
       
   257         # attributes/relations shared by all entities of the same type
       
   258         self.etype_attrs = []
       
   259         self.etype_rels = []
       
   260         # attributes/relations specific to each entity
       
   261         self.entity_attrs = ['cwuri']
       
   262         #self.entity_rels = [] XXX not handled (YAGNI?)
       
   263         schema = cnx.vreg.schema
       
   264         rschema = schema.rschema
       
   265         for rtype in self.META_RELATIONS:
       
   266             # skip owned_by / created_by if user is the internal manager
       
   267             if cnx.user.eid == -1 and rtype in ('owned_by', 'created_by'):
       
   268                 continue
       
   269             if rschema(rtype).final:
       
   270                 self.etype_attrs.append(rtype)
       
   271             else:
       
   272                 self.etype_rels.append(rtype)
       
   273 
       
   274     @cached
       
   275     def base_etype_dicts(self, etype):
       
   276         entity = self._cnx.vreg['etypes'].etype_class(etype)(self._cnx)
       
   277         # entity are "surface" copied, avoid shared dict between copies
       
   278         del entity.cw_extra_kwargs
       
   279         entity.cw_edited = EditedEntity(entity)
       
   280         for attr in self.etype_attrs:
       
   281             genfunc = self.generate(attr)
       
   282             if genfunc:
       
   283                 entity.cw_edited.edited_attribute(attr, genfunc(entity))
       
   284         rels = {}
       
   285         for rel in self.etype_rels:
       
   286             genfunc = self.generate(rel)
       
   287             if genfunc:
       
   288                 rels[rel] = genfunc(entity)
       
   289         return entity, rels
       
   290 
       
   291     def init_entity(self, entity):
       
   292         entity.eid = self.create_eid(self._cnx)
       
   293         extid = entity.cw_edited.get('cwuri')
       
   294         for attr in self.entity_attrs:
       
   295             if attr in entity.cw_edited:
       
   296                 # already set, skip this attribute
       
   297                 continue
       
   298             genfunc = self.generate(attr)
       
   299             if genfunc:
       
   300                 entity.cw_edited.edited_attribute(attr, genfunc(entity))
       
   301         if isinstance(extid, unicode):
       
   302             extid = extid.encode('utf-8')
       
   303         return self.source, extid
       
   304 
       
   305     def generate(self, rtype):
       
   306         return getattr(self, 'gen_%s' % rtype, None)
       
   307 
       
   308     def gen_cwuri(self, entity):
       
   309         assert self.baseurl, 'baseurl is None while generating cwuri'
       
   310         return u'%s%s' % (self.baseurl, entity.eid)
       
   311 
       
   312     def gen_creation_date(self, entity):
       
   313         return self.time
       
   314 
       
   315     def gen_modification_date(self, entity):
       
   316         return self.time
       
   317 
       
   318     def gen_created_by(self, entity):
       
   319         return self._cnx.user.eid
       
   320 
       
   321     def gen_owned_by(self, entity):
       
   322         return self._cnx.user.eid
       
   323