1 # copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
|
2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
|
3 # |
|
4 # This file is part of CubicWeb. |
|
5 # |
|
6 # CubicWeb is free software: you can redistribute it and/or modify it under the |
|
7 # terms of the GNU Lesser General Public License as published by the Free |
|
8 # Software Foundation, either version 2.1 of the License, or (at your option) |
|
9 # any later version. |
|
10 # |
|
11 # CubicWeb is distributed in the hope that it will be useful, but WITHOUT |
|
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
|
14 # details. |
|
15 # |
|
16 # You should have received a copy of the GNU Lesser General Public License along |
|
17 # with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
|
18 """ |
|
19 Stores are responsible to insert properly formatted entities and relations into the database. They |
|
20 have the following API:: |
|
21 |
|
22 >>> user_eid = store.prepare_insert_entity('CWUser', login=u'johndoe') |
|
23 >>> group_eid = store.prepare_insert_entity('CWUser', name=u'unknown') |
|
24 >>> store.prepare_insert_relation(user_eid, 'in_group', group_eid) |
|
25 >>> store.flush() |
|
26 >>> store.commit() |
|
27 >>> store.finish() |
|
28 |
|
29 Some store **requires a flush** to copy data in the database, so if you want to have store |
|
30 independant code you should explicitly call it. (There may be multiple flushes during the |
|
31 process, or only one at the end if there is no memory issue). This is different from the |
|
32 commit which validates the database transaction. At last, the `finish()` method should be called in |
|
33 case the store requires additional work once everything is done. |
|
34 |
|
35 * ``prepare_insert_entity(<entity type>, **kwargs) -> eid``: given an entity |
|
36 type, attributes and inlined relations, return the eid of the entity to be |
|
37 inserted, *with no guarantee that anything has been inserted in database*, |
|
38 |
|
39 * ``prepare_update_entity(<entity type>, eid, **kwargs) -> None``: given an |
|
40 entity type and eid, promise for update given attributes and inlined |
|
41 relations *with no guarantee that anything has been inserted in database*, |
|
42 |
|
43 * ``prepare_insert_relation(eid_from, rtype, eid_to) -> None``: indicate that a |
|
44 relation ``rtype`` should be added between entities with eids ``eid_from`` |
|
45 and ``eid_to``. Similar to ``prepare_insert_entity()``, *there is no |
|
46 guarantee that the relation will be inserted in database*, |
|
47 |
|
48 * ``flush() -> None``: flush any temporary data to database. May be called |
|
49 several times during an import, |
|
50 |
|
51 * ``commit() -> None``: commit the database transaction, |
|
52 |
|
53 * ``finish() -> None``: additional stuff to do after import is terminated. |
|
54 |
|
55 .. autoclass:: cubicweb.dataimport.stores.RQLObjectStore |
|
56 .. autoclass:: cubicweb.dataimport.stores.NoHookRQLObjectStore |
|
57 .. autoclass:: cubicweb.dataimport.stores.MetaGenerator |
|
58 """ |
|
59 import inspect |
|
60 import warnings |
|
61 from datetime import datetime |
|
62 from copy import copy |
|
63 |
|
64 from six import text_type |
|
65 |
|
66 from logilab.common.deprecation import deprecated |
|
67 from logilab.common.decorators import cached |
|
68 |
|
69 from cubicweb.schema import META_RTYPES, VIRTUAL_RTYPES |
|
70 from cubicweb.server.edition import EditedEntity |
|
71 |
|
72 |
|
73 class RQLObjectStore(object): |
|
74 """Store that works by making RQL queries, hence with all the cubicweb's machinery activated. |
|
75 """ |
|
76 |
|
77 def __init__(self, cnx, commit=None): |
|
78 if commit is not None: |
|
79 warnings.warn('[3.19] commit argument should not be specified ' |
|
80 'as the cnx object already provides it.', |
|
81 DeprecationWarning, stacklevel=2) |
|
82 self._cnx = cnx |
|
83 self._commit = commit or cnx.commit |
|
84 # XXX 3.21 deprecated attributes |
|
85 self.eids = {} |
|
86 self.types = {} |
|
87 |
|
88 def rql(self, *args): |
|
89 """Execute a RQL query. This is NOT part of the store API.""" |
|
90 return self._cnx.execute(*args) |
|
91 |
|
92 def prepare_insert_entity(self, *args, **kwargs): |
|
93 """Given an entity type, attributes and inlined relations, returns the inserted entity's |
|
94 eid. |
|
95 """ |
|
96 entity = self._cnx.create_entity(*args, **kwargs) |
|
97 self.eids[entity.eid] = entity |
|
98 self.types.setdefault(args[0], []).append(entity.eid) |
|
99 return entity.eid |
|
100 |
|
101 def prepare_update_entity(self, etype, eid, **kwargs): |
|
102 """Given an entity type and eid, updates the corresponding entity with specified attributes |
|
103 and inlined relations. |
|
104 """ |
|
105 entity = self._cnx.entity_from_eid(eid) |
|
106 assert entity.cw_etype == etype, 'Trying to update with wrong type %s' % etype |
|
107 # XXX some inlined relations may already exists |
|
108 entity.cw_set(**kwargs) |
|
109 |
|
110 def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs): |
|
111 """Insert into the database a relation ``rtype`` between entities with eids ``eid_from`` |
|
112 and ``eid_to``. |
|
113 """ |
|
114 self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype, |
|
115 {'x': int(eid_from), 'y': int(eid_to)}) |
|
116 |
|
117 def flush(self): |
|
118 """Nothing to flush for this store.""" |
|
119 pass |
|
120 |
|
121 def commit(self): |
|
122 """Commit the database transaction.""" |
|
123 return self._commit() |
|
124 |
|
125 def finish(self): |
|
126 """Nothing to do once import is terminated for this store.""" |
|
127 pass |
|
128 |
|
129 @property |
|
130 def session(self): |
|
131 warnings.warn('[3.19] deprecated property.', DeprecationWarning, stacklevel=2) |
|
132 return self._cnx.repo._get_session(self._cnx.sessionid) |
|
133 |
|
134 @deprecated("[3.19] use cnx.find(*args, **kwargs).entities() instead") |
|
135 def find_entities(self, *args, **kwargs): |
|
136 return self._cnx.find(*args, **kwargs).entities() |
|
137 |
|
138 @deprecated("[3.19] use cnx.find(*args, **kwargs).one() instead") |
|
139 def find_one_entity(self, *args, **kwargs): |
|
140 return self._cnx.find(*args, **kwargs).one() |
|
141 |
|
142 @deprecated('[3.21] use prepare_insert_entity instead') |
|
143 def create_entity(self, *args, **kwargs): |
|
144 eid = self.prepare_insert_entity(*args, **kwargs) |
|
145 return self._cnx.entity_from_eid(eid) |
|
146 |
|
147 @deprecated('[3.21] use prepare_insert_relation instead') |
|
148 def relate(self, eid_from, rtype, eid_to, **kwargs): |
|
149 self.prepare_insert_relation(eid_from, rtype, eid_to, **kwargs) |
|
150 |
|
151 |
|
152 class NoHookRQLObjectStore(RQLObjectStore): |
|
153 """Store that works by accessing low-level CubicWeb's source API, with all hooks deactivated. It |
|
154 must be given a metadata generator object to handle metadata which are usually handled by hooks |
|
155 (see :class:`MetaGenerator`). |
|
156 """ |
|
157 |
|
158 def __init__(self, cnx, metagen=None): |
|
159 super(NoHookRQLObjectStore, self).__init__(cnx) |
|
160 self.source = cnx.repo.system_source |
|
161 self.rschema = cnx.repo.schema.rschema |
|
162 self.add_relation = self.source.add_relation |
|
163 if metagen is None: |
|
164 metagen = MetaGenerator(cnx) |
|
165 self.metagen = metagen |
|
166 self._nb_inserted_entities = 0 |
|
167 self._nb_inserted_types = 0 |
|
168 self._nb_inserted_relations = 0 |
|
169 # deactivate security |
|
170 cnx.read_security = False |
|
171 cnx.write_security = False |
|
172 |
|
173 def prepare_insert_entity(self, etype, **kwargs): |
|
174 """Given an entity type, attributes and inlined relations, returns the inserted entity's |
|
175 eid. |
|
176 """ |
|
177 for k, v in kwargs.items(): |
|
178 kwargs[k] = getattr(v, 'eid', v) |
|
179 entity, rels = self.metagen.base_etype_dicts(etype) |
|
180 # make a copy to keep cached entity pristine |
|
181 entity = copy(entity) |
|
182 entity.cw_edited = copy(entity.cw_edited) |
|
183 entity.cw_clear_relation_cache() |
|
184 entity.cw_edited.update(kwargs, skipsec=False) |
|
185 entity_source, extid = self.metagen.init_entity(entity) |
|
186 cnx = self._cnx |
|
187 self.source.add_info(cnx, entity, entity_source, extid) |
|
188 self.source.add_entity(cnx, entity) |
|
189 kwargs = dict() |
|
190 if inspect.getargspec(self.add_relation).keywords: |
|
191 kwargs['subjtype'] = entity.cw_etype |
|
192 for rtype, targeteids in rels.items(): |
|
193 # targeteids may be a single eid or a list of eids |
|
194 inlined = self.rschema(rtype).inlined |
|
195 try: |
|
196 for targeteid in targeteids: |
|
197 self.add_relation(cnx, entity.eid, rtype, targeteid, |
|
198 inlined, **kwargs) |
|
199 except TypeError: |
|
200 self.add_relation(cnx, entity.eid, rtype, targeteids, |
|
201 inlined, **kwargs) |
|
202 self._nb_inserted_entities += 1 |
|
203 return entity.eid |
|
204 |
|
205 # XXX: prepare_update_entity is inherited from RQLObjectStore, it should be reimplemented to |
|
206 # actually skip hooks as prepare_insert_entity |
|
207 |
|
208 def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs): |
|
209 """Insert into the database a relation ``rtype`` between entities with eids ``eid_from`` |
|
210 and ``eid_to``. |
|
211 """ |
|
212 assert not rtype.startswith('reverse_') |
|
213 self.add_relation(self._cnx, eid_from, rtype, eid_to, |
|
214 self.rschema(rtype).inlined) |
|
215 if self.rschema(rtype).symmetric: |
|
216 self.add_relation(self._cnx, eid_to, rtype, eid_from, |
|
217 self.rschema(rtype).inlined) |
|
218 self._nb_inserted_relations += 1 |
|
219 |
|
220 @property |
|
221 @deprecated('[3.21] deprecated') |
|
222 def nb_inserted_entities(self): |
|
223 return self._nb_inserted_entities |
|
224 |
|
225 @property |
|
226 @deprecated('[3.21] deprecated') |
|
227 def nb_inserted_types(self): |
|
228 return self._nb_inserted_types |
|
229 |
|
230 @property |
|
231 @deprecated('[3.21] deprecated') |
|
232 def nb_inserted_relations(self): |
|
233 return self._nb_inserted_relations |
|
234 |
|
235 |
|
236 class MetaGenerator(object): |
|
237 """Class responsible for generating standard metadata for imported entities. You may want to |
|
238 derive it to add application specific's metadata. |
|
239 |
|
240 Parameters: |
|
241 * `cnx`: connection to the repository |
|
242 * `baseurl`: optional base URL to be used for `cwuri` generation - default to config['base-url'] |
|
243 * `source`: optional source to be used as `cw_source` for imported entities |
|
244 """ |
|
245 META_RELATIONS = (META_RTYPES |
|
246 - VIRTUAL_RTYPES |
|
247 - set(('eid', 'cwuri', |
|
248 'is', 'is_instance_of', 'cw_source'))) |
|
249 |
|
250 def __init__(self, cnx, baseurl=None, source=None): |
|
251 self._cnx = cnx |
|
252 if baseurl is None: |
|
253 config = cnx.vreg.config |
|
254 baseurl = config['base-url'] or config.default_base_url() |
|
255 if not baseurl[-1] == '/': |
|
256 baseurl += '/' |
|
257 self.baseurl = baseurl |
|
258 if source is None: |
|
259 source = cnx.repo.system_source |
|
260 self.source = source |
|
261 self.create_eid = cnx.repo.system_source.create_eid |
|
262 self.time = datetime.utcnow() |
|
263 # attributes/relations shared by all entities of the same type |
|
264 self.etype_attrs = [] |
|
265 self.etype_rels = [] |
|
266 # attributes/relations specific to each entity |
|
267 self.entity_attrs = ['cwuri'] |
|
268 #self.entity_rels = [] XXX not handled (YAGNI?) |
|
269 schema = cnx.vreg.schema |
|
270 rschema = schema.rschema |
|
271 for rtype in self.META_RELATIONS: |
|
272 # skip owned_by / created_by if user is the internal manager |
|
273 if cnx.user.eid == -1 and rtype in ('owned_by', 'created_by'): |
|
274 continue |
|
275 if rschema(rtype).final: |
|
276 self.etype_attrs.append(rtype) |
|
277 else: |
|
278 self.etype_rels.append(rtype) |
|
279 |
|
280 @cached |
|
281 def base_etype_dicts(self, etype): |
|
282 entity = self._cnx.vreg['etypes'].etype_class(etype)(self._cnx) |
|
283 # entity are "surface" copied, avoid shared dict between copies |
|
284 del entity.cw_extra_kwargs |
|
285 entity.cw_edited = EditedEntity(entity) |
|
286 for attr in self.etype_attrs: |
|
287 genfunc = self.generate(attr) |
|
288 if genfunc: |
|
289 entity.cw_edited.edited_attribute(attr, genfunc(entity)) |
|
290 rels = {} |
|
291 for rel in self.etype_rels: |
|
292 genfunc = self.generate(rel) |
|
293 if genfunc: |
|
294 rels[rel] = genfunc(entity) |
|
295 return entity, rels |
|
296 |
|
297 def init_entity(self, entity): |
|
298 entity.eid = self.create_eid(self._cnx) |
|
299 extid = entity.cw_edited.get('cwuri') |
|
300 for attr in self.entity_attrs: |
|
301 if attr in entity.cw_edited: |
|
302 # already set, skip this attribute |
|
303 continue |
|
304 genfunc = self.generate(attr) |
|
305 if genfunc: |
|
306 entity.cw_edited.edited_attribute(attr, genfunc(entity)) |
|
307 if isinstance(extid, text_type): |
|
308 extid = extid.encode('utf-8') |
|
309 return self.source, extid |
|
310 |
|
311 def generate(self, rtype): |
|
312 return getattr(self, 'gen_%s' % rtype, None) |
|
313 |
|
314 def gen_cwuri(self, entity): |
|
315 assert self.baseurl, 'baseurl is None while generating cwuri' |
|
316 return u'%s%s' % (self.baseurl, entity.eid) |
|
317 |
|
318 def gen_creation_date(self, entity): |
|
319 return self.time |
|
320 |
|
321 def gen_modification_date(self, entity): |
|
322 return self.time |
|
323 |
|
324 def gen_created_by(self, entity): |
|
325 return self._cnx.user.eid |
|
326 |
|
327 def gen_owned_by(self, entity): |
|
328 return self._cnx.user.eid |
|