|
1 # -*- coding: utf-8 -*- |
|
2 """This module provides tools to import tabular data. |
|
3 |
|
4 :organization: Logilab |
|
5 :copyright: 2001-2010 LOGILAB S.A. (Paris, FRANCE), license is LGPL v2. |
|
6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
|
7 :license: GNU Lesser General Public License, v2.1 - http://www.gnu.org/licenses |
|
8 |
|
9 |
|
10 Example of use (run this with `cubicweb-ctl shell instance import-script.py`): |
|
11 |
|
12 .. sourcecode:: python |
|
13 |
|
14 from cubicweb.devtools.dataimport import * |
|
15 # define data generators |
|
16 GENERATORS = [] |
|
17 |
|
18 USERS = [('Prenom', 'firstname', ()), |
|
19 ('Nom', 'surname', ()), |
|
20 ('Identifiant', 'login', ()), |
|
21 ] |
|
22 |
|
23 def gen_users(ctl): |
|
24 for row in ctl.get_data('utilisateurs'): |
|
25 entity = mk_entity(row, USERS) |
|
26 entity['upassword'] = u'motdepasse' |
|
27 ctl.check('login', entity['login'], None) |
|
28 ctl.store.add('CWUser', entity) |
|
29 email = {'address': row['email']} |
|
30 ctl.store.add('EmailAddress', email) |
|
31 ctl.store.relate(entity['eid'], 'use_email', email['eid']) |
|
32 ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x':entity['eid']}) |
|
33 |
|
34 CHK = [('login', check_doubles, 'Utilisateurs Login', |
|
35 'Deux utilisateurs ne devraient pas avoir le même login.'), |
|
36 ] |
|
37 |
|
38 GENERATORS.append( (gen_users, CHK) ) |
|
39 |
|
40 # create controller |
|
41 ctl = CWImportController(RQLObjectStore()) |
|
42 ctl.askerror = 1 |
|
43 ctl.generators = GENERATORS |
|
44 ctl.store._checkpoint = checkpoint |
|
45 ctl.store._rql = rql |
|
46 ctl.data['utilisateurs'] = lazytable(utf8csvreader(open('users.csv'))) |
|
47 # run |
|
48 ctl.run() |
|
49 sys.exit(0) |
|
50 |
|
51 |
|
52 .. BUG fichier à une colonne pose un problème de parsing |
|
53 .. TODO rollback() |
|
54 """ |
|
55 __docformat__ = "restructuredtext en" |
|
56 |
|
57 import sys |
|
58 import csv |
|
59 import traceback |
|
60 import os.path as osp |
|
61 from StringIO import StringIO |
|
62 from copy import copy |
|
63 |
|
64 from logilab.common import shellutils |
|
65 from logilab.common.date import strptime |
|
66 from logilab.common.decorators import cached |
|
67 from logilab.common.deprecation import deprecated |
|
68 |
|
69 |
|
70 def ucsvreader_pb(filepath, encoding='utf-8', separator=',', quote='"', |
|
71 skipfirst=False, withpb=True): |
|
72 """same as ucsvreader but a progress bar is displayed as we iter on rows""" |
|
73 if not osp.exists(filepath): |
|
74 raise Exception("file doesn't exists: %s" % filepath) |
|
75 rowcount = int(shellutils.Execute('wc -l "%s"' % filepath).out.strip().split()[0]) |
|
76 if skipfirst: |
|
77 rowcount -= 1 |
|
78 if withpb: |
|
79 pb = shellutils.ProgressBar(rowcount, 50) |
|
80 for urow in ucsvreader(file(filepath), encoding, separator, quote, skipfirst): |
|
81 yield urow |
|
82 if withpb: |
|
83 pb.update() |
|
84 print ' %s rows imported' % rowcount |
|
85 |
|
86 def ucsvreader(stream, encoding='utf-8', separator=',', quote='"', |
|
87 skipfirst=False): |
|
88 """A csv reader that accepts files with any encoding and outputs unicode |
|
89 strings |
|
90 """ |
|
91 it = iter(csv.reader(stream, delimiter=separator, quotechar=quote)) |
|
92 if skipfirst: |
|
93 it.next() |
|
94 for row in it: |
|
95 yield [item.decode(encoding) for item in row] |
|
96 |
|
97 def commit_every(nbit, store, it): |
|
98 for i, x in enumerate(it): |
|
99 yield x |
|
100 if nbit is not None and i % nbit: |
|
101 store.checkpoint() |
|
102 if nbit is not None: |
|
103 store.checkpoint() |
|
104 |
|
105 def lazytable(reader): |
|
106 """The first row is taken to be the header of the table and |
|
107 used to output a dict for each row of data. |
|
108 |
|
109 >>> data = lazytable(utf8csvreader(open(filename))) |
|
110 """ |
|
111 header = reader.next() |
|
112 for row in reader: |
|
113 yield dict(zip(header, row)) |
|
114 |
|
115 def mk_entity(row, map): |
|
116 """Return a dict made from sanitized mapped values. |
|
117 |
|
118 ValidationError can be raised on unexpected values found in checkers |
|
119 |
|
120 >>> row = {'myname': u'dupont'} |
|
121 >>> map = [('myname', u'name', (capitalize_if_unicase,))] |
|
122 >>> mk_entity(row, map) |
|
123 {'name': u'Dupont'} |
|
124 >>> row = {'myname': u'dupont', 'optname': u''} |
|
125 >>> map = [('myname', u'name', (capitalize_if_unicase,)), |
|
126 ... ('optname', u'MARKER', (optional,))] |
|
127 >>> mk_entity(row, map) |
|
128 {'name': u'Dupont'} |
|
129 """ |
|
130 res = {} |
|
131 assert isinstance(row, dict) |
|
132 assert isinstance(map, list) |
|
133 for src, dest, funcs in map: |
|
134 assert not (required in funcs and optional in funcs), \ |
|
135 "optional and required checks are exclusive" |
|
136 res[dest] = row[src] |
|
137 try: |
|
138 for func in funcs: |
|
139 res[dest] = func(res[dest]) |
|
140 if res[dest] is None: |
|
141 break |
|
142 except ValueError, err: |
|
143 raise ValueError('error with %r field: %s' % (src, err)) |
|
144 return res |
|
145 |
|
146 |
|
147 # user interactions ############################################################ |
|
148 |
|
149 def tell(msg): |
|
150 print msg |
|
151 |
|
152 def confirm(question): |
|
153 """A confirm function that asks for yes/no/abort and exits on abort.""" |
|
154 answer = shellutils.ASK.ask(question, ('Y', 'n', 'abort'), 'Y') |
|
155 if answer == 'abort': |
|
156 sys.exit(1) |
|
157 return answer == 'Y' |
|
158 |
|
159 |
|
160 class catch_error(object): |
|
161 """Helper for @contextmanager decorator.""" |
|
162 |
|
163 def __init__(self, ctl, key='unexpected error', msg=None): |
|
164 self.ctl = ctl |
|
165 self.key = key |
|
166 self.msg = msg |
|
167 |
|
168 def __enter__(self): |
|
169 return self |
|
170 |
|
171 def __exit__(self, type, value, traceback): |
|
172 if type is not None: |
|
173 if issubclass(type, (KeyboardInterrupt, SystemExit)): |
|
174 return # re-raise |
|
175 if self.ctl.catcherrors: |
|
176 self.ctl.record_error(self.key, None, type, value, traceback) |
|
177 return True # silent |
|
178 |
|
179 |
|
180 # base sanitizing/coercing functions ########################################### |
|
181 |
|
182 def optional(value): |
|
183 """validation error will not been raised if you add this checker in chain""" |
|
184 if value: |
|
185 return value |
|
186 return None |
|
187 |
|
188 def required(value): |
|
189 """raise ValueError is value is empty |
|
190 |
|
191 This check should be often found in last position in the chain. |
|
192 """ |
|
193 if value: |
|
194 return value |
|
195 raise ValueError("required") |
|
196 |
|
197 def todatetime(format='%d/%m/%Y'): |
|
198 """return a transformation function to turn string input value into a |
|
199 `datetime.datetime` instance, using given format. |
|
200 |
|
201 Follow it by `todate` or `totime` functions from `logilab.common.date` if |
|
202 you want a `date`/`time` instance instead of `datetime`. |
|
203 """ |
|
204 def coerce(value): |
|
205 return strptime(value, format) |
|
206 return coerce |
|
207 |
|
208 def call_transform_method(methodname, *args, **kwargs): |
|
209 """return value returned by calling the given method on input""" |
|
210 def coerce(value): |
|
211 return getattr(value, methodname)(*args, **kwargs) |
|
212 return coerce |
|
213 |
|
214 def call_check_method(methodname, *args, **kwargs): |
|
215 """check value returned by calling the given method on input is true, |
|
216 else raise ValueError |
|
217 """ |
|
218 def check(value): |
|
219 if getattr(value, methodname)(*args, **kwargs): |
|
220 return value |
|
221 raise ValueError('%s not verified on %r' % (methodname, value)) |
|
222 return check |
|
223 |
|
224 # base integrity checking functions ############################################ |
|
225 |
|
226 def check_doubles(buckets): |
|
227 """Extract the keys that have more than one item in their bucket.""" |
|
228 return [(k, len(v)) for k, v in buckets.items() if len(v) > 1] |
|
229 |
|
230 def check_doubles_not_none(buckets): |
|
231 """Extract the keys that have more than one item in their bucket.""" |
|
232 return [(k, len(v)) for k, v in buckets.items() |
|
233 if k is not None and len(v) > 1] |
|
234 |
|
235 |
|
236 # object stores ################################################################# |
|
237 |
|
238 class ObjectStore(object): |
|
239 """Store objects in memory for *faster* validation (development mode) |
|
240 |
|
241 But it will not enforce the constraints of the schema and hence will miss some problems |
|
242 |
|
243 >>> store = ObjectStore() |
|
244 >>> user = {'login': 'johndoe'} |
|
245 >>> store.add('CWUser', user) |
|
246 >>> group = {'name': 'unknown'} |
|
247 >>> store.add('CWUser', group) |
|
248 >>> store.relate(user['eid'], 'in_group', group['eid']) |
|
249 """ |
|
250 def __init__(self): |
|
251 self.items = [] |
|
252 self.eids = {} |
|
253 self.types = {} |
|
254 self.relations = set() |
|
255 self.indexes = {} |
|
256 self._rql = None |
|
257 self._checkpoint = None |
|
258 |
|
259 def _put(self, type, item): |
|
260 self.items.append(item) |
|
261 return len(self.items) - 1 |
|
262 |
|
263 def add(self, type, item): |
|
264 assert isinstance(item, dict), 'item is not a dict but a %s' % type(item) |
|
265 eid = item['eid'] = self._put(type, item) |
|
266 self.eids[eid] = item |
|
267 self.types.setdefault(type, []).append(eid) |
|
268 |
|
269 def relate(self, eid_from, rtype, eid_to, inlined=False): |
|
270 """Add new relation (reverse type support is available) |
|
271 |
|
272 >>> 1,2 = eid_from, eid_to |
|
273 >>> self.relate(eid_from, 'in_group', eid_to) |
|
274 1, 'in_group', 2 |
|
275 >>> self.relate(eid_from, 'reverse_in_group', eid_to) |
|
276 2, 'in_group', 1 |
|
277 """ |
|
278 if rtype.startswith('reverse_'): |
|
279 eid_from, eid_to = eid_to, eid_from |
|
280 rtype = rtype[8:] |
|
281 relation = eid_from, rtype, eid_to |
|
282 self.relations.add(relation) |
|
283 return relation |
|
284 |
|
285 def build_index(self, name, type, func=None): |
|
286 index = {} |
|
287 if func is None or not callable(func): |
|
288 func = lambda x: x['eid'] |
|
289 for eid in self.types[type]: |
|
290 index.setdefault(func(self.eids[eid]), []).append(eid) |
|
291 assert index, "new index '%s' cannot be empty" % name |
|
292 self.indexes[name] = index |
|
293 |
|
294 def build_rqlindex(self, name, type, key, rql, rql_params=False, func=None): |
|
295 """build an index by rql query |
|
296 |
|
297 rql should return eid in first column |
|
298 ctl.store.build_index('index_name', 'users', 'login', 'Any U WHERE U is CWUser') |
|
299 """ |
|
300 rset = self.rql(rql, rql_params or {}) |
|
301 for entity in rset.entities(): |
|
302 getattr(entity, key) # autopopulate entity with key attribute |
|
303 self.eids[entity.eid] = dict(entity) |
|
304 if entity.eid not in self.types.setdefault(type, []): |
|
305 self.types[type].append(entity.eid) |
|
306 assert self.types[type], "new index type '%s' cannot be empty (0 record found)" % type |
|
307 |
|
308 # Build index with specified key |
|
309 func = lambda x: x[key] |
|
310 self.build_index(name, type, func) |
|
311 |
|
312 def fetch(self, name, key, unique=False, decorator=None): |
|
313 """ |
|
314 decorator is a callable method or an iterator of callable methods (usually a lambda function) |
|
315 decorator=lambda x: x[:1] (first value is returned) |
|
316 |
|
317 We can use validation check function available in _entity |
|
318 """ |
|
319 eids = self.indexes[name].get(key, []) |
|
320 if decorator is not None: |
|
321 if not hasattr(decorator, '__iter__'): |
|
322 decorator = (decorator,) |
|
323 for f in decorator: |
|
324 eids = f(eids) |
|
325 if unique: |
|
326 assert len(eids) == 1, u'expected a single one value for key "%s" in index "%s". Got %i' % (key, name, len(eids)) |
|
327 eids = eids[0] # FIXME maybe it's better to keep an iterator here ? |
|
328 return eids |
|
329 |
|
330 def find(self, type, key, value): |
|
331 for idx in self.types[type]: |
|
332 item = self.items[idx] |
|
333 if item[key] == value: |
|
334 yield item |
|
335 |
|
336 def rql(self, *args): |
|
337 if self._rql is not None: |
|
338 return self._rql(*args) |
|
339 |
|
340 def checkpoint(self): |
|
341 pass |
|
342 |
|
343 @property |
|
344 def nb_inserted_entities(self): |
|
345 return len(self.eids) |
|
346 @property |
|
347 def nb_inserted_types(self): |
|
348 return len(self.types) |
|
349 @property |
|
350 def nb_inserted_relations(self): |
|
351 return len(self.relations) |
|
352 |
|
353 @deprecated('[3.6] get_many() deprecated. Use fetch() instead') |
|
354 def get_many(self, name, key): |
|
355 return self.fetch(name, key, unique=False) |
|
356 |
|
357 @deprecated('[3.6] get_one() deprecated. Use fetch(..., unique=True) instead') |
|
358 def get_one(self, name, key): |
|
359 return self.fetch(name, key, unique=True) |
|
360 |
|
361 |
|
362 class RQLObjectStore(ObjectStore): |
|
363 """ObjectStore that works with an actual RQL repository (production mode)""" |
|
364 _rql = None # bw compat |
|
365 |
|
366 def __init__(self, session=None, checkpoint=None): |
|
367 ObjectStore.__init__(self) |
|
368 if session is not None: |
|
369 if not hasattr(session, 'set_pool'): |
|
370 # connection |
|
371 cnx = session |
|
372 session = session.request() |
|
373 session.set_pool = lambda : None |
|
374 checkpoint = checkpoint or cnx.commit |
|
375 else: |
|
376 session.set_pool() |
|
377 self.session = session |
|
378 self._checkpoint = checkpoint or session.commit |
|
379 elif checkpoint is not None: |
|
380 self._checkpoint = checkpoint |
|
381 # XXX .session |
|
382 |
|
383 def checkpoint(self): |
|
384 self._checkpoint() |
|
385 self.session.set_pool() |
|
386 |
|
387 def rql(self, *args): |
|
388 if self._rql is not None: |
|
389 return self._rql(*args) |
|
390 return self.session.execute(*args) |
|
391 |
|
392 def create_entity(self, *args, **kwargs): |
|
393 entity = self.session.create_entity(*args, **kwargs) |
|
394 self.eids[entity.eid] = entity |
|
395 self.types.setdefault(args[0], []).append(entity.eid) |
|
396 return entity |
|
397 |
|
398 def _put(self, type, item): |
|
399 query = ('INSERT %s X: ' % type) + ', '.join('X %s %%(%s)s' % (k, k) |
|
400 for k in item) |
|
401 return self.rql(query, item)[0][0] |
|
402 |
|
403 def relate(self, eid_from, rtype, eid_to, inlined=False): |
|
404 # if reverse relation is found, eids are exchanged |
|
405 eid_from, rtype, eid_to = super(RQLObjectStore, self).relate( |
|
406 eid_from, rtype, eid_to) |
|
407 self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype, |
|
408 {'x': int(eid_from), 'y': int(eid_to)}, ('x', 'y')) |
|
409 |
|
410 |
|
411 # the import controller ######################################################## |
|
412 |
|
413 class CWImportController(object): |
|
414 """Controller of the data import process. |
|
415 |
|
416 >>> ctl = CWImportController(store) |
|
417 >>> ctl.generators = list_of_data_generators |
|
418 >>> ctl.data = dict_of_data_tables |
|
419 >>> ctl.run() |
|
420 """ |
|
421 |
|
422 def __init__(self, store, askerror=0, catcherrors=None, tell=tell, |
|
423 commitevery=50): |
|
424 self.store = store |
|
425 self.generators = None |
|
426 self.data = {} |
|
427 self.errors = None |
|
428 self.askerror = askerror |
|
429 if catcherrors is None: |
|
430 catcherrors = askerror |
|
431 self.catcherrors = catcherrors |
|
432 self.commitevery = commitevery # set to None to do a single commit |
|
433 self._tell = tell |
|
434 |
|
435 def check(self, type, key, value): |
|
436 self._checks.setdefault(type, {}).setdefault(key, []).append(value) |
|
437 |
|
438 def check_map(self, entity, key, map, default): |
|
439 try: |
|
440 entity[key] = map[entity[key]] |
|
441 except KeyError: |
|
442 self.check(key, entity[key], None) |
|
443 entity[key] = default |
|
444 |
|
445 def record_error(self, key, msg=None, type=None, value=None, tb=None): |
|
446 tmp = StringIO() |
|
447 if type is None: |
|
448 traceback.print_exc(file=tmp) |
|
449 else: |
|
450 traceback.print_exception(type, value, tb, file=tmp) |
|
451 print tmp.getvalue() |
|
452 # use a list to avoid counting a <nb lines> errors instead of one |
|
453 errorlog = self.errors.setdefault(key, []) |
|
454 if msg is None: |
|
455 errorlog.append(tmp.getvalue().splitlines()) |
|
456 else: |
|
457 errorlog.append( (msg, tmp.getvalue().splitlines()) ) |
|
458 |
|
459 def run(self): |
|
460 self.errors = {} |
|
461 for func, checks in self.generators: |
|
462 self._checks = {} |
|
463 func_name = func.__name__[4:] # XXX |
|
464 self.tell("Import '%s'..." % func_name) |
|
465 try: |
|
466 func(self) |
|
467 except: |
|
468 if self.catcherrors: |
|
469 self.record_error(func_name, 'While calling %s' % func.__name__) |
|
470 else: |
|
471 raise |
|
472 for key, func, title, help in checks: |
|
473 buckets = self._checks.get(key) |
|
474 if buckets: |
|
475 err = func(buckets) |
|
476 if err: |
|
477 self.errors[title] = (help, err) |
|
478 self.store.checkpoint() |
|
479 nberrors = sum(len(err[1]) for err in self.errors.values()) |
|
480 self.tell('\nImport completed: %i entities, %i types, %i relations and %i errors' |
|
481 % (self.store.nb_inserted_entities, |
|
482 self.store.nb_inserted_types, |
|
483 self.store.nb_inserted_relations, |
|
484 nberrors)) |
|
485 if self.errors: |
|
486 if self.askerror == 2 or (self.askerror and confirm('Display errors ?')): |
|
487 from pprint import pformat |
|
488 for errkey, error in self.errors.items(): |
|
489 self.tell("\n%s (%s): %d\n" % (error[0], errkey, len(error[1]))) |
|
490 self.tell(pformat(sorted(error[1]))) |
|
491 |
|
492 def get_data(self, key): |
|
493 return self.data.get(key) |
|
494 |
|
495 def index(self, name, key, value, unique=False): |
|
496 """create a new index |
|
497 |
|
498 If unique is set to True, only first occurence will be kept not the following ones |
|
499 """ |
|
500 if unique: |
|
501 try: |
|
502 if value in self.store.indexes[name][key]: |
|
503 return |
|
504 except KeyError: |
|
505 # we're sure that one is the first occurence; so continue... |
|
506 pass |
|
507 self.store.indexes.setdefault(name, {}).setdefault(key, []).append(value) |
|
508 |
|
509 def tell(self, msg): |
|
510 self._tell(msg) |
|
511 |
|
512 def iter_and_commit(self, datakey): |
|
513 """iter rows, triggering commit every self.commitevery iterations""" |
|
514 return commit_every(self.commitevery, self.store, self.get_data(datakey)) |
|
515 |
|
516 |
|
517 |
|
518 from datetime import datetime |
|
519 from cubicweb.schema import META_RTYPES, VIRTUAL_RTYPES |
|
520 |
|
521 |
|
522 class NoHookRQLObjectStore(RQLObjectStore): |
|
523 """ObjectStore that works with an actual RQL repository (production mode)""" |
|
524 _rql = None # bw compat |
|
525 |
|
526 def __init__(self, session, metagen=None, baseurl=None): |
|
527 super(NoHookRQLObjectStore, self).__init__(session) |
|
528 self.source = session.repo.system_source |
|
529 self.rschema = session.repo.schema.rschema |
|
530 self.add_relation = self.source.add_relation |
|
531 if metagen is None: |
|
532 metagen = MetaGenerator(session, baseurl) |
|
533 self.metagen = metagen |
|
534 self._nb_inserted_entities = 0 |
|
535 self._nb_inserted_types = 0 |
|
536 self._nb_inserted_relations = 0 |
|
537 self.rql = session.unsafe_execute |
|
538 |
|
539 def create_entity(self, etype, **kwargs): |
|
540 for k, v in kwargs.iteritems(): |
|
541 kwargs[k] = getattr(v, 'eid', v) |
|
542 entity, rels = self.metagen.base_etype_dicts(etype) |
|
543 entity = copy(entity) |
|
544 entity._related_cache = {} |
|
545 self.metagen.init_entity(entity) |
|
546 entity.update(kwargs) |
|
547 session = self.session |
|
548 self.source.add_entity(session, entity) |
|
549 self.source.add_info(session, entity, self.source, complete=False) |
|
550 for rtype, targeteids in rels.iteritems(): |
|
551 # targeteids may be a single eid or a list of eids |
|
552 inlined = self.rschema(rtype).inlined |
|
553 try: |
|
554 for targeteid in targeteids: |
|
555 self.add_relation(session, entity.eid, rtype, targeteid, |
|
556 inlined) |
|
557 except TypeError: |
|
558 self.add_relation(session, entity.eid, rtype, targeteids, |
|
559 inlined) |
|
560 self._nb_inserted_entities += 1 |
|
561 return entity |
|
562 |
|
563 def relate(self, eid_from, rtype, eid_to): |
|
564 assert not rtype.startswith('reverse_') |
|
565 self.add_relation(self.session, eid_from, rtype, eid_to, |
|
566 self.rschema(rtype).inlined) |
|
567 self._nb_inserted_relations += 1 |
|
568 |
|
569 @property |
|
570 def nb_inserted_entities(self): |
|
571 return self._nb_inserted_entities |
|
572 @property |
|
573 def nb_inserted_types(self): |
|
574 return self._nb_inserted_types |
|
575 @property |
|
576 def nb_inserted_relations(self): |
|
577 return self._nb_inserted_relations |
|
578 |
|
579 def _put(self, type, item): |
|
580 raise RuntimeError('use create entity') |
|
581 |
|
582 |
|
583 class MetaGenerator(object): |
|
584 def __init__(self, session, baseurl=None): |
|
585 self.session = session |
|
586 self.source = session.repo.system_source |
|
587 self.time = datetime.now() |
|
588 if baseurl is None: |
|
589 config = session.vreg.config |
|
590 baseurl = config['base-url'] or config.default_base_url() |
|
591 if not baseurl[-1] == '/': |
|
592 baseurl += '/' |
|
593 self.baseurl = baseurl |
|
594 # attributes/relations shared by all entities of the same type |
|
595 self.etype_attrs = [] |
|
596 self.etype_rels = [] |
|
597 # attributes/relations specific to each entity |
|
598 self.entity_attrs = ['eid', 'cwuri'] |
|
599 #self.entity_rels = [] XXX not handled (YAGNI?) |
|
600 schema = session.vreg.schema |
|
601 rschema = schema.rschema |
|
602 for rtype in META_RTYPES: |
|
603 if rtype in ('eid', 'cwuri') or rtype in VIRTUAL_RTYPES: |
|
604 continue |
|
605 if rschema(rtype).final: |
|
606 self.etype_attrs.append(rtype) |
|
607 else: |
|
608 self.etype_rels.append(rtype) |
|
609 if not schema._eid_index: |
|
610 # test schema loaded from the fs |
|
611 self.gen_is = self.test_gen_is |
|
612 self.gen_is_instance_of = self.test_gen_is_instanceof |
|
613 |
|
614 @cached |
|
615 def base_etype_dicts(self, etype): |
|
616 entity = self.session.vreg['etypes'].etype_class(etype)(self.session) |
|
617 # entity are "surface" copied, avoid shared dict between copies |
|
618 del entity.cw_extra_kwargs |
|
619 for attr in self.etype_attrs: |
|
620 entity[attr] = self.generate(entity, attr) |
|
621 rels = {} |
|
622 for rel in self.etype_rels: |
|
623 rels[rel] = self.generate(entity, rel) |
|
624 return entity, rels |
|
625 |
|
626 def init_entity(self, entity): |
|
627 for attr in self.entity_attrs: |
|
628 entity[attr] = self.generate(entity, attr) |
|
629 entity.eid = entity['eid'] |
|
630 |
|
631 def generate(self, entity, rtype): |
|
632 return getattr(self, 'gen_%s' % rtype)(entity) |
|
633 |
|
634 def gen_eid(self, entity): |
|
635 return self.source.create_eid(self.session) |
|
636 |
|
637 def gen_cwuri(self, entity): |
|
638 return u'%seid/%s' % (self.baseurl, entity['eid']) |
|
639 |
|
640 def gen_creation_date(self, entity): |
|
641 return self.time |
|
642 def gen_modification_date(self, entity): |
|
643 return self.time |
|
644 |
|
645 def gen_is(self, entity): |
|
646 return entity.e_schema.eid |
|
647 def gen_is_instance_of(self, entity): |
|
648 eids = [] |
|
649 for etype in entity.e_schema.ancestors() + [entity.e_schema]: |
|
650 eids.append(entity.e_schema.eid) |
|
651 return eids |
|
652 |
|
653 def gen_created_by(self, entity): |
|
654 return self.session.user.eid |
|
655 def gen_owned_by(self, entity): |
|
656 return self.session.user.eid |
|
657 |
|
658 # implementations of gen_is / gen_is_instance_of to use during test where |
|
659 # schema has been loaded from the fs (hence entity type schema eids are not |
|
660 # known) |
|
661 def test_gen_is(self, entity): |
|
662 from cubicweb.hooks.metadata import eschema_eid |
|
663 return eschema_eid(self.session, entity.e_schema) |
|
664 def test_gen_is_instanceof(self, entity): |
|
665 from cubicweb.hooks.metadata import eschema_eid |
|
666 eids = [] |
|
667 for eschema in entity.e_schema.ancestors() + [entity.e_schema]: |
|
668 eids.append(eschema_eid(self.session, eschema)) |
|
669 return eids |
|
670 |
|
671 |
|
672 ################################################################################ |
|
673 |
|
674 utf8csvreader = deprecated('[3.6] use ucsvreader instead')(ucsvreader) |
|
675 |
|
676 @deprecated('[3.6] use required') |
|
677 def nonempty(value): |
|
678 return required(value) |
|
679 |
|
680 @deprecated("[3.6] use call_check_method('isdigit')") |
|
681 def alldigits(txt): |
|
682 if txt.isdigit(): |
|
683 return txt |
|
684 else: |
|
685 return u'' |
|
686 |
|
687 @deprecated("[3.7] too specific, will move away, copy me") |
|
688 def capitalize_if_unicase(txt): |
|
689 if txt.isupper() or txt.islower(): |
|
690 return txt.capitalize() |
|
691 return txt |
|
692 |
|
693 @deprecated("[3.7] too specific, will move away, copy me") |
|
694 def yesno(value): |
|
695 """simple heuristic that returns boolean value |
|
696 |
|
697 >>> yesno("Yes") |
|
698 True |
|
699 >>> yesno("oui") |
|
700 True |
|
701 >>> yesno("1") |
|
702 True |
|
703 >>> yesno("11") |
|
704 True |
|
705 >>> yesno("") |
|
706 False |
|
707 >>> yesno("Non") |
|
708 False |
|
709 >>> yesno("blablabla") |
|
710 False |
|
711 """ |
|
712 if value: |
|
713 return value.lower()[0] in 'yo1' |
|
714 return False |
|
715 |
|
716 @deprecated("[3.7] use call_check_method('isalpha')") |
|
717 def isalpha(value): |
|
718 if value.isalpha(): |
|
719 return value |
|
720 raise ValueError("not all characters in the string alphabetic") |
|
721 |
|
722 @deprecated("[3.7] use call_transform_method('upper')") |
|
723 def uppercase(txt): |
|
724 return txt.upper() |
|
725 |
|
726 @deprecated("[3.7] use call_transform_method('lower')") |
|
727 def lowercase(txt): |
|
728 return txt.lower() |
|
729 |
|
730 @deprecated("[3.7] use call_transform_method('replace', ' ', '')") |
|
731 def no_space(txt): |
|
732 return txt.replace(' ','') |
|
733 |
|
734 @deprecated("[3.7] use call_transform_method('replace', u'\xa0', '')") |
|
735 def no_uspace(txt): |
|
736 return txt.replace(u'\xa0','') |
|
737 |
|
738 @deprecated("[3.7] use call_transform_method('replace', '-', '')") |
|
739 def no_dash(txt): |
|
740 return txt.replace('-','') |
|
741 |
|
742 @deprecated("[3.7] use call_transform_method('strip')") |
|
743 def strip(txt): |
|
744 return txt.strip() |
|
745 |
|
746 @deprecated("[3.7] use call_transform_method('replace', ',', '.'), float") |
|
747 def decimal(value): |
|
748 return comma_float(value) |
|
749 |
|
750 @deprecated('[3.7] use int builtin') |
|
751 def integer(value): |
|
752 return int(value) |