dataimport.py
changeset 4847 9466604ef448
parent 4818 9f9bfbcdecfd
child 4912 9767cc516b4f
equal deleted inserted replaced
4845:dc351b96f596 4847:9466604ef448
       
     1 # -*- coding: utf-8 -*-
       
     2 """This module provides tools to import tabular data.
       
     3 
       
     4 :organization: Logilab
       
     5 :copyright: 2001-2010 LOGILAB S.A. (Paris, FRANCE), license is LGPL v2.
       
     6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     7 :license: GNU Lesser General Public License, v2.1 - http://www.gnu.org/licenses
       
     8 
       
     9 
       
    10 Example of use (run this with `cubicweb-ctl shell instance import-script.py`):
       
    11 
       
    12 .. sourcecode:: python
       
    13 
       
    14   from cubicweb.devtools.dataimport import *
       
    15   # define data generators
       
    16   GENERATORS = []
       
    17 
       
    18   USERS = [('Prenom', 'firstname', ()),
       
    19            ('Nom', 'surname', ()),
       
    20            ('Identifiant', 'login', ()),
       
    21            ]
       
    22 
       
    23   def gen_users(ctl):
       
    24       for row in ctl.get_data('utilisateurs'):
       
    25           entity = mk_entity(row, USERS)
       
    26           entity['upassword'] = u'motdepasse'
       
    27           ctl.check('login', entity['login'], None)
       
    28           ctl.store.add('CWUser', entity)
       
    29           email = {'address': row['email']}
       
    30           ctl.store.add('EmailAddress', email)
       
    31           ctl.store.relate(entity['eid'], 'use_email', email['eid'])
       
    32           ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x':entity['eid']})
       
    33 
       
    34   CHK = [('login', check_doubles, 'Utilisateurs Login',
       
    35           'Deux utilisateurs ne devraient pas avoir le même login.'),
       
    36          ]
       
    37 
       
    38   GENERATORS.append( (gen_users, CHK) )
       
    39 
       
    40   # create controller
       
    41   ctl = CWImportController(RQLObjectStore())
       
    42   ctl.askerror = 1
       
    43   ctl.generators = GENERATORS
       
    44   ctl.store._checkpoint = checkpoint
       
    45   ctl.store._rql = rql
       
    46   ctl.data['utilisateurs'] = lazytable(utf8csvreader(open('users.csv')))
       
    47   # run
       
    48   ctl.run()
       
    49   sys.exit(0)
       
    50 
       
    51 
       
    52 .. BUG fichier à une colonne pose un problème de parsing
       
    53 .. TODO rollback()
       
    54 """
       
    55 __docformat__ = "restructuredtext en"
       
    56 
       
    57 import sys
       
    58 import csv
       
    59 import traceback
       
    60 import os.path as osp
       
    61 from StringIO import StringIO
       
    62 from copy import copy
       
    63 
       
    64 from logilab.common import shellutils
       
    65 from logilab.common.date import strptime
       
    66 from logilab.common.decorators import cached
       
    67 from logilab.common.deprecation import deprecated
       
    68 
       
    69 
       
    70 def ucsvreader_pb(filepath, encoding='utf-8', separator=',', quote='"',
       
    71                   skipfirst=False, withpb=True):
       
    72     """same as ucsvreader but a progress bar is displayed as we iter on rows"""
       
    73     if not osp.exists(filepath):
       
    74         raise Exception("file doesn't exists: %s" % filepath)
       
    75     rowcount = int(shellutils.Execute('wc -l "%s"' % filepath).out.strip().split()[0])
       
    76     if skipfirst:
       
    77         rowcount -= 1
       
    78     if withpb:
       
    79         pb = shellutils.ProgressBar(rowcount, 50)
       
    80     for urow in ucsvreader(file(filepath), encoding, separator, quote, skipfirst):
       
    81         yield urow
       
    82         if withpb:
       
    83             pb.update()
       
    84     print ' %s rows imported' % rowcount
       
    85 
       
    86 def ucsvreader(stream, encoding='utf-8', separator=',', quote='"',
       
    87                skipfirst=False):
       
    88     """A csv reader that accepts files with any encoding and outputs unicode
       
    89     strings
       
    90     """
       
    91     it = iter(csv.reader(stream, delimiter=separator, quotechar=quote))
       
    92     if skipfirst:
       
    93         it.next()
       
    94     for row in it:
       
    95         yield [item.decode(encoding) for item in row]
       
    96 
       
    97 def commit_every(nbit, store, it):
       
    98     for i, x in enumerate(it):
       
    99         yield x
       
   100         if nbit is not None and i % nbit:
       
   101             store.checkpoint()
       
   102     if nbit is not None:
       
   103         store.checkpoint()
       
   104 
       
   105 def lazytable(reader):
       
   106     """The first row is taken to be the header of the table and
       
   107     used to output a dict for each row of data.
       
   108 
       
   109     >>> data = lazytable(utf8csvreader(open(filename)))
       
   110     """
       
   111     header = reader.next()
       
   112     for row in reader:
       
   113         yield dict(zip(header, row))
       
   114 
       
   115 def mk_entity(row, map):
       
   116     """Return a dict made from sanitized mapped values.
       
   117 
       
   118     ValidationError can be raised on unexpected values found in checkers
       
   119 
       
   120     >>> row = {'myname': u'dupont'}
       
   121     >>> map = [('myname', u'name', (capitalize_if_unicase,))]
       
   122     >>> mk_entity(row, map)
       
   123     {'name': u'Dupont'}
       
   124     >>> row = {'myname': u'dupont', 'optname': u''}
       
   125     >>> map = [('myname', u'name', (capitalize_if_unicase,)),
       
   126     ...        ('optname', u'MARKER', (optional,))]
       
   127     >>> mk_entity(row, map)
       
   128     {'name': u'Dupont'}
       
   129     """
       
   130     res = {}
       
   131     assert isinstance(row, dict)
       
   132     assert isinstance(map, list)
       
   133     for src, dest, funcs in map:
       
   134         assert not (required in funcs and optional in funcs), \
       
   135                "optional and required checks are exclusive"
       
   136         res[dest] = row[src]
       
   137         try:
       
   138             for func in funcs:
       
   139                 res[dest] = func(res[dest])
       
   140                 if res[dest] is None:
       
   141                     break
       
   142         except ValueError, err:
       
   143             raise ValueError('error with %r field: %s' % (src, err))
       
   144     return res
       
   145 
       
   146 
       
   147 # user interactions ############################################################
       
   148 
       
   149 def tell(msg):
       
   150     print msg
       
   151 
       
   152 def confirm(question):
       
   153     """A confirm function that asks for yes/no/abort and exits on abort."""
       
   154     answer = shellutils.ASK.ask(question, ('Y', 'n', 'abort'), 'Y')
       
   155     if answer == 'abort':
       
   156         sys.exit(1)
       
   157     return answer == 'Y'
       
   158 
       
   159 
       
   160 class catch_error(object):
       
   161     """Helper for @contextmanager decorator."""
       
   162 
       
   163     def __init__(self, ctl, key='unexpected error', msg=None):
       
   164         self.ctl = ctl
       
   165         self.key = key
       
   166         self.msg = msg
       
   167 
       
   168     def __enter__(self):
       
   169         return self
       
   170 
       
   171     def __exit__(self, type, value, traceback):
       
   172         if type is not None:
       
   173             if issubclass(type, (KeyboardInterrupt, SystemExit)):
       
   174                 return # re-raise
       
   175             if self.ctl.catcherrors:
       
   176                 self.ctl.record_error(self.key, None, type, value, traceback)
       
   177                 return True # silent
       
   178 
       
   179 
       
   180 # base sanitizing/coercing functions ###########################################
       
   181 
       
   182 def optional(value):
       
   183     """validation error will not been raised if you add this checker in chain"""
       
   184     if value:
       
   185         return value
       
   186     return None
       
   187 
       
   188 def required(value):
       
   189     """raise ValueError is value is empty
       
   190 
       
   191     This check should be often found in last position in the chain.
       
   192     """
       
   193     if value:
       
   194         return value
       
   195     raise ValueError("required")
       
   196 
       
   197 def todatetime(format='%d/%m/%Y'):
       
   198     """return a transformation function to turn string input value into a
       
   199     `datetime.datetime` instance, using given format.
       
   200 
       
   201     Follow it by `todate` or `totime` functions from `logilab.common.date` if
       
   202     you want a `date`/`time` instance instead of `datetime`.
       
   203     """
       
   204     def coerce(value):
       
   205         return strptime(value, format)
       
   206     return coerce
       
   207 
       
   208 def call_transform_method(methodname, *args, **kwargs):
       
   209     """return value returned by calling the given method on input"""
       
   210     def coerce(value):
       
   211         return getattr(value, methodname)(*args, **kwargs)
       
   212     return coerce
       
   213 
       
   214 def call_check_method(methodname, *args, **kwargs):
       
   215     """check value returned by calling the given method on input is true,
       
   216     else raise ValueError
       
   217     """
       
   218     def check(value):
       
   219         if getattr(value, methodname)(*args, **kwargs):
       
   220             return value
       
   221         raise ValueError('%s not verified on %r' % (methodname, value))
       
   222     return check
       
   223 
       
   224 # base integrity checking functions ############################################
       
   225 
       
   226 def check_doubles(buckets):
       
   227     """Extract the keys that have more than one item in their bucket."""
       
   228     return [(k, len(v)) for k, v in buckets.items() if len(v) > 1]
       
   229 
       
   230 def check_doubles_not_none(buckets):
       
   231     """Extract the keys that have more than one item in their bucket."""
       
   232     return [(k, len(v)) for k, v in buckets.items()
       
   233             if k is not None and len(v) > 1]
       
   234 
       
   235 
       
   236 # object stores #################################################################
       
   237 
       
   238 class ObjectStore(object):
       
   239     """Store objects in memory for *faster* validation (development mode)
       
   240 
       
   241     But it will not enforce the constraints of the schema and hence will miss some problems
       
   242 
       
   243     >>> store = ObjectStore()
       
   244     >>> user = {'login': 'johndoe'}
       
   245     >>> store.add('CWUser', user)
       
   246     >>> group = {'name': 'unknown'}
       
   247     >>> store.add('CWUser', group)
       
   248     >>> store.relate(user['eid'], 'in_group', group['eid'])
       
   249     """
       
   250     def __init__(self):
       
   251         self.items = []
       
   252         self.eids = {}
       
   253         self.types = {}
       
   254         self.relations = set()
       
   255         self.indexes = {}
       
   256         self._rql = None
       
   257         self._checkpoint = None
       
   258 
       
   259     def _put(self, type, item):
       
   260         self.items.append(item)
       
   261         return len(self.items) - 1
       
   262 
       
   263     def add(self, type, item):
       
   264         assert isinstance(item, dict), 'item is not a dict but a %s' % type(item)
       
   265         eid = item['eid'] = self._put(type, item)
       
   266         self.eids[eid] = item
       
   267         self.types.setdefault(type, []).append(eid)
       
   268 
       
   269     def relate(self, eid_from, rtype, eid_to, inlined=False):
       
   270         """Add new relation (reverse type support is available)
       
   271 
       
   272         >>> 1,2 = eid_from, eid_to
       
   273         >>> self.relate(eid_from, 'in_group', eid_to)
       
   274         1, 'in_group', 2
       
   275         >>> self.relate(eid_from, 'reverse_in_group', eid_to)
       
   276         2, 'in_group', 1
       
   277         """
       
   278         if rtype.startswith('reverse_'):
       
   279             eid_from, eid_to = eid_to, eid_from
       
   280             rtype = rtype[8:]
       
   281         relation = eid_from, rtype, eid_to
       
   282         self.relations.add(relation)
       
   283         return relation
       
   284 
       
   285     def build_index(self, name, type, func=None):
       
   286         index = {}
       
   287         if func is None or not callable(func):
       
   288             func = lambda x: x['eid']
       
   289         for eid in self.types[type]:
       
   290             index.setdefault(func(self.eids[eid]), []).append(eid)
       
   291         assert index, "new index '%s' cannot be empty" % name
       
   292         self.indexes[name] = index
       
   293 
       
   294     def build_rqlindex(self, name, type, key, rql, rql_params=False, func=None):
       
   295         """build an index by rql query
       
   296 
       
   297         rql should return eid in first column
       
   298         ctl.store.build_index('index_name', 'users', 'login', 'Any U WHERE U is CWUser')
       
   299         """
       
   300         rset = self.rql(rql, rql_params or {})
       
   301         for entity in rset.entities():
       
   302             getattr(entity, key) # autopopulate entity with key attribute
       
   303             self.eids[entity.eid] = dict(entity)
       
   304             if entity.eid not in self.types.setdefault(type, []):
       
   305                 self.types[type].append(entity.eid)
       
   306         assert self.types[type], "new index type '%s' cannot be empty (0 record found)" % type
       
   307 
       
   308         # Build index with specified key
       
   309         func = lambda x: x[key]
       
   310         self.build_index(name, type, func)
       
   311 
       
   312     def fetch(self, name, key, unique=False, decorator=None):
       
   313         """
       
   314             decorator is a callable method or an iterator of callable methods (usually a lambda function)
       
   315             decorator=lambda x: x[:1] (first value is returned)
       
   316 
       
   317             We can use validation check function available in _entity
       
   318         """
       
   319         eids = self.indexes[name].get(key, [])
       
   320         if decorator is not None:
       
   321             if not hasattr(decorator, '__iter__'):
       
   322                 decorator = (decorator,)
       
   323             for f in decorator:
       
   324                 eids = f(eids)
       
   325         if unique:
       
   326             assert len(eids) == 1, u'expected a single one value for key "%s" in index "%s". Got %i' % (key, name, len(eids))
       
   327             eids = eids[0] # FIXME maybe it's better to keep an iterator here ?
       
   328         return eids
       
   329 
       
   330     def find(self, type, key, value):
       
   331         for idx in self.types[type]:
       
   332             item = self.items[idx]
       
   333             if item[key] == value:
       
   334                 yield item
       
   335 
       
   336     def rql(self, *args):
       
   337         if self._rql is not None:
       
   338             return self._rql(*args)
       
   339 
       
   340     def checkpoint(self):
       
   341         pass
       
   342 
       
   343     @property
       
   344     def nb_inserted_entities(self):
       
   345         return len(self.eids)
       
   346     @property
       
   347     def nb_inserted_types(self):
       
   348         return len(self.types)
       
   349     @property
       
   350     def nb_inserted_relations(self):
       
   351         return len(self.relations)
       
   352 
       
   353     @deprecated('[3.6] get_many() deprecated. Use fetch() instead')
       
   354     def get_many(self, name, key):
       
   355         return self.fetch(name, key, unique=False)
       
   356 
       
   357     @deprecated('[3.6] get_one() deprecated. Use fetch(..., unique=True) instead')
       
   358     def get_one(self, name, key):
       
   359         return self.fetch(name, key, unique=True)
       
   360 
       
   361 
       
   362 class RQLObjectStore(ObjectStore):
       
   363     """ObjectStore that works with an actual RQL repository (production mode)"""
       
   364     _rql = None # bw compat
       
   365 
       
   366     def __init__(self, session=None, checkpoint=None):
       
   367         ObjectStore.__init__(self)
       
   368         if session is not None:
       
   369             if not hasattr(session, 'set_pool'):
       
   370                 # connection
       
   371                 cnx = session
       
   372                 session = session.request()
       
   373                 session.set_pool = lambda : None
       
   374                 checkpoint = checkpoint or cnx.commit
       
   375             else:
       
   376                 session.set_pool()
       
   377             self.session = session
       
   378             self._checkpoint = checkpoint or session.commit
       
   379         elif checkpoint is not None:
       
   380             self._checkpoint = checkpoint
       
   381             # XXX .session
       
   382 
       
   383     def checkpoint(self):
       
   384         self._checkpoint()
       
   385         self.session.set_pool()
       
   386 
       
   387     def rql(self, *args):
       
   388         if self._rql is not None:
       
   389             return self._rql(*args)
       
   390         return self.session.execute(*args)
       
   391 
       
   392     def create_entity(self, *args, **kwargs):
       
   393         entity = self.session.create_entity(*args, **kwargs)
       
   394         self.eids[entity.eid] = entity
       
   395         self.types.setdefault(args[0], []).append(entity.eid)
       
   396         return entity
       
   397 
       
   398     def _put(self, type, item):
       
   399         query = ('INSERT %s X: ' % type) + ', '.join('X %s %%(%s)s' % (k, k)
       
   400                                                      for k in item)
       
   401         return self.rql(query, item)[0][0]
       
   402 
       
   403     def relate(self, eid_from, rtype, eid_to, inlined=False):
       
   404         # if reverse relation is found, eids are exchanged
       
   405         eid_from, rtype, eid_to = super(RQLObjectStore, self).relate(
       
   406             eid_from, rtype, eid_to)
       
   407         self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype,
       
   408                   {'x': int(eid_from), 'y': int(eid_to)}, ('x', 'y'))
       
   409 
       
   410 
       
   411 # the import controller ########################################################
       
   412 
       
   413 class CWImportController(object):
       
   414     """Controller of the data import process.
       
   415 
       
   416     >>> ctl = CWImportController(store)
       
   417     >>> ctl.generators = list_of_data_generators
       
   418     >>> ctl.data = dict_of_data_tables
       
   419     >>> ctl.run()
       
   420     """
       
   421 
       
   422     def __init__(self, store, askerror=0, catcherrors=None, tell=tell,
       
   423                  commitevery=50):
       
   424         self.store = store
       
   425         self.generators = None
       
   426         self.data = {}
       
   427         self.errors = None
       
   428         self.askerror = askerror
       
   429         if  catcherrors is None:
       
   430             catcherrors = askerror
       
   431         self.catcherrors = catcherrors
       
   432         self.commitevery = commitevery # set to None to do a single commit
       
   433         self._tell = tell
       
   434 
       
   435     def check(self, type, key, value):
       
   436         self._checks.setdefault(type, {}).setdefault(key, []).append(value)
       
   437 
       
   438     def check_map(self, entity, key, map, default):
       
   439         try:
       
   440             entity[key] = map[entity[key]]
       
   441         except KeyError:
       
   442             self.check(key, entity[key], None)
       
   443             entity[key] = default
       
   444 
       
   445     def record_error(self, key, msg=None, type=None, value=None, tb=None):
       
   446         tmp = StringIO()
       
   447         if type is None:
       
   448             traceback.print_exc(file=tmp)
       
   449         else:
       
   450             traceback.print_exception(type, value, tb, file=tmp)
       
   451         print tmp.getvalue()
       
   452         # use a list to avoid counting a <nb lines> errors instead of one
       
   453         errorlog = self.errors.setdefault(key, [])
       
   454         if msg is None:
       
   455             errorlog.append(tmp.getvalue().splitlines())
       
   456         else:
       
   457             errorlog.append( (msg, tmp.getvalue().splitlines()) )
       
   458 
       
   459     def run(self):
       
   460         self.errors = {}
       
   461         for func, checks in self.generators:
       
   462             self._checks = {}
       
   463             func_name = func.__name__[4:]  # XXX
       
   464             self.tell("Import '%s'..." % func_name)
       
   465             try:
       
   466                 func(self)
       
   467             except:
       
   468                 if self.catcherrors:
       
   469                     self.record_error(func_name, 'While calling %s' % func.__name__)
       
   470                 else:
       
   471                     raise
       
   472             for key, func, title, help in checks:
       
   473                 buckets = self._checks.get(key)
       
   474                 if buckets:
       
   475                     err = func(buckets)
       
   476                     if err:
       
   477                         self.errors[title] = (help, err)
       
   478         self.store.checkpoint()
       
   479         nberrors = sum(len(err[1]) for err in self.errors.values())
       
   480         self.tell('\nImport completed: %i entities, %i types, %i relations and %i errors'
       
   481                   % (self.store.nb_inserted_entities,
       
   482                      self.store.nb_inserted_types,
       
   483                      self.store.nb_inserted_relations,
       
   484                      nberrors))
       
   485         if self.errors:
       
   486             if self.askerror == 2 or (self.askerror and confirm('Display errors ?')):
       
   487                 from pprint import pformat
       
   488                 for errkey, error in self.errors.items():
       
   489                     self.tell("\n%s (%s): %d\n" % (error[0], errkey, len(error[1])))
       
   490                     self.tell(pformat(sorted(error[1])))
       
   491 
       
   492     def get_data(self, key):
       
   493         return self.data.get(key)
       
   494 
       
   495     def index(self, name, key, value, unique=False):
       
   496         """create a new index
       
   497 
       
   498         If unique is set to True, only first occurence will be kept not the following ones
       
   499         """
       
   500         if unique:
       
   501             try:
       
   502                 if value in self.store.indexes[name][key]:
       
   503                     return
       
   504             except KeyError:
       
   505                 # we're sure that one is the first occurence; so continue...
       
   506                 pass
       
   507         self.store.indexes.setdefault(name, {}).setdefault(key, []).append(value)
       
   508 
       
   509     def tell(self, msg):
       
   510         self._tell(msg)
       
   511 
       
   512     def iter_and_commit(self, datakey):
       
   513         """iter rows, triggering commit every self.commitevery iterations"""
       
   514         return commit_every(self.commitevery, self.store, self.get_data(datakey))
       
   515 
       
   516 
       
   517 
       
   518 from datetime import datetime
       
   519 from cubicweb.schema import META_RTYPES, VIRTUAL_RTYPES
       
   520 
       
   521 
       
   522 class NoHookRQLObjectStore(RQLObjectStore):
       
   523     """ObjectStore that works with an actual RQL repository (production mode)"""
       
   524     _rql = None # bw compat
       
   525 
       
   526     def __init__(self, session, metagen=None, baseurl=None):
       
   527         super(NoHookRQLObjectStore, self).__init__(session)
       
   528         self.source = session.repo.system_source
       
   529         self.rschema = session.repo.schema.rschema
       
   530         self.add_relation = self.source.add_relation
       
   531         if metagen is None:
       
   532             metagen = MetaGenerator(session, baseurl)
       
   533         self.metagen = metagen
       
   534         self._nb_inserted_entities = 0
       
   535         self._nb_inserted_types = 0
       
   536         self._nb_inserted_relations = 0
       
   537         self.rql = session.unsafe_execute
       
   538 
       
   539     def create_entity(self, etype, **kwargs):
       
   540         for k, v in kwargs.iteritems():
       
   541             kwargs[k] = getattr(v, 'eid', v)
       
   542         entity, rels = self.metagen.base_etype_dicts(etype)
       
   543         entity = copy(entity)
       
   544         entity._related_cache = {}
       
   545         self.metagen.init_entity(entity)
       
   546         entity.update(kwargs)
       
   547         session = self.session
       
   548         self.source.add_entity(session, entity)
       
   549         self.source.add_info(session, entity, self.source, complete=False)
       
   550         for rtype, targeteids in rels.iteritems():
       
   551             # targeteids may be a single eid or a list of eids
       
   552             inlined = self.rschema(rtype).inlined
       
   553             try:
       
   554                 for targeteid in targeteids:
       
   555                     self.add_relation(session, entity.eid, rtype, targeteid,
       
   556                                       inlined)
       
   557             except TypeError:
       
   558                 self.add_relation(session, entity.eid, rtype, targeteids,
       
   559                                   inlined)
       
   560         self._nb_inserted_entities += 1
       
   561         return entity
       
   562 
       
   563     def relate(self, eid_from, rtype, eid_to):
       
   564         assert not rtype.startswith('reverse_')
       
   565         self.add_relation(self.session, eid_from, rtype, eid_to,
       
   566                           self.rschema(rtype).inlined)
       
   567         self._nb_inserted_relations += 1
       
   568 
       
   569     @property
       
   570     def nb_inserted_entities(self):
       
   571         return self._nb_inserted_entities
       
   572     @property
       
   573     def nb_inserted_types(self):
       
   574         return self._nb_inserted_types
       
   575     @property
       
   576     def nb_inserted_relations(self):
       
   577         return self._nb_inserted_relations
       
   578 
       
   579     def _put(self, type, item):
       
   580         raise RuntimeError('use create entity')
       
   581 
       
   582 
       
   583 class MetaGenerator(object):
       
   584     def __init__(self, session, baseurl=None):
       
   585         self.session = session
       
   586         self.source = session.repo.system_source
       
   587         self.time = datetime.now()
       
   588         if baseurl is None:
       
   589             config = session.vreg.config
       
   590             baseurl = config['base-url'] or config.default_base_url()
       
   591         if not baseurl[-1] == '/':
       
   592             baseurl += '/'
       
   593         self.baseurl =  baseurl
       
   594         # attributes/relations shared by all entities of the same type
       
   595         self.etype_attrs = []
       
   596         self.etype_rels = []
       
   597         # attributes/relations specific to each entity
       
   598         self.entity_attrs = ['eid', 'cwuri']
       
   599         #self.entity_rels = [] XXX not handled (YAGNI?)
       
   600         schema = session.vreg.schema
       
   601         rschema = schema.rschema
       
   602         for rtype in META_RTYPES:
       
   603             if rtype in ('eid', 'cwuri') or rtype in VIRTUAL_RTYPES:
       
   604                 continue
       
   605             if rschema(rtype).final:
       
   606                 self.etype_attrs.append(rtype)
       
   607             else:
       
   608                 self.etype_rels.append(rtype)
       
   609         if not schema._eid_index:
       
   610             # test schema loaded from the fs
       
   611             self.gen_is = self.test_gen_is
       
   612             self.gen_is_instance_of = self.test_gen_is_instanceof
       
   613 
       
   614     @cached
       
   615     def base_etype_dicts(self, etype):
       
   616         entity = self.session.vreg['etypes'].etype_class(etype)(self.session)
       
   617         # entity are "surface" copied, avoid shared dict between copies
       
   618         del entity.cw_extra_kwargs
       
   619         for attr in self.etype_attrs:
       
   620             entity[attr] = self.generate(entity, attr)
       
   621         rels = {}
       
   622         for rel in self.etype_rels:
       
   623             rels[rel] = self.generate(entity, rel)
       
   624         return entity, rels
       
   625 
       
   626     def init_entity(self, entity):
       
   627         for attr in self.entity_attrs:
       
   628             entity[attr] = self.generate(entity, attr)
       
   629         entity.eid = entity['eid']
       
   630 
       
   631     def generate(self, entity, rtype):
       
   632         return getattr(self, 'gen_%s' % rtype)(entity)
       
   633 
       
   634     def gen_eid(self, entity):
       
   635         return self.source.create_eid(self.session)
       
   636 
       
   637     def gen_cwuri(self, entity):
       
   638         return u'%seid/%s' % (self.baseurl, entity['eid'])
       
   639 
       
   640     def gen_creation_date(self, entity):
       
   641         return self.time
       
   642     def gen_modification_date(self, entity):
       
   643         return self.time
       
   644 
       
   645     def gen_is(self, entity):
       
   646         return entity.e_schema.eid
       
   647     def gen_is_instance_of(self, entity):
       
   648         eids = []
       
   649         for etype in entity.e_schema.ancestors() + [entity.e_schema]:
       
   650             eids.append(entity.e_schema.eid)
       
   651         return eids
       
   652 
       
   653     def gen_created_by(self, entity):
       
   654         return self.session.user.eid
       
   655     def gen_owned_by(self, entity):
       
   656         return self.session.user.eid
       
   657 
       
   658     # implementations of gen_is / gen_is_instance_of to use during test where
       
   659     # schema has been loaded from the fs (hence entity type schema eids are not
       
   660     # known)
       
   661     def test_gen_is(self, entity):
       
   662         from cubicweb.hooks.metadata import eschema_eid
       
   663         return eschema_eid(self.session, entity.e_schema)
       
   664     def test_gen_is_instanceof(self, entity):
       
   665         from cubicweb.hooks.metadata import eschema_eid
       
   666         eids = []
       
   667         for eschema in entity.e_schema.ancestors() + [entity.e_schema]:
       
   668             eids.append(eschema_eid(self.session, eschema))
       
   669         return eids
       
   670 
       
   671 
       
   672 ################################################################################
       
   673 
       
   674 utf8csvreader = deprecated('[3.6] use ucsvreader instead')(ucsvreader)
       
   675 
       
   676 @deprecated('[3.6] use required')
       
   677 def nonempty(value):
       
   678     return required(value)
       
   679 
       
   680 @deprecated("[3.6] use call_check_method('isdigit')")
       
   681 def alldigits(txt):
       
   682     if txt.isdigit():
       
   683         return txt
       
   684     else:
       
   685         return u''
       
   686 
       
   687 @deprecated("[3.7] too specific, will move away, copy me")
       
   688 def capitalize_if_unicase(txt):
       
   689     if txt.isupper() or txt.islower():
       
   690         return txt.capitalize()
       
   691     return txt
       
   692 
       
   693 @deprecated("[3.7] too specific, will move away, copy me")
       
   694 def yesno(value):
       
   695     """simple heuristic that returns boolean value
       
   696 
       
   697     >>> yesno("Yes")
       
   698     True
       
   699     >>> yesno("oui")
       
   700     True
       
   701     >>> yesno("1")
       
   702     True
       
   703     >>> yesno("11")
       
   704     True
       
   705     >>> yesno("")
       
   706     False
       
   707     >>> yesno("Non")
       
   708     False
       
   709     >>> yesno("blablabla")
       
   710     False
       
   711     """
       
   712     if value:
       
   713         return value.lower()[0] in 'yo1'
       
   714     return False
       
   715 
       
   716 @deprecated("[3.7] use call_check_method('isalpha')")
       
   717 def isalpha(value):
       
   718     if value.isalpha():
       
   719         return value
       
   720     raise ValueError("not all characters in the string alphabetic")
       
   721 
       
   722 @deprecated("[3.7] use call_transform_method('upper')")
       
   723 def uppercase(txt):
       
   724     return txt.upper()
       
   725 
       
   726 @deprecated("[3.7] use call_transform_method('lower')")
       
   727 def lowercase(txt):
       
   728     return txt.lower()
       
   729 
       
   730 @deprecated("[3.7] use call_transform_method('replace', ' ', '')")
       
   731 def no_space(txt):
       
   732     return txt.replace(' ','')
       
   733 
       
   734 @deprecated("[3.7] use call_transform_method('replace', u'\xa0', '')")
       
   735 def no_uspace(txt):
       
   736     return txt.replace(u'\xa0','')
       
   737 
       
   738 @deprecated("[3.7] use call_transform_method('replace', '-', '')")
       
   739 def no_dash(txt):
       
   740     return txt.replace('-','')
       
   741 
       
   742 @deprecated("[3.7] use call_transform_method('strip')")
       
   743 def strip(txt):
       
   744     return txt.strip()
       
   745 
       
   746 @deprecated("[3.7] use call_transform_method('replace', ',', '.'), float")
       
   747 def decimal(value):
       
   748     return comma_float(value)
       
   749 
       
   750 @deprecated('[3.7] use int builtin')
       
   751 def integer(value):
       
   752     return int(value)