dataimport.py
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Mon, 29 Mar 2010 10:20:01 +0200
branchstable
changeset 5054 cb066d29166a
parent 4913 083b4d454192
child 5063 2a94b61837e1
permissions -rw-r--r--
fix dataimport for 3.7.2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     2
"""This module provides tools to import tabular data.
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     3
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     4
:organization: Logilab
4212
ab6573088b4a update copyright: welcome 2010
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 3486
diff changeset
     5
:copyright: 2001-2010 LOGILAB S.A. (Paris, FRANCE), license is LGPL v2.
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     6
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     7
:license: GNU Lesser General Public License, v2.1 - http://www.gnu.org/licenses
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     8
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     9
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    10
Example of use (run this with `cubicweb-ctl shell instance import-script.py`):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    11
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    12
.. sourcecode:: python
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    13
3318
5b47b9f09bca documentation : fixed docstring
Alexandre Fayolle <alexandre.fayolle@logilab.fr>
parents: 3029
diff changeset
    14
  from cubicweb.devtools.dataimport import *
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    15
  # define data generators
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    16
  GENERATORS = []
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    17
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    18
  USERS = [('Prenom', 'firstname', ()),
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    19
           ('Nom', 'surname', ()),
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    20
           ('Identifiant', 'login', ()),
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    21
           ]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    22
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    23
  def gen_users(ctl):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    24
      for row in ctl.get_data('utilisateurs'):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    25
          entity = mk_entity(row, USERS)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    26
          entity['upassword'] = u'motdepasse'
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    27
          ctl.check('login', entity['login'], None)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    28
          ctl.store.add('CWUser', entity)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    29
          email = {'address': row['email']}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    30
          ctl.store.add('EmailAddress', email)
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
    31
          ctl.store.relate(entity['eid'], 'use_email', email['eid'])
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
    32
          ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x':entity['eid']})
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    33
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    34
  CHK = [('login', check_doubles, 'Utilisateurs Login',
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    35
          'Deux utilisateurs ne devraient pas avoir le même login.'),
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    36
         ]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    37
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    38
  GENERATORS.append( (gen_users, CHK) )
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    39
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    40
  # create controller
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
    41
  ctl = CWImportController(RQLObjectStore(cnx))
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
    42
  ctl.askerror = 1
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    43
  ctl.generators = GENERATORS
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    44
  ctl.data['utilisateurs'] = lazytable(utf8csvreader(open('users.csv')))
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    45
  # run
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    46
  ctl.run()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    47
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
    48
.. BUG file with one column are not parsable
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
    49
.. TODO rollback() invocation is not possible yet
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    50
"""
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    51
__docformat__ = "restructuredtext en"
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    52
4186
ca7e526b07b6 import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4173
diff changeset
    53
import sys
ca7e526b07b6 import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4173
diff changeset
    54
import csv
ca7e526b07b6 import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4173
diff changeset
    55
import traceback
ca7e526b07b6 import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4173
diff changeset
    56
import os.path as osp
ca7e526b07b6 import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4173
diff changeset
    57
from StringIO import StringIO
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
    58
from copy import copy
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    59
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    60
from logilab.common import shellutils
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
    61
from logilab.common.date import strptime
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
    62
from logilab.common.decorators import cached
4136
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    63
from logilab.common.deprecation import deprecated
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    64
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
    65
4136
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    66
def ucsvreader_pb(filepath, encoding='utf-8', separator=',', quote='"',
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    67
                  skipfirst=False, withpb=True):
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    68
    """same as ucsvreader but a progress bar is displayed as we iter on rows"""
4186
ca7e526b07b6 import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4173
diff changeset
    69
    if not osp.exists(filepath):
ca7e526b07b6 import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4173
diff changeset
    70
        raise Exception("file doesn't exists: %s" % filepath)
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
    71
    rowcount = int(shellutils.Execute('wc -l "%s"' % filepath).out.strip().split()[0])
4136
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    72
    if skipfirst:
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    73
        rowcount -= 1
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    74
    if withpb:
4140
46ddd27a4ca4 tweaks output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4136
diff changeset
    75
        pb = shellutils.ProgressBar(rowcount, 50)
4136
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    76
    for urow in ucsvreader(file(filepath), encoding, separator, quote, skipfirst):
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    77
        yield urow
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    78
        if withpb:
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    79
            pb.update()
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    80
    print ' %s rows imported' % rowcount
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    81
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    82
def ucsvreader(stream, encoding='utf-8', separator=',', quote='"',
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    83
               skipfirst=False):
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    84
    """A csv reader that accepts files with any encoding and outputs unicode
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    85
    strings
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    86
    """
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    87
    it = iter(csv.reader(stream, delimiter=separator, quotechar=quote))
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    88
    if skipfirst:
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    89
        it.next()
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    90
    for row in it:
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    91
        yield [item.decode(encoding) for item in row]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    92
4136
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    93
def commit_every(nbit, store, it):
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
    94
    for i, x in enumerate(it):
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
    95
        yield x
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
    96
        if nbit is not None and i % nbit:
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
    97
            store.commit()
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
    98
    if nbit is not None:
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
    99
        store.commit()
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   100
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   101
def lazytable(reader):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   102
    """The first row is taken to be the header of the table and
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   103
    used to output a dict for each row of data.
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   104
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   105
    >>> data = lazytable(utf8csvreader(open(filename)))
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   106
    """
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   107
    header = reader.next()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   108
    for row in reader:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   109
        yield dict(zip(header, row))
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   110
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   111
def mk_entity(row, map):
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   112
    """Return a dict made from sanitized mapped values.
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   113
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   114
    ValueError can be raised on unexpected values found in checkers
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   115
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   116
    >>> row = {'myname': u'dupont'}
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   117
    >>> map = [('myname', u'name', (call_transform_method('title'),))]
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   118
    >>> mk_entity(row, map)
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   119
    {'name': u'Dupont'}
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   120
    >>> row = {'myname': u'dupont', 'optname': u''}
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   121
    >>> map = [('myname', u'name', (call_transform_method('title'),)),
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   122
    ...        ('optname', u'MARKER', (optional,))]
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   123
    >>> mk_entity(row, map)
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   124
    {'name': u'Dupont', 'optname': None}
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   125
    """
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   126
    res = {}
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   127
    assert isinstance(row, dict)
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   128
    assert isinstance(map, list)
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   129
    for src, dest, funcs in map:
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   130
        res[dest] = row[src]
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   131
        try:
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   132
            for func in funcs:
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   133
                res[dest] = func(res[dest])
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   134
                if res[dest] is None:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   135
                    break
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   136
        except ValueError, err:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   137
            raise ValueError('error with %r field: %s' % (src, err))
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   138
    return res
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   139
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   140
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   141
# user interactions ############################################################
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   142
3029
bc573d5fb5b7 F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 3003
diff changeset
   143
def tell(msg):
bc573d5fb5b7 F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 3003
diff changeset
   144
    print msg
bc573d5fb5b7 F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 3003
diff changeset
   145
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   146
def confirm(question):
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   147
    """A confirm function that asks for yes/no/abort and exits on abort."""
4721
8f63691ccb7f pylint style fixes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4613
diff changeset
   148
    answer = shellutils.ASK.ask(question, ('Y', 'n', 'abort'), 'Y')
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   149
    if answer == 'abort':
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   150
        sys.exit(1)
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   151
    return answer == 'Y'
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   153
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   154
class catch_error(object):
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   155
    """Helper for @contextmanager decorator."""
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   156
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   157
    def __init__(self, ctl, key='unexpected error', msg=None):
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   158
        self.ctl = ctl
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   159
        self.key = key
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   160
        self.msg = msg
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   161
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   162
    def __enter__(self):
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   163
        return self
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   164
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   165
    def __exit__(self, type, value, traceback):
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   166
        if type is not None:
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   167
            if issubclass(type, (KeyboardInterrupt, SystemExit)):
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   168
                return # re-raise
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   169
            if self.ctl.catcherrors:
4173
cfd5d3270f99 msg isn't defined there, but we've to give traceback information to record error
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4152
diff changeset
   170
                self.ctl.record_error(self.key, None, type, value, traceback)
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   171
                return True # silent
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   172
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   173
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   174
# base sanitizing/coercing functions ###########################################
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   175
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   176
def optional(value):
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   177
    """checker to filter optional field
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   178
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   179
    If value is undefined (ex: empty string), return None that will
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   180
    break the checkers validation chain
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   181
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   182
    General use is to add 'optional' check in first condition to avoid
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   183
    ValueError by further checkers
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   184
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   185
    >>> MAPPER = [(u'value', 'value', (optional, int))]
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   186
    >>> row = {'value': u'XXX'}
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   187
    >>> mk_entity(row, MAPPER)
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   188
    {'value': None}
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   189
    >>> row = {'value': u'100'}
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   190
    >>> mk_entity(row, MAPPER)
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   191
    {'value': 100}
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   192
    """
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   193
    if value:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   194
        return value
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   195
    return None
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   196
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   197
def required(value):
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   198
    """raise ValueError is value is empty
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   199
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   200
    This check should be often found in last position in the chain.
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   201
    """
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   202
    if value:
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   203
        return value
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   204
    raise ValueError("required")
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   205
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   206
def todatetime(format='%d/%m/%Y'):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   207
    """return a transformation function to turn string input value into a
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   208
    `datetime.datetime` instance, using given format.
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   209
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   210
    Follow it by `todate` or `totime` functions from `logilab.common.date` if
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   211
    you want a `date`/`time` instance instead of `datetime`.
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   212
    """
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   213
    def coerce(value):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   214
        return strptime(value, format)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   215
    return coerce
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   216
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   217
def call_transform_method(methodname, *args, **kwargs):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   218
    """return value returned by calling the given method on input"""
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   219
    def coerce(value):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   220
        return getattr(value, methodname)(*args, **kwargs)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   221
    return coerce
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   222
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   223
def call_check_method(methodname, *args, **kwargs):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   224
    """check value returned by calling the given method on input is true,
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   225
    else raise ValueError
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   226
    """
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   227
    def check(value):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   228
        if getattr(value, methodname)(*args, **kwargs):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   229
            return value
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   230
        raise ValueError('%s not verified on %r' % (methodname, value))
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   231
    return check
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   232
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   233
# base integrity checking functions ############################################
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   234
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   235
def check_doubles(buckets):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   236
    """Extract the keys that have more than one item in their bucket."""
4721
8f63691ccb7f pylint style fixes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4613
diff changeset
   237
    return [(k, len(v)) for k, v in buckets.items() if len(v) > 1]
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   238
4136
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   239
def check_doubles_not_none(buckets):
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   240
    """Extract the keys that have more than one item in their bucket."""
4721
8f63691ccb7f pylint style fixes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4613
diff changeset
   241
    return [(k, len(v)) for k, v in buckets.items()
8f63691ccb7f pylint style fixes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4613
diff changeset
   242
            if k is not None and len(v) > 1]
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   243
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   244
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   245
# object stores #################################################################
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   246
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   247
class ObjectStore(object):
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   248
    """Store objects in memory for *faster* validation (development mode)
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   249
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   250
    But it will not enforce the constraints of the schema and hence will miss some problems
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   251
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   252
    >>> store = ObjectStore()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   253
    >>> user = {'login': 'johndoe'}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   254
    >>> store.add('CWUser', user)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   255
    >>> group = {'name': 'unknown'}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   256
    >>> store.add('CWUser', group)
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   257
    >>> store.relate(user['eid'], 'in_group', group['eid'])
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   258
    """
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   259
    def __init__(self):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   260
        self.items = []
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   261
        self.eids = {}
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   262
        self.types = {}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   263
        self.relations = set()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   264
        self.indexes = {}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   265
        self._rql = None
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   266
        self._commit = None
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   267
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   268
    def _put(self, type, item):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   269
        self.items.append(item)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   270
        return len(self.items) - 1
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   271
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   272
    def add(self, type, item):
3486
ea6bf6f9ba0c [cwctl] improve dialog messages
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 3318
diff changeset
   273
        assert isinstance(item, dict), 'item is not a dict but a %s' % type(item)
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   274
        eid = item['eid'] = self._put(type, item)
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   275
        self.eids[eid] = item
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   276
        self.types.setdefault(type, []).append(eid)
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   277
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   278
    def relate(self, eid_from, rtype, eid_to, inlined=False):
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   279
        """Add new relation"""
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   280
        relation = eid_from, rtype, eid_to
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   281
        self.relations.add(relation)
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   282
        return relation
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   283
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   284
    def commit(self):
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   285
        """this commit method do nothing by default
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   286
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   287
        This is voluntary to use the frequent autocommit feature in CubicWeb
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   288
        when you are using hooks or another
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   289
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   290
        If you want override commit method, please set it by the
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   291
        constructor
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   292
        """
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   293
        pass
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   294
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   295
    def rql(self, *args):
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   296
        if self._rql is not None:
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   297
            return self._rql(*args)
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   298
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   299
    @property
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   300
    def nb_inserted_entities(self):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   301
        return len(self.eids)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   302
    @property
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   303
    def nb_inserted_types(self):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   304
        return len(self.types)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   305
    @property
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   306
    def nb_inserted_relations(self):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   307
        return len(self.relations)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   308
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   309
    @deprecated("[3.7] index support will disappear")
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   310
    def build_index(self, name, type, func=None, can_be_empty=False):
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   311
        """build internal index for further search"""
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   312
        index = {}
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   313
        if func is None or not callable(func):
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   314
            func = lambda x: x['eid']
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   315
        for eid in self.types[type]:
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   316
            index.setdefault(func(self.eids[eid]), []).append(eid)
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   317
        if not can_be_empty:
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   318
            assert index, "new index '%s' cannot be empty" % name
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   319
        self.indexes[name] = index
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   320
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   321
    @deprecated("[3.7] index support will disappear")
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   322
    def build_rqlindex(self, name, type, key, rql, rql_params=False,
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   323
                       func=None, can_be_empty=False):
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   324
        """build an index by rql query
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   325
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   326
        rql should return eid in first column
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   327
        ctl.store.build_index('index_name', 'users', 'login', 'Any U WHERE U is CWUser')
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   328
        """
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   329
        self.types[type] = []
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   330
        rset = self.rql(rql, rql_params or {})
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   331
        if not can_be_empty:
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   332
            assert rset, "new index type '%s' cannot be empty (0 record found)" % type
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   333
        for entity in rset.entities():
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   334
            getattr(entity, key) # autopopulate entity with key attribute
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   335
            self.eids[entity.eid] = dict(entity)
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   336
            if entity.eid not in self.types[type]:
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   337
                self.types[type].append(entity.eid)
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   338
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   339
        # Build index with specified key
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   340
        func = lambda x: x[key]
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   341
        self.build_index(name, type, func, can_be_empty=can_be_empty)
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   342
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   343
    @deprecated("[3.7] index support will disappear")
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   344
    def fetch(self, name, key, unique=False, decorator=None):
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   345
        """index fetcher method
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   346
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   347
        decorator is a callable method or an iterator of callable methods (usually a lambda function)
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   348
        decorator=lambda x: x[:1] (first value is returned)
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   349
        decorator=lambda x: x.lower (lowercased value is returned)
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   350
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   351
        decorator is handy when you want to improve index keys but without
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   352
        changing the original field
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   353
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   354
        Same check functions can be reused here.
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   355
        """
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   356
        eids = self.indexes[name].get(key, [])
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   357
        if decorator is not None:
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   358
            if not hasattr(decorator, '__iter__'):
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   359
                decorator = (decorator,)
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   360
            for f in decorator:
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   361
                eids = f(eids)
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   362
        if unique:
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   363
            assert len(eids) == 1, u'expected a single one value for key "%s" in index "%s". Got %i' % (key, name, len(eids))
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   364
            eids = eids[0]
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   365
        return eids
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   366
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   367
    @deprecated("[3.7] index support will disappear")
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   368
    def find(self, type, key, value):
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   369
        for idx in self.types[type]:
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   370
            item = self.items[idx]
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   371
            if item[key] == value:
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   372
                yield item
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   373
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   374
    @deprecated("[3.7] checkpoint() deprecated. use commit() instead")
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   375
    def checkpoint(self):
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   376
        self.commit()
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   377
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   378
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   379
class RQLObjectStore(ObjectStore):
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   380
    """ObjectStore that works with an actual RQL repository (production mode)"""
4136
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   381
    _rql = None # bw compat
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   382
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   383
    def __init__(self, session=None, commit=None):
4136
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   384
        ObjectStore.__init__(self)
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   385
        if session is not None:
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   386
            if not hasattr(session, 'set_pool'):
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   387
                # connection
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   388
                cnx = session
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   389
                session = session.request()
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   390
                session.set_pool = lambda : None
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   391
                commit = commit or cnx.commit
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   392
            else:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   393
                session.set_pool()
4136
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   394
            self.session = session
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   395
            self._commit = commit or session.commit
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   396
        elif commit is not None:
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   397
            self._commit = commit
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   398
            # XXX .session
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   399
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   400
    @deprecated("[3.7] checkpoint() deprecated. use commit() instead")
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   401
    def checkpoint(self):
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   402
        self.commit()
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   403
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   404
    def commit(self):
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   405
        self._commit()
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   406
        self.session.set_pool()
4136
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   407
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   408
    def rql(self, *args):
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   409
        if self._rql is not None:
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   410
            return self._rql(*args)
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   411
        return self.session.execute(*args)
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   412
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   413
    def create_entity(self, *args, **kwargs):
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   414
        entity = self.session.create_entity(*args, **kwargs)
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   415
        self.eids[entity.eid] = entity
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   416
        self.types.setdefault(args[0], []).append(entity.eid)
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   417
        return entity
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   418
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   419
    def _put(self, type, item):
4721
8f63691ccb7f pylint style fixes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4613
diff changeset
   420
        query = ('INSERT %s X: ' % type) + ', '.join('X %s %%(%s)s' % (k, k)
4734
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4721
diff changeset
   421
                                                     for k in item)
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   422
        return self.rql(query, item)[0][0]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   423
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   424
    def relate(self, eid_from, rtype, eid_to, inlined=False):
4721
8f63691ccb7f pylint style fixes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4613
diff changeset
   425
        eid_from, rtype, eid_to = super(RQLObjectStore, self).relate(
8f63691ccb7f pylint style fixes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4613
diff changeset
   426
            eid_from, rtype, eid_to)
4136
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   427
        self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype,
47060a66c97f dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 3486
diff changeset
   428
                  {'x': int(eid_from), 'y': int(eid_to)}, ('x', 'y'))
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   429
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   430
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   431
# the import controller ########################################################
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   432
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   433
class CWImportController(object):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   434
    """Controller of the data import process.
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   435
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   436
    >>> ctl = CWImportController(store)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   437
    >>> ctl.generators = list_of_data_generators
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   438
    >>> ctl.data = dict_of_data_tables
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   439
    >>> ctl.run()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   440
    """
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   441
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   442
    def __init__(self, store, askerror=0, catcherrors=None, tell=tell,
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   443
                 commitevery=50):
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   444
        self.store = store
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   445
        self.generators = None
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   446
        self.data = {}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   447
        self.errors = None
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   448
        self.askerror = askerror
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   449
        if  catcherrors is None:
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   450
            catcherrors = askerror
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   451
        self.catcherrors = catcherrors
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   452
        self.commitevery = commitevery # set to None to do a single commit
3029
bc573d5fb5b7 F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 3003
diff changeset
   453
        self._tell = tell
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   454
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   455
    def check(self, type, key, value):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   456
        self._checks.setdefault(type, {}).setdefault(key, []).append(value)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   457
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   458
    def check_map(self, entity, key, map, default):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   459
        try:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   460
            entity[key] = map[entity[key]]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   461
        except KeyError:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   462
            self.check(key, entity[key], None)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   463
            entity[key] = default
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   464
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   465
    def record_error(self, key, msg=None, type=None, value=None, tb=None):
4186
ca7e526b07b6 import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4173
diff changeset
   466
        tmp = StringIO()
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   467
        if type is None:
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   468
            traceback.print_exc(file=tmp)
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   469
        else:
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   470
            traceback.print_exception(type, value, tb, file=tmp)
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   471
        print tmp.getvalue()
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   472
        # use a list to avoid counting a <nb lines> errors instead of one
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   473
        errorlog = self.errors.setdefault(key, [])
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   474
        if msg is None:
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   475
            errorlog.append(tmp.getvalue().splitlines())
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   476
        else:
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   477
            errorlog.append( (msg, tmp.getvalue().splitlines()) )
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   478
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   479
    def run(self):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   480
        self.errors = {}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   481
        for func, checks in self.generators:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   482
            self._checks = {}
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   483
            func_name = func.__name__
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   484
            self.tell("Run import function '%s'..." % func_name)
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   485
            try:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   486
                func(self)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   487
            except:
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   488
                if self.catcherrors:
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   489
                    self.record_error(func_name, 'While calling %s' % func.__name__)
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   490
                else:
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   491
                    self._print_stats()
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   492
                    raise
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   493
            for key, func, title, help in checks:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   494
                buckets = self._checks.get(key)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   495
                if buckets:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   496
                    err = func(buckets)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   497
                    if err:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   498
                        self.errors[title] = (help, err)
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   499
        self.store.commit()
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   500
        self._print_stats()
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   501
        if self.errors:
4721
8f63691ccb7f pylint style fixes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4613
diff changeset
   502
            if self.askerror == 2 or (self.askerror and confirm('Display errors ?')):
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   503
                from pprint import pformat
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   504
                for errkey, error in self.errors.items():
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   505
                    self.tell("\n%s (%s): %d\n" % (error[0], errkey, len(error[1])))
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   506
                    self.tell(pformat(sorted(error[1])))
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   507
4912
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   508
    def _print_stats(self):
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   509
        nberrors = sum(len(err[1]) for err in self.errors.values())
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   510
        self.tell('\nImport statistics: %i entities, %i types, %i relations and %i errors'
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   511
                  % (self.store.nb_inserted_entities,
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   512
                     self.store.nb_inserted_types,
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   513
                     self.store.nb_inserted_relations,
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   514
                     nberrors))
9767cc516b4f [R] dataimport: changes
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4847
diff changeset
   515
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   516
    def get_data(self, key):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   517
        return self.data.get(key)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   518
4527
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   519
    def index(self, name, key, value, unique=False):
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   520
        """create a new index
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   521
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   522
        If unique is set to True, only first occurence will be kept not the following ones
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   523
        """
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   524
        if unique:
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   525
            try:
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   526
                if value in self.store.indexes[name][key]:
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   527
                    return
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   528
            except KeyError:
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   529
                # we're sure that one is the first occurence; so continue...
67ab70e98488 [R] devtools: improve default data import mechanism
Julien Jehannet <julien.jehannet@logilab.fr>
parents: 4252
diff changeset
   530
                pass
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   531
        self.store.indexes.setdefault(name, {}).setdefault(key, []).append(value)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   532
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   533
    def tell(self, msg):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   534
        self._tell(msg)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   535
4152
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   536
    def iter_and_commit(self, datakey):
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   537
        """iter rows, triggering commit every self.commitevery iterations"""
30fd1229137d new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4140
diff changeset
   538
        return commit_every(self.commitevery, self.store, self.get_data(datakey))
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   539
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   540
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   541
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   542
from datetime import datetime
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   543
from cubicweb.schema import META_RTYPES, VIRTUAL_RTYPES
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   544
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   545
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   546
class NoHookRQLObjectStore(RQLObjectStore):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   547
    """ObjectStore that works with an actual RQL repository (production mode)"""
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   548
    _rql = None # bw compat
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   549
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   550
    def __init__(self, session, metagen=None, baseurl=None):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   551
        super(NoHookRQLObjectStore, self).__init__(session)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   552
        self.source = session.repo.system_source
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   553
        self.rschema = session.repo.schema.rschema
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   554
        self.add_relation = self.source.add_relation
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   555
        if metagen is None:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   556
            metagen = MetaGenerator(session, baseurl)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   557
        self.metagen = metagen
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   558
        self._nb_inserted_entities = 0
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   559
        self._nb_inserted_types = 0
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   560
        self._nb_inserted_relations = 0
5054
cb066d29166a fix dataimport for 3.7.2
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4913
diff changeset
   561
        self.rql = session.execute
cb066d29166a fix dataimport for 3.7.2
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4913
diff changeset
   562
        # deactivate security
cb066d29166a fix dataimport for 3.7.2
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4913
diff changeset
   563
        session.set_read_security(False)
cb066d29166a fix dataimport for 3.7.2
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4913
diff changeset
   564
        session.set_write_security(False)
4913
083b4d454192 server/web api for accessing to deleted_entites
Katia Saurfelt <katia.saurfelt@logilab.fr>
parents: 4912
diff changeset
   565
        # disable undoing
083b4d454192 server/web api for accessing to deleted_entites
Katia Saurfelt <katia.saurfelt@logilab.fr>
parents: 4912
diff changeset
   566
        session.undo_actions = frozenset()
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   567
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   568
    def create_entity(self, etype, **kwargs):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   569
        for k, v in kwargs.iteritems():
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   570
            kwargs[k] = getattr(v, 'eid', v)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   571
        entity, rels = self.metagen.base_etype_dicts(etype)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   572
        entity = copy(entity)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   573
        entity._related_cache = {}
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   574
        self.metagen.init_entity(entity)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   575
        entity.update(kwargs)
5054
cb066d29166a fix dataimport for 3.7.2
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4913
diff changeset
   576
        entity.edited_attributes = set(entity)
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   577
        session = self.session
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   578
        self.source.add_entity(session, entity)
5054
cb066d29166a fix dataimport for 3.7.2
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4913
diff changeset
   579
        self.source.add_info(session, entity, self.source, None, complete=False)
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   580
        for rtype, targeteids in rels.iteritems():
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   581
            # targeteids may be a single eid or a list of eids
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   582
            inlined = self.rschema(rtype).inlined
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   583
            try:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   584
                for targeteid in targeteids:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   585
                    self.add_relation(session, entity.eid, rtype, targeteid,
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   586
                                      inlined)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   587
            except TypeError:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   588
                self.add_relation(session, entity.eid, rtype, targeteids,
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   589
                                  inlined)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   590
        self._nb_inserted_entities += 1
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   591
        return entity
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   592
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   593
    def relate(self, eid_from, rtype, eid_to):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   594
        assert not rtype.startswith('reverse_')
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   595
        self.add_relation(self.session, eid_from, rtype, eid_to,
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   596
                          self.rschema(rtype).inlined)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   597
        self._nb_inserted_relations += 1
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   598
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   599
    @property
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   600
    def nb_inserted_entities(self):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   601
        return self._nb_inserted_entities
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   602
    @property
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   603
    def nb_inserted_types(self):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   604
        return self._nb_inserted_types
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   605
    @property
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   606
    def nb_inserted_relations(self):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   607
        return self._nb_inserted_relations
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   608
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   609
    def _put(self, type, item):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   610
        raise RuntimeError('use create entity')
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   611
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   612
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   613
class MetaGenerator(object):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   614
    def __init__(self, session, baseurl=None):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   615
        self.session = session
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   616
        self.source = session.repo.system_source
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   617
        self.time = datetime.now()
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   618
        if baseurl is None:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   619
            config = session.vreg.config
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   620
            baseurl = config['base-url'] or config.default_base_url()
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   621
        if not baseurl[-1] == '/':
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   622
            baseurl += '/'
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   623
        self.baseurl =  baseurl
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   624
        # attributes/relations shared by all entities of the same type
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   625
        self.etype_attrs = []
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   626
        self.etype_rels = []
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   627
        # attributes/relations specific to each entity
5054
cb066d29166a fix dataimport for 3.7.2
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4913
diff changeset
   628
        self.entity_attrs = ['cwuri']
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   629
        #self.entity_rels = [] XXX not handled (YAGNI?)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   630
        schema = session.vreg.schema
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   631
        rschema = schema.rschema
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   632
        for rtype in META_RTYPES:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   633
            if rtype in ('eid', 'cwuri') or rtype in VIRTUAL_RTYPES:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   634
                continue
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   635
            if rschema(rtype).final:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   636
                self.etype_attrs.append(rtype)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   637
            else:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   638
                self.etype_rels.append(rtype)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   639
        if not schema._eid_index:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   640
            # test schema loaded from the fs
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   641
            self.gen_is = self.test_gen_is
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   642
            self.gen_is_instance_of = self.test_gen_is_instanceof
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   643
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   644
    @cached
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   645
    def base_etype_dicts(self, etype):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   646
        entity = self.session.vreg['etypes'].etype_class(etype)(self.session)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   647
        # entity are "surface" copied, avoid shared dict between copies
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   648
        del entity.cw_extra_kwargs
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   649
        for attr in self.etype_attrs:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   650
            entity[attr] = self.generate(entity, attr)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   651
        rels = {}
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   652
        for rel in self.etype_rels:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   653
            rels[rel] = self.generate(entity, rel)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   654
        return entity, rels
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   655
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   656
    def init_entity(self, entity):
5054
cb066d29166a fix dataimport for 3.7.2
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4913
diff changeset
   657
        entity.eid = self.source.create_eid(self.session)
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   658
        for attr in self.entity_attrs:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   659
            entity[attr] = self.generate(entity, attr)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   660
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   661
    def generate(self, entity, rtype):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   662
        return getattr(self, 'gen_%s' % rtype)(entity)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   663
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   664
    def gen_cwuri(self, entity):
5054
cb066d29166a fix dataimport for 3.7.2
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4913
diff changeset
   665
        return u'%seid/%s' % (self.baseurl, entity.eid)
4818
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   666
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   667
    def gen_creation_date(self, entity):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   668
        return self.time
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   669
    def gen_modification_date(self, entity):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   670
        return self.time
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   671
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   672
    def gen_is(self, entity):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   673
        return entity.e_schema.eid
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   674
    def gen_is_instance_of(self, entity):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   675
        eids = []
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   676
        for etype in entity.e_schema.ancestors() + [entity.e_schema]:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   677
            eids.append(entity.e_schema.eid)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   678
        return eids
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   679
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   680
    def gen_created_by(self, entity):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   681
        return self.session.user.eid
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   682
    def gen_owned_by(self, entity):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   683
        return self.session.user.eid
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   684
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   685
    # implementations of gen_is / gen_is_instance_of to use during test where
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   686
    # schema has been loaded from the fs (hence entity type schema eids are not
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   687
    # known)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   688
    def test_gen_is(self, entity):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   689
        from cubicweb.hooks.metadata import eschema_eid
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   690
        return eschema_eid(self.session, entity.e_schema)
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   691
    def test_gen_is_instanceof(self, entity):
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   692
        from cubicweb.hooks.metadata import eschema_eid
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   693
        eids = []
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   694
        for eschema in entity.e_schema.ancestors() + [entity.e_schema]:
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   695
            eids.append(eschema_eid(self.session, eschema))
9f9bfbcdecfd le patch massiveimport a été importé
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 4734
diff changeset
   696
        return eids