devtools/dataimport.py
author Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
Wed, 07 Oct 2009 19:59:46 +0200
branchstable
changeset 3608 5a46e68c3d3c
parent 3486 ea6bf6f9ba0c
child 4136 47060a66c97f
child 4212 ab6573088b4a
permissions -rw-r--r--
[editcontroller] backout (sort of) removal of entity.complete() in validate_form entity is passed to js callbacks when there's one. In that case, we want as much information as we can. The removal was there to avoid complete to fail when all constraints (required relations / attributes) aren't satisfied. This fix only does the complete() after the commit is done, any ValidationError should have been raised at this point.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     2
"""This module provides tools to import tabular data.
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     3
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     4
:organization: Logilab
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     5
:copyright: 2001-2009 LOGILAB S.A. (Paris, FRANCE), license is LGPL v2.
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     6
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     7
:license: GNU Lesser General Public License, v2.1 - http://www.gnu.org/licenses
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     8
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
     9
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    10
Example of use (run this with `cubicweb-ctl shell instance import-script.py`):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    11
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    12
.. sourcecode:: python
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    13
3318
5b47b9f09bca documentation : fixed docstring
Alexandre Fayolle <alexandre.fayolle@logilab.fr>
parents: 3029
diff changeset
    14
  from cubicweb.devtools.dataimport import *
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    15
  # define data generators
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    16
  GENERATORS = []
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    17
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    18
  USERS = [('Prenom', 'firstname', ()),
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    19
           ('Nom', 'surname', ()),
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    20
           ('Identifiant', 'login', ()),
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    21
           ]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    22
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    23
  def gen_users(ctl):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    24
      for row in ctl.get_data('utilisateurs'):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    25
          entity = mk_entity(row, USERS)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    26
          entity['upassword'] = u'motdepasse'
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    27
          ctl.check('login', entity['login'], None)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    28
          ctl.store.add('CWUser', entity)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    29
          email = {'address': row['email']}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    30
          ctl.store.add('EmailAddress', email)
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
    31
          ctl.store.relate(entity['eid'], 'use_email', email['eid'])
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
    32
          ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x':entity['eid']})
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    33
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    34
  CHK = [('login', check_doubles, 'Utilisateurs Login',
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    35
          'Deux utilisateurs ne devraient pas avoir le même login.'),
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    36
         ]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    37
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    38
  GENERATORS.append( (gen_users, CHK) )
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    39
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    40
  # create controller
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    41
  ctl = CWImportController(RQLObjectStore())
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    42
  ctl.askerror = True
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    43
  ctl.generators = GENERATORS
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    44
  ctl.store._checkpoint = checkpoint
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    45
  ctl.store._rql = rql
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    46
  ctl.data['utilisateurs'] = lazytable(utf8csvreader(open('users.csv')))
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    47
  # run
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    48
  ctl.run()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    49
  sys.exit(0)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    50
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    51
"""
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    52
__docformat__ = "restructuredtext en"
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    53
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    54
import sys, csv, traceback
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    55
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    56
from logilab.common import shellutils
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    57
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    58
def utf8csvreader(file, encoding='utf-8', separator=',', quote='"'):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    59
    """A csv reader that accepts files with any encoding and outputs
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    60
    unicode strings."""
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    61
    for row in csv.reader(file, delimiter=separator, quotechar=quote):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    62
        yield [item.decode(encoding) for item in row]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    63
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    64
def lazytable(reader):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    65
    """The first row is taken to be the header of the table and
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    66
    used to output a dict for each row of data.
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    67
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    68
    >>> data = lazytable(utf8csvreader(open(filename)))
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    69
    """
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    70
    header = reader.next()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    71
    for row in reader:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    72
        yield dict(zip(header, row))
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    73
3029
bc573d5fb5b7 F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 3003
diff changeset
    74
def tell(msg):
bc573d5fb5b7 F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 3003
diff changeset
    75
    print msg
bc573d5fb5b7 F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 3003
diff changeset
    76
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    77
# base sanitizing functions #####
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    78
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    79
def capitalize_if_unicase(txt):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    80
    if txt.isupper() or txt.islower():
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    81
        return txt.capitalize()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    82
    return txt
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    83
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    84
def no_space(txt):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    85
    return txt.replace(' ','')
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    86
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    87
def no_uspace(txt):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    88
    return txt.replace(u'\xa0','')
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    89
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    90
def no_dash(txt):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    91
    return txt.replace('-','')
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    92
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    93
def alldigits(txt):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    94
    if txt.isdigit():
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    95
        return txt
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    96
    else:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    97
        return u''
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    98
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
    99
def strip(txt):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   100
    return txt.strip()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   101
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   102
# base checks #####
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   103
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   104
def check_doubles(buckets):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   105
    """Extract the keys that have more than one item in their bucket."""
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   106
    return [(key, len(value)) for key,value in buckets.items() if len(value) > 1]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   107
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   108
# make entity helper #####
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   109
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   110
def mk_entity(row, map):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   111
    """Return a dict made from sanitized mapped values.
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   112
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   113
    >>> row = {'myname': u'dupont'}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   114
    >>> map = [('myname', u'name', (capitalize_if_unicase,))]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   115
    >>> mk_entity(row, map)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   116
    {'name': u'Dupont'}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   117
    """
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   118
    res = {}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   119
    for src, dest, funcs in map:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   120
        res[dest] = row[src]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   121
        for func in funcs:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   122
            res[dest] = func(res[dest])
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   123
    return res
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   124
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   125
# object stores
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   126
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   127
class ObjectStore(object):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   128
    """Store objects in memory for faster testing. Will not
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   129
    enforce the constraints of the schema and hence will miss
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   130
    some problems.
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   131
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   132
    >>> store = ObjectStore()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   133
    >>> user = {'login': 'johndoe'}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   134
    >>> store.add('CWUser', user)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   135
    >>> group = {'name': 'unknown'}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   136
    >>> store.add('CWUser', group)
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   137
    >>> store.relate(user['eid'], 'in_group', group['eid'])
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   138
    """
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   139
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   140
    def __init__(self):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   141
        self.items = []
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   142
        self.eids = {}
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   143
        self.types = {}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   144
        self.relations = set()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   145
        self.indexes = {}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   146
        self._rql = None
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   147
        self._checkpoint = None
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   148
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   149
    def _put(self, type, item):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   150
        self.items.append(item)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   151
        return len(self.items) - 1
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   152
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   153
    def add(self, type, item):
3486
ea6bf6f9ba0c [cwctl] improve dialog messages
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 3318
diff changeset
   154
        assert isinstance(item, dict), 'item is not a dict but a %s' % type(item)
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   155
        eid = item['eid'] = self._put(type, item)
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   156
        self.eids[eid] = item
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   157
        self.types.setdefault(type, []).append(eid)
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   158
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   159
    def relate(self, eid_from, rtype, eid_to):
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   160
        eids_valid = (eid_from < len(self.items) and eid_to <= len(self.items))
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   161
        assert eids_valid, 'eid error %s %s' % (eid_from, eid_to)
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   162
        self.relations.add( (eid_from, rtype, eid_to) )
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   163
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   164
    def build_index(self, name, type, func):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   165
        index = {}
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   166
        for eid in self.types[type]:
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   167
            index.setdefault(func(self.eids[eid]), []).append(eid)
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   168
        self.indexes[name] = index
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   169
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   170
    def get_many(self, name, key):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   171
        return self.indexes[name].get(key, [])
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   172
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   173
    def get_one(self, name, key):
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   174
        eids = self.indexes[name].get(key, [])
3486
ea6bf6f9ba0c [cwctl] improve dialog messages
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 3318
diff changeset
   175
        assert len(eids) == 1, 'expected a single one got %i' % len(eids)
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   176
        return eids[0]
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   177
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   178
    def find(self, type, key, value):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   179
        for idx in self.types[type]:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   180
            item = self.items[idx]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   181
            if item[key] == value:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   182
                yield item
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   183
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   184
    def rql(self, query, args):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   185
        if self._rql:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   186
            return self._rql(query, args)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   187
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   188
    def checkpoint(self):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   189
        if self._checkpoint:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   190
            self._checkpoint()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   191
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   192
class RQLObjectStore(ObjectStore):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   193
    """ObjectStore that works with an actual RQL repository."""
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   194
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   195
    def _put(self, type, item):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   196
        query = ('INSERT %s X: ' % type) + ', '.join(['X %s %%(%s)s' % (key,key) for key in item])
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   197
        return self.rql(query, item)[0][0]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   198
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   199
    def relate(self, eid_from, rtype, eid_to):
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   200
        query = 'SET X %s Y WHERE X eid %%(from)s, Y eid %%(to)s' % rtype
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   201
        self.rql(query, {'from': int(eid_from), 'to': int(eid_to)})
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   202
        self.relations.add( (eid_from, rtype, eid_to) )
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   203
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   204
# import controller #####
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   205
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   206
class CWImportController(object):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   207
    """Controller of the data import process.
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   208
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   209
    >>> ctl = CWImportController(store)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   210
    >>> ctl.generators = list_of_data_generators
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   211
    >>> ctl.data = dict_of_data_tables
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   212
    >>> ctl.run()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   213
    """
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   214
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   215
    def __init__(self, store):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   216
        self.store = store
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   217
        self.generators = None
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   218
        self.data = {}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   219
        self.errors = None
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   220
        self.askerror = False
3029
bc573d5fb5b7 F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 3003
diff changeset
   221
        self._tell = tell
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   222
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   223
    def check(self, type, key, value):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   224
        self._checks.setdefault(type, {}).setdefault(key, []).append(value)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   225
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   226
    def check_map(self, entity, key, map, default):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   227
        try:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   228
            entity[key] = map[entity[key]]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   229
        except KeyError:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   230
            self.check(key, entity[key], None)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   231
            entity[key] = default
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   232
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   233
    def run(self):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   234
        self.errors = {}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   235
        for func, checks in self.generators:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   236
            self._checks = {}
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   237
            func_name = func.__name__[4:]
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   238
            question = 'Importation de %s' % func_name
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   239
            self.tell(question)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   240
            try:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   241
                func(self)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   242
            except:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   243
                import StringIO
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   244
                tmp = StringIO.StringIO()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   245
                traceback.print_exc(file=tmp)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   246
                print tmp.getvalue()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   247
                self.errors[func_name] = ('Erreur lors de la transformation',
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   248
                                          tmp.getvalue().splitlines())
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   249
            for key, func, title, help in checks:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   250
                buckets = self._checks.get(key)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   251
                if buckets:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   252
                    err = func(buckets)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   253
                    if err:
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   254
                        self.errors[title] = (help, err)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   255
            self.store.checkpoint()
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   256
        errors = sum(len(err[1]) for err in self.errors.values())
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   257
        self.tell('Importation terminée. (%i objets, %i types, %i relations et %i erreurs).'
3003
2944ee420dca R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 2974
diff changeset
   258
                  % (len(self.store.eids), len(self.store.types),
2974
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   259
                     len(self.store.relations), errors))
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   260
        if self.errors and self.askerror and confirm('Afficher les erreurs ?'):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   261
            import pprint
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   262
            pprint.pprint(self.errors)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   263
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   264
    def get_data(self, key):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   265
        return self.data.get(key)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   266
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   267
    def index(self, name, key, value):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   268
        self.store.indexes.setdefault(name, {}).setdefault(key, []).append(value)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   269
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   270
    def tell(self, msg):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   271
        self._tell(msg)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   272
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   273
def confirm(question):
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   274
    """A confirm function that asks for yes/no/abort and exits on abort."""
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   275
    answer = shellutils.ASK.ask(question, ('Y','n','abort'), 'Y')
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   276
    if answer == 'abort':
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   277
        sys.exit(1)
3dfe497e5afa F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff changeset
   278
    return answer == 'Y'