author | Sylvain Thénault <sylvain.thenault@logilab.fr> |
Wed, 20 Jan 2010 15:03:30 +0100 | |
changeset 4297 | 5f2081181055 |
parent 4252 | 6c4f109c2b03 |
child 4527 | 67ab70e98488 |
permissions | -rw-r--r-- |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
1 |
# -*- coding: utf-8 -*- |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
2 |
"""This module provides tools to import tabular data. |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
3 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
4 |
:organization: Logilab |
4212
ab6573088b4a
update copyright: welcome 2010
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3486
diff
changeset
|
5 |
:copyright: 2001-2010 LOGILAB S.A. (Paris, FRANCE), license is LGPL v2. |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
6 |
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
7 |
:license: GNU Lesser General Public License, v2.1 - http://www.gnu.org/licenses |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
8 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
9 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
10 |
Example of use (run this with `cubicweb-ctl shell instance import-script.py`): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
11 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
12 |
.. sourcecode:: python |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
13 |
|
3318
5b47b9f09bca
documentation : fixed docstring
Alexandre Fayolle <alexandre.fayolle@logilab.fr>
parents:
3029
diff
changeset
|
14 |
from cubicweb.devtools.dataimport import * |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
15 |
# define data generators |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
16 |
GENERATORS = [] |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
17 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
18 |
USERS = [('Prenom', 'firstname', ()), |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
19 |
('Nom', 'surname', ()), |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
20 |
('Identifiant', 'login', ()), |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
21 |
] |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
22 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
23 |
def gen_users(ctl): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
24 |
for row in ctl.get_data('utilisateurs'): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
25 |
entity = mk_entity(row, USERS) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
26 |
entity['upassword'] = u'motdepasse' |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
27 |
ctl.check('login', entity['login'], None) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
28 |
ctl.store.add('CWUser', entity) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
29 |
email = {'address': row['email']} |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
30 |
ctl.store.add('EmailAddress', email) |
3003
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
31 |
ctl.store.relate(entity['eid'], 'use_email', email['eid']) |
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
32 |
ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x':entity['eid']}) |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
33 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
34 |
CHK = [('login', check_doubles, 'Utilisateurs Login', |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
35 |
'Deux utilisateurs ne devraient pas avoir le même login.'), |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
36 |
] |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
37 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
38 |
GENERATORS.append( (gen_users, CHK) ) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
39 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
40 |
# create controller |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
41 |
ctl = CWImportController(RQLObjectStore()) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
42 |
ctl.askerror = True |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
43 |
ctl.generators = GENERATORS |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
44 |
ctl.store._checkpoint = checkpoint |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
45 |
ctl.store._rql = rql |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
46 |
ctl.data['utilisateurs'] = lazytable(utf8csvreader(open('users.csv'))) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
47 |
# run |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
48 |
ctl.run() |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
49 |
sys.exit(0) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
50 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
51 |
""" |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
52 |
__docformat__ = "restructuredtext en" |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
53 |
|
4186
ca7e526b07b6
import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4173
diff
changeset
|
54 |
import sys |
ca7e526b07b6
import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4173
diff
changeset
|
55 |
import csv |
ca7e526b07b6
import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4173
diff
changeset
|
56 |
import traceback |
ca7e526b07b6
import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4173
diff
changeset
|
57 |
import os.path as osp |
ca7e526b07b6
import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4173
diff
changeset
|
58 |
from StringIO import StringIO |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
59 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
60 |
from logilab.common import shellutils |
4136
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
61 |
from logilab.common.deprecation import deprecated |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
62 |
|
4136
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
63 |
def ucsvreader_pb(filepath, encoding='utf-8', separator=',', quote='"', |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
64 |
skipfirst=False, withpb=True): |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
65 |
"""same as ucsvreader but a progress bar is displayed as we iter on rows""" |
4186
ca7e526b07b6
import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4173
diff
changeset
|
66 |
if not osp.exists(filepath): |
ca7e526b07b6
import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4173
diff
changeset
|
67 |
raise Exception("file doesn't exists: %s" % filepath) |
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
68 |
rowcount = int(shellutils.Execute('wc -l "%s"' % filepath).out.strip().split()[0]) |
4136
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
69 |
if skipfirst: |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
70 |
rowcount -= 1 |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
71 |
if withpb: |
4140
46ddd27a4ca4
tweaks output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4136
diff
changeset
|
72 |
pb = shellutils.ProgressBar(rowcount, 50) |
4136
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
73 |
for urow in ucsvreader(file(filepath), encoding, separator, quote, skipfirst): |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
74 |
yield urow |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
75 |
if withpb: |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
76 |
pb.update() |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
77 |
print ' %s rows imported' % rowcount |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
78 |
|
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
79 |
def ucsvreader(stream, encoding='utf-8', separator=',', quote='"', |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
80 |
skipfirst=False): |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
81 |
"""A csv reader that accepts files with any encoding and outputs unicode |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
82 |
strings |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
83 |
""" |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
84 |
it = iter(csv.reader(stream, delimiter=separator, quotechar=quote)) |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
85 |
if skipfirst: |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
86 |
it.next() |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
87 |
for row in it: |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
88 |
yield [item.decode(encoding) for item in row] |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
89 |
|
4136
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
90 |
utf8csvreader = deprecated('use ucsvreader instead')(ucsvreader) |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
91 |
|
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
92 |
def commit_every(nbit, store, it): |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
93 |
for i, x in enumerate(it): |
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
94 |
yield x |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
95 |
if nbit is not None and i % nbit: |
4136
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
96 |
store.checkpoint() |
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
97 |
if nbit is not None: |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
98 |
store.checkpoint() |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
99 |
|
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
100 |
def lazytable(reader): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
101 |
"""The first row is taken to be the header of the table and |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
102 |
used to output a dict for each row of data. |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
103 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
104 |
>>> data = lazytable(utf8csvreader(open(filename))) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
105 |
""" |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
106 |
header = reader.next() |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
107 |
for row in reader: |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
108 |
yield dict(zip(header, row)) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
109 |
|
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
110 |
def mk_entity(row, map): |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
111 |
"""Return a dict made from sanitized mapped values. |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
112 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
113 |
>>> row = {'myname': u'dupont'} |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
114 |
>>> map = [('myname', u'name', (capitalize_if_unicase,))] |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
115 |
>>> mk_entity(row, map) |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
116 |
{'name': u'Dupont'} |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
117 |
""" |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
118 |
res = {} |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
119 |
for src, dest, funcs in map: |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
120 |
res[dest] = row[src] |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
121 |
for func in funcs: |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
122 |
res[dest] = func(res[dest]) |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
123 |
return res |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
124 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
125 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
126 |
# user interactions ############################################################ |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
127 |
|
3029
bc573d5fb5b7
F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
3003
diff
changeset
|
128 |
def tell(msg): |
bc573d5fb5b7
F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
3003
diff
changeset
|
129 |
print msg |
bc573d5fb5b7
F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
3003
diff
changeset
|
130 |
|
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
131 |
def confirm(question): |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
132 |
"""A confirm function that asks for yes/no/abort and exits on abort.""" |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
133 |
answer = shellutils.ASK.ask(question, ('Y','n','abort'), 'Y') |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
134 |
if answer == 'abort': |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
135 |
sys.exit(1) |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
136 |
return answer == 'Y' |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
137 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
138 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
139 |
class catch_error(object): |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
140 |
"""Helper for @contextmanager decorator.""" |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
141 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
142 |
def __init__(self, ctl, key='unexpected error', msg=None): |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
143 |
self.ctl = ctl |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
144 |
self.key = key |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
145 |
self.msg = msg |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
146 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
147 |
def __enter__(self): |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
148 |
return self |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
149 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
150 |
def __exit__(self, type, value, traceback): |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
151 |
if type is not None: |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
152 |
if issubclass(type, (KeyboardInterrupt, SystemExit)): |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
153 |
return # re-raise |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
154 |
if self.ctl.catcherrors: |
4173
cfd5d3270f99
msg isn't defined there, but we've to give traceback information to record error
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4152
diff
changeset
|
155 |
self.ctl.record_error(self.key, None, type, value, traceback) |
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
156 |
return True # silent |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
157 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
158 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
159 |
# base sanitizing functions #################################################### |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
160 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
161 |
def capitalize_if_unicase(txt): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
162 |
if txt.isupper() or txt.islower(): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
163 |
return txt.capitalize() |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
164 |
return txt |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
165 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
166 |
def no_space(txt): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
167 |
return txt.replace(' ','') |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
168 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
169 |
def no_uspace(txt): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
170 |
return txt.replace(u'\xa0','') |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
171 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
172 |
def no_dash(txt): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
173 |
return txt.replace('-','') |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
174 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
175 |
def alldigits(txt): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
176 |
if txt.isdigit(): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
177 |
return txt |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
178 |
else: |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
179 |
return u'' |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
180 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
181 |
def strip(txt): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
182 |
return txt.strip() |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
183 |
|
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
184 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
185 |
# base integrity checking functions ############################################ |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
186 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
187 |
def check_doubles(buckets): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
188 |
"""Extract the keys that have more than one item in their bucket.""" |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
189 |
return [(key, len(value)) for key,value in buckets.items() if len(value) > 1] |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
190 |
|
4136
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
191 |
def check_doubles_not_none(buckets): |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
192 |
"""Extract the keys that have more than one item in their bucket.""" |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
193 |
return [(key, len(value)) for key,value in buckets.items() if key is not None and len(value) > 1] |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
194 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
195 |
|
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
196 |
# object stores ################################################################# |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
197 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
198 |
class ObjectStore(object): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
199 |
"""Store objects in memory for faster testing. Will not |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
200 |
enforce the constraints of the schema and hence will miss |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
201 |
some problems. |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
202 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
203 |
>>> store = ObjectStore() |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
204 |
>>> user = {'login': 'johndoe'} |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
205 |
>>> store.add('CWUser', user) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
206 |
>>> group = {'name': 'unknown'} |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
207 |
>>> store.add('CWUser', group) |
3003
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
208 |
>>> store.relate(user['eid'], 'in_group', group['eid']) |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
209 |
""" |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
210 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
211 |
def __init__(self): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
212 |
self.items = [] |
3003
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
213 |
self.eids = {} |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
214 |
self.types = {} |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
215 |
self.relations = set() |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
216 |
self.indexes = {} |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
217 |
self._rql = None |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
218 |
self._checkpoint = None |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
219 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
220 |
def _put(self, type, item): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
221 |
self.items.append(item) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
222 |
return len(self.items) - 1 |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
223 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
224 |
def add(self, type, item): |
3486
ea6bf6f9ba0c
[cwctl] improve dialog messages
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
3318
diff
changeset
|
225 |
assert isinstance(item, dict), 'item is not a dict but a %s' % type(item) |
3003
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
226 |
eid = item['eid'] = self._put(type, item) |
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
227 |
self.eids[eid] = item |
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
228 |
self.types.setdefault(type, []).append(eid) |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
229 |
|
3003
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
230 |
def relate(self, eid_from, rtype, eid_to): |
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
231 |
eids_valid = (eid_from < len(self.items) and eid_to <= len(self.items)) |
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
232 |
assert eids_valid, 'eid error %s %s' % (eid_from, eid_to) |
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
233 |
self.relations.add( (eid_from, rtype, eid_to) ) |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
234 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
235 |
def build_index(self, name, type, func): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
236 |
index = {} |
3003
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
237 |
for eid in self.types[type]: |
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
238 |
index.setdefault(func(self.eids[eid]), []).append(eid) |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
239 |
self.indexes[name] = index |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
240 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
241 |
def get_many(self, name, key): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
242 |
return self.indexes[name].get(key, []) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
243 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
244 |
def get_one(self, name, key): |
3003
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
245 |
eids = self.indexes[name].get(key, []) |
3486
ea6bf6f9ba0c
[cwctl] improve dialog messages
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
3318
diff
changeset
|
246 |
assert len(eids) == 1, 'expected a single one got %i' % len(eids) |
3003
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
247 |
return eids[0] |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
248 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
249 |
def find(self, type, key, value): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
250 |
for idx in self.types[type]: |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
251 |
item = self.items[idx] |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
252 |
if item[key] == value: |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
253 |
yield item |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
254 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
255 |
def checkpoint(self): |
4136
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
256 |
pass |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
257 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
258 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
259 |
class RQLObjectStore(ObjectStore): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
260 |
"""ObjectStore that works with an actual RQL repository.""" |
4136
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
261 |
_rql = None # bw compat |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
262 |
|
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
263 |
def __init__(self, session=None, checkpoint=None): |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
264 |
ObjectStore.__init__(self) |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
265 |
if session is not None: |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
266 |
if not hasattr(session, 'set_pool'): |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
267 |
# connection |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
268 |
cnx = session |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
269 |
session = session.request() |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
270 |
session.set_pool = lambda : None |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
271 |
checkpoint = checkpoint or cnx.commit |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
272 |
self.session = session |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
273 |
self.checkpoint = checkpoint or session.commit |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
274 |
elif checkpoint is not None: |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
275 |
self.checkpoint = checkpoint |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
276 |
|
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
277 |
def rql(self, *args): |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
278 |
if self._rql is not None: |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
279 |
return self._rql(*args) |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
280 |
self.session.set_pool() |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
281 |
return self.session.execute(*args) |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
282 |
|
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
283 |
def create_entity(self, *args, **kwargs): |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
284 |
self.session.set_pool() |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
285 |
entity = self.session.create_entity(*args, **kwargs) |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
286 |
self.eids[entity.eid] = entity |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
287 |
self.types.setdefault(args[0], []).append(entity.eid) |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
288 |
return entity |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
289 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
290 |
def _put(self, type, item): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
291 |
query = ('INSERT %s X: ' % type) + ', '.join(['X %s %%(%s)s' % (key,key) for key in item]) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
292 |
return self.rql(query, item)[0][0] |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
293 |
|
3003
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
294 |
def relate(self, eid_from, rtype, eid_to): |
4136
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
295 |
self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype, |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
296 |
{'x': int(eid_from), 'y': int(eid_to)}, ('x', 'y')) |
3003
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
297 |
self.relations.add( (eid_from, rtype, eid_to) ) |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
298 |
|
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
299 |
|
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
300 |
# the import controller ######################################################## |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
301 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
302 |
class CWImportController(object): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
303 |
"""Controller of the data import process. |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
304 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
305 |
>>> ctl = CWImportController(store) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
306 |
>>> ctl.generators = list_of_data_generators |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
307 |
>>> ctl.data = dict_of_data_tables |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
308 |
>>> ctl.run() |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
309 |
""" |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
310 |
|
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
311 |
def __init__(self, store, askerror=False, catcherrors=None, tell=tell, |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
312 |
commitevery=50): |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
313 |
self.store = store |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
314 |
self.generators = None |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
315 |
self.data = {} |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
316 |
self.errors = None |
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
317 |
self.askerror = askerror |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
318 |
if catcherrors is None: |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
319 |
catcherrors = askerror |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
320 |
self.catcherrors = catcherrors |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
321 |
self.commitevery = commitevery # set to None to do a single commit |
3029
bc573d5fb5b7
F [devtools] by default dataimport prints message on stdout
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
3003
diff
changeset
|
322 |
self._tell = tell |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
323 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
324 |
def check(self, type, key, value): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
325 |
self._checks.setdefault(type, {}).setdefault(key, []).append(value) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
326 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
327 |
def check_map(self, entity, key, map, default): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
328 |
try: |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
329 |
entity[key] = map[entity[key]] |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
330 |
except KeyError: |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
331 |
self.check(key, entity[key], None) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
332 |
entity[key] = default |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
333 |
|
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
334 |
def record_error(self, key, msg=None, type=None, value=None, tb=None): |
4186
ca7e526b07b6
import cleanup, check data file exists
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4173
diff
changeset
|
335 |
tmp = StringIO() |
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
336 |
if type is None: |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
337 |
traceback.print_exc(file=tmp) |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
338 |
else: |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
339 |
traceback.print_exception(type, value, tb, file=tmp) |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
340 |
print tmp.getvalue() |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
341 |
# use a list to avoid counting a <nb lines> errors instead of one |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
342 |
errorlog = self.errors.setdefault(key, []) |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
343 |
if msg is None: |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
344 |
errorlog.append(tmp.getvalue().splitlines()) |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
345 |
else: |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
346 |
errorlog.append( (msg, tmp.getvalue().splitlines()) ) |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
347 |
|
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
348 |
def run(self): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
349 |
self.errors = {} |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
350 |
for func, checks in self.generators: |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
351 |
self._checks = {} |
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
352 |
func_name = func.__name__[4:] # XXX |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
353 |
self.tell('Importing %s' % func_name) |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
354 |
try: |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
355 |
func(self) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
356 |
except: |
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
357 |
if self.catcherrors: |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
358 |
self.record_error(func_name, 'While calling %s' % func.__name__) |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
359 |
else: |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
360 |
raise |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
361 |
for key, func, title, help in checks: |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
362 |
buckets = self._checks.get(key) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
363 |
if buckets: |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
364 |
err = func(buckets) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
365 |
if err: |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
366 |
self.errors[title] = (help, err) |
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
367 |
self.store.checkpoint() |
4140
46ddd27a4ca4
tweaks output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4136
diff
changeset
|
368 |
self.tell('\nImport completed: %i entities (%i types), %i relations' |
3003
2944ee420dca
R [dataimport] rename uid to eid
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
2974
diff
changeset
|
369 |
% (len(self.store.eids), len(self.store.types), |
4136
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
370 |
len(self.store.relations))) |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
371 |
nberrors = sum(len(err[1]) for err in self.errors.values()) |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
372 |
if nberrors: |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
373 |
print '%s errors' % nberrors |
47060a66c97f
dataimport refactoring / improvments, keeping bw compat (for now)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3486
diff
changeset
|
374 |
if self.errors and self.askerror and confirm('Display errors?'): |
2974
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
375 |
import pprint |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
376 |
pprint.pprint(self.errors) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
377 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
378 |
def get_data(self, key): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
379 |
return self.data.get(key) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
380 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
381 |
def index(self, name, key, value): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
382 |
self.store.indexes.setdefault(name, {}).setdefault(key, []).append(value) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
383 |
|
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
384 |
def tell(self, msg): |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
385 |
self._tell(msg) |
3dfe497e5afa
F tools to import data
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
diff
changeset
|
386 |
|
4152
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
387 |
def iter_and_commit(self, datakey): |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
388 |
"""iter rows, triggering commit every self.commitevery iterations""" |
30fd1229137d
new catch_error context manager, nicer controller __init__ and new iter_and_commit(datakey) method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4140
diff
changeset
|
389 |
return commit_every(self.commitevery, self.store, self.get_data(datakey)) |