author | Julien Cristau <julien.cristau@logilab.fr> |
Wed, 17 Feb 2016 10:19:14 +0100 | |
changeset 11133 | 9e955d8be8a9 |
parent 10939 | b30c2f49da57 |
permissions | -rw-r--r-- |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
1 |
# copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
2 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
3 |
# |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
4 |
# This file is part of CubicWeb. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
5 |
# |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
6 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
7 |
# terms of the GNU Lesser General Public License as published by the Free |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
8 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
9 |
# any later version. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
10 |
# |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
11 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
12 |
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
13 |
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
14 |
# details. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
15 |
# |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
16 |
# You should have received a copy of the GNU Lesser General Public License along |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
17 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
18 |
"""Old and deprecated dataimport API that provides tools to import tabular data. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
19 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
20 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
21 |
Example of use (run this with `cubicweb-ctl shell instance import-script.py`): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
22 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
23 |
.. sourcecode:: python |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
24 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
25 |
from cubicweb.dataimport import * |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
26 |
# define data generators |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
27 |
GENERATORS = [] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
28 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
29 |
USERS = [('Prenom', 'firstname', ()), |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
30 |
('Nom', 'surname', ()), |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
31 |
('Identifiant', 'login', ()), |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
32 |
] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
33 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
34 |
def gen_users(ctl): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
35 |
for row in ctl.iter_and_commit('utilisateurs'): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
36 |
entity = mk_entity(row, USERS) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
37 |
entity['upassword'] = 'motdepasse' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
38 |
ctl.check('login', entity['login'], None) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
39 |
entity = ctl.store.prepare_insert_entity('CWUser', **entity) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
40 |
email = ctl.store.prepare_insert_entity('EmailAddress', address=row['email']) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
41 |
ctl.store.prepare_insert_relation(entity, 'use_email', email) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
42 |
ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x': entity}) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
43 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
44 |
CHK = [('login', check_doubles, 'Utilisateurs Login', |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
45 |
'Deux utilisateurs ne devraient pas avoir le meme login.'), |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
46 |
] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
47 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
48 |
GENERATORS.append( (gen_users, CHK) ) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
49 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
50 |
# create controller |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
51 |
ctl = CWImportController(RQLObjectStore(cnx)) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
52 |
ctl.askerror = 1 |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
53 |
ctl.generators = GENERATORS |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
54 |
ctl.data['utilisateurs'] = lazytable(ucsvreader(open('users.csv'))) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
55 |
# run |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
56 |
ctl.run() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
57 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
58 |
.. BUG file with one column are not parsable |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
59 |
.. TODO rollback() invocation is not possible yet |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
60 |
""" |
10589
7c23b7de2b8d
[py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10513
diff
changeset
|
61 |
from __future__ import print_function |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
62 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
63 |
import sys |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
64 |
import traceback |
10808
b6b2e1bcd1b2
[dataimport] import StringIO from io
Julien Cristau <julien.cristau@logilab.fr>
parents:
10688
diff
changeset
|
65 |
from io import StringIO |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
66 |
|
10688
fa29f3628a1b
[py3k] use six.add_metaclass
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10669
diff
changeset
|
67 |
from six import add_metaclass |
fa29f3628a1b
[py3k] use six.add_metaclass
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10669
diff
changeset
|
68 |
|
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
69 |
from logilab.common import attrdict, shellutils |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
70 |
from logilab.common.date import strptime |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
71 |
from logilab.common.deprecation import deprecated, class_deprecated |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
72 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
73 |
from cubicweb import QueryError |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
74 |
from cubicweb.dataimport import callfunc_every |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
75 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
76 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
77 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
78 |
def lazytable(reader): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
79 |
"""The first row is taken to be the header of the table and |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
80 |
used to output a dict for each row of data. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
81 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
82 |
>>> data = lazytable(ucsvreader(open(filename))) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
83 |
""" |
10669
155c29e0ed1c
[py3k] use next builtin instead of next method
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10663
diff
changeset
|
84 |
header = next(reader) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
85 |
for row in reader: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
86 |
yield dict(zip(header, row)) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
87 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
88 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
89 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
90 |
def lazydbtable(cu, table, headers, orderby=None): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
91 |
"""return an iterator on rows of a sql table. On each row, fetch columns |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
92 |
defined in headers and return values as a dictionary. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
93 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
94 |
>>> data = lazydbtable(cu, 'experimentation', ('id', 'nickname', 'gps')) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
95 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
96 |
sql = 'SELECT %s FROM %s' % (','.join(headers), table,) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
97 |
if orderby: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
98 |
sql += ' ORDER BY %s' % ','.join(orderby) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
99 |
cu.execute(sql) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
100 |
while True: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
101 |
row = cu.fetchone() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
102 |
if row is None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
103 |
break |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
104 |
yield dict(zip(headers, row)) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
105 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
106 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
107 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
108 |
def tell(msg): |
10589
7c23b7de2b8d
[py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10513
diff
changeset
|
109 |
print(msg) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
110 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
111 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
112 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
113 |
def confirm(question): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
114 |
"""A confirm function that asks for yes/no/abort and exits on abort.""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
115 |
answer = shellutils.ASK.ask(question, ('Y', 'n', 'abort'), 'Y') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
116 |
if answer == 'abort': |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
117 |
sys.exit(1) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
118 |
return answer == 'Y' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
119 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
120 |
|
10688
fa29f3628a1b
[py3k] use six.add_metaclass
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10669
diff
changeset
|
121 |
@add_metaclass(class_deprecated) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
122 |
class catch_error(object): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
123 |
"""Helper for @contextmanager decorator.""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
124 |
__deprecation_warning__ = '[3.21] deprecated' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
125 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
126 |
def __init__(self, ctl, key='unexpected error', msg=None): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
127 |
self.ctl = ctl |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
128 |
self.key = key |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
129 |
self.msg = msg |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
130 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
131 |
def __enter__(self): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
132 |
return self |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
133 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
134 |
def __exit__(self, type, value, traceback): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
135 |
if type is not None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
136 |
if issubclass(type, (KeyboardInterrupt, SystemExit)): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
137 |
return # re-raise |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
138 |
if self.ctl.catcherrors: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
139 |
self.ctl.record_error(self.key, None, type, value, traceback) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
140 |
return True # silent |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
141 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
142 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
143 |
def mk_entity(row, map): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
144 |
"""Return a dict made from sanitized mapped values. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
145 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
146 |
ValueError can be raised on unexpected values found in checkers |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
147 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
148 |
>>> row = {'myname': u'dupont'} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
149 |
>>> map = [('myname', u'name', (call_transform_method('title'),))] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
150 |
>>> mk_entity(row, map) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
151 |
{'name': u'Dupont'} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
152 |
>>> row = {'myname': u'dupont', 'optname': u''} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
153 |
>>> map = [('myname', u'name', (call_transform_method('title'),)), |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
154 |
... ('optname', u'MARKER', (optional,))] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
155 |
>>> mk_entity(row, map) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
156 |
{'name': u'Dupont', 'optname': None} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
157 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
158 |
res = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
159 |
assert isinstance(row, dict) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
160 |
assert isinstance(map, list) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
161 |
for src, dest, funcs in map: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
162 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
163 |
res[dest] = row[src] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
164 |
except KeyError: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
165 |
continue |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
166 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
167 |
for func in funcs: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
168 |
res[dest] = func(res[dest]) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
169 |
if res[dest] is None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
170 |
break |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
171 |
except ValueError as err: |
10590
7629902e7554
[py3k] Fix raise with embedded traceback
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10589
diff
changeset
|
172 |
exc = ValueError('error with %r field: %s' % (src, err)) |
7629902e7554
[py3k] Fix raise with embedded traceback
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10589
diff
changeset
|
173 |
exc.__traceback__ = sys.exc_info()[-1] |
7629902e7554
[py3k] Fix raise with embedded traceback
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10589
diff
changeset
|
174 |
raise exc |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
175 |
return res |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
176 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
177 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
178 |
# base sanitizing/coercing functions ########################################### |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
179 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
180 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
181 |
def optional(value): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
182 |
"""checker to filter optional field |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
183 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
184 |
If value is undefined (ex: empty string), return None that will |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
185 |
break the checkers validation chain |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
186 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
187 |
General use is to add 'optional' check in first condition to avoid |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
188 |
ValueError by further checkers |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
189 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
190 |
>>> MAPPER = [(u'value', 'value', (optional, int))] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
191 |
>>> row = {'value': u'XXX'} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
192 |
>>> mk_entity(row, MAPPER) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
193 |
{'value': None} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
194 |
>>> row = {'value': u'100'} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
195 |
>>> mk_entity(row, MAPPER) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
196 |
{'value': 100} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
197 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
198 |
if value: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
199 |
return value |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
200 |
return None |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
201 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
202 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
203 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
204 |
def required(value): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
205 |
"""raise ValueError if value is empty |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
206 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
207 |
This check should be often found in last position in the chain. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
208 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
209 |
if value: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
210 |
return value |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
211 |
raise ValueError("required") |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
212 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
213 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
214 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
215 |
def todatetime(format='%d/%m/%Y'): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
216 |
"""return a transformation function to turn string input value into a |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
217 |
`datetime.datetime` instance, using given format. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
218 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
219 |
Follow it by `todate` or `totime` functions from `logilab.common.date` if |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
220 |
you want a `date`/`time` instance instead of `datetime`. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
221 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
222 |
def coerce(value): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
223 |
return strptime(value, format) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
224 |
return coerce |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
225 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
226 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
227 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
228 |
def call_transform_method(methodname, *args, **kwargs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
229 |
"""return value returned by calling the given method on input""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
230 |
def coerce(value): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
231 |
return getattr(value, methodname)(*args, **kwargs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
232 |
return coerce |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
233 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
234 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
235 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
236 |
def call_check_method(methodname, *args, **kwargs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
237 |
"""check value returned by calling the given method on input is true, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
238 |
else raise ValueError |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
239 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
240 |
def check(value): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
241 |
if getattr(value, methodname)(*args, **kwargs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
242 |
return value |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
243 |
raise ValueError('%s not verified on %r' % (methodname, value)) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
244 |
return check |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
245 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
246 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
247 |
# base integrity checking functions ############################################ |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
248 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
249 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
250 |
def check_doubles(buckets): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
251 |
"""Extract the keys that have more than one item in their bucket.""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
252 |
return [(k, len(v)) for k, v in buckets.items() if len(v) > 1] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
253 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
254 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
255 |
@deprecated('[3.21] deprecated') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
256 |
def check_doubles_not_none(buckets): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
257 |
"""Extract the keys that have more than one item in their bucket.""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
258 |
return [(k, len(v)) for k, v in buckets.items() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
259 |
if k is not None and len(v) > 1] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
260 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
261 |
|
10688
fa29f3628a1b
[py3k] use six.add_metaclass
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10669
diff
changeset
|
262 |
@add_metaclass(class_deprecated) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
263 |
class ObjectStore(object): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
264 |
"""Store objects in memory for *faster* validation (development mode) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
265 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
266 |
But it will not enforce the constraints of the schema and hence will miss some problems |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
267 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
268 |
>>> store = ObjectStore() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
269 |
>>> user = store.prepare_insert_entity('CWUser', login=u'johndoe') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
270 |
>>> group = store.prepare_insert_entity('CWUser', name=u'unknown') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
271 |
>>> store.prepare_insert_relation(user, 'in_group', group) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
272 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
273 |
__deprecation_warning__ = '[3.21] use the new importer API' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
274 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
275 |
def __init__(self): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
276 |
self.items = [] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
277 |
self.eids = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
278 |
self.types = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
279 |
self.relations = set() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
280 |
self.indexes = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
281 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
282 |
def prepare_insert_entity(self, etype, **data): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
283 |
"""Given an entity type, attributes and inlined relations, return an eid for the entity that |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
284 |
would be inserted with a real store. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
285 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
286 |
data = attrdict(data) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
287 |
data['eid'] = eid = len(self.items) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
288 |
self.items.append(data) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
289 |
self.eids[eid] = data |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
290 |
self.types.setdefault(etype, []).append(eid) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
291 |
return eid |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
292 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
293 |
def prepare_update_entity(self, etype, eid, **kwargs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
294 |
"""Given an entity type and eid, updates the corresponding fake entity with specified |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
295 |
attributes and inlined relations. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
296 |
""" |
10939
b30c2f49da57
[dataimport] Format strings with % instead of .format()
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10907
diff
changeset
|
297 |
assert eid in self.types[etype], 'Trying to update with wrong type %s' % etype |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
298 |
data = self.eids[eid] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
299 |
data.update(kwargs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
300 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
301 |
def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
302 |
"""Store into the `relations` attribute that a relation ``rtype`` exists between entities |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
303 |
with eids ``eid_from`` and ``eid_to``. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
304 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
305 |
relation = eid_from, rtype, eid_to |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
306 |
self.relations.add(relation) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
307 |
return relation |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
308 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
309 |
def flush(self): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
310 |
"""Nothing to flush for this store.""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
311 |
pass |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
312 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
313 |
def commit(self): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
314 |
"""Nothing to commit for this store.""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
315 |
return |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
316 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
317 |
def finish(self): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
318 |
"""Nothing to do once import is terminated for this store.""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
319 |
pass |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
320 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
321 |
@property |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
322 |
def nb_inserted_entities(self): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
323 |
return len(self.eids) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
324 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
325 |
@property |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
326 |
def nb_inserted_types(self): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
327 |
return len(self.types) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
328 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
329 |
@property |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
330 |
def nb_inserted_relations(self): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
331 |
return len(self.relations) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
332 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
333 |
@deprecated('[3.21] use prepare_insert_entity instead') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
334 |
def create_entity(self, etype, **data): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
335 |
self.prepare_insert_entity(etype, **data) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
336 |
return attrdict(data) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
337 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
338 |
@deprecated('[3.21] use prepare_insert_relation instead') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
339 |
def relate(self, eid_from, rtype, eid_to, **kwargs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
340 |
self.prepare_insert_relation(eid_from, rtype, eid_to, **kwargs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
341 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
342 |
|
10688
fa29f3628a1b
[py3k] use six.add_metaclass
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10669
diff
changeset
|
343 |
@add_metaclass(class_deprecated) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
344 |
class CWImportController(object): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
345 |
"""Controller of the data import process. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
346 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
347 |
>>> ctl = CWImportController(store) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
348 |
>>> ctl.generators = list_of_data_generators |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
349 |
>>> ctl.data = dict_of_data_tables |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
350 |
>>> ctl.run() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
351 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
352 |
__deprecation_warning__ = '[3.21] use the new importer API' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
353 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
354 |
def __init__(self, store, askerror=0, catcherrors=None, tell=tell, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
355 |
commitevery=50): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
356 |
self.store = store |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
357 |
self.generators = None |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
358 |
self.data = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
359 |
self.errors = None |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
360 |
self.askerror = askerror |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
361 |
if catcherrors is None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
362 |
catcherrors = askerror |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
363 |
self.catcherrors = catcherrors |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
364 |
self.commitevery = commitevery # set to None to do a single commit |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
365 |
self._tell = tell |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
366 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
367 |
def check(self, type, key, value): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
368 |
self._checks.setdefault(type, {}).setdefault(key, []).append(value) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
369 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
370 |
def check_map(self, entity, key, map, default): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
371 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
372 |
entity[key] = map[entity[key]] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
373 |
except KeyError: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
374 |
self.check(key, entity[key], None) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
375 |
entity[key] = default |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
376 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
377 |
def record_error(self, key, msg=None, type=None, value=None, tb=None): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
378 |
tmp = StringIO() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
379 |
if type is None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
380 |
traceback.print_exc(file=tmp) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
381 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
382 |
traceback.print_exception(type, value, tb, file=tmp) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
383 |
# use a list to avoid counting a <nb lines> errors instead of one |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
384 |
errorlog = self.errors.setdefault(key, []) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
385 |
if msg is None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
386 |
errorlog.append(tmp.getvalue().splitlines()) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
387 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
388 |
errorlog.append( (msg, tmp.getvalue().splitlines()) ) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
389 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
390 |
def run(self): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
391 |
self.errors = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
392 |
if self.commitevery is None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
393 |
self.tell('Will commit all or nothing.') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
394 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
395 |
self.tell('Will commit every %s iterations' % self.commitevery) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
396 |
for func, checks in self.generators: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
397 |
self._checks = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
398 |
func_name = func.__name__ |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
399 |
self.tell("Run import function '%s'..." % func_name) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
400 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
401 |
func(self) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
402 |
except Exception: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
403 |
if self.catcherrors: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
404 |
self.record_error(func_name, 'While calling %s' % func.__name__) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
405 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
406 |
self._print_stats() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
407 |
raise |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
408 |
for key, func, title, help in checks: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
409 |
buckets = self._checks.get(key) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
410 |
if buckets: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
411 |
err = func(buckets) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
412 |
if err: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
413 |
self.errors[title] = (help, err) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
414 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
415 |
txuuid = self.store.commit() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
416 |
if txuuid is not None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
417 |
self.tell('Transaction commited (txuuid: %s)' % txuuid) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
418 |
except QueryError as ex: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
419 |
self.tell('Transaction aborted: %s' % ex) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
420 |
self._print_stats() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
421 |
if self.errors: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
422 |
if self.askerror == 2 or (self.askerror and confirm('Display errors ?')): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
423 |
from pprint import pformat |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
424 |
for errkey, error in self.errors.items(): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
425 |
self.tell("\n%s (%s): %d\n" % (error[0], errkey, len(error[1]))) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
426 |
self.tell(pformat(sorted(error[1]))) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
427 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
428 |
def _print_stats(self): |
10663
54b8a1f249fb
[py3k] dict.itervalues → dict.values
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10590
diff
changeset
|
429 |
nberrors = sum(len(err) for err in self.errors.values()) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
430 |
self.tell('\nImport statistics: %i entities, %i types, %i relations and %i errors' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
431 |
% (self.store.nb_inserted_entities, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
432 |
self.store.nb_inserted_types, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
433 |
self.store.nb_inserted_relations, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
434 |
nberrors)) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
435 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
436 |
def get_data(self, key): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
437 |
return self.data.get(key) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
438 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
439 |
def index(self, name, key, value, unique=False): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
440 |
"""create a new index |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
441 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
442 |
If unique is set to True, only first occurence will be kept not the following ones |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
443 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
444 |
if unique: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
445 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
446 |
if value in self.store.indexes[name][key]: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
447 |
return |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
448 |
except KeyError: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
449 |
# we're sure that one is the first occurence; so continue... |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
450 |
pass |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
451 |
self.store.indexes.setdefault(name, {}).setdefault(key, []).append(value) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
452 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
453 |
def tell(self, msg): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
454 |
self._tell(msg) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
455 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
456 |
def iter_and_commit(self, datakey): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
457 |
"""iter rows, triggering commit every self.commitevery iterations""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
458 |
if self.commitevery is None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
459 |
return self.get_data(datakey) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
460 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
461 |
return callfunc_every(self.store.commit, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
462 |
self.commitevery, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
463 |
self.get_data(datakey)) |