author | Denis Laxalde <denis.laxalde@logilab.fr> |
Tue, 21 Jun 2016 15:32:51 +0200 | |
changeset 11298 | b1e7de000536 |
parent 11275 | 814f54d6183b |
permissions | -rw-r--r-- |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
1 |
# copyright 2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
2 |
# contact http://www.logilab.fr -- mailto:contact@logilab.fr |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
3 |
# |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
4 |
# This program is free software: you can redistribute it and/or modify it under |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
5 |
# the terms of the GNU Lesser General Public License as published by the Free |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
6 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
7 |
# any later version. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
8 |
# |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
9 |
# This program is distributed in the hope that it will be useful, but WITHOUT |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
10 |
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
11 |
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
12 |
# details. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
13 |
# |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
14 |
# You should have received a copy of the GNU Lesser General Public License along |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
15 |
# with this program. If not, see <http://www.gnu.org/licenses/>. |
10461
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
16 |
"""Data import of external entities. |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
17 |
|
10461
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
18 |
Main entry points: |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
19 |
|
10461
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
20 |
.. autoclass:: ExtEntitiesImporter |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
21 |
.. autoclass:: ExtEntity |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
22 |
|
10461
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
23 |
Utilities: |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
24 |
|
10461
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
25 |
.. autofunction:: cwuri2eid |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
26 |
.. autoclass:: RelationMapping |
10514
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
27 |
.. autofunction:: cubicweb.dataimport.importer.use_extid_as_cwuri |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
28 |
""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
29 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
30 |
from collections import defaultdict |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
31 |
import logging |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
32 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
33 |
from logilab.mtconverter import xml_escape |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
34 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
35 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
36 |
def cwuri2eid(cnx, etypes, source_eid=None): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
37 |
"""Return a dictionary mapping cwuri to eid for entities of the given entity types and / or |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
38 |
source. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
39 |
""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
40 |
assert source_eid or etypes, 'no entity types nor source specified' |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
41 |
rql = 'Any U, X WHERE X cwuri U' |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
42 |
args = {} |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
43 |
if len(etypes) == 1: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
44 |
rql += ', X is %s' % etypes[0] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
45 |
elif etypes: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
46 |
rql += ', X is IN (%s)' % ','.join(etypes) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
47 |
if source_eid is not None: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
48 |
rql += ', X cw_source S, S eid %(s)s' |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
49 |
args['s'] = source_eid |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
50 |
return dict(cnx.execute(rql, args)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
51 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
52 |
|
10514
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
53 |
def use_extid_as_cwuri(extid2eid): |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
54 |
"""Return a generator of :class:`ExtEntity` objects that will set `cwuri` |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
55 |
using entity's extid if the entity does not exist yet and has no `cwuri` |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
56 |
defined. |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
57 |
|
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
58 |
`extid2eid` is an extid to eid dictionary coming from an |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
59 |
:class:`ExtEntitiesImporter` instance. |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
60 |
|
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
61 |
Example usage: |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
62 |
|
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
63 |
.. code-block:: python |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
64 |
|
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
65 |
importer = SKOSExtEntitiesImporter(cnx, store, import_log) |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
66 |
set_cwuri = use_extid_as_cwuri(importer.extid2eid) |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
67 |
importer.import_entities(set_cwuri(extentities)) |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
68 |
""" |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
69 |
def use_extid_as_cwuri_filter(extentities): |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
70 |
for extentity in extentities: |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
71 |
if extentity.extid not in extid2eid: |
10809
359cbdf3a515
[dataimport] extid must be a bytes object
Julien Cristau <julien.cristau@logilab.fr>
parents:
10514
diff
changeset
|
72 |
extentity.values.setdefault('cwuri', set([extentity.extid.decode('utf-8')])) |
10514
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
73 |
yield extentity |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
74 |
return use_extid_as_cwuri_filter |
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
75 |
|
b29d9904482e
add use_extid_as_cwuri ext entity transform, that will be often necessary and not so easy to write at once
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10461
diff
changeset
|
76 |
|
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
77 |
class RelationMapping(object): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
78 |
"""Read-only mapping from relation type to set of related (subject, object) eids. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
79 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
80 |
If `source` is specified, only returns relations implying entities from |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
81 |
this source. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
82 |
""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
83 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
84 |
def __init__(self, cnx, source=None): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
85 |
self.cnx = cnx |
10939
b30c2f49da57
[dataimport] Format strings with % instead of .format()
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10809
diff
changeset
|
86 |
self._rql_template = 'Any S,O WHERE S %s O' |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
87 |
self._kwargs = {} |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
88 |
if source is not None: |
10939
b30c2f49da57
[dataimport] Format strings with % instead of .format()
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10809
diff
changeset
|
89 |
self._rql_template += ', S cw_source SO, O cw_source SO, SO eid %%(s)s' |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
90 |
self._kwargs['s'] = source.eid |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
91 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
92 |
def __getitem__(self, rtype): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
93 |
"""Return a set of (subject, object) eids already related by `rtype`""" |
10939
b30c2f49da57
[dataimport] Format strings with % instead of .format()
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10809
diff
changeset
|
94 |
rql = self._rql_template % rtype |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
95 |
return set(tuple(x) for x in self.cnx.execute(rql, self._kwargs)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
96 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
97 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
98 |
class ExtEntity(object): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
99 |
"""Transitional representation of an entity for use in data importer. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
100 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
101 |
An external entity has the following properties: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
102 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
103 |
* ``extid`` (external id), an identifier for the ext entity, |
10461
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
104 |
|
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
105 |
* ``etype`` (entity type), a string which must be the name of one entity type in the schema |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
106 |
(eg. ``'Person'``, ``'Animal'``, ...), |
10461
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
107 |
|
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
108 |
* ``values``, a dictionary whose keys are attribute or relation names from the schema (eg. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
109 |
``'first_name'``, ``'friend'``), and whose values are *sets* |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
110 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
111 |
For instance: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
112 |
|
10461
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
113 |
.. code-block:: python |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
114 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
115 |
ext_entity.extid = 'http://example.org/person/debby' |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
116 |
ext_entity.etype = 'Person' |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
117 |
ext_entity.values = {'first_name': set([u"Deborah", u"Debby"]), |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
118 |
'friend': set(['http://example.org/person/john'])} |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
119 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
120 |
""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
121 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
122 |
def __init__(self, etype, extid, values=None): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
123 |
self.etype = etype |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
124 |
self.extid = extid |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
125 |
if values is None: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
126 |
values = {} |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
127 |
self.values = values |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
128 |
self._schema = None |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
129 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
130 |
def __repr__(self): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
131 |
return '<%s %s %s>' % (self.etype, self.extid, self.values) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
132 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
133 |
def iter_rdefs(self): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
134 |
"""Yield (key, rtype, role) defined in `.values` dict, with: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
135 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
136 |
* `key` is the original key in `.values` (i.e. the relation type or a 2-uple (relation type, |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
137 |
role)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
138 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
139 |
* `rtype` is a yams relation type, expected to be found in the schema (attribute or |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
140 |
relation) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
141 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
142 |
* `role` is the role of the entity in the relation, 'subject' or 'object' |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
143 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
144 |
Iteration is done on a copy of the keys so values may be inserted/deleted during it. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
145 |
""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
146 |
for key in list(self.values): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
147 |
if isinstance(key, tuple): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
148 |
rtype, role = key |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
149 |
assert role in ('subject', 'object'), key |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
150 |
yield key, rtype, role |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
151 |
else: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
152 |
yield key, key, 'subject' |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
153 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
154 |
def prepare(self, schema): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
155 |
"""Prepare an external entity for later insertion: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
156 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
157 |
* ensure attributes and inlined relations have a single value |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
158 |
* turn set([value]) into value and remove key associated to empty set |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
159 |
* remove non inlined relations and return them as a [(e1key, relation, e2key)] list |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
160 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
161 |
Return a list of non inlined relations that may be inserted later, each relations defined by |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
162 |
a 3-tuple (subject extid, relation type, object extid). |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
163 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
164 |
Take care the importer may call this method several times. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
165 |
""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
166 |
assert self._schema is None, 'prepare() has already been called for %s' % self |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
167 |
self._schema = schema |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
168 |
eschema = schema.eschema(self.etype) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
169 |
deferred = [] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
170 |
entity_dict = self.values |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
171 |
for key, rtype, role in self.iter_rdefs(): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
172 |
rschema = schema.rschema(rtype) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
173 |
if rschema.final or (rschema.inlined and role == 'subject'): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
174 |
assert len(entity_dict[key]) <= 1, \ |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
175 |
"more than one value for %s: %s (%s)" % (rtype, entity_dict[key], self.extid) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
176 |
if entity_dict[key]: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
177 |
entity_dict[rtype] = entity_dict[key].pop() |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
178 |
if key != rtype: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
179 |
del entity_dict[key] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
180 |
if (rschema.final and eschema.has_metadata(rtype, 'format') |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
181 |
and not rtype + '_format' in entity_dict): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
182 |
entity_dict[rtype + '_format'] = u'text/plain' |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
183 |
else: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
184 |
del entity_dict[key] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
185 |
else: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
186 |
for target_extid in entity_dict.pop(key): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
187 |
if role == 'subject': |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
188 |
deferred.append((self.extid, rtype, target_extid)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
189 |
else: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
190 |
deferred.append((target_extid, rtype, self.extid)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
191 |
return deferred |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
192 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
193 |
def is_ready(self, extid2eid): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
194 |
"""Return True if the ext entity is ready, i.e. has all the URIs used in inlined relations |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
195 |
currently existing. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
196 |
""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
197 |
assert self._schema, 'prepare() method should be called first on %s' % self |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
198 |
# as .prepare has been called, we know that .values only contains subject relation *type* as |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
199 |
# key (no more (rtype, role) tuple) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
200 |
schema = self._schema |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
201 |
entity_dict = self.values |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
202 |
for rtype in entity_dict: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
203 |
rschema = schema.rschema(rtype) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
204 |
if not rschema.final: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
205 |
# .prepare() should drop other cases from the entity dict |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
206 |
assert rschema.inlined |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
207 |
if not entity_dict[rtype] in extid2eid: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
208 |
return False |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
209 |
# entity is ready, replace all relation's extid by eids |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
210 |
for rtype in entity_dict: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
211 |
rschema = schema.rschema(rtype) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
212 |
if rschema.inlined: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
213 |
entity_dict[rtype] = extid2eid[entity_dict[rtype]] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
214 |
return True |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
215 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
216 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
217 |
class ExtEntitiesImporter(object): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
218 |
"""This class is responsible for importing externals entities, that is instances of |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
219 |
:class:`ExtEntity`, into CubicWeb entities. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
220 |
|
10461
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
221 |
:param schema: the CubicWeb's instance schema |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
222 |
:param store: a CubicWeb `Store` |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
223 |
:param extid2eid: optional {extid: eid} dictionary giving information on existing entities. It |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
224 |
will be completed during import. You may want to use :func:`cwuri2eid` to build it. |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
225 |
:param existing_relation: optional {rtype: set((subj eid, obj eid))} mapping giving information on |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
226 |
existing relations of a given type. You may want to use :class:`RelationMapping` to build it. |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
227 |
:param etypes_order_hint: optional ordered iterable on entity types, giving an hint on the order in |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
228 |
which they should be attempted to be imported |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
229 |
:param import_log: optional object implementing the :class:`SimpleImportLog` interface to record |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
230 |
events occuring during the import |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
231 |
:param raise_on_error: optional boolean flag - default to false, indicating whether errors should |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
232 |
be raised or logged. You usually want them to be raised during test but to be logged in |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
233 |
production. |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
234 |
|
10461
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
235 |
Instances of this class are meant to import external entities through :meth:`import_entities` |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
236 |
which handles a stream of :class:`ExtEntity`. One may then plug arbitrary filters into the |
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
237 |
external entities stream. |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
238 |
|
10461
37644c518705
[doc] Add a tutorial and extend documentation for ExtEntityImporter
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
10460
diff
changeset
|
239 |
.. automethod:: import_entities |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
240 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
241 |
""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
242 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
243 |
def __init__(self, schema, store, extid2eid=None, existing_relations=None, |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
244 |
etypes_order_hint=(), import_log=None, raise_on_error=False): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
245 |
self.schema = schema |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
246 |
self.store = store |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
247 |
self.extid2eid = extid2eid if extid2eid is not None else {} |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
248 |
self.existing_relations = (existing_relations if existing_relations is not None |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
249 |
else defaultdict(set)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
250 |
self.etypes_order_hint = etypes_order_hint |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
251 |
if import_log is None: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
252 |
import_log = SimpleImportLog('<unspecified>') |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
253 |
self.import_log = import_log |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
254 |
self.raise_on_error = raise_on_error |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
255 |
# set of created/updated eids |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
256 |
self.created = set() |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
257 |
self.updated = set() |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
258 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
259 |
def import_entities(self, ext_entities): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
260 |
"""Import given external entities (:class:`ExtEntity`) stream (usually a generator).""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
261 |
# {etype: [etype dict]} of entities that are in the import queue |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
262 |
queue = {} |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
263 |
# order entity dictionaries then create/update them |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
264 |
deferred = self._import_entities(ext_entities, queue) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
265 |
# create deferred relations that don't exist already |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
266 |
missing_relations = self.prepare_insert_deferred_relations(deferred) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
267 |
self._warn_about_missing_work(queue, missing_relations) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
268 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
269 |
def _import_entities(self, ext_entities, queue): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
270 |
extid2eid = self.extid2eid |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
271 |
deferred = {} # non inlined relations that may be deferred |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
272 |
self.import_log.record_debug('importing entities') |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
273 |
for ext_entity in self.iter_ext_entities(ext_entities, deferred, queue): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
274 |
try: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
275 |
eid = extid2eid[ext_entity.extid] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
276 |
except KeyError: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
277 |
self.prepare_insert_entity(ext_entity) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
278 |
else: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
279 |
if ext_entity.values: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
280 |
self.prepare_update_entity(ext_entity, eid) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
281 |
return deferred |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
282 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
283 |
def iter_ext_entities(self, ext_entities, deferred, queue): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
284 |
"""Yield external entities in an order which attempts to satisfy |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
285 |
schema constraints (inlined / cardinality) and to optimize the import. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
286 |
""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
287 |
schema = self.schema |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
288 |
extid2eid = self.extid2eid |
11275
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
289 |
order_hint = list(self.etypes_order_hint) |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
290 |
for ext_entity in ext_entities: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
291 |
# check data in the transitional representation and prepare it for |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
292 |
# later insertion in the database |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
293 |
for subject_uri, rtype, object_uri in ext_entity.prepare(schema): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
294 |
deferred.setdefault(rtype, set()).add((subject_uri, object_uri)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
295 |
if not ext_entity.is_ready(extid2eid): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
296 |
queue.setdefault(ext_entity.etype, []).append(ext_entity) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
297 |
continue |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
298 |
yield ext_entity |
11275
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
299 |
if not queue: |
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
300 |
continue |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
301 |
# check for some entities in the queue that may now be ready. We'll have to restart |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
302 |
# search for ready entities until no one is generated |
11275
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
303 |
for etype in queue: |
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
304 |
if etype not in order_hint: |
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
305 |
order_hint.append(etype) |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
306 |
new = True |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
307 |
while new: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
308 |
new = False |
11275
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
309 |
for etype in order_hint: |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
310 |
if etype in queue: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
311 |
new_queue = [] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
312 |
for ext_entity in queue[etype]: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
313 |
if ext_entity.is_ready(extid2eid): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
314 |
yield ext_entity |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
315 |
# may unlock entity previously handled within this loop |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
316 |
new = True |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
317 |
else: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
318 |
new_queue.append(ext_entity) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
319 |
if new_queue: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
320 |
queue[etype][:] = new_queue |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
321 |
else: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
322 |
del queue[etype] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
323 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
324 |
def prepare_insert_entity(self, ext_entity): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
325 |
"""Call the store to prepare insertion of the given external entity""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
326 |
eid = self.store.prepare_insert_entity(ext_entity.etype, **ext_entity.values) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
327 |
self.extid2eid[ext_entity.extid] = eid |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
328 |
self.created.add(eid) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
329 |
return eid |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
330 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
331 |
def prepare_update_entity(self, ext_entity, eid): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
332 |
"""Call the store to prepare update of the given external entity""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
333 |
self.store.prepare_update_entity(ext_entity.etype, eid, **ext_entity.values) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
334 |
self.updated.add(eid) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
335 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
336 |
def prepare_insert_deferred_relations(self, deferred): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
337 |
"""Call the store to insert deferred relations (not handled during insertion/update for |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
338 |
entities). Return a list of relations `[(subj ext id, obj ext id)]` that may not be inserted |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
339 |
because the target entities don't exists yet. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
340 |
""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
341 |
prepare_insert_relation = self.store.prepare_insert_relation |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
342 |
rschema = self.schema.rschema |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
343 |
extid2eid = self.extid2eid |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
344 |
missing_relations = [] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
345 |
for rtype, relations in deferred.items(): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
346 |
self.import_log.record_debug('importing %s %s relations' % (len(relations), rtype)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
347 |
symmetric = rschema(rtype).symmetric |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
348 |
existing = self.existing_relations[rtype] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
349 |
for subject_uri, object_uri in relations: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
350 |
try: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
351 |
subject_eid = extid2eid[subject_uri] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
352 |
object_eid = extid2eid[object_uri] |
11275
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
353 |
except KeyError as exc: |
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
354 |
missing_relations.append((subject_uri, rtype, object_uri, exc)) |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
355 |
continue |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
356 |
if (subject_eid, object_eid) not in existing: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
357 |
prepare_insert_relation(subject_eid, rtype, object_eid) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
358 |
existing.add((subject_eid, object_eid)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
359 |
if symmetric: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
360 |
existing.add((object_eid, subject_eid)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
361 |
return missing_relations |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
362 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
363 |
def _warn_about_missing_work(self, queue, missing_relations): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
364 |
error = self.import_log.record_error |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
365 |
if queue: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
366 |
msgs = ["can't create some entities, is there some cycle or " |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
367 |
"missing data?"] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
368 |
for ext_entities in queue.values(): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
369 |
for ext_entity in ext_entities: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
370 |
msgs.append(str(ext_entity)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
371 |
map(error, msgs) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
372 |
if self.raise_on_error: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
373 |
raise Exception('\n'.join(msgs)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
374 |
if missing_relations: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
375 |
msgs = ["can't create some relations, is there missing data?"] |
11275
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
376 |
for subject_uri, rtype, object_uri, exc in missing_relations: |
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
377 |
msgs.append("Could not find %s when trying to insert (%s, %s, %s)" |
814f54d6183b
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
10939
diff
changeset
|
378 |
% (exc, subject_uri, rtype, object_uri)) |
10460
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
379 |
map(error, msgs) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
380 |
if self.raise_on_error: |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
381 |
raise Exception('\n'.join(msgs)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
382 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
383 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
384 |
class SimpleImportLog(object): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
385 |
"""Fake CWDataImport log using a simple text format. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
386 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
387 |
Useful to display logs in the UI instead of storing them to the |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
388 |
database. |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
389 |
""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
390 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
391 |
def __init__(self, filename): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
392 |
self.logs = [] |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
393 |
self.filename = filename |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
394 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
395 |
def record_debug(self, msg, path=None, line=None): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
396 |
self._log(logging.DEBUG, msg, path, line) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
397 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
398 |
def record_info(self, msg, path=None, line=None): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
399 |
self._log(logging.INFO, msg, path, line) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
400 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
401 |
def record_warning(self, msg, path=None, line=None): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
402 |
self._log(logging.WARNING, msg, path, line) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
403 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
404 |
def record_error(self, msg, path=None, line=None): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
405 |
self._log(logging.ERROR, msg, path, line) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
406 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
407 |
def record_fatal(self, msg, path=None, line=None): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
408 |
self._log(logging.FATAL, msg, path, line) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
409 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
410 |
def _log(self, severity, msg, path, line): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
411 |
encodedmsg = u'%s\t%s\t%s\t%s' % (severity, self.filename, |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
412 |
line or u'', msg) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
413 |
self.logs.append(encodedmsg) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
414 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
415 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
416 |
class HTMLImportLog(SimpleImportLog): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
417 |
"""Fake CWDataImport log using a simple HTML format.""" |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
418 |
def __init__(self, filename): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
419 |
super(HTMLImportLog, self).__init__(xml_escape(filename)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
420 |
|
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
421 |
def _log(self, severity, msg, path, line): |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
422 |
encodedmsg = u'%s\t%s\t%s\t%s<br/>' % (severity, self.filename, |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
423 |
line or u'', xml_escape(msg)) |
d260722f2453
[dataimport] introduce the importer and extentity classes
Yann Voté <yann.vote@logilab.fr>
parents:
diff
changeset
|
424 |
self.logs.append(encodedmsg) |