author | Rémi Cardona <remi.cardona@logilab.fr> |
Mon, 22 Jun 2015 14:27:37 +0200 | |
changeset 10411 | 4ee15441f2eb |
parent 8927 | 885dea8f16a0 |
child 10662 | 10942ed172de |
permissions | -rw-r--r-- |
8836
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
1 |
# -*- coding: utf-8 -*- |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
2 |
# copyright 2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
3 |
# contact http://www.logilab.fr -- mailto:contact@logilab.fr |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
4 |
# |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
5 |
# This program is free software: you can redistribute it and/or modify it under |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
6 |
# the terms of the GNU Lesser General Public License as published by the Free |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
7 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
8 |
# any later version. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
9 |
# |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
10 |
# This program is distributed in the hope that it will be useful, but WITHOUT |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
11 |
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
12 |
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
13 |
# details. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
14 |
# |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
15 |
# You should have received a copy of the GNU Lesser General Public License along |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
16 |
# with this program. If not, see <http://www.gnu.org/licenses/>. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
17 |
|
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
18 |
"""This module imports the Diseasome data into a CubicWeb instance. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
19 |
""" |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
20 |
|
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
21 |
# Python imports |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
22 |
import sys |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
23 |
import argparse |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
24 |
|
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
25 |
# Logilab import, for timing |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
26 |
from logilab.common.decorators import timed |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
27 |
|
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
28 |
# CubicWeb imports |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
29 |
import cubicweb.dataimport as cwdi |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
30 |
from cubes.dataio import dataimport as mcwdi |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
31 |
|
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
32 |
# Diseasome parser import |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
33 |
import diseasome_parser as parser |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
34 |
|
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
35 |
def _is_of_class(instance, class_name): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
36 |
"""Helper function to determine whether an instance is |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
37 |
of a specified class or not. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
38 |
Returns a True if this is the case and False otherwise. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
39 |
""" |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
40 |
if instance.__class__.__name__ == class_name: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
41 |
return True |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
42 |
else: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
43 |
return False |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
44 |
|
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
45 |
@timed |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
46 |
def diseasome_import(session, file_name, store): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
47 |
"""Main function for importing Diseasome data. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
48 |
|
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
49 |
It uses the Diseasome data parser to get the contents of the |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
50 |
data from a file, then uses a store for importing the data |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
51 |
into a CubicWeb instance. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
52 |
|
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
53 |
>>> diseasome_import(session, 'file_name', Store) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
54 |
|
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
55 |
""" |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
56 |
exturis = dict(session.execute('Any U, X WHERE X is ExternalUri, X uri U')) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
57 |
uri_to_eid = {} |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
58 |
uri_to_etype = {} |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
59 |
all_relations = {} |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
60 |
etypes = {('http://www4.wiwiss.fu-berlin.de/' |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
61 |
'diseasome/resource/diseasome/genes'): 'Gene', |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
62 |
('http://www4.wiwiss.fu-berlin.de/' |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
63 |
'diseasome/resource/diseasome/diseases'): 'Disease'} |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
64 |
# Read the parsed data |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
65 |
for entity, relations in parser.entities_from_rdf(file_name, |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
66 |
('gene', 'disease')): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
67 |
uri = entity.get('cwuri', None) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
68 |
types = list(relations.get('types', [])) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
69 |
if not types: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
70 |
continue |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
71 |
etype = etypes.get(types[0]) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
72 |
if not etype: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
73 |
sys.stderr.write('Entity type %s not recognized.', types[0]) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
74 |
sys.stderr.flush() |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
75 |
if _is_of_class(store, 'MassiveObjectStore'): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
76 |
for relation in (set(relations).intersection(('classes', |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
77 |
'possible_drugs', 'omim', 'omim_page', |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
78 |
'chromosomal_location', 'same_as', 'gene_id', |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
79 |
'hgnc_id', 'hgnc_page'))): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
80 |
store.init_rtype_table(etype, relation, 'ExternalUri') |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
81 |
for relation in set(relations).intersection(('subtype_of',)): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
82 |
store.init_rtype_table(etype, relation, 'Disease') |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
83 |
for relation in set(relations).intersection(('associated_genes',)): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
84 |
store.init_rtype_table(etype, relation, 'Gene') |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
85 |
# Create the entities |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
86 |
ent = store.create_entity(etype, **entity) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
87 |
if not _is_of_class(store, 'MassiveObjectStore'): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
88 |
uri_to_eid[uri] = ent.eid |
8927
885dea8f16a0
[cubicweb/doc] Replace dc_type() by cw_etype
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
8836
diff
changeset
|
89 |
uri_to_etype[uri] = ent.cw_etype |
8836
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
90 |
else: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
91 |
uri_to_eid[uri] = uri |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
92 |
uri_to_etype[uri] = etype |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
93 |
# Store relations for after |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
94 |
all_relations[uri] = relations |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
95 |
# Perform a first commit, of the entities |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
96 |
store.flush() |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
97 |
kwargs = {} |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
98 |
for uri, relations in all_relations.iteritems(): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
99 |
from_eid = uri_to_eid.get(uri) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
100 |
# ``subjtype`` should be initialized if ``SQLGenObjectStore`` is used |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
101 |
# and there are inlined relations in the schema. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
102 |
# If ``subjtype`` is not given, while ``SQLGenObjectStore`` is used |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
103 |
# and there are inlined relations in the schema, the store |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
104 |
# tries to infer the type of the subject, but this does not always |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
105 |
# work, e.g. when there are several object types for the relation. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
106 |
# ``subjtype`` is ignored for other stores, or if there are no |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
107 |
# inlined relations in the schema. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
108 |
kwargs['subjtype'] = uri_to_etype.get(uri) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
109 |
if not from_eid: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
110 |
continue |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
111 |
for rtype, rels in relations.iteritems(): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
112 |
if rtype in ('classes', 'possible_drugs', 'omim', 'omim_page', |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
113 |
'chromosomal_location', 'same_as', 'gene_id', |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
114 |
'hgnc_id', 'hgnc_page'): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
115 |
for rel in list(rels): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
116 |
if rel not in exturis: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
117 |
# Create the "ExternalUri" entities, which are the |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
118 |
# objects of the relations |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
119 |
extu = store.create_entity('ExternalUri', uri=rel) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
120 |
if not _is_of_class(store, 'MassiveObjectStore'): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
121 |
rel_eid = extu.eid |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
122 |
else: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
123 |
# For the "MassiveObjectStore", the EIDs are |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
124 |
# in fact the URIs. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
125 |
rel_eid = rel |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
126 |
exturis[rel] = rel_eid |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
127 |
else: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
128 |
rel_eid = exturis[rel] |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
129 |
# Create the relations that have "ExternalUri"s as objects |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
130 |
if not _is_of_class(store, 'MassiveObjectStore'): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
131 |
store.relate(from_eid, rtype, rel_eid, **kwargs) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
132 |
else: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
133 |
store.relate_by_iid(from_eid, rtype, rel_eid) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
134 |
elif rtype in ('subtype_of', 'associated_genes'): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
135 |
for rel in list(rels): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
136 |
to_eid = uri_to_eid.get(rel) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
137 |
if to_eid: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
138 |
# Create relations that have objects of other type |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
139 |
# than "ExternalUri" |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
140 |
if not _is_of_class(store, 'MassiveObjectStore'): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
141 |
store.relate(from_eid, rtype, to_eid, **kwargs) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
142 |
else: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
143 |
store.relate_by_iid(from_eid, rtype, to_eid) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
144 |
else: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
145 |
sys.stderr.write('Missing entity with URI %s ' |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
146 |
'for relation %s' % (rel, rtype)) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
147 |
sys.stderr.flush() |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
148 |
# Perform a second commit, of the "ExternalUri" entities. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
149 |
# when the stores in the CubicWeb ``dataimport`` module are used, |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
150 |
# relations are also committed. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
151 |
store.flush() |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
152 |
# If the ``MassiveObjectStore`` is used, then entity and relation metadata |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
153 |
# are pushed as well. By metadata we mean information on the creation |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
154 |
# time and author. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
155 |
if _is_of_class(store, 'MassiveObjectStore'): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
156 |
store.flush_meta_data() |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
157 |
for relation in ('classes', 'possible_drugs', 'omim', 'omim_page', |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
158 |
'chromosomal_location', 'same_as'): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
159 |
# Afterwards, relations are actually created in the database. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
160 |
store.convert_relations('Disease', relation, 'ExternalUri', |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
161 |
'cwuri', 'uri') |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
162 |
store.convert_relations('Disease', 'subtype_of', 'Disease', |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
163 |
'cwuri', 'cwuri') |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
164 |
store.convert_relations('Disease', 'associated_genes', 'Gene', |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
165 |
'cwuri', 'cwuri') |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
166 |
for relation in ('gene_id', 'hgnc_id', 'hgnc_page', 'same_as'): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
167 |
store.convert_relations('Gene', relation, 'ExternalUri', |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
168 |
'cwuri', 'uri') |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
169 |
# Clean up temporary tables in the database |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
170 |
store.cleanup() |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
171 |
|
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
172 |
if __name__ == '__main__': |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
173 |
# Change sys.argv so that ``cubicweb-ctl shell`` can work out the options |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
174 |
# we give to our ``diseasome_import.py`` script. |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
175 |
sys.argv = [arg for |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
176 |
arg in sys.argv[sys.argv.index("--") - 1:] if arg != "--"] |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
177 |
PARSER = argparse.ArgumentParser(description="Import Diseasome data") |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
178 |
PARSER.add_argument("-df", "--datafile", type=str, |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
179 |
help="RDF data file name") |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
180 |
PARSER.add_argument("-st", "--store", type=str, |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
181 |
default="RQLObjectStore", |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
182 |
help="data import store") |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
183 |
ARGS = PARSER.parse_args() |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
184 |
if ARGS.datafile: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
185 |
FILENAME = ARGS.datafile |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
186 |
if ARGS.store in (st + "ObjectStore" for |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
187 |
st in ("RQL", "NoHookRQL", "SQLGen")): |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
188 |
IMPORT_STORE = getattr(cwdi, ARGS.store)(session) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
189 |
elif ARGS.store == "MassiveObjectStore": |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
190 |
IMPORT_STORE = mcwdi.MassiveObjectStore(session) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
191 |
else: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
192 |
sys.exit("Import store unknown") |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
193 |
diseasome_import(session, FILENAME, IMPORT_STORE) |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
194 |
else: |
8a57802d40d3
[cubicweb/doc] Add tutorial on data import in CubicWeb.
Vladimir Popescu <vladimir.popescu@logilab.fr>
parents:
diff
changeset
|
195 |
sys.exit("Data file not found or not specified") |