author | Sylvain Thénault <sylvain.thenault@logilab.fr> |
Wed, 09 Feb 2011 18:08:40 +0100 | |
changeset 6964 | 4813efcee2c6 |
parent 6958 | 861251f125cf |
child 6992 | 38f2e306ad8b |
permissions | -rw-r--r-- |
6958
861251f125cf
[pyro source] benefit from addition of latest_retrieval on CWSource
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6957
diff
changeset
|
1 |
from datetime import datetime |
861251f125cf
[pyro source] benefit from addition of latest_retrieval on CWSource
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6957
diff
changeset
|
2 |
|
6944
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
3 |
for rtype in ('cw_support', 'cw_dont_cross', 'cw_may_cross'): |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
4 |
drop_relation_type(rtype) |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6944
diff
changeset
|
5 |
|
6944
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
6 |
add_entity_type('CWSourceSchemaConfig') |
6724
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
7 |
|
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6944
diff
changeset
|
8 |
if not 'url' in schema['CWSource'].subjrels: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6944
diff
changeset
|
9 |
add_attribute('CWSource', 'url') |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6944
diff
changeset
|
10 |
add_attribute('CWSource', 'parser') |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6944
diff
changeset
|
11 |
add_attribute('CWSource', 'latest_retrieval') |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6944
diff
changeset
|
12 |
|
6724
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
13 |
try: |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
14 |
from cubicweb.server.sources.pyrorql import PyroRQLSource |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
15 |
except ImportError: |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
16 |
pass |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
17 |
else: |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
18 |
|
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
19 |
from os.path import join |
6944
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
20 |
# function to read old python mapping file |
6724
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
21 |
def load_mapping_file(source): |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
22 |
mappingfile = source.config['mapping-file'] |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
23 |
mappingfile = join(source.repo.config.apphome, mappingfile) |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
24 |
mapping = {} |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
25 |
execfile(mappingfile, mapping) |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
26 |
for junk in ('__builtins__', '__doc__'): |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
27 |
mapping.pop(junk, None) |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
28 |
mapping.setdefault('support_relations', {}) |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
29 |
mapping.setdefault('dont_cross_relations', set()) |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
30 |
mapping.setdefault('cross_relations', set()) |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
31 |
# do some basic checks of the mapping content |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
32 |
assert 'support_entities' in mapping, \ |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
33 |
'mapping file should at least define support_entities' |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
34 |
assert isinstance(mapping['support_entities'], dict) |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
35 |
assert isinstance(mapping['support_relations'], dict) |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
36 |
assert isinstance(mapping['dont_cross_relations'], set) |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
37 |
assert isinstance(mapping['cross_relations'], set) |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
38 |
unknown = set(mapping) - set( ('support_entities', 'support_relations', |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
39 |
'dont_cross_relations', 'cross_relations') ) |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
40 |
assert not unknown, 'unknown mapping attribute(s): %s' % unknown |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
41 |
# relations that are necessarily not crossed |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
42 |
for rtype in ('is', 'is_instance_of', 'cw_source'): |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
43 |
assert rtype not in mapping['dont_cross_relations'], \ |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
44 |
'%s relation should not be in dont_cross_relations' % rtype |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
45 |
assert rtype not in mapping['support_relations'], \ |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
46 |
'%s relation should not be in support_relations' % rtype |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
47 |
return mapping |
6944
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
48 |
# for now, only pyrorql sources have a mapping |
6724
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
49 |
for source in repo.sources_by_uri.values(): |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
50 |
if not isinstance(source, PyroRQLSource): |
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
51 |
continue |
6944
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
52 |
sourceentity = session.entity_from_eid(source.eid) |
6724
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
53 |
mapping = load_mapping_file(source) |
6944
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
54 |
# write mapping as entities |
6724
24bf6f181d0e
[pyro source] store pyro source mapping file into the database
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
55 |
print 'migrating map for', source |
6944
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
56 |
for etype, write in mapping['support_entities'].items(): |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
57 |
create_entity('CWSourceSchemaConfig', |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
58 |
cw_for_source=sourceentity, |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
59 |
cw_schema=session.entity_from_eid(schema[etype].eid), |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
60 |
options=write and u'write' or None, |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
61 |
ask_confirm=False) |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
62 |
for rtype, write in mapping['support_relations'].items(): |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
63 |
options = [] |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
64 |
if write: |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
65 |
options.append(u'write') |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
66 |
if rtype in mapping['cross_relations']: |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
67 |
options.append(u'maycross') |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
68 |
create_entity('CWSourceSchemaConfig', |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
69 |
cw_for_source=sourceentity, |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
70 |
cw_schema=session.entity_from_eid(schema[rtype].eid), |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
71 |
options=u':'.join(options) or None, |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
72 |
ask_confirm=False) |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
73 |
for rtype in mapping['dont_cross_relations']: |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
74 |
create_entity('CWSourceSchemaConfig', |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
75 |
cw_for_source=source, |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
76 |
cw_schema=session.entity_from_eid(schema[etype].eid), |
0cf10429ad39
[sources] rewrite the way pyrorql mapping are stored in the database so it can be reused for other sources (eg datafeed+cwxml)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6724
diff
changeset
|
77 |
options=u'dontcross') |
6958
861251f125cf
[pyro source] benefit from addition of latest_retrieval on CWSource
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6957
diff
changeset
|
78 |
# latest update time cwproperty is now a source attribute (latest_retrieval) |
861251f125cf
[pyro source] benefit from addition of latest_retrieval on CWSource
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6957
diff
changeset
|
79 |
pkey = u'sources.%s.latest-update-time' % source.uri |
861251f125cf
[pyro source] benefit from addition of latest_retrieval on CWSource
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6957
diff
changeset
|
80 |
rset = session.execute('Any V WHERE X is CWProperty, X value V, X pkey %(k)s', |
861251f125cf
[pyro source] benefit from addition of latest_retrieval on CWSource
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6957
diff
changeset
|
81 |
{'k': pkey}) |
861251f125cf
[pyro source] benefit from addition of latest_retrieval on CWSource
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6957
diff
changeset
|
82 |
timestamp = int(rset[0][0]) |
861251f125cf
[pyro source] benefit from addition of latest_retrieval on CWSource
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6957
diff
changeset
|
83 |
sourceentity.set_attributes(latest_retrieval=datetime.fromtimestamp(timestamp)) |
861251f125cf
[pyro source] benefit from addition of latest_retrieval on CWSource
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6957
diff
changeset
|
84 |
session.execute('DELETE CWProperty X WHERE X pkey %(k)s', {'k': pkey}) |