author | Sylvain Thénault <sylvain.thenault@logilab.fr> |
Fri, 10 Jun 2011 16:54:20 +0200 | |
changeset 7493 | 3c46b9390871 |
parent 7461 | cb25b62074cc |
child 7527 | ef1e9bc38137 |
permissions | -rw-r--r-- |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
1 |
# copyright 2010-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
2 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
3 |
# |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
4 |
# This file is part of CubicWeb. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
5 |
# |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
6 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
7 |
# terms of the GNU Lesser General Public License as published by the Free |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
8 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
9 |
# any later version. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
10 |
# |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
11 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
12 |
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
13 |
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
14 |
# details. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
15 |
# |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
16 |
# You should have received a copy of the GNU Lesser General Public License along |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
17 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
18 |
"""datafeed sources: copy data from an external data stream into the system |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
19 |
database |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
20 |
""" |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
21 |
from __future__ import with_statement |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
22 |
|
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
23 |
import urllib2 |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
24 |
import StringIO |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
25 |
from datetime import datetime, timedelta |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
26 |
from base64 import b64decode |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
27 |
from cookielib import CookieJar |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
28 |
|
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
29 |
from lxml import etree |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
30 |
|
7399
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
31 |
from cubicweb import RegistryNotFound, ObjectNotFound, ValidationError, UnknownEid |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
32 |
from cubicweb.server.sources import AbstractSource |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
33 |
from cubicweb.appobject import AppObject |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
34 |
|
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
35 |
|
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
36 |
class DataFeedSource(AbstractSource): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
37 |
copy_based_source = True |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
38 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
39 |
options = ( |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
40 |
('synchronize', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
41 |
{'type' : 'yn', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
42 |
'default': True, |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
43 |
'help': ('Is the repository responsible to automatically import ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
44 |
'content from this source? ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
45 |
'You should say yes unless you don\'t want this behaviour ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
46 |
'or if you use a multiple repositories setup, in which ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
47 |
'case you should say yes on one repository, no on others.'), |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
48 |
'group': 'datafeed-source', 'level': 2, |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
49 |
}), |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
50 |
('synchronization-interval', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
51 |
{'type' : 'time', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
52 |
'default': '5min', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
53 |
'help': ('Interval in seconds between synchronization with the ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
54 |
'external source (default to 5 minutes, must be >= 1 min).'), |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
55 |
'group': 'datafeed-source', 'level': 2, |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
56 |
}), |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
57 |
('delete-entities', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
58 |
{'type' : 'yn', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
59 |
'default': True, |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
60 |
'help': ('Should already imported entities not found anymore on the ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
61 |
'external source be deleted?'), |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
62 |
'group': 'datafeed-source', 'level': 2, |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
63 |
}), |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
64 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
65 |
) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
66 |
def __init__(self, repo, source_config, eid=None): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
67 |
AbstractSource.__init__(self, repo, source_config, eid) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
68 |
self.update_config(None, self.check_conf_dict(eid, source_config)) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
69 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
70 |
def check_config(self, source_entity): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
71 |
"""check configuration of source entity""" |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
72 |
typedconfig = super(DataFeedSource, self).check_config(source_entity) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
73 |
if typedconfig['synchronization-interval'] < 60: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
74 |
_ = source_entity._cw._ |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
75 |
msg = _('synchronization-interval must be greater than 1 minute') |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
76 |
raise ValidationError(source_entity.eid, {'config': msg}) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
77 |
return typedconfig |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
78 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
79 |
def _entity_update(self, source_entity): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
80 |
source_entity.complete() |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
81 |
self.parser = source_entity.parser |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
82 |
self.latest_retrieval = source_entity.latest_retrieval |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
83 |
self.urls = [url.strip() for url in source_entity.url.splitlines() |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
84 |
if url.strip()] |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
85 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
86 |
def update_config(self, source_entity, typedconfig): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
87 |
"""update configuration from source entity. `typedconfig` is config |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
88 |
properly typed with defaults set |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
89 |
""" |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
90 |
self.synchro_interval = timedelta(seconds=typedconfig['synchronization-interval']) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
91 |
if source_entity is not None: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
92 |
self._entity_update(source_entity) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
93 |
self.config = typedconfig |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
94 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
95 |
def init(self, activated, source_entity): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
96 |
if activated: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
97 |
self._entity_update(source_entity) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
98 |
self.parser = source_entity.parser |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
99 |
self.load_mapping(source_entity._cw) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
100 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
101 |
def _get_parser(self, session, **kwargs): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
102 |
return self.repo.vreg['parsers'].select( |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
103 |
self.parser, session, source=self, **kwargs) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
104 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
105 |
def load_mapping(self, session): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
106 |
self.mapping = {} |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
107 |
self.mapping_idx = {} |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
108 |
try: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
109 |
parser = self._get_parser(session) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
110 |
except (RegistryNotFound, ObjectNotFound): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
111 |
return # no parser yet, don't go further |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
112 |
self._load_mapping(session, parser=parser) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
113 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
114 |
def add_schema_config(self, schemacfg, checkonly=False, parser=None): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
115 |
"""added CWSourceSchemaConfig, modify mapping accordingly""" |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
116 |
if parser is None: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
117 |
parser = self._get_parser(schemacfg._cw) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
118 |
parser.add_schema_config(schemacfg, checkonly) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
119 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
120 |
def del_schema_config(self, schemacfg, checkonly=False, parser=None): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
121 |
"""deleted CWSourceSchemaConfig, modify mapping accordingly""" |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
122 |
if parser is None: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
123 |
parser = self._get_parser(schemacfg._cw) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
124 |
parser.del_schema_config(schemacfg, checkonly) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
125 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
126 |
def fresh(self): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
127 |
if self.latest_retrieval is None: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
128 |
return False |
7443
c10164464afc
[datafeed] we should use utc timestamp to avoid pb with local times
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
129 |
return datetime.utcnow() < (self.latest_retrieval + self.synchro_interval) |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
130 |
|
7446
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
131 |
def update_latest_retrieval(self, session): |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
132 |
self.latest_retrieval = datetime.utcnow() |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
133 |
session.execute('SET X latest_retrieval %(date)s WHERE X eid %(x)s', |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
134 |
{'x': self.eid, 'date': self.latest_retrieval}) |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
135 |
|
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
136 |
def acquire_synchronization_lock(self, session): |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
137 |
# XXX race condition until WHERE of SET queries is executed using |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
138 |
# 'SELECT FOR UPDATE' |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
139 |
if not session.execute('SET X synchronizing TRUE WHERE X eid %(x)s, X synchronizing FALSE', |
7461
cb25b62074cc
[datafeed]Â fix stupid error introduced by 'just before qfinish' refactoring...
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7456
diff
changeset
|
140 |
{'x': self.eid}): |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
141 |
self.error('concurrent synchronization detected, skip pull') |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
142 |
session.commit(free_cnxset=False) |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
143 |
return False |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
144 |
session.commit(free_cnxset=False) |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
145 |
return True |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
146 |
|
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
147 |
def release_synchronization_lock(self, session): |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
148 |
session.execute('SET X synchronizing FALSE WHERE X eid %(x)s', |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
149 |
{'x': self.eid}) |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
150 |
session.commit() |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
151 |
|
7351
ed66f236715d
fix _set_relation when no target eids, update datafeed source pull_data arguments to raise on error during tests
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7001
diff
changeset
|
152 |
def pull_data(self, session, force=False, raise_on_error=False): |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
153 |
"""Launch synchronization of the source if needed. |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
154 |
|
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
155 |
This method is responsible to handle commit/rollback on the given |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
156 |
session. |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
157 |
""" |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
158 |
if not force and self.fresh(): |
6972
12aa5cd81ce5
[datafeed] return empty dict when source is fresh avoid crash in the looping task because None returned
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6957
diff
changeset
|
159 |
return {} |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
160 |
if not self.acquire_synchronization_lock(session): |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
161 |
return {} |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
162 |
try: |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
163 |
with session.transaction(free_cnxset=False): |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
164 |
return self._pull_data(session, force, raise_on_error) |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
165 |
finally: |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
166 |
self.release_synchronization_lock(session) |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
167 |
|
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
168 |
def _pull_data(self, session, force=False, raise_on_error=False): |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
169 |
if self.config['delete-entities']: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
170 |
myuris = self.source_cwuris(session) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
171 |
else: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
172 |
myuris = None |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
173 |
parser = self._get_parser(session, sourceuris=myuris) |
7446
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
174 |
if self.process_urls(parser, self.urls, raise_on_error): |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
175 |
self.warning("some error occured, don't attempt to delete entities") |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
176 |
elif self.config['delete-entities'] and myuris: |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
177 |
byetype = {} |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
178 |
for eid, etype in myuris.values(): |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
179 |
byetype.setdefault(etype, []).append(str(eid)) |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
180 |
self.error('delete %s entities %s', self.uri, byetype) |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
181 |
for etype, eids in byetype.iteritems(): |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
182 |
session.execute('DELETE %s X WHERE X eid IN (%s)' |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
183 |
% (etype, ','.join(eids))) |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
184 |
self.update_latest_retrieval(session) |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
185 |
return parser.stats |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
186 |
|
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
187 |
def process_urls(self, parser, urls, raise_on_error=False): |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
188 |
error = False |
7446
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
189 |
for url in urls: |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
190 |
self.info('pulling data from %s', url) |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
191 |
try: |
7385
29f050e39b09
[datafeed] propagate raise_on_error to parser's process method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7379
diff
changeset
|
192 |
if parser.process(url, raise_on_error): |
7001
c53aa19640b2
[sobjects/parsers] on validationerror, skip entity and continue processing feed
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
6972
diff
changeset
|
193 |
error = True |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
194 |
except IOError, exc: |
7351
ed66f236715d
fix _set_relation when no target eids, update datafeed source pull_data arguments to raise on error during tests
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7001
diff
changeset
|
195 |
if raise_on_error: |
ed66f236715d
fix _set_relation when no target eids, update datafeed source pull_data arguments to raise on error during tests
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7001
diff
changeset
|
196 |
raise |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
197 |
self.error('could not pull data while processing %s: %s', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
198 |
url, exc) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
199 |
error = True |
7446
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
200 |
return error |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
201 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
202 |
def before_entity_insertion(self, session, lid, etype, eid, sourceparams): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
203 |
"""called by the repository when an eid has been attributed for an |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
204 |
entity stored here but the entity has not been inserted in the system |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
205 |
table yet. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
206 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
207 |
This method must return the an Entity instance representation of this |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
208 |
entity. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
209 |
""" |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
210 |
entity = super(DataFeedSource, self).before_entity_insertion( |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
211 |
session, lid, etype, eid, sourceparams) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
212 |
entity.cw_edited['cwuri'] = unicode(lid) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
213 |
entity.cw_edited.set_defaults() |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
214 |
sourceparams['parser'].before_entity_copy(entity, sourceparams) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
215 |
# avoid query to search full-text indexed attributes |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
216 |
for attr in entity.e_schema.indexable_attributes(): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
217 |
entity.cw_edited.setdefault(attr, u'') |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
218 |
return entity |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
219 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
220 |
def after_entity_insertion(self, session, lid, entity, sourceparams): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
221 |
"""called by the repository after an entity stored here has been |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
222 |
inserted in the system table. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
223 |
""" |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
224 |
if session.is_hook_category_activated('integrity'): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
225 |
entity.cw_edited.check(creation=True) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
226 |
self.repo.system_source.add_entity(session, entity) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
227 |
entity.cw_edited.saved = entity._cw_is_saved = True |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
228 |
sourceparams['parser'].after_entity_copy(entity, sourceparams) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
229 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
230 |
def source_cwuris(self, session): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
231 |
sql = ('SELECT extid, eid, type FROM entities, cw_source_relation ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
232 |
'WHERE entities.eid=cw_source_relation.eid_from ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
233 |
'AND cw_source_relation.eid_to=%s' % self.eid) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
234 |
return dict((b64decode(uri), (eid, type)) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
235 |
for uri, eid, type in session.system_sql(sql)) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
236 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
237 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
238 |
class DataFeedParser(AppObject): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
239 |
__registry__ = 'parsers' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
240 |
|
7446
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
241 |
def __init__(self, session, source, sourceuris=None, **kwargs): |
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
242 |
super(DataFeedParser, self).__init__(session, **kwargs) |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
243 |
self.source = source |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
244 |
self.sourceuris = sourceuris |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
245 |
self.stats = {'created': set(), |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
246 |
'updated': set()} |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
247 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
248 |
def add_schema_config(self, schemacfg, checkonly=False): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
249 |
"""added CWSourceSchemaConfig, modify mapping accordingly""" |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
250 |
msg = schemacfg._cw._("this parser doesn't use a mapping") |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
251 |
raise ValidationError(schemacfg.eid, {None: msg}) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
252 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
253 |
def del_schema_config(self, schemacfg, checkonly=False): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
254 |
"""deleted CWSourceSchemaConfig, modify mapping accordingly""" |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
255 |
msg = schemacfg._cw._("this parser doesn't use a mapping") |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
256 |
raise ValidationError(schemacfg.eid, {None: msg}) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
257 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
258 |
def extid2entity(self, uri, etype, **sourceparams): |
7399
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
259 |
"""return an entity for the given uri. May return None if it should be |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
260 |
skipped |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
261 |
""" |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
262 |
sourceparams['parser'] = self |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
263 |
eid = self.source.extid2eid(str(uri), etype, self._cw, |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
264 |
sourceparams=sourceparams) |
7399
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
265 |
if eid < 0: |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
266 |
# entity has been moved away from its original source |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
267 |
# |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
268 |
# Don't give etype to entity_from_eid so we get UnknownEid if the |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
269 |
# entity has been removed |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
270 |
try: |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
271 |
entity = self._cw.entity_from_eid(-eid) |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
272 |
except UnknownEid: |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
273 |
return None |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
274 |
self.notify_updated(entity) # avoid later update from the source's data |
972ed1843bd8
[multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7398
diff
changeset
|
275 |
return entity |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
276 |
if self.sourceuris is not None: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
277 |
self.sourceuris.pop(str(uri), None) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
278 |
return self._cw.entity_from_eid(eid, etype) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
279 |
|
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
280 |
def process(self, url, partialcommit=True): |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
281 |
"""main callback: process the url""" |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
282 |
raise NotImplementedError |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
283 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
284 |
def before_entity_copy(self, entity, sourceparams): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
285 |
raise NotImplementedError |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
286 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
287 |
def after_entity_copy(self, entity, sourceparams): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
288 |
self.stats['created'].add(entity.eid) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
289 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
290 |
def created_during_pull(self, entity): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
291 |
return entity.eid in self.stats['created'] |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
292 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
293 |
def updated_during_pull(self, entity): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
294 |
return entity.eid in self.stats['updated'] |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
295 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
296 |
def notify_updated(self, entity): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
297 |
return self.stats['updated'].add(entity.eid) |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
298 |
|
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
299 |
|
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
300 |
class DataFeedXMLParser(DataFeedParser): |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
301 |
|
7385
29f050e39b09
[datafeed] propagate raise_on_error to parser's process method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7379
diff
changeset
|
302 |
def process(self, url, raise_on_error=False, partialcommit=True): |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
303 |
"""IDataFeedParser main entry point""" |
7447
d5705c9bbe82
don't crash if we can't fetch data or if xml is malformed
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7446
diff
changeset
|
304 |
try: |
d5705c9bbe82
don't crash if we can't fetch data or if xml is malformed
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7446
diff
changeset
|
305 |
parsed = self.parse(url) |
d5705c9bbe82
don't crash if we can't fetch data or if xml is malformed
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7446
diff
changeset
|
306 |
except Exception, ex: |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
307 |
self.source.error(str(ex)) |
7447
d5705c9bbe82
don't crash if we can't fetch data or if xml is malformed
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7446
diff
changeset
|
308 |
return True |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
309 |
error = False |
7447
d5705c9bbe82
don't crash if we can't fetch data or if xml is malformed
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7446
diff
changeset
|
310 |
for args in parsed: |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
311 |
try: |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
312 |
self.process_item(*args) |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
313 |
if partialcommit: |
7398
26695dd703d8
[repository api] definitly kill usage of word 'pool' to refer to connections set used by a session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7385
diff
changeset
|
314 |
# commit+set_cnxset instead of commit(free_cnxset=False) to let |
26695dd703d8
[repository api] definitly kill usage of word 'pool' to refer to connections set used by a session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7385
diff
changeset
|
315 |
# other a chance to get our connections set |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
316 |
self._cw.commit() |
7398
26695dd703d8
[repository api] definitly kill usage of word 'pool' to refer to connections set used by a session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7385
diff
changeset
|
317 |
self._cw.set_cnxset() |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
318 |
except ValidationError, exc: |
7385
29f050e39b09
[datafeed] propagate raise_on_error to parser's process method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7379
diff
changeset
|
319 |
if raise_on_error: |
29f050e39b09
[datafeed] propagate raise_on_error to parser's process method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7379
diff
changeset
|
320 |
raise |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
321 |
if partialcommit: |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
322 |
self.source.error('Skipping %s because of validation error %s' % (args, exc)) |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
323 |
self._cw.rollback() |
7398
26695dd703d8
[repository api] definitly kill usage of word 'pool' to refer to connections set used by a session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7385
diff
changeset
|
324 |
self._cw.set_cnxset() |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
325 |
error = True |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
326 |
else: |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
327 |
raise |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
328 |
return error |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
329 |
|
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
330 |
def parse(self, url): |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
331 |
if url.startswith('http'): |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
332 |
from cubicweb.sobjects.parsers import HOST_MAPPING |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
333 |
for mappedurl in HOST_MAPPING: |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
334 |
if url.startswith(mappedurl): |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
335 |
url = url.replace(mappedurl, HOST_MAPPING[mappedurl], 1) |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
336 |
break |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
337 |
self.source.info('GET %s', url) |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
338 |
stream = _OPENER.open(url) |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
339 |
elif url.startswith('file://'): |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
340 |
stream = open(url[7:]) |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
341 |
else: |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
342 |
stream = StringIO.StringIO(url) |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
343 |
return self.parse_etree(etree.parse(stream).getroot()) |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
344 |
|
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
345 |
def parse_etree(self, document): |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
346 |
return [(document,)] |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
347 |
|
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
348 |
def process_item(self, *args): |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
349 |
raise NotImplementedError |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
350 |
|
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
351 |
# use a cookie enabled opener to use session cookie if any |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
352 |
_OPENER = urllib2.build_opener() |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
353 |
try: |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
354 |
from logilab.common import urllib2ext |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
355 |
_OPENER.add_handler(urllib2ext.HTTPGssapiAuthHandler()) |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
356 |
except ImportError: # python-kerberos not available |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
357 |
pass |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
358 |
_OPENER.add_handler(urllib2.HTTPCookieProcessor(CookieJar())) |