author | Philippe Pepiot <philippe.pepiot@logilab.fr> |
Fri, 10 May 2019 16:04:37 +0200 | |
changeset 12593 | c30f8dc81f22 |
parent 12567 | 26744ad37953 |
child 12828 | dadbd4148a44 |
permissions | -rw-r--r-- |
11138
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
1 |
# copyright 2010-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
2 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
3 |
# |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
4 |
# This file is part of CubicWeb. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
5 |
# |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
6 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
7 |
# terms of the GNU Lesser General Public License as published by the Free |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
8 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
9 |
# any later version. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
10 |
# |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
11 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
12 |
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
13 |
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
14 |
# details. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
15 |
# |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
16 |
# You should have received a copy of the GNU Lesser General Public License along |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
17 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
18 |
"""datafeed sources: copy data from an external data stream into the system |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
19 |
database |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
20 |
""" |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
21 |
|
12346
b3f45d96a179
[py37] `async` is now a reserved keyword
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
12149
diff
changeset
|
22 |
from warnings import warn |
10757 | 23 |
from io import BytesIO |
8187
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
24 |
from os.path import exists |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
25 |
from datetime import datetime, timedelta |
11138
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
26 |
from functools import partial |
12567
26744ad37953
Drop python2 support
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
12508
diff
changeset
|
27 |
from http.cookiejar import CookieJar |
26744ad37953
Drop python2 support
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
12508
diff
changeset
|
28 |
from urllib.parse import urlparse |
26744ad37953
Drop python2 support
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
12508
diff
changeset
|
29 |
from urllib.request import Request, build_opener, HTTPCookieProcessor |
26744ad37953
Drop python2 support
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
12508
diff
changeset
|
30 |
from urllib.error import HTTPError |
10603
65ad6980976e
[py3k] import URL mangling functions using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10581
diff
changeset
|
31 |
|
11042
079b32f4cd0d
[datafeed] use tz-aware datetime objects
Julien Cristau <julien.cristau@logilab.fr>
parents:
10914
diff
changeset
|
32 |
from pytz import utc |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
33 |
from lxml import etree |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
34 |
|
12149
649100470733
[sources] Stop translating validation error
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
12148
diff
changeset
|
35 |
from cubicweb import ObjectNotFound, ValidationError, SourceException, _ |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
36 |
from cubicweb.server.sources import AbstractSource |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
37 |
from cubicweb.appobject import AppObject |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
38 |
|
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
39 |
|
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
40 |
class DataFeedSource(AbstractSource): |
7552
82dde8276a5b
[datafeed, entities] url for entities from a datafeed source should be on their origin site. Closes #1769391
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7548
diff
changeset
|
41 |
use_cwuri_as_url = True |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
42 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
43 |
options = ( |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
44 |
('synchronize', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
45 |
{'type' : 'yn', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
46 |
'default': True, |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
47 |
'help': ('Is the repository responsible to automatically import ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
48 |
'content from this source? ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
49 |
'You should say yes unless you don\'t want this behaviour ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
50 |
'or if you use a multiple repositories setup, in which ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
51 |
'case you should say yes on one repository, no on others.'), |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
52 |
'group': 'datafeed-source', 'level': 2, |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
53 |
}), |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
54 |
('synchronization-interval', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
55 |
{'type' : 'time', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
56 |
'default': '5min', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
57 |
'help': ('Interval in seconds between synchronization with the ' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
58 |
'external source (default to 5 minutes, must be >= 1 min).'), |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
59 |
'group': 'datafeed-source', 'level': 2, |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
60 |
}), |
7921
a93e2ed5877a
[datafeed] add max-lifetime for concurrent synchronization lock (closes #1908676)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7731
diff
changeset
|
61 |
('max-lock-lifetime', |
a93e2ed5877a
[datafeed] add max-lifetime for concurrent synchronization lock (closes #1908676)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7731
diff
changeset
|
62 |
{'type' : 'time', |
a93e2ed5877a
[datafeed] add max-lifetime for concurrent synchronization lock (closes #1908676)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7731
diff
changeset
|
63 |
'default': '1h', |
a93e2ed5877a
[datafeed] add max-lifetime for concurrent synchronization lock (closes #1908676)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7731
diff
changeset
|
64 |
'help': ('Maximum time allowed for a synchronization to be run. ' |
a93e2ed5877a
[datafeed] add max-lifetime for concurrent synchronization lock (closes #1908676)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7731
diff
changeset
|
65 |
'Exceeded that time, the synchronization will be considered ' |
a93e2ed5877a
[datafeed] add max-lifetime for concurrent synchronization lock (closes #1908676)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7731
diff
changeset
|
66 |
'as having failed and not properly released the lock, hence ' |
a93e2ed5877a
[datafeed] add max-lifetime for concurrent synchronization lock (closes #1908676)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7731
diff
changeset
|
67 |
'it won\'t be considered'), |
a93e2ed5877a
[datafeed] add max-lifetime for concurrent synchronization lock (closes #1908676)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7731
diff
changeset
|
68 |
'group': 'datafeed-source', 'level': 2, |
a93e2ed5877a
[datafeed] add max-lifetime for concurrent synchronization lock (closes #1908676)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7731
diff
changeset
|
69 |
}), |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
70 |
('delete-entities', |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
71 |
{'type' : 'yn', |
8430
5bee87a14bb1
fix ldap removal handling in ldapfeed (closes #2376625 and #2385133)
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
8429
diff
changeset
|
72 |
'default': False, |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
73 |
'help': ('Should already imported entities not found anymore on the ' |
11758
3f81636a75db
[datafeed] Drop entity deletion handling in the default source / parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11757
diff
changeset
|
74 |
'external source be deleted? Handling of this parameter ' |
3f81636a75db
[datafeed] Drop entity deletion handling in the default source / parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11757
diff
changeset
|
75 |
"will depend on source's parser."), |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
76 |
'group': 'datafeed-source', 'level': 2, |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
77 |
}), |
7995
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
78 |
('logs-lifetime', |
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
79 |
{'type': 'time', |
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
80 |
'default': '10d', |
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
81 |
'help': ('Time before logs from datafeed imports are deleted.'), |
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
82 |
'group': 'datafeed-source', 'level': 2, |
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
83 |
}), |
9182
75493f6ca586
[datafeed] add a timeout config option (closes #2745677)
David Douard <david.douard@logilab.fr>
parents:
8695
diff
changeset
|
84 |
('http-timeout', |
75493f6ca586
[datafeed] add a timeout config option (closes #2745677)
David Douard <david.douard@logilab.fr>
parents:
8695
diff
changeset
|
85 |
{'type': 'time', |
75493f6ca586
[datafeed] add a timeout config option (closes #2745677)
David Douard <david.douard@logilab.fr>
parents:
8695
diff
changeset
|
86 |
'default': '1min', |
75493f6ca586
[datafeed] add a timeout config option (closes #2745677)
David Douard <david.douard@logilab.fr>
parents:
8695
diff
changeset
|
87 |
'help': ('Timeout of HTTP GET requests, when synchronizing a source.'), |
75493f6ca586
[datafeed] add a timeout config option (closes #2745677)
David Douard <david.douard@logilab.fr>
parents:
8695
diff
changeset
|
88 |
'group': 'datafeed-source', 'level': 2, |
75493f6ca586
[datafeed] add a timeout config option (closes #2745677)
David Douard <david.douard@logilab.fr>
parents:
8695
diff
changeset
|
89 |
}), |
9822
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
90 |
('use-cwuri-as-url', |
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
91 |
{'type': 'yn', |
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
92 |
'default': None, # explicitly unset |
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
93 |
'help': ('Use cwuri (i.e. external URL) for link to the entity ' |
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
94 |
'instead of its local URL.'), |
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
95 |
'group': 'datafeed-source', 'level': 1, |
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
96 |
}), |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
97 |
) |
8430
5bee87a14bb1
fix ldap removal handling in ldapfeed (closes #2376625 and #2385133)
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
8429
diff
changeset
|
98 |
|
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
99 |
def check_config(self, source_entity): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
100 |
"""check configuration of source entity""" |
8674
001c1592060a
[repo sources] move handling of source's url into abstract source as this becomes shared by most sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8573
diff
changeset
|
101 |
typed_config = super(DataFeedSource, self).check_config(source_entity) |
001c1592060a
[repo sources] move handling of source's url into abstract source as this becomes shared by most sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8573
diff
changeset
|
102 |
if typed_config['synchronization-interval'] < 60: |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
103 |
msg = _('synchronization-interval must be greater than 1 minute') |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
104 |
raise ValidationError(source_entity.eid, {'config': msg}) |
8674
001c1592060a
[repo sources] move handling of source's url into abstract source as this becomes shared by most sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8573
diff
changeset
|
105 |
return typed_config |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
106 |
|
12148
79160d54662e
[sources] Simplify source's init method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
12146
diff
changeset
|
107 |
def init(self, source_entity): |
79160d54662e
[sources] Simplify source's init method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
12146
diff
changeset
|
108 |
super(DataFeedSource, self).init(source_entity) |
12143
a446124bcf3c
[server] Drop update_config method of source
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
11775
diff
changeset
|
109 |
self.parser_id = source_entity.parser |
12144
f54286c1cef5
[server] Inline _entity_update method into init method of AbstractSource
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
12143
diff
changeset
|
110 |
self.latest_retrieval = source_entity.latest_retrieval |
12143
a446124bcf3c
[server] Drop update_config method of source
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
11775
diff
changeset
|
111 |
typed_config = self.config |
8674
001c1592060a
[repo sources] move handling of source's url into abstract source as this becomes shared by most sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8573
diff
changeset
|
112 |
self.synchro_interval = timedelta(seconds=typed_config['synchronization-interval']) |
001c1592060a
[repo sources] move handling of source's url into abstract source as this becomes shared by most sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8573
diff
changeset
|
113 |
self.max_lock_lifetime = timedelta(seconds=typed_config['max-lock-lifetime']) |
9182
75493f6ca586
[datafeed] add a timeout config option (closes #2745677)
David Douard <david.douard@logilab.fr>
parents:
8695
diff
changeset
|
114 |
self.http_timeout = typed_config['http-timeout'] |
9822
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
115 |
# if typed_config['use-cwuri-as-url'] is set, we have to update |
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
116 |
# use_cwuri_as_url attribute and public configuration dictionary |
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
117 |
# accordingly |
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
118 |
if typed_config['use-cwuri-as-url'] is not None: |
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
119 |
self.use_cwuri_as_url = typed_config['use-cwuri-as-url'] |
4a118bfd6ab4
[datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9746
diff
changeset
|
120 |
self.public_config['use-cwuri-as-url'] = self.use_cwuri_as_url |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
121 |
|
9879
21278eb03bbf
[datafeed sources] finish the session -> cnx switch
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
9860
diff
changeset
|
122 |
def _get_parser(self, cnx, **kwargs): |
10454
20f45a9b385c
[datafeed] give an error message if a source is missing a parser id
Julien Cristau <julien.cristau@logilab.fr>
parents:
10143
diff
changeset
|
123 |
if self.parser_id is None: |
20f45a9b385c
[datafeed] give an error message if a source is missing a parser id
Julien Cristau <julien.cristau@logilab.fr>
parents:
10143
diff
changeset
|
124 |
self.warning('No parser defined on source %r', self) |
20f45a9b385c
[datafeed] give an error message if a source is missing a parser id
Julien Cristau <julien.cristau@logilab.fr>
parents:
10143
diff
changeset
|
125 |
raise ObjectNotFound() |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
126 |
return self.repo.vreg['parsers'].select( |
9879
21278eb03bbf
[datafeed sources] finish the session -> cnx switch
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
9860
diff
changeset
|
127 |
self.parser_id, cnx, source=self, **kwargs) |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
128 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
129 |
def fresh(self): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
130 |
if self.latest_retrieval is None: |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
131 |
return False |
11042
079b32f4cd0d
[datafeed] use tz-aware datetime objects
Julien Cristau <julien.cristau@logilab.fr>
parents:
10914
diff
changeset
|
132 |
return datetime.now(tz=utc) < (self.latest_retrieval + self.synchro_interval) |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
133 |
|
9746
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
134 |
def update_latest_retrieval(self, cnx): |
11042
079b32f4cd0d
[datafeed] use tz-aware datetime objects
Julien Cristau <julien.cristau@logilab.fr>
parents:
10914
diff
changeset
|
135 |
self.latest_retrieval = datetime.now(tz=utc) |
9746
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
136 |
cnx.execute('SET X latest_retrieval %(date)s WHERE X eid %(x)s', |
9879
21278eb03bbf
[datafeed sources] finish the session -> cnx switch
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
9860
diff
changeset
|
137 |
{'x': self.eid, 'date': self.latest_retrieval}) |
9746
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
138 |
cnx.commit() |
7446
6fba86efdd09
[datafeed] extract some methods from pull_data to ease writing custom datafeed sources
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7444
diff
changeset
|
139 |
|
11345
27b98f3cceae
[datafeed] attempt to acquire synchronization lock even when force is given
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11255
diff
changeset
|
140 |
def acquire_synchronization_lock(self, cnx): |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
141 |
# XXX race condition until WHERE of SET queries is executed using |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
142 |
# 'SELECT FOR UPDATE' |
11042
079b32f4cd0d
[datafeed] use tz-aware datetime objects
Julien Cristau <julien.cristau@logilab.fr>
parents:
10914
diff
changeset
|
143 |
now = datetime.now(tz=utc) |
11345
27b98f3cceae
[datafeed] attempt to acquire synchronization lock even when force is given
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11255
diff
changeset
|
144 |
maxdt = now - self.max_lock_lifetime |
9746
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
145 |
if not cnx.execute( |
11125
e717da3dc164
c-c source-sync now actually force synchronization
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11057
diff
changeset
|
146 |
'SET X in_synchronization %(now)s WHERE X eid %(x)s, ' |
e717da3dc164
c-c source-sync now actually force synchronization
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11057
diff
changeset
|
147 |
'X in_synchronization NULL OR X in_synchronization < %(maxdt)s', |
e717da3dc164
c-c source-sync now actually force synchronization
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11057
diff
changeset
|
148 |
{'x': self.eid, 'now': now, 'maxdt': maxdt}): |
9746
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
149 |
cnx.commit() |
11345
27b98f3cceae
[datafeed] attempt to acquire synchronization lock even when force is given
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11255
diff
changeset
|
150 |
raise SourceException("a concurrent synchronization is already running") |
9746
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
151 |
cnx.commit() |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
152 |
|
9746
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
153 |
def release_synchronization_lock(self, cnx): |
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
154 |
cnx.execute('SET X in_synchronization NULL WHERE X eid %(x)s', |
9879
21278eb03bbf
[datafeed sources] finish the session -> cnx switch
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
9860
diff
changeset
|
155 |
{'x': self.eid}) |
9746
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
156 |
cnx.commit() |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
157 |
|
12346
b3f45d96a179
[py37] `async` is now a reserved keyword
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
12149
diff
changeset
|
158 |
def pull_data(self, cnx, force=False, raise_on_error=False, sync=True, **kwargs): |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
159 |
"""Launch synchronization of the source if needed. |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
160 |
|
12346
b3f45d96a179
[py37] `async` is now a reserved keyword
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
12149
diff
changeset
|
161 |
If `sync` is false, the method return immediatly a dictionnary containing the import log's |
b3f45d96a179
[py37] `async` is now a reserved keyword
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
12149
diff
changeset
|
162 |
eid, and the actual synchronization is done asynchronously. If `sync` is True, return some |
11138
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
163 |
imports statistics (e.g. number of created and updated entities). |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
164 |
|
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
165 |
This method is responsible to handle commit/rollback on the given connection. |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
166 |
""" |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
167 |
if not force and self.fresh(): |
6972
12aa5cd81ce5
[datafeed] return empty dict when source is fresh avoid crash in the looping task because None returned
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6957
diff
changeset
|
168 |
return {} |
11345
27b98f3cceae
[datafeed] attempt to acquire synchronization lock even when force is given
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11255
diff
changeset
|
169 |
try: |
27b98f3cceae
[datafeed] attempt to acquire synchronization lock even when force is given
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11255
diff
changeset
|
170 |
self.acquire_synchronization_lock(cnx) |
27b98f3cceae
[datafeed] attempt to acquire synchronization lock even when force is given
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11255
diff
changeset
|
171 |
except SourceException as exc: |
27b98f3cceae
[datafeed] attempt to acquire synchronization lock even when force is given
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11255
diff
changeset
|
172 |
if force: |
27b98f3cceae
[datafeed] attempt to acquire synchronization lock even when force is given
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11255
diff
changeset
|
173 |
raise |
27b98f3cceae
[datafeed] attempt to acquire synchronization lock even when force is given
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11255
diff
changeset
|
174 |
self.error(str(exc)) |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
175 |
return {} |
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
176 |
try: |
12346
b3f45d96a179
[py37] `async` is now a reserved keyword
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
12149
diff
changeset
|
177 |
if kwargs.get('async') is not None: |
b3f45d96a179
[py37] `async` is now a reserved keyword
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
12149
diff
changeset
|
178 |
warn('[3.27] `async` is reserved keyword in py3.7 use `sync` param instead', |
b3f45d96a179
[py37] `async` is now a reserved keyword
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
12149
diff
changeset
|
179 |
DeprecationWarning) |
b3f45d96a179
[py37] `async` is now a reserved keyword
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
12149
diff
changeset
|
180 |
sync = not kwargs['async'] |
b3f45d96a179
[py37] `async` is now a reserved keyword
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
12149
diff
changeset
|
181 |
if sync: |
b3f45d96a179
[py37] `async` is now a reserved keyword
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
12149
diff
changeset
|
182 |
return self._pull_data(cnx, force, raise_on_error) |
b3f45d96a179
[py37] `async` is now a reserved keyword
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
12149
diff
changeset
|
183 |
else: |
11138
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
184 |
return self._async_pull_data(cnx, force, raise_on_error) |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
185 |
finally: |
11138
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
186 |
cnx.rollback() # rollback first in case there is some dirty transaction remaining |
9746
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
187 |
self.release_synchronization_lock(cnx) |
7456
c54038622fc9
[datafeed] use a boolean flag on CWSource to ensure we don't have concurrent synchronizations. Closes #1725690
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7447
diff
changeset
|
188 |
|
11138
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
189 |
def _async_pull_data(self, cnx, force, raise_on_error): |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
190 |
import_log = cnx.create_entity('CWDataImport', cw_import_of=self) |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
191 |
cnx.commit() # commit the import log creation before starting the synchronize task |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
192 |
|
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
193 |
def _synchronize_source(repo, source_eid, import_log_eid): |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
194 |
with repo.internal_cnx() as cnx: |
12146
d540defa0591
[server] Add source_by_eid and source_by_uri methods to repository
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
12144
diff
changeset
|
195 |
source = repo.source_by_eid(source_eid) |
11138
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
196 |
source._pull_data(cnx, force, raise_on_error, import_log_eid=import_log_eid) |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
197 |
|
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
198 |
sync = partial(_synchronize_source, cnx.repo, self.eid, import_log.eid) |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
199 |
cnx.repo.threaded_task(sync) |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
200 |
return {'import_log_eid': import_log.eid} |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
201 |
|
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
202 |
def _pull_data(self, cnx, force=False, raise_on_error=False, import_log_eid=None): |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
203 |
importlog = self.init_import_log(cnx, import_log_eid) |
10454
20f45a9b385c
[datafeed] give an error message if a source is missing a parser id
Julien Cristau <julien.cristau@logilab.fr>
parents:
10143
diff
changeset
|
204 |
try: |
11758
3f81636a75db
[datafeed] Drop entity deletion handling in the default source / parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11757
diff
changeset
|
205 |
parser = self._get_parser(cnx, import_log=importlog) |
10454
20f45a9b385c
[datafeed] give an error message if a source is missing a parser id
Julien Cristau <julien.cristau@logilab.fr>
parents:
10143
diff
changeset
|
206 |
except ObjectNotFound: |
11740
dabbb2a4a493
[datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
11345
diff
changeset
|
207 |
msg = 'failed to load parser for %s' |
dabbb2a4a493
[datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
11345
diff
changeset
|
208 |
importlog.record_error(msg % ('source "%s"' % self.uri)) |
dabbb2a4a493
[datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
11345
diff
changeset
|
209 |
self.error(msg, self) |
dabbb2a4a493
[datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
11345
diff
changeset
|
210 |
stats = {} |
8430
5bee87a14bb1
fix ldap removal handling in ldapfeed (closes #2376625 and #2385133)
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
8429
diff
changeset
|
211 |
else: |
11740
dabbb2a4a493
[datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
11345
diff
changeset
|
212 |
if parser.process_urls(self.urls, raise_on_error): |
dabbb2a4a493
[datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
11345
diff
changeset
|
213 |
self.warning("some error occurred, don't attempt to delete entities") |
dabbb2a4a493
[datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
11345
diff
changeset
|
214 |
stats = parser.stats |
9746
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
215 |
self.update_latest_retrieval(cnx) |
7995
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
216 |
if stats.get('created'): |
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
217 |
importlog.record_info('added %s entities' % len(stats['created'])) |
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
218 |
if stats.get('updated'): |
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
219 |
importlog.record_info('updated %s entities' % len(stats['updated'])) |
9746
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
220 |
importlog.write_log(cnx, end_timestamp=self.latest_retrieval) |
81b56897a377
[datafeed] update datafeed internals to use connection instead of session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9665
diff
changeset
|
221 |
cnx.commit() |
7995
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
222 |
return stats |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
223 |
|
11138
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
224 |
def init_import_log(self, cnx, import_log_eid=None, **kwargs): |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
225 |
if import_log_eid is None: |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
226 |
import_log = cnx.create_entity('CWDataImport', cw_import_of=self, |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
227 |
start_timestamp=datetime.now(tz=utc), |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
228 |
**kwargs) |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
229 |
else: |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
230 |
import_log = cnx.entity_from_eid(import_log_eid) |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
231 |
import_log.cw_set(start_timestamp=datetime.now(tz=utc), **kwargs) |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
232 |
cnx.commit() # make changes visible |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
233 |
import_log.init() |
78c8e64f3cef
[sources] synchronize source asynchronously when started from the UI
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11129
diff
changeset
|
234 |
return import_log |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
235 |
|
8187
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
236 |
|
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
237 |
class DataFeedParser(AppObject): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
238 |
__registry__ = 'parsers' |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
239 |
|
11758
3f81636a75db
[datafeed] Drop entity deletion handling in the default source / parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11757
diff
changeset
|
240 |
def __init__(self, cnx, source, import_log=None): |
11255
58be5fe4a232
[datafeed] don't allow arbitrary kwargs on DatafeedParser initializer
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11254
diff
changeset
|
241 |
super(DataFeedParser, self).__init__(cnx) |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
242 |
self.source = source |
7995
9a9f35ef418c
Record a log of datafeed source imports (closes #2026097)
Julien Cristau <julien.cristau@logilab.fr>
parents:
7950
diff
changeset
|
243 |
self.import_log = import_log |
8435
5064b6e0d6f4
[datafeed] correctly distinguish checked/updated
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
8434
diff
changeset
|
244 |
self.stats = {'created': set(), 'updated': set(), 'checked': set()} |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
245 |
|
8187
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
246 |
def normalize_url(self, url): |
9823
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
247 |
"""Normalize an url by looking if there is a replacement for it in |
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
248 |
`cubicweb.sobjects.URL_MAPPING`. |
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
249 |
|
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
250 |
This dictionary allow to redirect from one host to another, which may be |
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
251 |
useful for example in case of test instance using production data, while |
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
252 |
you don't want to load the external source nor to hack your `/etc/hosts` |
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
253 |
file. |
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
254 |
""" |
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
255 |
# local import mandatory, it's available after registration |
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
256 |
from cubicweb.sobjects import URL_MAPPING |
8187
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
257 |
for mappedurl in URL_MAPPING: |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
258 |
if url.startswith(mappedurl): |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
259 |
return url.replace(mappedurl, URL_MAPPING[mappedurl], 1) |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
260 |
return url |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
261 |
|
10516
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
262 |
def retrieve_url(self, url): |
9823
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
263 |
"""Return stream linked by the given url: |
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
264 |
* HTTP urls will be normalized (see :meth:`normalize_url`) |
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
265 |
* handle file:// URL |
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
266 |
* other will be considered as plain content, useful for testing purpose |
10516
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
267 |
|
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
268 |
For http URLs, it will try to find a cwclientlib config entry |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
269 |
(if available) and use it as requester. |
9823
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
270 |
""" |
10603
65ad6980976e
[py3k] import URL mangling functions using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10581
diff
changeset
|
271 |
purl = urlparse(url) |
10516
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
272 |
if purl.scheme == 'file': |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
273 |
return URLLibResponseAdapter(open(url[7:]), url) |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
274 |
|
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
275 |
url = self.normalize_url(url) |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
276 |
|
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
277 |
# first, try to use cwclientlib if it's available and if the |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
278 |
# url matches a configuration entry in ~/.config/cwclientlibrc |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
279 |
try: |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
280 |
from cwclientlib import cwproxy_for |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
281 |
# parse url again since it has been normalized |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
282 |
cnx = cwproxy_for(url) |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
283 |
cnx.timeout = self.source.http_timeout |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
284 |
self.source.info('Using cwclientlib for %s' % url) |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
285 |
resp = cnx.get(url) |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
286 |
resp.raise_for_status() |
11055
3c1139344621
[datafeed] io.BytesIO requires a buffer, not a unicode (closes #9783743)
David Douard <david.douard@logilab.fr>
parents:
11042
diff
changeset
|
287 |
return URLLibResponseAdapter(BytesIO(resp.content), url) |
10532
2cc74c688eb9
[datafeed] also catch EnvironmentError when trying to load the cwclientlib config file
David Douard <david.douard@logilab.fr>
parents:
10522
diff
changeset
|
288 |
except (ImportError, ValueError, EnvironmentError) as exc: |
10516
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
289 |
# ImportError: not available |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
290 |
# ValueError: no config entry found |
10532
2cc74c688eb9
[datafeed] also catch EnvironmentError when trying to load the cwclientlib config file
David Douard <david.douard@logilab.fr>
parents:
10522
diff
changeset
|
291 |
# EnvironmentError: no cwclientlib config file found |
10516
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
292 |
self.source.debug(str(exc)) |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
293 |
|
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
294 |
# no chance with cwclientlib, fall back to former implementation |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
295 |
if purl.scheme in ('http', 'https'): |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
296 |
self.source.info('GET %s', url) |
10610
d53b9c157f99
[py3k] import urllib2 from six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10603
diff
changeset
|
297 |
req = Request(url) |
9825
946b483bc8a1
[datafeed parser] enhance retrieve_url to support POSTing data and custom HTTP headers
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9824
diff
changeset
|
298 |
return _OPENER.open(req, timeout=self.source.http_timeout) |
10516
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
299 |
|
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
300 |
# url is probably plain content |
10757 | 301 |
return URLLibResponseAdapter(BytesIO(url.encode('ascii')), url) |
9823
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
302 |
|
11251
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
303 |
def process_urls(self, urls, raise_on_error=False): |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
304 |
error = False |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
305 |
for url in urls: |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
306 |
self.info('pulling data from %s', url) |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
307 |
try: |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
308 |
if self.process(url, raise_on_error): |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
309 |
error = True |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
310 |
except IOError as exc: |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
311 |
if raise_on_error: |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
312 |
raise |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
313 |
self.import_log.record_error( |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
314 |
'could not pull data while processing %s: %s' |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
315 |
% (url, exc)) |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
316 |
error = True |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
317 |
except Exception as exc: |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
318 |
if raise_on_error: |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
319 |
raise |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
320 |
self.import_log.record_error(str(exc)) |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
321 |
self.exception('error while processing %s: %s', |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
322 |
url, exc) |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
323 |
error = True |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
324 |
return error |
b66a8c3eebeb
[datafeed] move process_urls to the parser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11151
diff
changeset
|
325 |
|
8409
79534887943e
[datafeed] fix/finish cleanup started by auc in 8393:c25b96ae4f8a: parser.process prototytpe is (url, raise_on_error=False). Drop partialcommit argument which were never specified
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8408
diff
changeset
|
326 |
def process(self, url, raise_on_error=False): |
6957
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
327 |
"""main callback: process the url""" |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
328 |
raise NotImplementedError |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
329 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
330 |
def created_during_pull(self, entity): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
331 |
return entity.eid in self.stats['created'] |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
332 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
333 |
def updated_during_pull(self, entity): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
334 |
return entity.eid in self.stats['updated'] |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
335 |
|
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
336 |
def notify_updated(self, entity): |
ffda12be2e9f
[repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
337 |
return self.stats['updated'].add(entity.eid) |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
338 |
|
8435
5064b6e0d6f4
[datafeed] correctly distinguish checked/updated
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
8434
diff
changeset
|
339 |
def notify_checked(self, entity): |
5064b6e0d6f4
[datafeed] correctly distinguish checked/updated
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
8434
diff
changeset
|
340 |
return self.stats['checked'].add(entity.eid) |
5064b6e0d6f4
[datafeed] correctly distinguish checked/updated
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
8434
diff
changeset
|
341 |
|
8187
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
342 |
def is_deleted(self, extid, etype, eid): |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
343 |
"""return True if the entity of given external id, entity type and eid |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
344 |
is actually deleted. Always return True by default, put more sensible |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
345 |
stuff in sub-classes. |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
346 |
""" |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
347 |
return True |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
348 |
|
8188
1867e252e487
[repository] ldap-feed source. Closes #2086984
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8187
diff
changeset
|
349 |
def update_if_necessary(self, entity, attrs): |
1867e252e487
[repository] ldap-feed source. Closes #2086984
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8187
diff
changeset
|
350 |
entity.complete(tuple(attrs)) |
1867e252e487
[repository] ldap-feed source. Closes #2086984
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8187
diff
changeset
|
351 |
# check modification date and compare attribute values to only update |
1867e252e487
[repository] ldap-feed source. Closes #2086984
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8187
diff
changeset
|
352 |
# what's actually needed |
8435
5064b6e0d6f4
[datafeed] correctly distinguish checked/updated
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
8434
diff
changeset
|
353 |
self.notify_checked(entity) |
8188
1867e252e487
[repository] ldap-feed source. Closes #2086984
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8187
diff
changeset
|
354 |
mdate = attrs.get('modification_date') |
1867e252e487
[repository] ldap-feed source. Closes #2086984
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8187
diff
changeset
|
355 |
if not mdate or mdate > entity.modification_date: |
10662
10942ed172de
[py3k] dict.iteritems → dict.items
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10611
diff
changeset
|
356 |
attrs = dict( (k, v) for k, v in attrs.items() |
8188
1867e252e487
[repository] ldap-feed source. Closes #2086984
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8187
diff
changeset
|
357 |
if v != getattr(entity, k)) |
1867e252e487
[repository] ldap-feed source. Closes #2086984
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8187
diff
changeset
|
358 |
if attrs: |
8483
4ba11607d84a
[entity api] unify set_attributes / set_relations into a cw_set method. Closes #2423719
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8435
diff
changeset
|
359 |
entity.cw_set(**attrs) |
8434
39c5bb4dcc59
[ldapfeed] do not crash on ldap user deletion + pull + already deactivated users, cleanups (closes #2392933)
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
8430
diff
changeset
|
360 |
self.notify_updated(entity) |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
361 |
|
8547
f23ac525ddd1
[datafeed] properly call hooks for inlined relations on entity creation. Closes #2481156
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8529
diff
changeset
|
362 |
|
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
363 |
class DataFeedXMLParser(DataFeedParser): |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
364 |
|
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
365 |
def parse(self, url): |
9823
258d2f9f7d39
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9822
diff
changeset
|
366 |
stream = self.retrieve_url(url) |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
367 |
return self.parse_etree(etree.parse(stream).getroot()) |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
368 |
|
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
369 |
def parse_etree(self, document): |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
370 |
return [(document,)] |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
371 |
|
10089
6346f53c85f1
[datafeed] Add a raise_on_error parameter to DataFeedSource.extid2entity
Denis Laxalde <denis.laxalde@logilab.fr>
parents:
9990
diff
changeset
|
372 |
def process_item(self, *args, **kwargs): |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
373 |
raise NotImplementedError |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
374 |
|
8187
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
375 |
def is_deleted(self, extid, etype, eid): |
10551
1182f5f16a3d
[datafeed] fix typo in DataFeedXMLParser.is_deleted (closes #5729755)
David Douard <david.douard@logilab.fr>
parents:
10532
diff
changeset
|
376 |
if extid.startswith('file://'): |
1182f5f16a3d
[datafeed] fix typo in DataFeedXMLParser.is_deleted (closes #5729755)
David Douard <david.douard@logilab.fr>
parents:
10532
diff
changeset
|
377 |
return exists(extid[7:]) |
10516
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
378 |
|
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
379 |
url = self.normalize_url(extid) |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
380 |
# first, try to use cwclientlib if it's available and if the |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
381 |
# url matches a configuration entry in ~/.config/cwclientlibrc |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
382 |
try: |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
383 |
from cwclientlib import cwproxy_for |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
384 |
# parse url again since it has been normalized |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
385 |
cnx = cwproxy_for(url) |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
386 |
cnx.timeout = self.source.http_timeout |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
387 |
self.source.info('Using cwclientlib for checking %s' % url) |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
388 |
return cnx.get(url).status_code == 404 |
10532
2cc74c688eb9
[datafeed] also catch EnvironmentError when trying to load the cwclientlib config file
David Douard <david.douard@logilab.fr>
parents:
10522
diff
changeset
|
389 |
except (ImportError, ValueError, EnvironmentError) as exc: |
10516
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
390 |
# ImportError: not available |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
391 |
# ValueError: no config entry found |
10532
2cc74c688eb9
[datafeed] also catch EnvironmentError when trying to load the cwclientlib config file
David Douard <david.douard@logilab.fr>
parents:
10522
diff
changeset
|
392 |
# EnvironmentError: no cwclientlib config file found |
10516
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
393 |
self.source.debug(str(exc)) |
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
394 |
|
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
395 |
# no chance with cwclientlib, fall back to former implementation |
10603
65ad6980976e
[py3k] import URL mangling functions using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10581
diff
changeset
|
396 |
if urlparse(url).scheme in ('http', 'https'): |
8187
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
397 |
try: |
10516
4c59409220b6
[datafeed] allow to use cwclientlib for datafeed's queries (closes #5456849)
David Douard <david.douard@logilab.fr>
parents:
10143
diff
changeset
|
398 |
_OPENER.open(url, timeout=self.source.http_timeout) |
10610
d53b9c157f99
[py3k] import urllib2 from six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10603
diff
changeset
|
399 |
except HTTPError as ex: |
8187
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
400 |
if ex.code == 404: |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
401 |
return True |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
402 |
return False |
981f6e487788
[datafeed] set delete-entities=yes is now safer, by checking each entity actually seems deleted. Closes #2165381
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8069
diff
changeset
|
403 |
|
9824
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
404 |
|
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
405 |
class URLLibResponseAdapter(object): |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
406 |
"""Thin wrapper to be used to fake a value returned by urllib2.urlopen""" |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
407 |
def __init__(self, stream, url, code=200): |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
408 |
self._stream = stream |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
409 |
self._url = url |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
410 |
self.code = code |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
411 |
|
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
412 |
def read(self, *args): |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
413 |
return self._stream.read(*args) |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
414 |
|
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
415 |
def geturl(self): |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
416 |
return self._url |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
417 |
|
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
418 |
def getcode(self): |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
419 |
return self.code |
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
420 |
|
30183ecf5c61
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
9823
diff
changeset
|
421 |
|
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
422 |
# use a cookie enabled opener to use session cookie if any |
10610
d53b9c157f99
[py3k] import urllib2 from six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10603
diff
changeset
|
423 |
_OPENER = build_opener() |
7378
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
424 |
try: |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
425 |
from logilab.common import urllib2ext |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
426 |
_OPENER.add_handler(urllib2ext.HTTPGssapiAuthHandler()) |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
427 |
except ImportError: # python-kerberos not available |
86a1ae289f05
[datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
7351
diff
changeset
|
428 |
pass |
10610
d53b9c157f99
[py3k] import urllib2 from six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10603
diff
changeset
|
429 |
_OPENER.add_handler(HTTPCookieProcessor(CookieJar())) |