29 } |
29 } |
30 } |
30 } |
31 |
31 |
32 """ |
32 """ |
33 |
33 |
34 import urllib2 |
|
35 import StringIO |
|
36 import os.path as osp |
34 import os.path as osp |
37 from cookielib import CookieJar |
|
38 from datetime import datetime, timedelta |
35 from datetime import datetime, timedelta |
39 |
|
40 from lxml import etree |
|
41 |
36 |
42 from logilab.common.date import todate, totime |
37 from logilab.common.date import todate, totime |
43 from logilab.common.textutils import splitstrip, text_to_dict |
38 from logilab.common.textutils import splitstrip, text_to_dict |
44 |
39 |
45 from yams.constraints import BASE_CONVERTERS |
40 from yams.constraints import BASE_CONVERTERS |
69 return totime(datetime.strptime(ustr, '%H:%M:%S')) |
64 return totime(datetime.strptime(ustr, '%H:%M:%S')) |
70 DEFAULT_CONVERTERS['Time'] = convert_time |
65 DEFAULT_CONVERTERS['Time'] = convert_time |
71 def convert_interval(ustr): |
66 def convert_interval(ustr): |
72 return time(seconds=int(ustr)) |
67 return time(seconds=int(ustr)) |
73 DEFAULT_CONVERTERS['Interval'] = convert_interval |
68 DEFAULT_CONVERTERS['Interval'] = convert_interval |
74 |
|
75 # use a cookie enabled opener to use session cookie if any |
|
76 _OPENER = urllib2.build_opener() |
|
77 try: |
|
78 from logilab.common import urllib2ext |
|
79 _OPENER.add_handler(urllib2ext.HTTPGssapiAuthHandler()) |
|
80 except ImportError: # python-kerberos not available |
|
81 pass |
|
82 _OPENER.add_handler(urllib2.HTTPCookieProcessor(CookieJar())) |
|
83 |
69 |
84 def extract_typed_attrs(eschema, stringdict, converters=DEFAULT_CONVERTERS): |
70 def extract_typed_attrs(eschema, stringdict, converters=DEFAULT_CONVERTERS): |
85 typeddict = {} |
71 typeddict = {} |
86 for rschema in eschema.subject_relations(): |
72 for rschema in eschema.subject_relations(): |
87 if rschema.final and rschema in stringdict: |
73 if rschema.final and rschema in stringdict: |
136 if not 'linkattr' in options: |
122 if not 'linkattr' in options: |
137 msg = _("'%s' action requires 'linkattr' option") % action |
123 msg = _("'%s' action requires 'linkattr' option") % action |
138 raise ValidationError(eid, {rn('options', 'subject'): msg}) |
124 raise ValidationError(eid, {rn('options', 'subject'): msg}) |
139 |
125 |
140 |
126 |
141 class CWEntityXMLParser(datafeed.DataFeedParser): |
127 class CWEntityXMLParser(datafeed.DataFeedXMLParser): |
142 """datafeed parser for the 'xml' entity view""" |
128 """datafeed parser for the 'xml' entity view""" |
143 __regid__ = 'cw.entityxml' |
129 __regid__ = 'cw.entityxml' |
144 |
130 |
145 action_options = { |
131 action_options = { |
146 'copy': _check_no_option, |
132 'copy': _check_no_option, |
147 'link-or-create': _check_linkattr_option, |
133 'link-or-create': _check_linkattr_option, |
148 'link': _check_linkattr_option, |
134 'link': _check_linkattr_option, |
149 } |
135 } |
|
136 parse_etree = staticmethod(_parse_entity_etree) |
|
137 |
150 |
138 |
151 def __init__(self, *args, **kwargs): |
139 def __init__(self, *args, **kwargs): |
152 super(CWEntityXMLParser, self).__init__(*args, **kwargs) |
140 super(CWEntityXMLParser, self).__init__(*args, **kwargs) |
153 self.action_methods = { |
141 self.action_methods = { |
154 'copy': self.related_copy, |
142 'copy': self.related_copy, |
206 if not rules: |
194 if not rules: |
207 del self.source.mapping[etype][(rtype, role, action)] |
195 del self.source.mapping[etype][(rtype, role, action)] |
208 |
196 |
209 # import handling ########################################################## |
197 # import handling ########################################################## |
210 |
198 |
211 def process(self, url, partialcommit=True): |
199 # XXX suppression support according to source configuration. If set, get all |
212 """IDataFeedParser main entry point""" |
200 # cwuri of entities from this source, and compare with newly imported ones |
213 # XXX suppression support according to source configuration. If set, get |
|
214 # all cwuri of entities from this source, and compare with newly |
|
215 # imported ones |
|
216 error = False |
|
217 for item, rels in self.parse(url): |
|
218 cwuri = item['cwuri'] |
|
219 try: |
|
220 self.process_item(item, rels) |
|
221 if partialcommit: |
|
222 # commit+set_pool instead of commit(reset_pool=False) to let |
|
223 # other a chance to get our pool |
|
224 self._cw.commit() |
|
225 self._cw.set_pool() |
|
226 except ValidationError, exc: |
|
227 if partialcommit: |
|
228 self.source.error('Skipping %s because of validation error %s' % (cwuri, exc)) |
|
229 self._cw.rollback() |
|
230 self._cw.set_pool() |
|
231 error = True |
|
232 else: |
|
233 raise |
|
234 return error |
|
235 |
|
236 def parse(self, url): |
|
237 if not url.startswith('http'): |
|
238 stream = StringIO.StringIO(url) |
|
239 else: |
|
240 for mappedurl in HOST_MAPPING: |
|
241 if url.startswith(mappedurl): |
|
242 url = url.replace(mappedurl, HOST_MAPPING[mappedurl], 1) |
|
243 break |
|
244 self.source.info('GET %s', url) |
|
245 stream = _OPENER.open(url) |
|
246 return _parse_entity_etree(etree.parse(stream).getroot()) |
|
247 |
201 |
248 def process_item(self, item, rels): |
202 def process_item(self, item, rels): |
249 entity = self.extid2entity(str(item.pop('cwuri')), item.pop('cwtype'), |
203 entity = self.extid2entity(str(item.pop('cwuri')), item.pop('cwtype'), |
250 item=item) |
204 item=item) |
251 if not (self.created_during_pull(entity) or self.updated_during_pull(entity)): |
205 if not (self.created_during_pull(entity) or self.updated_during_pull(entity)): |