1 # copyright 2010-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
|
2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
|
3 # |
|
4 # This file is part of CubicWeb. |
|
5 # |
|
6 # CubicWeb is free software: you can redistribute it and/or modify it under the |
|
7 # terms of the GNU Lesser General Public License as published by the Free |
|
8 # Software Foundation, either version 2.1 of the License, or (at your option) |
|
9 # any later version. |
|
10 # |
|
11 # CubicWeb is distributed in the hope that it will be useful, but WITHOUT |
|
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
|
14 # details. |
|
15 # |
|
16 # You should have received a copy of the GNU Lesser General Public License along |
|
17 # with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
|
18 """datafeed parser for xml generated by cubicweb |
|
19 |
|
20 Example of mapping for CWEntityXMLParser:: |
|
21 |
|
22 {u'CWUser': { # EntityType |
|
23 (u'in_group', u'subject', u'link'): [ # (rtype, role, action) |
|
24 (u'CWGroup', {u'linkattr': u'name'})], # -> rules = [(EntityType, options), ...] |
|
25 (u'tags', u'object', u'link-or-create'): [ # (...) |
|
26 (u'Tag', {u'linkattr': u'name'})], # -> ... |
|
27 (u'use_email', u'subject', u'copy'): [ # (...) |
|
28 (u'EmailAddress', {})] # -> ... |
|
29 } |
|
30 } |
|
31 |
|
32 """ |
|
33 |
|
34 import os.path as osp |
|
35 from datetime import datetime, timedelta, time |
|
36 from urllib import urlencode |
|
37 from cgi import parse_qs # in urlparse with python >= 2.6 |
|
38 |
|
39 from logilab.common.date import todate, totime |
|
40 from logilab.common.textutils import splitstrip, text_to_dict |
|
41 from logilab.common.decorators import classproperty |
|
42 |
|
43 from yams.constraints import BASE_CONVERTERS |
|
44 from yams.schema import role_name as rn |
|
45 |
|
46 from cubicweb import ValidationError, RegistryException, typed_eid |
|
47 from cubicweb.view import Component |
|
48 from cubicweb.server.sources import datafeed |
|
49 from cubicweb.server.hook import match_rtype |
|
50 |
|
51 # XXX see cubicweb.cwvreg.YAMS_TO_PY |
|
52 # XXX see cubicweb.web.views.xmlrss.SERIALIZERS |
|
53 DEFAULT_CONVERTERS = BASE_CONVERTERS.copy() |
|
54 DEFAULT_CONVERTERS['String'] = unicode |
|
55 DEFAULT_CONVERTERS['Password'] = lambda x: x.encode('utf8') |
|
56 def convert_date(ustr): |
|
57 return todate(datetime.strptime(ustr, '%Y-%m-%d')) |
|
58 DEFAULT_CONVERTERS['Date'] = convert_date |
|
59 def convert_datetime(ustr): |
|
60 if '.' in ustr: # assume %Y-%m-%d %H:%M:%S.mmmmmm |
|
61 ustr = ustr.split('.',1)[0] |
|
62 return datetime.strptime(ustr, '%Y-%m-%d %H:%M:%S') |
|
63 DEFAULT_CONVERTERS['Datetime'] = convert_datetime |
|
64 # XXX handle timezone, though this will be enough as TZDatetime are |
|
65 # serialized without time zone by default (UTC time). See |
|
66 # cw.web.views.xmlrss.SERIALIZERS. |
|
67 DEFAULT_CONVERTERS['TZDatetime'] = convert_datetime |
|
68 def convert_time(ustr): |
|
69 return totime(datetime.strptime(ustr, '%H:%M:%S')) |
|
70 DEFAULT_CONVERTERS['Time'] = convert_time |
|
71 DEFAULT_CONVERTERS['TZTime'] = convert_time |
|
72 def convert_interval(ustr): |
|
73 return time(seconds=int(ustr)) |
|
74 DEFAULT_CONVERTERS['Interval'] = convert_interval |
|
75 |
|
76 def extract_typed_attrs(eschema, stringdict, converters=DEFAULT_CONVERTERS): |
|
77 typeddict = {} |
|
78 for rschema in eschema.subject_relations(): |
|
79 if rschema.final and rschema in stringdict: |
|
80 if rschema in ('eid', 'cwuri', 'cwtype', 'cwsource'): |
|
81 continue |
|
82 attrtype = eschema.destination(rschema) |
|
83 value = stringdict[rschema] |
|
84 if value is not None: |
|
85 value = converters[attrtype](value) |
|
86 typeddict[rschema.type] = value |
|
87 return typeddict |
|
88 |
|
89 def rtype_role_rql(rtype, role): |
|
90 if role == 'object': |
|
91 return 'Y %s X WHERE X eid %%(x)s' % rtype |
|
92 else: |
|
93 return 'X %s Y WHERE X eid %%(x)s' % rtype |
|
94 |
|
95 |
|
96 class CWEntityXMLParser(datafeed.DataFeedXMLParser): |
|
97 """datafeed parser for the 'xml' entity view |
|
98 |
|
99 Most of the logic is delegated to the following components: |
|
100 |
|
101 * an "item builder" component, turning an etree xml node into a specific |
|
102 python dictionnary representing an entity |
|
103 |
|
104 * "action" components, selected given an entity, a relation and its role in |
|
105 the relation, and responsible to link the entity to given related items |
|
106 (eg dictionnary) |
|
107 |
|
108 So the parser is only doing the gluing service and the connection to the |
|
109 source. |
|
110 """ |
|
111 __regid__ = 'cw.entityxml' |
|
112 |
|
113 def __init__(self, *args, **kwargs): |
|
114 super(CWEntityXMLParser, self).__init__(*args, **kwargs) |
|
115 self._parsed_urls = {} |
|
116 self._processed_entities = set() |
|
117 |
|
118 def select_linker(self, action, rtype, role, entity=None): |
|
119 try: |
|
120 return self._cw.vreg['components'].select( |
|
121 'cw.entityxml.action.%s' % action, self._cw, entity=entity, |
|
122 rtype=rtype, role=role, parser=self) |
|
123 except RegistryException: |
|
124 raise RegistryException('Unknown action %s' % action) |
|
125 |
|
126 def list_actions(self): |
|
127 reg = self._cw.vreg['components'] |
|
128 return sorted(clss[0].action for rid, clss in reg.iteritems() |
|
129 if rid.startswith('cw.entityxml.action.')) |
|
130 |
|
131 # mapping handling ######################################################### |
|
132 |
|
133 def add_schema_config(self, schemacfg, checkonly=False): |
|
134 """added CWSourceSchemaConfig, modify mapping accordingly""" |
|
135 _ = self._cw._ |
|
136 try: |
|
137 rtype = schemacfg.schema.rtype.name |
|
138 except AttributeError: |
|
139 msg = _("entity and relation types can't be mapped, only attributes " |
|
140 "or relations") |
|
141 raise ValidationError(schemacfg.eid, {rn('cw_for_schema', 'subject'): msg}) |
|
142 if schemacfg.options: |
|
143 options = text_to_dict(schemacfg.options) |
|
144 else: |
|
145 options = {} |
|
146 try: |
|
147 role = options.pop('role') |
|
148 if role not in ('subject', 'object'): |
|
149 raise KeyError |
|
150 except KeyError: |
|
151 msg = _('"role=subject" or "role=object" must be specified in options') |
|
152 raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg}) |
|
153 try: |
|
154 action = options.pop('action') |
|
155 linker = self.select_linker(action, rtype, role) |
|
156 linker.check_options(options, schemacfg.eid) |
|
157 except KeyError: |
|
158 msg = _('"action" must be specified in options; allowed values are ' |
|
159 '%s') % ', '.join(self.list_actions()) |
|
160 raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg}) |
|
161 except RegistryException: |
|
162 msg = _('allowed values for "action" are %s') % ', '.join(self.list_actions()) |
|
163 raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg}) |
|
164 if not checkonly: |
|
165 if role == 'subject': |
|
166 etype = schemacfg.schema.stype.name |
|
167 ttype = schemacfg.schema.otype.name |
|
168 else: |
|
169 etype = schemacfg.schema.otype.name |
|
170 ttype = schemacfg.schema.stype.name |
|
171 etyperules = self.source.mapping.setdefault(etype, {}) |
|
172 etyperules.setdefault((rtype, role, action), []).append( |
|
173 (ttype, options) ) |
|
174 self.source.mapping_idx[schemacfg.eid] = ( |
|
175 etype, rtype, role, action, ttype) |
|
176 |
|
177 def del_schema_config(self, schemacfg, checkonly=False): |
|
178 """deleted CWSourceSchemaConfig, modify mapping accordingly""" |
|
179 etype, rtype, role, action, ttype = self.source.mapping_idx[schemacfg.eid] |
|
180 rules = self.source.mapping[etype][(rtype, role, action)] |
|
181 rules = [x for x in rules if not x[0] == ttype] |
|
182 if not rules: |
|
183 del self.source.mapping[etype][(rtype, role, action)] |
|
184 |
|
185 # import handling ########################################################## |
|
186 |
|
187 def process(self, url, raise_on_error=False, partialcommit=True): |
|
188 """IDataFeedParser main entry point""" |
|
189 if url.startswith('http'): # XXX similar loose test as in parse of sources.datafeed |
|
190 url = self.complete_url(url) |
|
191 super(CWEntityXMLParser, self).process(url, raise_on_error, partialcommit) |
|
192 |
|
193 def parse_etree(self, parent): |
|
194 for node in list(parent): |
|
195 builder = self._cw.vreg['components'].select( |
|
196 'cw.entityxml.item-builder', self._cw, node=node, |
|
197 parser=self) |
|
198 yield builder.build_item() |
|
199 |
|
200 def process_item(self, item, rels): |
|
201 """ |
|
202 item and rels are what's returned by the item builder `build_item` method: |
|
203 |
|
204 * `item` is an {attribute: value} dictionary |
|
205 * `rels` is for relations and structured as |
|
206 {role: {relation: [(related item, related rels)...]} |
|
207 """ |
|
208 entity = self.extid2entity(str(item['cwuri']), item['cwtype'], |
|
209 cwsource=item['cwsource'], item=item) |
|
210 if entity is None: |
|
211 return None |
|
212 if entity.eid in self._processed_entities: |
|
213 return entity |
|
214 self._processed_entities.add(entity.eid) |
|
215 if not (self.created_during_pull(entity) or self.updated_during_pull(entity)): |
|
216 self.notify_updated(entity) |
|
217 attrs = extract_typed_attrs(entity.e_schema, item) |
|
218 # check modification date and compare attribute values to only |
|
219 # update what's actually needed |
|
220 entity.complete(tuple(attrs)) |
|
221 mdate = attrs.get('modification_date') |
|
222 if not mdate or mdate > entity.modification_date: |
|
223 attrs = dict( (k, v) for k, v in attrs.iteritems() |
|
224 if v != getattr(entity, k)) |
|
225 if attrs: |
|
226 entity.set_attributes(**attrs) |
|
227 self.process_relations(entity, rels) |
|
228 return entity |
|
229 |
|
230 def process_relations(self, entity, rels): |
|
231 etype = entity.__regid__ |
|
232 for (rtype, role, action), rules in self.source.mapping.get(etype, {}).iteritems(): |
|
233 try: |
|
234 related_items = rels[role][rtype] |
|
235 except KeyError: |
|
236 self.import_log.record_error('relation %s-%s not found in xml export of %s' |
|
237 % (rtype, role, etype)) |
|
238 continue |
|
239 try: |
|
240 linker = self.select_linker(action, rtype, role, entity) |
|
241 except RegistryException: |
|
242 self.import_log.record_error('no linker for action %s' % action) |
|
243 else: |
|
244 linker.link_items(related_items, rules) |
|
245 |
|
246 def before_entity_copy(self, entity, sourceparams): |
|
247 """IDataFeedParser callback""" |
|
248 attrs = extract_typed_attrs(entity.e_schema, sourceparams['item']) |
|
249 entity.cw_edited.update(attrs) |
|
250 |
|
251 def complete_url(self, url, etype=None, known_relations=None): |
|
252 """append to the url's query string information about relation that should |
|
253 be included in the resulting xml, according to source mapping. |
|
254 |
|
255 If etype is not specified, try to guess it using the last path part of |
|
256 the url, i.e. the format used by default in cubicweb to map all entities |
|
257 of a given type as in 'http://mysite.org/EntityType'. |
|
258 |
|
259 If `known_relations` is given, it should be a dictionary of already |
|
260 known relations, so they don't get queried again. |
|
261 """ |
|
262 try: |
|
263 url, qs = url.split('?', 1) |
|
264 except ValueError: |
|
265 qs = '' |
|
266 params = parse_qs(qs) |
|
267 if not 'vid' in params: |
|
268 params['vid'] = ['xml'] |
|
269 if etype is None: |
|
270 try: |
|
271 etype = url.rsplit('/', 1)[1] |
|
272 except ValueError: |
|
273 return url + '?' + self._cw.build_url_params(**params) |
|
274 try: |
|
275 etype = self._cw.vreg.case_insensitive_etypes[etype.lower()] |
|
276 except KeyError: |
|
277 return url + '?' + self._cw.build_url_params(**params) |
|
278 relations = params.setdefault('relation', []) |
|
279 for rtype, role, _ in self.source.mapping.get(etype, ()): |
|
280 if known_relations and rtype in known_relations.get('role', ()): |
|
281 continue |
|
282 reldef = '%s-%s' % (rtype, role) |
|
283 if not reldef in relations: |
|
284 relations.append(reldef) |
|
285 return url + '?' + self._cw.build_url_params(**params) |
|
286 |
|
287 def complete_item(self, item, rels): |
|
288 try: |
|
289 return self._parsed_urls[item['cwuri']] |
|
290 except KeyError: |
|
291 itemurl = self.complete_url(item['cwuri'], item['cwtype'], rels) |
|
292 item_rels = list(self.parse(itemurl)) |
|
293 assert len(item_rels) == 1, 'url %s expected to bring back one '\ |
|
294 'and only one entity, got %s' % (itemurl, len(item_rels)) |
|
295 self._parsed_urls[item['cwuri']] = item_rels[0] |
|
296 if rels: |
|
297 # XXX (do it better) merge relations |
|
298 new_rels = item_rels[0][1] |
|
299 new_rels.get('subject', {}).update(rels.get('subject', {})) |
|
300 new_rels.get('object', {}).update(rels.get('object', {})) |
|
301 return item_rels[0] |
|
302 |
|
303 |
|
304 class CWEntityXMLItemBuilder(Component): |
|
305 __regid__ = 'cw.entityxml.item-builder' |
|
306 |
|
307 def __init__(self, _cw, parser, node, **kwargs): |
|
308 super(CWEntityXMLItemBuilder, self).__init__(_cw, **kwargs) |
|
309 self.parser = parser |
|
310 self.node = node |
|
311 |
|
312 def build_item(self): |
|
313 """parse a XML document node and return two dictionaries defining (part |
|
314 of) an entity: |
|
315 |
|
316 - {attribute: value} |
|
317 - {role: {relation: [(related item, related rels)...]} |
|
318 """ |
|
319 node = self.node |
|
320 item = dict(node.attrib.items()) |
|
321 item['cwtype'] = unicode(node.tag) |
|
322 item.setdefault('cwsource', None) |
|
323 try: |
|
324 item['eid'] = typed_eid(item['eid']) |
|
325 except KeyError: |
|
326 # cw < 3.11 compat mode XXX |
|
327 item['eid'] = typed_eid(node.find('eid').text) |
|
328 item['cwuri'] = node.find('cwuri').text |
|
329 rels = {} |
|
330 for child in node: |
|
331 role = child.get('role') |
|
332 if role: |
|
333 # relation |
|
334 related = rels.setdefault(role, {}).setdefault(child.tag, []) |
|
335 related += self.parser.parse_etree(child) |
|
336 elif child.text: |
|
337 # attribute |
|
338 item[child.tag] = unicode(child.text) |
|
339 else: |
|
340 # None attribute (empty tag) |
|
341 item[child.tag] = None |
|
342 return item, rels |
|
343 |
|
344 |
|
345 class CWEntityXMLActionCopy(Component): |
|
346 """implementation of cubicweb entity xml parser's'copy' action |
|
347 |
|
348 Takes no option. |
|
349 """ |
|
350 __regid__ = 'cw.entityxml.action.copy' |
|
351 |
|
352 def __init__(self, _cw, parser, rtype, role, entity=None, **kwargs): |
|
353 super(CWEntityXMLActionCopy, self).__init__(_cw, **kwargs) |
|
354 self.parser = parser |
|
355 self.rtype = rtype |
|
356 self.role = role |
|
357 self.entity = entity |
|
358 |
|
359 @classproperty |
|
360 def action(cls): |
|
361 return cls.__regid__.rsplit('.', 1)[-1] |
|
362 |
|
363 def check_options(self, options, eid): |
|
364 self._check_no_options(options, eid) |
|
365 |
|
366 def _check_no_options(self, options, eid, msg=None): |
|
367 if options: |
|
368 if msg is None: |
|
369 msg = self._cw._("'%s' action doesn't take any options") % self.action |
|
370 raise ValidationError(eid, {rn('options', 'subject'): msg}) |
|
371 |
|
372 def link_items(self, others, rules): |
|
373 assert not any(x[1] for x in rules), "'copy' action takes no option" |
|
374 ttypes = frozenset([x[0] for x in rules]) |
|
375 eids = [] # local eids |
|
376 for item, rels in others: |
|
377 if item['cwtype'] in ttypes: |
|
378 item, rels = self.parser.complete_item(item, rels) |
|
379 other_entity = self.parser.process_item(item, rels) |
|
380 if other_entity is not None: |
|
381 eids.append(other_entity.eid) |
|
382 if eids: |
|
383 self._set_relation(eids) |
|
384 else: |
|
385 self._clear_relation(ttypes) |
|
386 |
|
387 def _clear_relation(self, ttypes): |
|
388 if not self.parser.created_during_pull(self.entity): |
|
389 if len(ttypes) > 1: |
|
390 typerestr = ', Y is IN(%s)' % ','.join(ttypes) |
|
391 else: |
|
392 typerestr = ', Y is %s' % ','.join(ttypes) |
|
393 self._cw.execute('DELETE ' + rtype_role_rql(self.rtype, self.role) + typerestr, |
|
394 {'x': self.entity.eid}) |
|
395 |
|
396 def _set_relation(self, eids): |
|
397 assert eids |
|
398 rtype = self.rtype |
|
399 rqlbase = rtype_role_rql(rtype, self.role) |
|
400 eidstr = ','.join(str(eid) for eid in eids) |
|
401 self._cw.execute('DELETE %s, NOT Y eid IN (%s)' % (rqlbase, eidstr), |
|
402 {'x': self.entity.eid}) |
|
403 if self.role == 'object': |
|
404 rql = 'SET %s, Y eid IN (%s), NOT Y %s X' % (rqlbase, eidstr, rtype) |
|
405 else: |
|
406 rql = 'SET %s, Y eid IN (%s), NOT X %s Y' % (rqlbase, eidstr, rtype) |
|
407 self._cw.execute(rql, {'x': self.entity.eid}) |
|
408 |
|
409 |
|
410 class CWEntityXMLActionLink(CWEntityXMLActionCopy): |
|
411 """implementation of cubicweb entity xml parser's'link' action |
|
412 |
|
413 requires a 'linkattr' option to control search of the linked entity. |
|
414 """ |
|
415 __regid__ = 'cw.entityxml.action.link' |
|
416 |
|
417 def check_options(self, options, eid): |
|
418 if not 'linkattr' in options: |
|
419 msg = self._cw._("'%s' action requires 'linkattr' option") % self.action |
|
420 raise ValidationError(eid, {rn('options', 'subject'): msg}) |
|
421 |
|
422 create_when_not_found = False |
|
423 |
|
424 def link_items(self, others, rules): |
|
425 for ttype, options in rules: |
|
426 searchattrs = splitstrip(options.get('linkattr', '')) |
|
427 self._related_link(ttype, others, searchattrs) |
|
428 |
|
429 def _related_link(self, ttype, others, searchattrs): |
|
430 def issubset(x,y): |
|
431 return all(z in y for z in x) |
|
432 eids = [] # local eids |
|
433 log = self.parser.import_log |
|
434 for item, rels in others: |
|
435 if item['cwtype'] != ttype: |
|
436 continue |
|
437 if not issubset(searchattrs, item): |
|
438 item, rels = self.parser.complete_item(item, rels) |
|
439 if not issubset(searchattrs, item): |
|
440 log.record_error('missing attribute, got %s expected keys %s' |
|
441 % (item, searchattrs)) |
|
442 continue |
|
443 # XXX str() needed with python < 2.6 |
|
444 kwargs = dict((str(attr), item[attr]) for attr in searchattrs) |
|
445 targets = self._find_entities(item, kwargs) |
|
446 if len(targets) == 1: |
|
447 entity = targets[0] |
|
448 elif not targets and self.create_when_not_found: |
|
449 entity = self._cw.create_entity(item['cwtype'], **kwargs) |
|
450 else: |
|
451 if len(targets) > 1: |
|
452 log.record_error('ambiguous link: found %s entity %s with attributes %s' |
|
453 % (len(targets), item['cwtype'], kwargs)) |
|
454 else: |
|
455 log.record_error('can not find %s entity with attributes %s' |
|
456 % (item['cwtype'], kwargs)) |
|
457 continue |
|
458 eids.append(entity.eid) |
|
459 self.parser.process_relations(entity, rels) |
|
460 if eids: |
|
461 self._set_relation(eids) |
|
462 else: |
|
463 self._clear_relation((ttype,)) |
|
464 |
|
465 def _find_entities(self, item, kwargs): |
|
466 return tuple(self._cw.find_entities(item['cwtype'], **kwargs)) |
|
467 |
|
468 |
|
469 class CWEntityXMLActionLinkInState(CWEntityXMLActionLink): |
|
470 """custom implementation of cubicweb entity xml parser's'link' action for |
|
471 in_state relation |
|
472 """ |
|
473 __select__ = match_rtype('in_state') |
|
474 |
|
475 def check_options(self, options, eid): |
|
476 super(CWEntityXMLActionLinkInState, self).check_options(options, eid) |
|
477 if not 'name' in options['linkattr']: |
|
478 msg = self._cw._("'%s' action for in_state relation should at least have 'linkattr=name' option") % self.action |
|
479 raise ValidationError(eid, {rn('options', 'subject'): msg}) |
|
480 |
|
481 def _find_entities(self, item, kwargs): |
|
482 assert 'name' in item # XXX else, complete_item |
|
483 state_name = item['name'] |
|
484 wf = self.entity.cw_adapt_to('IWorkflowable').current_workflow |
|
485 state = wf.state_by_name(state_name) |
|
486 if state is None: |
|
487 return () |
|
488 return (state,) |
|
489 |
|
490 |
|
491 class CWEntityXMLActionLinkOrCreate(CWEntityXMLActionLink): |
|
492 """implementation of cubicweb entity xml parser's'link-or-create' action |
|
493 |
|
494 requires a 'linkattr' option to control search of the linked entity. |
|
495 """ |
|
496 __regid__ = 'cw.entityxml.action.link-or-create' |
|
497 create_when_not_found = True |
|
498 |
|
499 |
|
500 def registration_callback(vreg): |
|
501 vreg.register_all(globals().values(), __name__) |
|
502 global URL_MAPPING |
|
503 URL_MAPPING = {} |
|
504 if vreg.config.apphome: |
|
505 url_mapping_file = osp.join(vreg.config.apphome, 'urlmapping.py') |
|
506 if osp.exists(url_mapping_file): |
|
507 URL_MAPPING = eval(file(url_mapping_file).read()) |
|
508 vreg.info('using url mapping %s from %s', URL_MAPPING, url_mapping_file) |
|