--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sobjects/cwxmlparser.py Tue Jan 31 21:43:24 2012 +0100
@@ -0,0 +1,487 @@
+# copyright 2010-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
+#
+# This file is part of CubicWeb.
+#
+# CubicWeb is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
+"""datafeed parser for xml generated by cubicweb
+
+Example of mapping for CWEntityXMLParser::
+
+ {u'CWUser': { # EntityType
+ (u'in_group', u'subject', u'link'): [ # (rtype, role, action)
+ (u'CWGroup', {u'linkattr': u'name'})], # -> rules = [(EntityType, options), ...]
+ (u'tags', u'object', u'link-or-create'): [ # (...)
+ (u'Tag', {u'linkattr': u'name'})], # -> ...
+ (u'use_email', u'subject', u'copy'): [ # (...)
+ (u'EmailAddress', {})] # -> ...
+ }
+ }
+
+"""
+
+from datetime import datetime, timedelta, time
+from urllib import urlencode
+from cgi import parse_qs # in urlparse with python >= 2.6
+
+from logilab.common.date import todate, totime
+from logilab.common.textutils import splitstrip, text_to_dict
+from logilab.common.decorators import classproperty
+
+from yams.constraints import BASE_CONVERTERS
+from yams.schema import role_name as rn
+
+from cubicweb import ValidationError, RegistryException, typed_eid
+from cubicweb.view import Component
+from cubicweb.server.sources import datafeed
+from cubicweb.server.hook import match_rtype
+
+# XXX see cubicweb.cwvreg.YAMS_TO_PY
+# XXX see cubicweb.web.views.xmlrss.SERIALIZERS
+DEFAULT_CONVERTERS = BASE_CONVERTERS.copy()
+DEFAULT_CONVERTERS['String'] = unicode
+DEFAULT_CONVERTERS['Password'] = lambda x: x.encode('utf8')
+def convert_date(ustr):
+ return todate(datetime.strptime(ustr, '%Y-%m-%d'))
+DEFAULT_CONVERTERS['Date'] = convert_date
+def convert_datetime(ustr):
+ if '.' in ustr: # assume %Y-%m-%d %H:%M:%S.mmmmmm
+ ustr = ustr.split('.',1)[0]
+ return datetime.strptime(ustr, '%Y-%m-%d %H:%M:%S')
+DEFAULT_CONVERTERS['Datetime'] = convert_datetime
+# XXX handle timezone, though this will be enough as TZDatetime are
+# serialized without time zone by default (UTC time). See
+# cw.web.views.xmlrss.SERIALIZERS.
+DEFAULT_CONVERTERS['TZDatetime'] = convert_datetime
+def convert_time(ustr):
+ return totime(datetime.strptime(ustr, '%H:%M:%S'))
+DEFAULT_CONVERTERS['Time'] = convert_time
+DEFAULT_CONVERTERS['TZTime'] = convert_time
+def convert_interval(ustr):
+ return time(seconds=int(ustr))
+DEFAULT_CONVERTERS['Interval'] = convert_interval
+
+def extract_typed_attrs(eschema, stringdict, converters=DEFAULT_CONVERTERS):
+ typeddict = {}
+ for rschema in eschema.subject_relations():
+ if rschema.final and rschema in stringdict:
+ if rschema in ('eid', 'cwuri', 'cwtype', 'cwsource'):
+ continue
+ attrtype = eschema.destination(rschema)
+ value = stringdict[rschema]
+ if value is not None:
+ value = converters[attrtype](value)
+ typeddict[rschema.type] = value
+ return typeddict
+
+def rtype_role_rql(rtype, role):
+ if role == 'object':
+ return 'Y %s X WHERE X eid %%(x)s' % rtype
+ else:
+ return 'X %s Y WHERE X eid %%(x)s' % rtype
+
+
+class CWEntityXMLParser(datafeed.DataFeedXMLParser):
+ """datafeed parser for the 'xml' entity view
+
+ Most of the logic is delegated to the following components:
+
+ * an "item builder" component, turning an etree xml node into a specific
+ python dictionnary representing an entity
+
+ * "action" components, selected given an entity, a relation and its role in
+ the relation, and responsible to link the entity to given related items
+ (eg dictionnary)
+
+ So the parser is only doing the gluing service and the connection to the
+ source.
+ """
+ __regid__ = 'cw.entityxml'
+
+ def __init__(self, *args, **kwargs):
+ super(CWEntityXMLParser, self).__init__(*args, **kwargs)
+ self._parsed_urls = {}
+ self._processed_entities = set()
+
+ def select_linker(self, action, rtype, role, entity=None):
+ try:
+ return self._cw.vreg['components'].select(
+ 'cw.entityxml.action.%s' % action, self._cw, entity=entity,
+ rtype=rtype, role=role, parser=self)
+ except RegistryException:
+ raise RegistryException('Unknown action %s' % action)
+
+ def list_actions(self):
+ reg = self._cw.vreg['components']
+ return sorted(clss[0].action for rid, clss in reg.iteritems()
+ if rid.startswith('cw.entityxml.action.'))
+
+ # mapping handling #########################################################
+
+ def add_schema_config(self, schemacfg, checkonly=False):
+ """added CWSourceSchemaConfig, modify mapping accordingly"""
+ _ = self._cw._
+ try:
+ rtype = schemacfg.schema.rtype.name
+ except AttributeError:
+ msg = _("entity and relation types can't be mapped, only attributes "
+ "or relations")
+ raise ValidationError(schemacfg.eid, {rn('cw_for_schema', 'subject'): msg})
+ if schemacfg.options:
+ options = text_to_dict(schemacfg.options)
+ else:
+ options = {}
+ try:
+ role = options.pop('role')
+ if role not in ('subject', 'object'):
+ raise KeyError
+ except KeyError:
+ msg = _('"role=subject" or "role=object" must be specified in options')
+ raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg})
+ try:
+ action = options.pop('action')
+ linker = self.select_linker(action, rtype, role)
+ linker.check_options(options, schemacfg.eid)
+ except KeyError:
+ msg = _('"action" must be specified in options; allowed values are '
+ '%s') % ', '.join(self.list_actions())
+ raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg})
+ except RegistryException:
+ msg = _('allowed values for "action" are %s') % ', '.join(self.list_actions())
+ raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg})
+ if not checkonly:
+ if role == 'subject':
+ etype = schemacfg.schema.stype.name
+ ttype = schemacfg.schema.otype.name
+ else:
+ etype = schemacfg.schema.otype.name
+ ttype = schemacfg.schema.stype.name
+ etyperules = self.source.mapping.setdefault(etype, {})
+ etyperules.setdefault((rtype, role, action), []).append(
+ (ttype, options) )
+ self.source.mapping_idx[schemacfg.eid] = (
+ etype, rtype, role, action, ttype)
+
+ def del_schema_config(self, schemacfg, checkonly=False):
+ """deleted CWSourceSchemaConfig, modify mapping accordingly"""
+ etype, rtype, role, action, ttype = self.source.mapping_idx[schemacfg.eid]
+ rules = self.source.mapping[etype][(rtype, role, action)]
+ rules = [x for x in rules if not x[0] == ttype]
+ if not rules:
+ del self.source.mapping[etype][(rtype, role, action)]
+
+ # import handling ##########################################################
+
+ def process(self, url, raise_on_error=False, partialcommit=True):
+ """IDataFeedParser main entry point"""
+ if url.startswith('http'): # XXX similar loose test as in parse of sources.datafeed
+ url = self.complete_url(url)
+ super(CWEntityXMLParser, self).process(url, raise_on_error, partialcommit)
+
+ def parse_etree(self, parent):
+ for node in list(parent):
+ builder = self._cw.vreg['components'].select(
+ 'cw.entityxml.item-builder', self._cw, node=node,
+ parser=self)
+ yield builder.build_item()
+
+ def process_item(self, item, rels):
+ """
+ item and rels are what's returned by the item builder `build_item` method:
+
+ * `item` is an {attribute: value} dictionary
+ * `rels` is for relations and structured as
+ {role: {relation: [(related item, related rels)...]}
+ """
+ entity = self.extid2entity(str(item['cwuri']), item['cwtype'],
+ cwsource=item['cwsource'], item=item)
+ if entity is None:
+ return None
+ if entity.eid in self._processed_entities:
+ return entity
+ self._processed_entities.add(entity.eid)
+ if not (self.created_during_pull(entity) or self.updated_during_pull(entity)):
+ attrs = extract_typed_attrs(entity.e_schema, item)
+ self.update_if_necessary(entity, attrs)
+ self.process_relations(entity, rels)
+ return entity
+
+ def process_relations(self, entity, rels):
+ etype = entity.__regid__
+ for (rtype, role, action), rules in self.source.mapping.get(etype, {}).iteritems():
+ try:
+ related_items = rels[role][rtype]
+ except KeyError:
+ self.import_log.record_error('relation %s-%s not found in xml export of %s'
+ % (rtype, role, etype))
+ continue
+ try:
+ linker = self.select_linker(action, rtype, role, entity)
+ except RegistryException:
+ self.import_log.record_error('no linker for action %s' % action)
+ else:
+ linker.link_items(related_items, rules)
+
+ def before_entity_copy(self, entity, sourceparams):
+ """IDataFeedParser callback"""
+ attrs = extract_typed_attrs(entity.e_schema, sourceparams['item'])
+ entity.cw_edited.update(attrs)
+
+ def complete_url(self, url, etype=None, known_relations=None):
+ """append to the url's query string information about relation that should
+ be included in the resulting xml, according to source mapping.
+
+ If etype is not specified, try to guess it using the last path part of
+ the url, i.e. the format used by default in cubicweb to map all entities
+ of a given type as in 'http://mysite.org/EntityType'.
+
+ If `known_relations` is given, it should be a dictionary of already
+ known relations, so they don't get queried again.
+ """
+ try:
+ url, qs = url.split('?', 1)
+ except ValueError:
+ qs = ''
+ params = parse_qs(qs)
+ if not 'vid' in params:
+ params['vid'] = ['xml']
+ if etype is None:
+ try:
+ etype = url.rsplit('/', 1)[1]
+ except ValueError:
+ return url + '?' + self._cw.build_url_params(**params)
+ try:
+ etype = self._cw.vreg.case_insensitive_etypes[etype.lower()]
+ except KeyError:
+ return url + '?' + self._cw.build_url_params(**params)
+ relations = params.setdefault('relation', [])
+ for rtype, role, _ in self.source.mapping.get(etype, ()):
+ if known_relations and rtype in known_relations.get('role', ()):
+ continue
+ reldef = '%s-%s' % (rtype, role)
+ if not reldef in relations:
+ relations.append(reldef)
+ return url + '?' + self._cw.build_url_params(**params)
+
+ def complete_item(self, item, rels):
+ try:
+ return self._parsed_urls[item['cwuri']]
+ except KeyError:
+ itemurl = self.complete_url(item['cwuri'], item['cwtype'], rels)
+ item_rels = list(self.parse(itemurl))
+ assert len(item_rels) == 1, 'url %s expected to bring back one '\
+ 'and only one entity, got %s' % (itemurl, len(item_rels))
+ self._parsed_urls[item['cwuri']] = item_rels[0]
+ if rels:
+ # XXX (do it better) merge relations
+ new_rels = item_rels[0][1]
+ new_rels.get('subject', {}).update(rels.get('subject', {}))
+ new_rels.get('object', {}).update(rels.get('object', {}))
+ return item_rels[0]
+
+
+class CWEntityXMLItemBuilder(Component):
+ __regid__ = 'cw.entityxml.item-builder'
+
+ def __init__(self, _cw, parser, node, **kwargs):
+ super(CWEntityXMLItemBuilder, self).__init__(_cw, **kwargs)
+ self.parser = parser
+ self.node = node
+
+ def build_item(self):
+ """parse a XML document node and return two dictionaries defining (part
+ of) an entity:
+
+ - {attribute: value}
+ - {role: {relation: [(related item, related rels)...]}
+ """
+ node = self.node
+ item = dict(node.attrib.items())
+ item['cwtype'] = unicode(node.tag)
+ item.setdefault('cwsource', None)
+ try:
+ item['eid'] = typed_eid(item['eid'])
+ except KeyError:
+ # cw < 3.11 compat mode XXX
+ item['eid'] = typed_eid(node.find('eid').text)
+ item['cwuri'] = node.find('cwuri').text
+ rels = {}
+ for child in node:
+ role = child.get('role')
+ if role:
+ # relation
+ related = rels.setdefault(role, {}).setdefault(child.tag, [])
+ related += self.parser.parse_etree(child)
+ elif child.text:
+ # attribute
+ item[child.tag] = unicode(child.text)
+ else:
+ # None attribute (empty tag)
+ item[child.tag] = None
+ return item, rels
+
+
+class CWEntityXMLActionCopy(Component):
+ """implementation of cubicweb entity xml parser's'copy' action
+
+ Takes no option.
+ """
+ __regid__ = 'cw.entityxml.action.copy'
+
+ def __init__(self, _cw, parser, rtype, role, entity=None, **kwargs):
+ super(CWEntityXMLActionCopy, self).__init__(_cw, **kwargs)
+ self.parser = parser
+ self.rtype = rtype
+ self.role = role
+ self.entity = entity
+
+ @classproperty
+ def action(cls):
+ return cls.__regid__.rsplit('.', 1)[-1]
+
+ def check_options(self, options, eid):
+ self._check_no_options(options, eid)
+
+ def _check_no_options(self, options, eid, msg=None):
+ if options:
+ if msg is None:
+ msg = self._cw._("'%s' action doesn't take any options") % self.action
+ raise ValidationError(eid, {rn('options', 'subject'): msg})
+
+ def link_items(self, others, rules):
+ assert not any(x[1] for x in rules), "'copy' action takes no option"
+ ttypes = frozenset([x[0] for x in rules])
+ eids = [] # local eids
+ for item, rels in others:
+ if item['cwtype'] in ttypes:
+ item, rels = self.parser.complete_item(item, rels)
+ other_entity = self.parser.process_item(item, rels)
+ if other_entity is not None:
+ eids.append(other_entity.eid)
+ if eids:
+ self._set_relation(eids)
+ else:
+ self._clear_relation(ttypes)
+
+ def _clear_relation(self, ttypes):
+ if not self.parser.created_during_pull(self.entity):
+ if len(ttypes) > 1:
+ typerestr = ', Y is IN(%s)' % ','.join(ttypes)
+ else:
+ typerestr = ', Y is %s' % ','.join(ttypes)
+ self._cw.execute('DELETE ' + rtype_role_rql(self.rtype, self.role) + typerestr,
+ {'x': self.entity.eid})
+
+ def _set_relation(self, eids):
+ assert eids
+ rtype = self.rtype
+ rqlbase = rtype_role_rql(rtype, self.role)
+ eidstr = ','.join(str(eid) for eid in eids)
+ self._cw.execute('DELETE %s, NOT Y eid IN (%s)' % (rqlbase, eidstr),
+ {'x': self.entity.eid})
+ if self.role == 'object':
+ rql = 'SET %s, Y eid IN (%s), NOT Y %s X' % (rqlbase, eidstr, rtype)
+ else:
+ rql = 'SET %s, Y eid IN (%s), NOT X %s Y' % (rqlbase, eidstr, rtype)
+ self._cw.execute(rql, {'x': self.entity.eid})
+
+
+class CWEntityXMLActionLink(CWEntityXMLActionCopy):
+ """implementation of cubicweb entity xml parser's'link' action
+
+ requires a 'linkattr' option to control search of the linked entity.
+ """
+ __regid__ = 'cw.entityxml.action.link'
+
+ def check_options(self, options, eid):
+ if not 'linkattr' in options:
+ msg = self._cw._("'%s' action requires 'linkattr' option") % self.action
+ raise ValidationError(eid, {rn('options', 'subject'): msg})
+
+ create_when_not_found = False
+
+ def link_items(self, others, rules):
+ for ttype, options in rules:
+ searchattrs = splitstrip(options.get('linkattr', ''))
+ self._related_link(ttype, others, searchattrs)
+
+ def _related_link(self, ttype, others, searchattrs):
+ def issubset(x,y):
+ return all(z in y for z in x)
+ eids = [] # local eids
+ log = self.parser.import_log
+ for item, rels in others:
+ if item['cwtype'] != ttype:
+ continue
+ if not issubset(searchattrs, item):
+ item, rels = self.parser.complete_item(item, rels)
+ if not issubset(searchattrs, item):
+ log.record_error('missing attribute, got %s expected keys %s'
+ % (item, searchattrs))
+ continue
+ # XXX str() needed with python < 2.6
+ kwargs = dict((str(attr), item[attr]) for attr in searchattrs)
+ targets = self._find_entities(item, kwargs)
+ if len(targets) == 1:
+ entity = targets[0]
+ elif not targets and self.create_when_not_found:
+ entity = self._cw.create_entity(item['cwtype'], **kwargs)
+ else:
+ if len(targets) > 1:
+ log.record_error('ambiguous link: found %s entity %s with attributes %s'
+ % (len(targets), item['cwtype'], kwargs))
+ else:
+ log.record_error('can not find %s entity with attributes %s'
+ % (item['cwtype'], kwargs))
+ continue
+ eids.append(entity.eid)
+ self.parser.process_relations(entity, rels)
+ if eids:
+ self._set_relation(eids)
+ else:
+ self._clear_relation((ttype,))
+
+ def _find_entities(self, item, kwargs):
+ return tuple(self._cw.find_entities(item['cwtype'], **kwargs))
+
+
+class CWEntityXMLActionLinkInState(CWEntityXMLActionLink):
+ """custom implementation of cubicweb entity xml parser's'link' action for
+ in_state relation
+ """
+ __select__ = match_rtype('in_state')
+
+ def check_options(self, options, eid):
+ super(CWEntityXMLActionLinkInState, self).check_options(options, eid)
+ if not 'name' in options['linkattr']:
+ msg = self._cw._("'%s' action for in_state relation should at least have 'linkattr=name' option") % self.action
+ raise ValidationError(eid, {rn('options', 'subject'): msg})
+
+ def _find_entities(self, item, kwargs):
+ assert 'name' in item # XXX else, complete_item
+ state_name = item['name']
+ wf = self.entity.cw_adapt_to('IWorkflowable').current_workflow
+ state = wf.state_by_name(state_name)
+ if state is None:
+ return ()
+ return (state,)
+
+
+class CWEntityXMLActionLinkOrCreate(CWEntityXMLActionLink):
+ """implementation of cubicweb entity xml parser's'link-or-create' action
+
+ requires a 'linkattr' option to control search of the linked entity.
+ """
+ __regid__ = 'cw.entityxml.action.link-or-create'
+ create_when_not_found = True