sobjects/parsers.py
changeset 6960 822f2530570d
child 6963 5774d4ba4306
equal deleted inserted replaced
6959:037a0277db0a 6960:822f2530570d
       
     1 # copyright 2010-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
       
     2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     3 #
       
     4 # This file is part of CubicWeb.
       
     5 #
       
     6 # CubicWeb is free software: you can redistribute it and/or modify it under the
       
     7 # terms of the GNU Lesser General Public License as published by the Free
       
     8 # Software Foundation, either version 2.1 of the License, or (at your option)
       
     9 # any later version.
       
    10 #
       
    11 # CubicWeb is distributed in the hope that it will be useful, but WITHOUT
       
    12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
       
    13 # FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
       
    14 # details.
       
    15 #
       
    16 # You should have received a copy of the GNU Lesser General Public License along
       
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
       
    18 """datafeed parser for xml generated by cubicweb"""
       
    19 
       
    20 import urllib2
       
    21 import StringIO
       
    22 from cookielib import CookieJar
       
    23 from datetime import datetime, timedelta
       
    24 
       
    25 from lxml import etree
       
    26 
       
    27 from logilab.common.date import todate, totime
       
    28 from logilab.common.textutils import splitstrip, text_to_dict
       
    29 
       
    30 from yams.constraints import BASE_CONVERTERS
       
    31 from yams.schema import role_name as rn
       
    32 
       
    33 from cubicweb import ValidationError, typed_eid
       
    34 from cubicweb.server.sources import datafeed
       
    35 
       
    36 
       
    37 # see cubicweb.web.views.xmlrss.SERIALIZERS
       
    38 DEFAULT_CONVERTERS = BASE_CONVERTERS.copy()
       
    39 DEFAULT_CONVERTERS['String'] = unicode
       
    40 DEFAULT_CONVERTERS['Password'] = lambda x: x.encode('utf8')
       
    41 def convert_date(ustr):
       
    42     return todate(datetime.strptime(ustr, '%Y-%m-%d'))
       
    43 DEFAULT_CONVERTERS['Date'] = convert_date
       
    44 def convert_datetime(ustr):
       
    45     return datetime.strptime(ustr, '%Y-%m-%d %H:%M:%S')
       
    46 DEFAULT_CONVERTERS['Datetime'] = convert_datetime
       
    47 def convert_time(ustr):
       
    48     return totime(datetime.strptime(ustr, '%H:%M:%S'))
       
    49 DEFAULT_CONVERTERS['Time'] = convert_time
       
    50 def convert_interval(ustr):
       
    51     return time(seconds=int(ustr))
       
    52 DEFAULT_CONVERTERS['Interval'] = convert_interval
       
    53 
       
    54 # use a cookie enabled opener to use session cookie if any
       
    55 _OPENER = urllib2.build_opener(urllib2.HTTPCookieProcessor(CookieJar()))
       
    56 
       
    57 def extract_typed_attrs(eschema, stringdict, converters=DEFAULT_CONVERTERS):
       
    58     typeddict = {}
       
    59     for rschema in eschema.subject_relations():
       
    60         if rschema.final and rschema in stringdict:
       
    61             if rschema == 'eid':
       
    62                 continue
       
    63             attrtype = eschema.destination(rschema)
       
    64             typeddict[rschema.type] = converters[attrtype](stringdict[rschema])
       
    65     return typeddict
       
    66 
       
    67 def _entity_etree(parent):
       
    68     for node in list(parent):
       
    69         item = {'cwtype': unicode(node.tag),
       
    70                 'cwuri': node.attrib['cwuri'],
       
    71                 'eid': typed_eid(node.attrib['eid']),
       
    72                 }
       
    73         rels = {}
       
    74         for child in node:
       
    75             role = child.get('role')
       
    76             if child.get('role'):
       
    77                 # relation
       
    78                 related = rels.setdefault(role, {}).setdefault(child.tag, [])
       
    79                 related += [ritem for ritem, _ in _entity_etree(child)]
       
    80             else:
       
    81                 # attribute
       
    82                 item[child.tag] = unicode(child.text)
       
    83         yield item, rels
       
    84 
       
    85 def build_search_rql(etype, attrs):
       
    86     restrictions = []
       
    87     for attr in attrs:
       
    88         restrictions.append('X %(attr)s %%(%(attr)s)s' % {'attr': attr})
       
    89     return 'Any X WHERE X is %s, %s' % (etype, ','.join(restrictions))
       
    90 
       
    91 def rtype_role_rql(rtype, role):
       
    92     if role == 'object':
       
    93         return 'Y %s X WHERE X eid %%(x)s' % rtype
       
    94     else:
       
    95         return 'X %s Y WHERE X eid %%(x)s' % rtype
       
    96 
       
    97 
       
    98 def _check_no_option(action, options, eid, _):
       
    99     if options:
       
   100         msg = _("'%s' action doesn't take any options") % action
       
   101         raise ValidationError(eid, {rn('options', 'subject'): msg})
       
   102 
       
   103 def _check_linkattr_option(action, options, eid, _):
       
   104     if not 'linkattr' in options:
       
   105         msg = _("'%s' action require 'linkattr' option") % action
       
   106         raise ValidationError(eid, {rn('options', 'subject'): msg})
       
   107 
       
   108 
       
   109 class CWEntityXMLParser(datafeed.DataFeedParser):
       
   110     """datafeed parser for the 'xml' entity view"""
       
   111     __regid__ = 'cw.entityxml'
       
   112 
       
   113     action_options = {
       
   114         'copy': _check_no_option,
       
   115         'link-or-create': _check_linkattr_option,
       
   116         'link': _check_linkattr_option,
       
   117         }
       
   118 
       
   119     def __init__(self, *args, **kwargs):
       
   120         super(CWEntityXMLParser, self).__init__(*args, **kwargs)
       
   121         self.action_methods = {
       
   122             'copy': self.related_copy,
       
   123             'link-or-create': self.related_link_or_create,
       
   124             'link': self.related_link,
       
   125             }
       
   126 
       
   127     # mapping handling #########################################################
       
   128 
       
   129     def add_schema_config(self, schemacfg, checkonly=False):
       
   130         """added CWSourceSchemaConfig, modify mapping accordingly"""
       
   131         _ = self._cw._
       
   132         try:
       
   133             rtype = schemacfg.schema.rtype.name
       
   134         except AttributeError:
       
   135             msg = _("entity and relation types can't be mapped, only attributes "
       
   136                     "or relations")
       
   137             raise ValidationError(schemacfg.eid, {rn('cw_for_schema', 'subject'): msg})
       
   138         if schemacfg.options:
       
   139             options = text_to_dict(schemacfg.options)
       
   140         else:
       
   141             options = {}
       
   142         try:
       
   143             role = options.pop('role')
       
   144             if role not in ('subject', 'object'):
       
   145                 raise KeyError
       
   146         except KeyError:
       
   147             msg = _('"role=subject" or "role=object" must be specified in options')
       
   148             raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg})
       
   149         try:
       
   150             action = options.pop('action')
       
   151             self.action_options[action](action, options, schemacfg.eid, _)
       
   152         except KeyError:
       
   153             msg = _('"action" must be specified in options; allowed values are '
       
   154                     '%s') % ', '.join(self.action_methods)
       
   155             raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg})
       
   156         if not checkonly:
       
   157             if role == 'subject':
       
   158                 etype = schemacfg.schema.stype.name
       
   159                 ttype = schemacfg.schema.otype.name
       
   160             else:
       
   161                 etype = schemacfg.schema.otype.name
       
   162                 ttype = schemacfg.schema.stype.name
       
   163             etyperules = self.source.mapping.setdefault(etype, {})
       
   164             etyperules.setdefault((rtype, role, action), []).append(
       
   165                 (ttype, options) )
       
   166             self.source.mapping_idx[schemacfg.eid] = (
       
   167                 etype, rtype, role, action, ttype)
       
   168 
       
   169     def del_schema_config(self, schemacfg, checkonly=False):
       
   170         """deleted CWSourceSchemaConfig, modify mapping accordingly"""
       
   171         etype, rtype, role, action, ttype = self.source.mapping_idx[schemacfg.eid]
       
   172         rules = self.source.mapping[etype][(rtype, role, action)]
       
   173         rules = [x for x in rules if not x[0] == ttype]
       
   174         if not rules:
       
   175             del self.source.mapping[etype][(rtype, role, action)]
       
   176 
       
   177     # import handling ##########################################################
       
   178 
       
   179     def process(self, url, partialcommit=True):
       
   180         """IDataFeedParser main entry point"""
       
   181         # XXX suppression support according to source configuration. If set, get
       
   182         # all cwuri of entities from this source, and compare with newly
       
   183         # imported ones
       
   184         for item, rels in self.parse(url):
       
   185             self.process_item(item, rels)
       
   186             if partialcommit:
       
   187                 # commit+set_pool instead of commit(reset_pool=False) to let
       
   188                 # other a chance to get our pool
       
   189                 self._cw.commit()
       
   190                 self._cw.set_pool()
       
   191 
       
   192     def parse(self, url):
       
   193         if not url.startswith('http'):
       
   194             stream = StringIO.StringIO(url)
       
   195         else:
       
   196             self.source.info('GET %s', url)
       
   197             stream = _OPENER.open(url)
       
   198         return _entity_etree(etree.parse(stream).getroot())
       
   199 
       
   200     def process_one(self, url):
       
   201         # XXX assert len(root.children) == 1
       
   202         for item, rels in self.parse(url):
       
   203             return self.process_item(item, rels)
       
   204 
       
   205     def process_item(self, item, rels):
       
   206         entity = self.extid2entity(str(item.pop('cwuri')),
       
   207                                    item.pop('cwtype'),
       
   208                                    item=item)
       
   209         if not (self.created_during_pull(entity)
       
   210                 or self.updated_during_pull(entity)):
       
   211             self.notify_updated(entity)
       
   212             item.pop('eid')
       
   213             # XXX check modification date
       
   214             attrs = extract_typed_attrs(entity.e_schema, item)
       
   215             entity.set_attributes(**attrs)
       
   216         for (rtype, role, action), rules in self.source.mapping.get(entity.__regid__, {}).iteritems():
       
   217             try:
       
   218                 rel = rels[role][rtype]
       
   219             except KeyError:
       
   220                 self.source.error('relation %s-%s doesn\'t seem exported in %s xml',
       
   221                                   rtype, role, entity.__regid__)
       
   222                 continue
       
   223             try:
       
   224                 actionmethod = self.action_methods[action]
       
   225             except KeyError:
       
   226                 raise Exception('Unknown action %s' % action)
       
   227             actionmethod(entity, rtype, role, rel, rules)
       
   228         return entity
       
   229 
       
   230     def before_entity_copy(self, entity, sourceparams):
       
   231         """IDataFeedParser callback"""
       
   232         attrs = extract_typed_attrs(entity.e_schema, sourceparams['item'])
       
   233         entity.cw_edited.update(attrs)
       
   234 
       
   235     def related_copy(self, entity, rtype, role, value, rules):
       
   236         """implementation of 'copy' action
       
   237 
       
   238         Takes no option.
       
   239         """
       
   240         assert not any(x[1] for x in rules), "'copy' action takes no option"
       
   241         ttypes = set([x[0] for x in rules])
       
   242         value = [item for item in value if item['cwtype'] in ttypes]
       
   243         eids = [] # local eids
       
   244         if not value:
       
   245             self._clear_relation(entity, rtype, role, ttypes)
       
   246             return
       
   247         for item in value:
       
   248             eids.append(self.process_one(self._complete_url(item)).eid)
       
   249         self._set_relation(entity, rtype, role, eids)
       
   250 
       
   251     def related_link(self, entity, rtype, role, value, rules):
       
   252         """implementation of 'link' action
       
   253 
       
   254         requires an options to control search of the linked entity.
       
   255         """
       
   256         for ttype, options in rules:
       
   257             assert 'linkattr' in options, (
       
   258                 "'link-or-create' action require a list of attributes used to "
       
   259                 "search if the entity already exists")
       
   260             self._related_link(entity, rtype, role, ttype, value, [options['linkattr']],
       
   261                                self._log_not_found)
       
   262 
       
   263     def related_link_or_create(self, entity, rtype, role, value, rules):
       
   264         """implementation of 'link-or-create' action
       
   265 
       
   266         requires an options to control search of the linked entity.
       
   267         """
       
   268         for ttype, options in rules:
       
   269             assert 'linkattr' in options, (
       
   270                 "'link-or-create' action require a list of attributes used to "
       
   271                 "search if the entity already exists")
       
   272             self._related_link(entity, rtype, role, ttype, value, [options['linkattr']],
       
   273                                self._create_not_found)
       
   274 
       
   275     def _log_not_found(self, entity, rtype, role, ritem, searchvalues):
       
   276         self.source.error('can find %s entity with attributes %s',
       
   277                           ritem['cwtype'], searchvalues)
       
   278 
       
   279     def _create_not_found(self, entity, rtype, role, ritem, searchvalues):
       
   280         return self._cw.create_entity(ritem['cwtype'], **searchvalues).eid
       
   281 
       
   282     def _related_link(self, entity, rtype, role, ttype, value, searchattrs,
       
   283                       notfound_callback):
       
   284         eids = [] # local eids
       
   285         for item in value:
       
   286             if item['cwtype'] != ttype:
       
   287                 continue
       
   288             if not all(attr in item for attr in searchattrs):
       
   289                 # need to fetch related entity's xml
       
   290                 ritems = list(self.parse(self._complete_url(item, False)))
       
   291                 assert len(ritems) == 1, 'unexpected xml'
       
   292                 ritem = ritems[0][0] # list of 2-uples
       
   293                 assert all(attr in ritem for attr in searchattrs), \
       
   294                        'missing attribute, got %s expected keys %s' % (item, searchattrs)
       
   295             else:
       
   296                 ritem = item
       
   297             kwargs = dict((attr, ritem[attr]) for attr in searchattrs)
       
   298             rql = build_search_rql(item['cwtype'], kwargs)
       
   299             rset = self._cw.execute(rql, kwargs)
       
   300             if rset:
       
   301                 assert len(rset) == 1
       
   302                 eids.append(rset[0][0])
       
   303             else:
       
   304                 eid = notfound_callback(entity, rtype, role, ritem, kwargs)
       
   305                 if eid is not None:
       
   306                     eids.append(eid)
       
   307         if not eids:
       
   308             self._clear_relation(entity, rtype, role, (ttype,))
       
   309         else:
       
   310             self._set_relation(entity, rtype, role, eids)
       
   311 
       
   312     def _complete_url(self, item, add_relations=True):
       
   313         itemurl = item['cwuri'] + '?vid=xml'
       
   314         for rtype, role, _ in self.source.mapping.get(item['cwtype'], ()):
       
   315             itemurl += '&relation=%s_%s' % (rtype, role)
       
   316         return itemurl
       
   317 
       
   318     def _clear_relation(self, entity, rtype, role, ttypes):
       
   319         if entity.eid not in self.stats['created']:
       
   320             if len(ttypes) > 1:
       
   321                 typerestr = ', Y is IN(%s)' % ','.join(ttypes)
       
   322             else:
       
   323                 typerestr = ', Y is %s' % ','.join(ttypes)
       
   324             self._cw.execute('DELETE ' + rtype_role_rql(rtype, role) + typerestr,
       
   325                              {'x': entity.eid})
       
   326 
       
   327     def _set_relation(self, entity, rtype, role, eids):
       
   328         eidstr = ','.join(str(eid) for eid in eids)
       
   329         rql = rtype_role_rql(rtype, role)
       
   330         self._cw.execute('DELETE %s, NOT Y eid IN (%s)' % (rql, eidstr),
       
   331                          {'x': entity.eid})
       
   332         if role == 'object':
       
   333             rql = 'SET %s, Y eid IN (%s), NOT Y %s X' % (rql, eidstr, rtype)
       
   334         else:
       
   335             rql = 'SET %s, Y eid IN (%s), NOT X %s Y' % (rql, eidstr, rtype)
       
   336         self._cw.execute(rql, {'x': entity.eid})