sobjects/parsers.py
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Wed, 08 Jun 2011 17:10:39 +0200
changeset 7474 7dc405ad7bf3
parent 7470 c3fc72ee720a
child 7478 9e213becdcf4
permissions -rw-r--r--
[datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     1
# copyright 2010-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     2
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     3
#
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     4
# This file is part of CubicWeb.
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     5
#
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     6
# CubicWeb is free software: you can redistribute it and/or modify it under the
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     7
# terms of the GNU Lesser General Public License as published by the Free
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     8
# Software Foundation, either version 2.1 of the License, or (at your option)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     9
# any later version.
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    10
#
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    11
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    12
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    13
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    14
# details.
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    15
#
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    16
# You should have received a copy of the GNU Lesser General Public License along
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    17
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    18
"""datafeed parser for xml generated by cubicweb
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    19
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    20
Example of mapping for CWEntityXMLParser::
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    21
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    22
  {u'CWUser': {                                        # EntityType
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    23
      (u'in_group', u'subject', u'link'): [            # (rtype, role, action)
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    24
          (u'CWGroup', {u'linkattr': u'name'})],       #   -> rules = [(EntityType, options), ...]
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    25
      (u'tags', u'object', u'link-or-create'): [       # (...)
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    26
          (u'Tag', {u'linkattr': u'name'})],           #   -> ...
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    27
      (u'use_email', u'subject', u'copy'): [           # (...)
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    28
          (u'EmailAddress', {})]                       #   -> ...
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    29
      }
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    30
   }
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    31
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    32
"""
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    33
6963
5774d4ba4306 [datafeed] introduce a host mapping so dev instance may transparently redirect request to another host than the actual's one
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6960
diff changeset
    34
import os.path as osp
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    35
from datetime import datetime, timedelta
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    36
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    37
from logilab.common.date import todate, totime
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    38
from logilab.common.textutils import splitstrip, text_to_dict
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    39
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    40
from yams.constraints import BASE_CONVERTERS
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    41
from yams.schema import role_name as rn
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    42
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    43
from cubicweb import ValidationError, typed_eid
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    44
from cubicweb.server.sources import datafeed
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    45
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    46
# XXX see cubicweb.cwvreg.YAMS_TO_PY
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    47
# XXX see cubicweb.web.views.xmlrss.SERIALIZERS
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    48
DEFAULT_CONVERTERS = BASE_CONVERTERS.copy()
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    49
DEFAULT_CONVERTERS['String'] = unicode
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    50
DEFAULT_CONVERTERS['Password'] = lambda x: x.encode('utf8')
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    51
def convert_date(ustr):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    52
    return todate(datetime.strptime(ustr, '%Y-%m-%d'))
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    53
DEFAULT_CONVERTERS['Date'] = convert_date
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    54
def convert_datetime(ustr):
7002
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    55
    if '.' in ustr: # assume %Y-%m-%d %H:%M:%S.mmmmmm
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    56
        ustr = ustr.split('.',1)[0]
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    57
    return datetime.strptime(ustr, '%Y-%m-%d %H:%M:%S')
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    58
DEFAULT_CONVERTERS['Datetime'] = convert_datetime
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    59
def convert_time(ustr):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    60
    return totime(datetime.strptime(ustr, '%H:%M:%S'))
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    61
DEFAULT_CONVERTERS['Time'] = convert_time
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    62
def convert_interval(ustr):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    63
    return time(seconds=int(ustr))
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    64
DEFAULT_CONVERTERS['Interval'] = convert_interval
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    65
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    66
def extract_typed_attrs(eschema, stringdict, converters=DEFAULT_CONVERTERS):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    67
    typeddict = {}
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    68
    for rschema in eschema.subject_relations():
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    69
        if rschema.final and rschema in stringdict:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    70
            if rschema == 'eid':
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    71
                continue
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    72
            attrtype = eschema.destination(rschema)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    73
            typeddict[rschema.type] = converters[attrtype](stringdict[rschema])
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    74
    return typeddict
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    75
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    76
def _parse_entity_etree(parent):
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    77
    for node in list(parent):
7002
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    78
        try:
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    79
            item = {'cwtype': unicode(node.tag),
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    80
                    'cwuri': node.attrib['cwuri'],
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    81
                    'eid': typed_eid(node.attrib['eid']),
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    82
                    }
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    83
        except KeyError:
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    84
            # cw < 3.11 compat mode XXX
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    85
            item = {'cwtype': unicode(node.tag),
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    86
                    'cwuri': node.find('cwuri').text,
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    87
                    'eid': typed_eid(node.find('eid').text),
29f085f6177b [sobjects/parsers] compatibility with xml view of cw <= 3.10
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7001
diff changeset
    88
                    }
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    89
        rels = {}
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    90
        for child in node:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    91
            role = child.get('role')
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    92
            if role:
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    93
                # relation
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    94
                related = rels.setdefault(role, {}).setdefault(child.tag, [])
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
    95
                related += [ritem for ritem, _ in _parse_entity_etree(child)]
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    96
            else:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    97
                # attribute
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    98
                item[child.tag] = unicode(child.text)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    99
        yield item, rels
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   100
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   101
def rtype_role_rql(rtype, role):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   102
    if role == 'object':
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   103
        return 'Y %s X WHERE X eid %%(x)s' % rtype
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   104
    else:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   105
        return 'X %s Y WHERE X eid %%(x)s' % rtype
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   106
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   107
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   108
def _check_no_option(action, options, eid, _):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   109
    if options:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   110
        msg = _("'%s' action doesn't take any options") % action
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   111
        raise ValidationError(eid, {rn('options', 'subject'): msg})
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   112
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   113
def _check_linkattr_option(action, options, eid, _):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   114
    if not 'linkattr' in options:
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   115
        msg = _("'%s' action requires 'linkattr' option") % action
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   116
        raise ValidationError(eid, {rn('options', 'subject'): msg})
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   117
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   118
7378
86a1ae289f05 [datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7354
diff changeset
   119
class CWEntityXMLParser(datafeed.DataFeedXMLParser):
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   120
    """datafeed parser for the 'xml' entity view"""
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   121
    __regid__ = 'cw.entityxml'
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   122
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   123
    action_options = {
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   124
        'copy': _check_no_option,
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   125
        'link-or-create': _check_linkattr_option,
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   126
        'link': _check_linkattr_option,
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   127
        }
7378
86a1ae289f05 [datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7354
diff changeset
   128
    parse_etree = staticmethod(_parse_entity_etree)
86a1ae289f05 [datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7354
diff changeset
   129
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   130
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   131
    def __init__(self, *args, **kwargs):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   132
        super(CWEntityXMLParser, self).__init__(*args, **kwargs)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   133
        self.action_methods = {
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   134
            'copy': self.related_copy,
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   135
            'link-or-create': self.related_link_or_create,
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   136
            'link': self.related_link,
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   137
            }
7474
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   138
        self._parsed_urls = {}
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   139
        self._processed_entities = set()
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   140
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   141
    # mapping handling #########################################################
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   142
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   143
    def add_schema_config(self, schemacfg, checkonly=False):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   144
        """added CWSourceSchemaConfig, modify mapping accordingly"""
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   145
        _ = self._cw._
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   146
        try:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   147
            rtype = schemacfg.schema.rtype.name
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   148
        except AttributeError:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   149
            msg = _("entity and relation types can't be mapped, only attributes "
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   150
                    "or relations")
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   151
            raise ValidationError(schemacfg.eid, {rn('cw_for_schema', 'subject'): msg})
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   152
        if schemacfg.options:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   153
            options = text_to_dict(schemacfg.options)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   154
        else:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   155
            options = {}
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   156
        try:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   157
            role = options.pop('role')
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   158
            if role not in ('subject', 'object'):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   159
                raise KeyError
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   160
        except KeyError:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   161
            msg = _('"role=subject" or "role=object" must be specified in options')
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   162
            raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg})
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   163
        try:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   164
            action = options.pop('action')
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   165
            self.action_options[action](action, options, schemacfg.eid, _)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   166
        except KeyError:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   167
            msg = _('"action" must be specified in options; allowed values are '
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   168
                    '%s') % ', '.join(self.action_methods)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   169
            raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg})
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   170
        if not checkonly:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   171
            if role == 'subject':
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   172
                etype = schemacfg.schema.stype.name
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   173
                ttype = schemacfg.schema.otype.name
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   174
            else:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   175
                etype = schemacfg.schema.otype.name
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   176
                ttype = schemacfg.schema.stype.name
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   177
            etyperules = self.source.mapping.setdefault(etype, {})
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   178
            etyperules.setdefault((rtype, role, action), []).append(
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   179
                (ttype, options) )
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   180
            self.source.mapping_idx[schemacfg.eid] = (
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   181
                etype, rtype, role, action, ttype)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   182
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   183
    def del_schema_config(self, schemacfg, checkonly=False):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   184
        """deleted CWSourceSchemaConfig, modify mapping accordingly"""
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   185
        etype, rtype, role, action, ttype = self.source.mapping_idx[schemacfg.eid]
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   186
        rules = self.source.mapping[etype][(rtype, role, action)]
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   187
        rules = [x for x in rules if not x[0] == ttype]
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   188
        if not rules:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   189
            del self.source.mapping[etype][(rtype, role, action)]
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   190
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   191
    # import handling ##########################################################
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   192
7378
86a1ae289f05 [datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7354
diff changeset
   193
    # XXX suppression support according to source configuration. If set, get all
86a1ae289f05 [datafeed] extract a generic DataFeedXMLParser from CWEntityXMLParser
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7354
diff changeset
   194
    # cwuri of entities from this source, and compare with newly imported ones
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   195
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   196
    def process_item(self, item, rels):
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   197
        entity = self.extid2entity(str(item.pop('cwuri')),  item.pop('cwtype'),
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   198
                                   item=item)
7399
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   199
        if entity is None:
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   200
            return None
7474
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   201
        if entity.eid in self._processed_entities:
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   202
            return entity
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   203
        self._processed_entities.add(entity.eid)
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   204
        if not (self.created_during_pull(entity) or self.updated_during_pull(entity)):
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   205
            self.notify_updated(entity)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   206
            item.pop('eid')
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   207
            # XXX check modification date
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   208
            attrs = extract_typed_attrs(entity.e_schema, item)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   209
            entity.set_attributes(**attrs)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   210
        for (rtype, role, action), rules in self.source.mapping.get(entity.__regid__, {}).iteritems():
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   211
            try:
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   212
                related_items = rels[role][rtype]
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   213
            except KeyError:
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   214
                self.source.error('relation %s-%s not found in xml export of %s',
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   215
                                  rtype, role, entity.__regid__)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   216
                continue
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   217
            try:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   218
                actionmethod = self.action_methods[action]
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   219
            except KeyError:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   220
                raise Exception('Unknown action %s' % action)
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   221
            actionmethod(entity, rtype, role, related_items, rules)
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   222
        return entity
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   223
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   224
    def before_entity_copy(self, entity, sourceparams):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   225
        """IDataFeedParser callback"""
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   226
        attrs = extract_typed_attrs(entity.e_schema, sourceparams['item'])
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   227
        entity.cw_edited.update(attrs)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   228
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   229
    def related_copy(self, entity, rtype, role, others, rules):
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   230
        """implementation of 'copy' action
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   231
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   232
        Takes no option.
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   233
        """
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   234
        assert not any(x[1] for x in rules), "'copy' action takes no option"
7399
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   235
        ttypes = frozenset([x[0] for x in rules])
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   236
        eids = [] # local eids
7399
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   237
        for item in others:
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   238
            if item['cwtype'] in ttypes:
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   239
                item, _rels = self._complete_item(item)
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   240
                other_entity = self.process_item(item, [])
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   241
                if other_entity is not None:
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   242
                    eids.append(other_entity.eid)
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   243
        if eids:
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   244
            self._set_relation(entity, rtype, role, eids)
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   245
        else:
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   246
            self._clear_relation(entity, rtype, role, ttypes)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   247
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   248
    def related_link(self, entity, rtype, role, others, rules):
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   249
        """implementation of 'link' action
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   250
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   251
        requires an options to control search of the linked entity.
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   252
        """
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   253
        for ttype, options in rules:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   254
            assert 'linkattr' in options, (
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   255
                "'link' action requires a list of attributes used to "
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   256
                "search if the entity already exists")
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   257
            self._related_link(entity, rtype, role, ttype, others, [options['linkattr']],
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   258
                               create_when_not_found=False)
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   259
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   260
    def related_link_or_create(self, entity, rtype, role, others, rules):
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   261
        """implementation of 'link-or-create' action
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   262
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   263
        requires an options to control search of the linked entity.
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   264
        """
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   265
        for ttype, options in rules:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   266
            assert 'linkattr' in options, (
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   267
                "'link-or-create' action requires a list of attributes used to "
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   268
                "search if the entity already exists")
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   269
            self._related_link(entity, rtype, role, ttype, others, [options['linkattr']],
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   270
                               create_when_not_found=True)
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   271
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   272
    def _related_link(self, entity, rtype, role, ttype, others, searchattrs,
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   273
                      create_when_not_found):
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   274
        def issubset(x,y):
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   275
            return all(z in y for z in x)
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   276
        eids = [] # local eids
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   277
        for item in others:
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   278
            if item['cwtype'] != ttype:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   279
                continue
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   280
            if not issubset(searchattrs, item):
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   281
                item, _rels = self._complete_item(item, False)
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   282
                if not issubset(searchattrs, item):
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   283
                    self.source.error('missing attribute, got %s expected keys %s'
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   284
                                      % item, searchattrs)
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   285
                    continue
7470
c3fc72ee720a [parsers] fix python2.5 compat problem introduced by 7466:395c966a1cb3
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7466
diff changeset
   286
            kwargs = dict((str(attr), item[attr]) for attr in searchattrs) # XXX str() needed with python < 2.6
7466
395c966a1cb3 [parsers] remove build_search_rql() that duplicates find_entities()
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7399
diff changeset
   287
            targets = tuple(self._cw.find_entities(item['cwtype'], **kwargs))
395c966a1cb3 [parsers] remove build_search_rql() that duplicates find_entities()
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7399
diff changeset
   288
            if len(targets) > 1:
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   289
                self.source.error('ambiguous link: found %s entity %s with attributes %s',
7466
395c966a1cb3 [parsers] remove build_search_rql() that duplicates find_entities()
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7399
diff changeset
   290
                                  len(targets), item['cwtype'], kwargs)
395c966a1cb3 [parsers] remove build_search_rql() that duplicates find_entities()
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7399
diff changeset
   291
            elif len(targets) == 1:
395c966a1cb3 [parsers] remove build_search_rql() that duplicates find_entities()
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7399
diff changeset
   292
                eids.append(targets[0].eid)
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   293
            elif create_when_not_found:
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   294
                eids.append(self._cw.create_entity(item['cwtype'], **kwargs).eid)
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   295
            else:
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   296
                self.source.error('can not find %s entity with attributes %s',
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   297
                                  item['cwtype'], kwargs)
7399
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   298
        if eids:
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   299
            self._set_relation(entity, rtype, role, eids)
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   300
        else:
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   301
            self._clear_relation(entity, rtype, role, (ttype,))
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   302
7354
f627ab500fda [parsers] various refactorings
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents: 7351
diff changeset
   303
    def _complete_item(self, item, add_relations=True):
7474
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   304
        try:
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   305
            return self._parsed_urls[(item['cwuri'], add_relations)]
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   306
        except KeyError:
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   307
            itemurl = item['cwuri'] + '?vid=xml'
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   308
            if add_relations:
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   309
                for rtype, role, _ in self.source.mapping.get(item['cwtype'], ()):
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   310
                    itemurl += '&relation=%s_%s' % (rtype, role)
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   311
            item_rels = list(self.parse(itemurl))
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   312
            assert len(item_rels) == 1, 'url %s expected to bring back one '\
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   313
                   'and only one entity, got %s' % (itemurl, len(item_rels))
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   314
            self._parsed_urls[(item['cwuri'], add_relations)] = item_rels[0]
7dc405ad7bf3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7470
diff changeset
   315
            return item_rels[0]
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   316
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   317
    def _clear_relation(self, entity, rtype, role, ttypes):
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   318
        if entity.eid not in self.stats['created']:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   319
            if len(ttypes) > 1:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   320
                typerestr = ', Y is IN(%s)' % ','.join(ttypes)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   321
            else:
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   322
                typerestr = ', Y is %s' % ','.join(ttypes)
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   323
            self._cw.execute('DELETE ' + rtype_role_rql(rtype, role) + typerestr,
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   324
                             {'x': entity.eid})
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   325
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   326
    def _set_relation(self, entity, rtype, role, eids):
7399
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   327
        assert eids
7351
ed66f236715d fix _set_relation when no target eids, update datafeed source pull_data arguments to raise on error during tests
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7002
diff changeset
   328
        rqlbase = rtype_role_rql(rtype, role)
7399
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   329
        eidstr = ','.join(str(eid) for eid in eids)
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   330
        self._cw.execute('DELETE %s, NOT Y eid IN (%s)' % (rqlbase, eidstr),
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   331
                         {'x': entity.eid})
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   332
        if role == 'object':
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   333
            rql = 'SET %s, Y eid IN (%s), NOT Y %s X' % (rqlbase, eidstr, rtype)
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   334
        else:
972ed1843bd8 [multi-sources] support for moving an entity from an external source (closes #343818)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7378
diff changeset
   335
            rql = 'SET %s, Y eid IN (%s), NOT X %s Y' % (rqlbase, eidstr, rtype)
6960
822f2530570d [datafeed] add parser to import cubicweb xml
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   336
        self._cw.execute(rql, {'x': entity.eid})
6963
5774d4ba4306 [datafeed] introduce a host mapping so dev instance may transparently redirect request to another host than the actual's one
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6960
diff changeset
   337
5774d4ba4306 [datafeed] introduce a host mapping so dev instance may transparently redirect request to another host than the actual's one
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6960
diff changeset
   338
def registration_callback(vreg):
6970
a6ccbfbacf3d [parser host mapping] take care to cases where apphome is None (c-c i18ncube command for instance)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6963
diff changeset
   339
    vreg.register_all(globals().values(), __name__)
6963
5774d4ba4306 [datafeed] introduce a host mapping so dev instance may transparently redirect request to another host than the actual's one
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6960
diff changeset
   340
    global HOST_MAPPING
6970
a6ccbfbacf3d [parser host mapping] take care to cases where apphome is None (c-c i18ncube command for instance)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6963
diff changeset
   341
    HOST_MAPPING = {}
a6ccbfbacf3d [parser host mapping] take care to cases where apphome is None (c-c i18ncube command for instance)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6963
diff changeset
   342
    if vreg.config.apphome:
a6ccbfbacf3d [parser host mapping] take care to cases where apphome is None (c-c i18ncube command for instance)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6963
diff changeset
   343
        host_mapping_file = osp.join(vreg.config.apphome, 'hostmapping.py')
a6ccbfbacf3d [parser host mapping] take care to cases where apphome is None (c-c i18ncube command for instance)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6963
diff changeset
   344
        if osp.exists(host_mapping_file):
a6ccbfbacf3d [parser host mapping] take care to cases where apphome is None (c-c i18ncube command for instance)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6963
diff changeset
   345
            HOST_MAPPING = eval(file(host_mapping_file).read())
a6ccbfbacf3d [parser host mapping] take care to cases where apphome is None (c-c i18ncube command for instance)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6963
diff changeset
   346
            vreg.info('using host mapping %s from %s', HOST_MAPPING, host_mapping_file)