sobjects/parsers.py
changeset 7378 86a1ae289f05
parent 7354 f627ab500fda
child 7399 972ed1843bd8
equal deleted inserted replaced
7377:d8083b2ae4d6 7378:86a1ae289f05
    29       }
    29       }
    30    }
    30    }
    31 
    31 
    32 """
    32 """
    33 
    33 
    34 import urllib2
       
    35 import StringIO
       
    36 import os.path as osp
    34 import os.path as osp
    37 from cookielib import CookieJar
       
    38 from datetime import datetime, timedelta
    35 from datetime import datetime, timedelta
    39 
       
    40 from lxml import etree
       
    41 
    36 
    42 from logilab.common.date import todate, totime
    37 from logilab.common.date import todate, totime
    43 from logilab.common.textutils import splitstrip, text_to_dict
    38 from logilab.common.textutils import splitstrip, text_to_dict
    44 
    39 
    45 from yams.constraints import BASE_CONVERTERS
    40 from yams.constraints import BASE_CONVERTERS
    69     return totime(datetime.strptime(ustr, '%H:%M:%S'))
    64     return totime(datetime.strptime(ustr, '%H:%M:%S'))
    70 DEFAULT_CONVERTERS['Time'] = convert_time
    65 DEFAULT_CONVERTERS['Time'] = convert_time
    71 def convert_interval(ustr):
    66 def convert_interval(ustr):
    72     return time(seconds=int(ustr))
    67     return time(seconds=int(ustr))
    73 DEFAULT_CONVERTERS['Interval'] = convert_interval
    68 DEFAULT_CONVERTERS['Interval'] = convert_interval
    74 
       
    75 # use a cookie enabled opener to use session cookie if any
       
    76 _OPENER = urllib2.build_opener()
       
    77 try:
       
    78     from logilab.common import urllib2ext
       
    79     _OPENER.add_handler(urllib2ext.HTTPGssapiAuthHandler())
       
    80 except ImportError: # python-kerberos not available
       
    81     pass
       
    82 _OPENER.add_handler(urllib2.HTTPCookieProcessor(CookieJar()))
       
    83 
    69 
    84 def extract_typed_attrs(eschema, stringdict, converters=DEFAULT_CONVERTERS):
    70 def extract_typed_attrs(eschema, stringdict, converters=DEFAULT_CONVERTERS):
    85     typeddict = {}
    71     typeddict = {}
    86     for rschema in eschema.subject_relations():
    72     for rschema in eschema.subject_relations():
    87         if rschema.final and rschema in stringdict:
    73         if rschema.final and rschema in stringdict:
   136     if not 'linkattr' in options:
   122     if not 'linkattr' in options:
   137         msg = _("'%s' action requires 'linkattr' option") % action
   123         msg = _("'%s' action requires 'linkattr' option") % action
   138         raise ValidationError(eid, {rn('options', 'subject'): msg})
   124         raise ValidationError(eid, {rn('options', 'subject'): msg})
   139 
   125 
   140 
   126 
   141 class CWEntityXMLParser(datafeed.DataFeedParser):
   127 class CWEntityXMLParser(datafeed.DataFeedXMLParser):
   142     """datafeed parser for the 'xml' entity view"""
   128     """datafeed parser for the 'xml' entity view"""
   143     __regid__ = 'cw.entityxml'
   129     __regid__ = 'cw.entityxml'
   144 
   130 
   145     action_options = {
   131     action_options = {
   146         'copy': _check_no_option,
   132         'copy': _check_no_option,
   147         'link-or-create': _check_linkattr_option,
   133         'link-or-create': _check_linkattr_option,
   148         'link': _check_linkattr_option,
   134         'link': _check_linkattr_option,
   149         }
   135         }
       
   136     parse_etree = staticmethod(_parse_entity_etree)
       
   137 
   150 
   138 
   151     def __init__(self, *args, **kwargs):
   139     def __init__(self, *args, **kwargs):
   152         super(CWEntityXMLParser, self).__init__(*args, **kwargs)
   140         super(CWEntityXMLParser, self).__init__(*args, **kwargs)
   153         self.action_methods = {
   141         self.action_methods = {
   154             'copy': self.related_copy,
   142             'copy': self.related_copy,
   206         if not rules:
   194         if not rules:
   207             del self.source.mapping[etype][(rtype, role, action)]
   195             del self.source.mapping[etype][(rtype, role, action)]
   208 
   196 
   209     # import handling ##########################################################
   197     # import handling ##########################################################
   210 
   198 
   211     def process(self, url, partialcommit=True):
   199     # XXX suppression support according to source configuration. If set, get all
   212         """IDataFeedParser main entry point"""
   200     # cwuri of entities from this source, and compare with newly imported ones
   213         # XXX suppression support according to source configuration. If set, get
       
   214         # all cwuri of entities from this source, and compare with newly
       
   215         # imported ones
       
   216         error = False
       
   217         for item, rels in self.parse(url):
       
   218             cwuri = item['cwuri']
       
   219             try:
       
   220                 self.process_item(item, rels)
       
   221                 if partialcommit:
       
   222                     # commit+set_pool instead of commit(reset_pool=False) to let
       
   223                     # other a chance to get our pool
       
   224                     self._cw.commit()
       
   225                     self._cw.set_pool()
       
   226             except ValidationError, exc:
       
   227                 if partialcommit:
       
   228                     self.source.error('Skipping %s because of validation error %s' % (cwuri, exc))
       
   229                     self._cw.rollback()
       
   230                     self._cw.set_pool()
       
   231                     error = True
       
   232                 else:
       
   233                     raise
       
   234         return error
       
   235 
       
   236     def parse(self, url):
       
   237         if not url.startswith('http'):
       
   238             stream = StringIO.StringIO(url)
       
   239         else:
       
   240             for mappedurl in HOST_MAPPING:
       
   241                 if url.startswith(mappedurl):
       
   242                     url = url.replace(mappedurl, HOST_MAPPING[mappedurl], 1)
       
   243                     break
       
   244             self.source.info('GET %s', url)
       
   245             stream = _OPENER.open(url)
       
   246         return _parse_entity_etree(etree.parse(stream).getroot())
       
   247 
   201 
   248     def process_item(self, item, rels):
   202     def process_item(self, item, rels):
   249         entity = self.extid2entity(str(item.pop('cwuri')),  item.pop('cwtype'),
   203         entity = self.extid2entity(str(item.pop('cwuri')),  item.pop('cwtype'),
   250                                    item=item)
   204                                    item=item)
   251         if not (self.created_during_pull(entity) or self.updated_during_pull(entity)):
   205         if not (self.created_during_pull(entity) or self.updated_during_pull(entity)):