sobjects/test/unittest_parsers.py
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Tue, 31 Jan 2012 21:43:24 +0100
changeset 8188 1867e252e487
parent 7957 5da7ede69063
permissions -rw-r--r--
[repository] ldap-feed source. Closes #2086984 datafeed based source which copy a subtree of the ldap directory into the system database. Authentication still go through ldap though. Pros: * don't need temporary tables and such for multi-sources RQL queries execution * much more flexible to enhance / configure behaviour (you simply have to replace the parser) * run better when ldap isn't reachable Cons: * no more 'on the fly' discovery of users (though a user authenticating itself will be automatically added if it doesn't exist in the db yet) * synchronization may be heavy if there are a lot of users A new cw.server.ldaputils containing code in common between former ldapuser and new ldapfeed sources has been introduced. Also ldapuser source now uses url instead of custom host/protocol option so it looks like a datafeed source (could be improved).

# copyright 2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of CubicWeb.
#
# CubicWeb is free software: you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import with_statement

from datetime import datetime

from cubicweb.devtools.testlib import CubicWebTC

from cubicweb.sobjects.parsers import CWEntityXMLParser

orig_parse = CWEntityXMLParser.parse

def parse(self, url):
    try:
        url = RELATEDXML[url.split('?')[0]]
    except KeyError:
        pass
    return orig_parse(self, url)

def setUpModule():
    CWEntityXMLParser.parse = parse

def tearDownModule():
    CWEntityXMLParser.parse = orig_parse


BASEXML = ''.join(u'''
<rset size="1">
 <CWUser eid="5" cwuri="http://pouet.org/5" cwsource="system">
  <login>sthenault</login>
  <upassword>toto</upassword>
  <last_login_time>2011-01-25 14:14:06</last_login_time>
  <creation_date>2010-01-22 10:27:59</creation_date>
  <modification_date>2011-01-25 14:14:06</modification_date>
  <use_email role="subject">
    <EmailAddress cwuri="http://pouet.org/6" eid="6"/>
  </use_email>
  <in_group role="subject">
    <CWGroup cwuri="http://pouet.org/7" eid="7"/>
    <CWGroup cwuri="http://pouet.org/8" eid="8"/>
  </in_group>
  <tags role="object">
    <Tag cwuri="http://pouet.org/9" eid="9"/>
    <Tag cwuri="http://pouet.org/10" eid="10"/>
  </tags>
  <in_state role="subject">
    <State cwuri="http://pouet.org/11" eid="11" name="activated"/>
  </in_state>
 </CWUser>
</rset>
'''.splitlines())

RELATEDXML = {
    'http://pouet.org/6': u'''
<rset size="1">
 <EmailAddress eid="6" cwuri="http://pouet.org/6">
  <address>syt@logilab.fr</address>
  <modification_date>2010-04-13 14:35:56</modification_date>
  <creation_date>2010-04-13 14:35:56</creation_date>
  <tags role="object">
    <Tag cwuri="http://pouet.org/9" eid="9"/>
  </tags>
 </EmailAddress>
</rset>
''',
    'http://pouet.org/7': u'''
<rset size="1">
 <CWGroup eid="7" cwuri="http://pouet.org/7">
  <name>users</name>
  <tags role="object">
    <Tag cwuri="http://pouet.org/9" eid="9"/>
  </tags>
 </CWGroup>
</rset>
''',
    'http://pouet.org/8': u'''
<rset size="1">
 <CWGroup eid="8" cwuri="http://pouet.org/8">
  <name>unknown</name>
 </CWGroup>
</rset>
''',
    'http://pouet.org/9': u'''
<rset size="1">
 <Tag eid="9" cwuri="http://pouet.org/9">
  <name>hop</name>
 </Tag>
</rset>
''',
    'http://pouet.org/10': u'''
<rset size="1">
 <Tag eid="10" cwuri="http://pouet.org/10">
  <name>unknown</name>
 </Tag>
</rset>
''',
    }


OTHERXML = ''.join(u'''
<rset size="1">
 <CWUser eid="5" cwuri="http://pouet.org/5" cwsource="myfeed">
  <login>sthenault</login>
  <upassword>toto</upassword>
  <last_login_time>2011-01-25 14:14:06</last_login_time>
  <creation_date>2010-01-22 10:27:59</creation_date>
  <modification_date>2011-01-25 14:14:06</modification_date>
  <in_group role="subject">
    <CWGroup cwuri="http://pouet.org/7" eid="7"/>
  </in_group>
 </CWUser>
</rset>
'''.splitlines()
)


class CWEntityXMLParserTC(CubicWebTC):
    """/!\ this test use a pre-setup database /!\, if you modify above xml,
    REMOVE THE DATABASE TEMPLATE else it won't be considered
    """
    test_db_id = 'xmlparser'
    @classmethod
    def pre_setup_database(cls, session, config):
        myfeed = session.create_entity('CWSource', name=u'myfeed', type=u'datafeed',
                                   parser=u'cw.entityxml', url=BASEXML)
        myotherfeed = session.create_entity('CWSource', name=u'myotherfeed', type=u'datafeed',
                                            parser=u'cw.entityxml', url=OTHERXML)
        session.commit()
        myfeed.init_mapping([(('CWUser', 'use_email', '*'),
                              u'role=subject\naction=copy'),
                             (('CWUser', 'in_group', '*'),
                              u'role=subject\naction=link\nlinkattr=name'),
                             (('CWUser', 'in_state', '*'),
                              u'role=subject\naction=link\nlinkattr=name'),
                             (('*', 'tags', '*'),
                              u'role=object\naction=link-or-create\nlinkattr=name'),
                            ])
        myotherfeed.init_mapping([(('CWUser', 'in_group', '*'),
                                   u'role=subject\naction=link\nlinkattr=name'),
                                  (('CWUser', 'in_state', '*'),
                                   u'role=subject\naction=link\nlinkattr=name'),
                                  ])
        session.create_entity('Tag', name=u'hop')

    def test_complete_url(self):
        dfsource = self.repo.sources_by_uri['myfeed']
        parser = dfsource._get_parser(self.session)
        self.assertEqual(parser.complete_url('http://www.cubicweb.org/CWUser'),
                         'http://www.cubicweb.org/CWUser?relation=tags-object&relation=in_group-subject&relation=in_tate-subject&relation=use_email-subject&vid=xml')
        self.assertEqual(parser.complete_url('http://www.cubicweb.org/cwuser'),
                         'http://www.cubicweb.org/cwuser?relation=tags-object&relation=in_group-subject&relation=in_state-subject&relation=use_email-subject&vid=xml')
        self.assertEqual(parser.complete_url('http://www.cubicweb.org/cwuser?vid=rdf&relation=hop'),
                         'http://www.cubicweb.org/cwuser?relation=hop&relation=tags-object&relation=in_group-subject&relation=in_state-subject&relation=use_email-subject&vid=rdf')
        self.assertEqual(parser.complete_url('http://www.cubicweb.org/?rql=cwuser&vid=rdf&relation=hop'),
                         'http://www.cubicweb.org/?rql=cwuser&relation=hop&vid=rdf')
        self.assertEqual(parser.complete_url('http://www.cubicweb.org/?rql=cwuser&relation=hop'),
                         'http://www.cubicweb.org/?rql=cwuser&relation=hop&vid=xml')


    def test_actions(self):
        dfsource = self.repo.sources_by_uri['myfeed']
        self.assertEqual(dfsource.mapping,
                         {u'CWUser': {
                             (u'in_group', u'subject', u'link'): [
                                 (u'CWGroup', {u'linkattr': u'name'})],
                             (u'in_state', u'subject', u'link'): [
                                 (u'State', {u'linkattr': u'name'})],
                             (u'tags', u'object', u'link-or-create'): [
                                 (u'Tag', {u'linkattr': u'name'})],
                             (u'use_email', u'subject', u'copy'): [
                                 (u'EmailAddress', {})]
                             },
                          u'CWGroup': {
                             (u'tags', u'object', u'link-or-create'): [
                                 (u'Tag', {u'linkattr': u'name'})],
                             },
                          u'EmailAddress': {
                             (u'tags', u'object', u'link-or-create'): [
                                 (u'Tag', {u'linkattr': u'name'})],
                             },
                          })
        session = self.repo.internal_session(safe=True)
        stats = dfsource.pull_data(session, force=True, raise_on_error=True)
        self.assertEqual(sorted(stats.keys()), ['created', 'updated'])
        self.assertEqual(len(stats['created']), 2)
        self.assertEqual(stats['updated'], set())

        user = self.execute('CWUser X WHERE X login "sthenault"').get_entity(0, 0)
        self.assertEqual(user.creation_date, datetime(2010, 01, 22, 10, 27, 59))
        self.assertEqual(user.modification_date, datetime(2011, 01, 25, 14, 14, 06))
        self.assertEqual(user.cwuri, 'http://pouet.org/5')
        self.assertEqual(user.cw_source[0].name, 'myfeed')
        self.assertEqual(user.absolute_url(), 'http://pouet.org/5')
        self.assertEqual(len(user.use_email), 1)
        # copy action
        email = user.use_email[0]
        self.assertEqual(email.address, 'syt@logilab.fr')
        self.assertEqual(email.cwuri, 'http://pouet.org/6')
        self.assertEqual(email.absolute_url(), 'http://pouet.org/6')
        self.assertEqual(email.cw_source[0].name, 'myfeed')
        self.assertEqual(len(email.reverse_tags), 1)
        self.assertEqual(email.reverse_tags[0].name, 'hop')
        # link action
        self.assertFalse(self.execute('CWGroup X WHERE X name "unknown"'))
        groups = sorted([g.name for g in user.in_group])
        self.assertEqual(groups, ['users'])
        group = user.in_group[0]
        self.assertEqual(len(group.reverse_tags), 1)
        self.assertEqual(group.reverse_tags[0].name, 'hop')
        # link or create action
        tags = set([(t.name, t.cwuri.replace(str(t.eid), ''), t.cw_source[0].name)
                    for t in user.reverse_tags])
        self.assertEqual(tags, set((('hop', 'http://testing.fr/cubicweb/', 'system'),
                                    ('unknown', 'http://testing.fr/cubicweb/', 'system')))
                         )
        session.set_cnxset()
        with session.security_enabled(read=False): # avoid Unauthorized due to password selection
            stats = dfsource.pull_data(session, force=True, raise_on_error=True)
        self.assertEqual(stats['created'], set())
        self.assertEqual(len(stats['updated']), 2)
        self.repo._type_source_cache.clear()
        self.repo._extid_cache.clear()
        session.set_cnxset()
        with session.security_enabled(read=False): # avoid Unauthorized due to password selection
            stats = dfsource.pull_data(session, force=True, raise_on_error=True)
        self.assertEqual(stats['created'], set())
        self.assertEqual(len(stats['updated']), 2)
        session.commit()

        # test move to system source
        self.sexecute('SET X cw_source S WHERE X eid %(x)s, S name "system"', {'x': email.eid})
        self.commit()
        rset = self.sexecute('EmailAddress X WHERE X address "syt@logilab.fr"')
        self.assertEqual(len(rset), 1)
        e = rset.get_entity(0, 0)
        self.assertEqual(e.eid, email.eid)
        self.assertEqual(e.cw_metainformation(), {'source': {'type': u'native', 'uri': u'system',
                                                             'use-cwuri-as-url': False},
                                                  'type': 'EmailAddress',
                                                  'extid': None})
        self.assertEqual(e.cw_source[0].name, 'system')
        self.assertEqual(e.reverse_use_email[0].login, 'sthenault')
        self.commit()
        # test everything is still fine after source synchronization
        session.set_cnxset()
        with session.security_enabled(read=False): # avoid Unauthorized due to password selection
            stats = dfsource.pull_data(session, force=True, raise_on_error=True)
        rset = self.sexecute('EmailAddress X WHERE X address "syt@logilab.fr"')
        self.assertEqual(len(rset), 1)
        e = rset.get_entity(0, 0)
        self.assertEqual(e.eid, email.eid)
        self.assertEqual(e.cw_metainformation(), {'source': {'type': u'native', 'uri': u'system',
                                                             'use-cwuri-as-url': False},
                                                  'type': 'EmailAddress',
                                                  'extid': None})
        self.assertEqual(e.cw_source[0].name, 'system')
        self.assertEqual(e.reverse_use_email[0].login, 'sthenault')
        session.commit()

        # test delete entity
        e.cw_delete()
        self.commit()
        # test everything is still fine after source synchronization
        session.set_cnxset()
        with session.security_enabled(read=False): # avoid Unauthorized due to password selection
            stats = dfsource.pull_data(session, force=True, raise_on_error=True)
        rset = self.sexecute('EmailAddress X WHERE X address "syt@logilab.fr"')
        self.assertEqual(len(rset), 0)
        rset = self.sexecute('Any X WHERE X use_email E, X login "sthenault"')
        self.assertEqual(len(rset), 0)

    def test_external_entity(self):
        dfsource = self.repo.sources_by_uri['myotherfeed']
        session = self.repo.internal_session(safe=True)
        stats = dfsource.pull_data(session, force=True, raise_on_error=True)
        user = self.execute('CWUser X WHERE X login "sthenault"').get_entity(0, 0)
        self.assertEqual(user.creation_date, datetime(2010, 01, 22, 10, 27, 59))
        self.assertEqual(user.modification_date, datetime(2011, 01, 25, 14, 14, 06))
        self.assertEqual(user.cwuri, 'http://pouet.org/5')
        self.assertEqual(user.cw_source[0].name, 'myfeed')

    def test_noerror_missing_fti_attribute(self):
        dfsource = self.repo.sources_by_uri['myfeed']
        session = self.repo.internal_session(safe=True)
        parser = dfsource._get_parser(session)
        dfsource.process_urls(parser, ['''
<rset size="1">
 <Card eid="50" cwuri="http://pouet.org/50" cwsource="system">
  <title>how-to</title>
 </Card>
</rset>
'''], raise_on_error=True)

    def test_noerror_unspecified_date(self):
        dfsource = self.repo.sources_by_uri['myfeed']
        session = self.repo.internal_session(safe=True)
        parser = dfsource._get_parser(session)
        dfsource.process_urls(parser, ['''
<rset size="1">
 <Card eid="50" cwuri="http://pouet.org/50" cwsource="system">
  <title>how-to</title>
  <content>how-to</content>
  <synopsis>how-to</synopsis>
  <creation_date/>
 </Card>
</rset>
'''], raise_on_error=True)

if __name__ == '__main__':
    from logilab.common.testlib import unittest_main
    unittest_main()