# HG changeset patch # User Sylvain Thénault # Date 1328042604 -3600 # Node ID 1867e252e487ceb2ca336084fc31dcc6ed4a3eef # Parent 981f6e487788247b173ab997c874f0e9790da130 [repository] ldap-feed source. Closes #2086984 datafeed based source which copy a subtree of the ldap directory into the system database. Authentication still go through ldap though. Pros: * don't need temporary tables and such for multi-sources RQL queries execution * much more flexible to enhance / configure behaviour (you simply have to replace the parser) * run better when ldap isn't reachable Cons: * no more 'on the fly' discovery of users (though a user authenticating itself will be automatically added if it doesn't exist in the db yet) * synchronization may be heavy if there are a lot of users A new cw.server.ldaputils containing code in common between former ldapuser and new ldapfeed sources has been introduced. Also ldapuser source now uses url instead of custom host/protocol option so it looks like a datafeed source (could be improved). diff -r 981f6e487788 -r 1867e252e487 __pkginfo__.py --- a/__pkginfo__.py Thu Feb 02 14:30:07 2012 +0100 +++ b/__pkginfo__.py Tue Jan 31 21:43:24 2012 +0100 @@ -1,5 +1,5 @@ # pylint: disable=W0622,C0103 -# copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This file is part of CubicWeb. @@ -22,7 +22,7 @@ modname = distname = "cubicweb" -numversion = (3, 14, 2) +numversion = (3, 15, 0) version = '.'.join(str(num) for num in numversion) description = "a repository of entities / relations for knowledge management" diff -r 981f6e487788 -r 1867e252e487 devtools/__init__.py --- a/devtools/__init__.py Thu Feb 02 14:30:07 2012 +0100 +++ b/devtools/__init__.py Tue Jan 31 21:43:24 2012 +0100 @@ -480,8 +480,8 @@ session = repo._sessions[cnx.sessionid] session.set_cnxset() _commit = session.commit - def keep_cnxset_commit(): - _commit(free_cnxset=False) + def keep_cnxset_commit(free_cnxset=False): + _commit(free_cnxset=free_cnxset) session.commit = keep_cnxset_commit pre_setup_func(session, self.config) session.commit() diff -r 981f6e487788 -r 1867e252e487 doc/3.15.rst --- a/doc/3.15.rst Thu Feb 02 14:30:07 2012 +0100 +++ b/doc/3.15.rst Tue Jan 31 21:43:24 2012 +0100 @@ -10,6 +10,8 @@ Unintrusive API changes ----------------------- +* new 'ldapfeed' source type, designed to replace 'ldapuser' source with + data-feed (i.e. copy based) source ideas. RQL diff -r 981f6e487788 -r 1867e252e487 misc/migration/3.15.0_Any.py --- a/misc/migration/3.15.0_Any.py Thu Feb 02 14:30:07 2012 +0100 +++ b/misc/migration/3.15.0_Any.py Tue Jan 31 21:43:24 2012 +0100 @@ -1,1 +1,10 @@ sync_schema_props_perms('EmailAddress') + +for source in rql('CWSource X WHERE X type "ldapuser"').entities(): + config = source.dictconfig + host = config.pop('host', 'ldap') + protocol = config.pop('protocol', 'ldap') + source.set_attributes(url='%s://%s' % (protocol, host)) + source.update_config(skip_unknown=True, **config) + +commit() diff -r 981f6e487788 -r 1867e252e487 misc/scripts/ldapuser2ldapfeed.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/misc/scripts/ldapuser2ldapfeed.py Tue Jan 31 21:43:24 2012 +0100 @@ -0,0 +1,76 @@ +"""turn a pyro source into a datafeed source + +Once this script is run, execute c-c db-check to cleanup relation tables. +""" +import sys + +try: + source_name, = __args__ + source = repo.sources_by_uri[source_name] +except ValueError: + print('you should specify the source name as script argument (i.e. after --' + ' on the command line)') + sys.exit(1) +except KeyError: + print '%s is not an active source' % source_name + sys.exit(1) + +# check source is reachable before doing anything +if not source.get_connection().cnx: + print '%s is not reachable. Fix this before running this script' % source_name + sys.exit(1) + +raw_input('Ensure you have shutdown all instances of this application before continuing.' + ' Type enter when ready.') + +system_source = repo.system_source + +from datetime import datetime +from cubicweb.server.edition import EditedEntity + + +session.mode = 'write' # hold on the connections set + +print '******************** backport entity content ***************************' + +todelete = {} +for entity in rql('Any X WHERE X cw_source S, S eid %(s)s', {'s': source.eid}).entities(): + etype = entity.__regid__ + if not source.support_entity(etype): + print "source doesn't support %s, delete %s" % (etype, entity.eid) + else: + try: + entity.complete() + except Exception: + print '%s %s much probably deleted, delete it (extid %s)' % ( + etype, entity.eid, entity.cw_metainformation()['extid']) + else: + print 'get back', etype, entity.eid + entity.cw_edited = EditedEntity(entity, **entity.cw_attr_cache) + if not entity.creation_date: + entity.cw_edited['creation_date'] = datetime.now() + if not entity.modification_date: + entity.cw_edited['modification_date'] = datetime.now() + if not entity.upassword: + entity.cw_edited['upassword'] = u'' + if not entity.cwuri: + entity.cw_edited['cwuri'] = '%s/?dn=%s' % ( + source.urls[0], entity.cw_metainformation()['extid']) + print entity.cw_edited + system_source.add_entity(session, entity) + sql("UPDATE entities SET source='system' " + "WHERE eid=%(eid)s", {'eid': entity.eid}) + continue + todelete.setdefault(etype, []).append(entity) + +# only cleanup entities table, remaining stuff should be cleaned by a c-c +# db-check to be run after this script +for entities in todelete.values(): + system_source.delete_info_multi(session, entities, source_name) + + +source_ent = rql('CWSource S WHERE S eid %(s)s', {'s': source.eid}).get_entity(0, 0) +source_ent.set_attributes(type=u"ldapfeed", parser=u"ldapfeed") + + +commit() diff -r 981f6e487788 -r 1867e252e487 server/__init__.py --- a/server/__init__.py Thu Feb 02 14:30:07 2012 +0100 +++ b/server/__init__.py Tue Jan 31 21:43:24 2012 +0100 @@ -271,7 +271,8 @@ # available sources registry SOURCE_TYPES = {'native': LazyObject('cubicweb.server.sources.native', 'NativeSQLSource'), - 'pyrorql': LazyObject('cubicweb.server.sources.pyrorql', 'PyroRQLSource'), + 'datafeed': LazyObject('cubicweb.server.sources.datafeed', 'DataFeedSource'), + 'ldapfeed': LazyObject('cubicweb.server.sources.ldapfeed', 'LDAPFeedSource'), 'ldapuser': LazyObject('cubicweb.server.sources.ldapuser', 'LDAPUserSource'), - 'datafeed': LazyObject('cubicweb.server.sources.datafeed', 'DataFeedSource'), + 'pyrorql': LazyObject('cubicweb.server.sources.pyrorql', 'PyroRQLSource'), } diff -r 981f6e487788 -r 1867e252e487 server/ldaputils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/ldaputils.py Tue Jan 31 21:43:24 2012 +0100 @@ -0,0 +1,362 @@ +# copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr +# +# This file is part of CubicWeb. +# +# CubicWeb is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 2.1 of the License, or (at your option) +# any later version. +# +# CubicWeb is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with CubicWeb. If not, see . +"""cubicweb utilities for ldap sources + +Part of the code is coming form Zope's LDAPUserFolder + +Copyright (c) 2004 Jens Vagelpohl. +All Rights Reserved. + +This software is subject to the provisions of the Zope Public License, +Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. +THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED +WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS +FOR A PARTICULAR PURPOSE. +""" + +from __future__ import division # XXX why? + +import ldap +from ldap.ldapobject import ReconnectLDAPObject +from ldap.filter import filter_format +from ldapurl import LDAPUrl + +from cubicweb import ValidationError, AuthenticationError +from cubicweb.server.sources import ConnectionWrapper + +_ = unicode + +# search scopes +BASE = ldap.SCOPE_BASE +ONELEVEL = ldap.SCOPE_ONELEVEL +SUBTREE = ldap.SCOPE_SUBTREE + +# map ldap protocol to their standard port +PROTO_PORT = {'ldap': 389, + 'ldaps': 636, + 'ldapi': None, + } + + +class LDAPSourceMixIn(object): + """a mix-in for LDAP based source""" + options = ( + ('auth-mode', + {'type' : 'choice', + 'default': 'simple', + 'choices': ('simple', 'cram_md5', 'digest_md5', 'gssapi'), + 'help': 'authentication mode used to authenticate user to the ldap.', + 'group': 'ldap-source', 'level': 3, + }), + ('auth-realm', + {'type' : 'string', + 'default': None, + 'help': 'realm to use when using gssapi/kerberos authentication.', + 'group': 'ldap-source', 'level': 3, + }), + + ('data-cnx-dn', + {'type' : 'string', + 'default': '', + 'help': 'user dn to use to open data connection to the ldap (eg used \ +to respond to rql queries). Leave empty for anonymous bind', + 'group': 'ldap-source', 'level': 1, + }), + ('data-cnx-password', + {'type' : 'string', + 'default': '', + 'help': 'password to use to open data connection to the ldap (eg used to respond to rql queries). Leave empty for anonymous bind.', + 'group': 'ldap-source', 'level': 1, + }), + + ('user-base-dn', + {'type' : 'string', + 'default': 'ou=People,dc=logilab,dc=fr', + 'help': 'base DN to lookup for users', + 'group': 'ldap-source', 'level': 1, + }), + ('user-scope', + {'type' : 'choice', + 'default': 'ONELEVEL', + 'choices': ('BASE', 'ONELEVEL', 'SUBTREE'), + 'help': 'user search scope (valid values: "BASE", "ONELEVEL", "SUBTREE")', + 'group': 'ldap-source', 'level': 1, + }), + ('user-classes', + {'type' : 'csv', + 'default': ('top', 'posixAccount'), + 'help': 'classes of user (with Active Directory, you want to say "user" here)', + 'group': 'ldap-source', 'level': 1, + }), + ('user-filter', + {'type': 'string', + 'default': '', + 'help': 'additional filters to be set in the ldap query to find valid users', + 'group': 'ldap-source', 'level': 2, + }), + ('user-login-attr', + {'type' : 'string', + 'default': 'uid', + 'help': 'attribute used as login on authentication (with Active Directory, you want to use "sAMAccountName" here)', + 'group': 'ldap-source', 'level': 1, + }), + ('user-default-group', + {'type' : 'csv', + 'default': ('users',), + 'help': 'name of a group in which ldap users will be by default. \ +You can set multiple groups by separating them by a comma.', + 'group': 'ldap-source', 'level': 1, + }), + ('user-attrs-map', + {'type' : 'named', + 'default': {'uid': 'login', 'gecos': 'email'}, + 'help': 'map from ldap user attributes to cubicweb attributes (with Active Directory, you want to use sAMAccountName:login,mail:email,givenName:firstname,sn:surname)', + 'group': 'ldap-source', 'level': 1, + }), + + ) + + _conn = None + + def _entity_update(self, source_entity): + if self.urls: + if len(self.urls) > 1: + raise ValidationError(source_entity, {'url': _('can only have one url')}) + try: + protocol, hostport = self.urls[0].split('://') + except ValueError: + raise ValidationError(source_entity, {'url': _('badly formatted url')}) + if protocol not in PROTO_PORT: + raise ValidationError(source_entity, {'url': _('unsupported protocol')}) + + def update_config(self, source_entity, typedconfig): + """update configuration from source entity. `typedconfig` is config + properly typed with defaults set + """ + self.authmode = typedconfig['auth-mode'] + self._authenticate = getattr(self, '_auth_%s' % self.authmode) + self.cnx_dn = typedconfig['data-cnx-dn'] + self.cnx_pwd = typedconfig['data-cnx-password'] + self.user_base_dn = str(typedconfig['user-base-dn']) + self.user_base_scope = globals()[typedconfig['user-scope']] + self.user_login_attr = typedconfig['user-login-attr'] + self.user_default_groups = typedconfig['user-default-group'] + self.user_attrs = typedconfig['user-attrs-map'] + self.user_rev_attrs = {'eid': 'dn'} + for ldapattr, cwattr in self.user_attrs.items(): + self.user_rev_attrs[cwattr] = ldapattr + self.base_filters = [filter_format('(%s=%s)', ('objectClass', o)) + for o in typedconfig['user-classes']] + if typedconfig['user-filter']: + self.base_filters.append(typedconfig['user-filter']) + self._conn = None + + def connection_info(self): + assert len(self.urls) == 1, self.urls + protocol, hostport = self.urls[0].split('://') + if protocol != 'ldapi' and not ':' in hostport: + hostport = '%s:%s' % (hostport, PROTO_PORT[protocol]) + return protocol, hostport + + def get_connection(self): + """open and return a connection to the source""" + if self._conn is None: + try: + self._connect() + except Exception: + self.exception('unable to connect to ldap') + return ConnectionWrapper(self._conn) + + def authenticate(self, session, login, password=None, **kwargs): + """return CWUser eid for the given login/password if this account is + defined in this source, else raise `AuthenticationError` + + two queries are needed since passwords are stored crypted, so we have + to fetch the salt first + """ + self.info('ldap authenticate %s', login) + if not password: + # On Windows + ADAM this would have succeeded (!!!) + # You get Authenticated as: 'NT AUTHORITY\ANONYMOUS LOGON'. + # we really really don't want that + raise AuthenticationError() + searchfilter = [filter_format('(%s=%s)', (self.user_login_attr, login))] + searchfilter.extend(self.base_filters) + searchstr = '(&%s)' % ''.join(searchfilter) + # first search the user + try: + user = self._search(session, self.user_base_dn, + self.user_base_scope, searchstr)[0] + except IndexError: + # no such user + raise AuthenticationError() + # check password by establishing a (unused) connection + try: + self._connect(user, password) + except ldap.LDAPError, ex: + # Something went wrong, most likely bad credentials + self.info('while trying to authenticate %s: %s', user, ex) + raise AuthenticationError() + except Exception: + self.error('while trying to authenticate %s', user, exc_info=True) + raise AuthenticationError() + eid = self.repo.extid2eid(self, user['dn'], 'CWUser', session) + if eid < 0: + # user has been moved away from this source + raise AuthenticationError() + return eid + + def object_exists_in_ldap(self, dn): + cnx = self.get_connection().cnx #session.cnxset.connection(self.uri).cnx + if cnx is None: + return True # ldap unreachable, suppose it exists + try: + cnx.search_s(base, scope, searchstr, attrs) + except ldap.PARTIAL_RESULTS: + pass + except ldap.NO_SUCH_OBJECT: + return False + return True + + def _connect(self, user=None, userpwd=None): + protocol, hostport = self.connection_info() + self.info('connecting %s://%s as %s', protocol, hostport, + user and user['dn'] or 'anonymous') + # don't require server certificate when using ldaps (will + # enable self signed certs) + ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_NEVER) + url = LDAPUrl(urlscheme=protocol, hostport=hostport) + conn = ReconnectLDAPObject(url.initializeUrl()) + # Set the protocol version - version 3 is preferred + try: + conn.set_option(ldap.OPT_PROTOCOL_VERSION, ldap.VERSION3) + except ldap.LDAPError: # Invalid protocol version, fall back safely + conn.set_option(ldap.OPT_PROTOCOL_VERSION, ldap.VERSION2) + # Deny auto-chasing of referrals to be safe, we handle them instead + #try: + # connection.set_option(ldap.OPT_REFERRALS, 0) + #except ldap.LDAPError: # Cannot set referrals, so do nothing + # pass + #conn.set_option(ldap.OPT_NETWORK_TIMEOUT, conn_timeout) + #conn.timeout = op_timeout + # Now bind with the credentials given. Let exceptions propagate out. + if user is None: + # no user specified, we want to initialize the 'data' connection, + assert self._conn is None + self._conn = conn + # XXX always use simple bind for data connection + if not self.cnx_dn: + conn.simple_bind_s(self.cnx_dn, self.cnx_pwd) + else: + self._authenticate(conn, {'dn': self.cnx_dn}, self.cnx_pwd) + else: + # user specified, we want to check user/password, no need to return + # the connection which will be thrown out + self._authenticate(conn, user, userpwd) + return conn + + def _auth_simple(self, conn, user, userpwd): + conn.simple_bind_s(user['dn'], userpwd) + + def _auth_cram_md5(self, conn, user, userpwd): + from ldap import sasl + auth_token = sasl.cram_md5(user['dn'], userpwd) + conn.sasl_interactive_bind_s('', auth_token) + + def _auth_digest_md5(self, conn, user, userpwd): + from ldap import sasl + auth_token = sasl.digest_md5(user['dn'], userpwd) + conn.sasl_interactive_bind_s('', auth_token) + + def _auth_gssapi(self, conn, user, userpwd): + # print XXX not proper sasl/gssapi + import kerberos + if not kerberos.checkPassword(user[self.user_login_attr], userpwd): + raise Exception('BAD login / mdp') + #from ldap import sasl + #conn.sasl_interactive_bind_s('', sasl.gssapi()) + + def _search(self, session, base, scope, + searchstr='(objectClass=*)', attrs=()): + """make an ldap query""" + self.debug('ldap search %s %s %s %s %s', self.uri, base, scope, + searchstr, list(attrs)) + # XXX for now, we do not have connections set support for LDAP, so + # this is always self._conn + cnx = self.get_connection().cnx #session.cnxset.connection(self.uri).cnx + if cnx is None: + # cant connect to server + msg = session._("can't connect to source %s, some data may be missing") + session.set_shared_data('sources_error', msg % self.uri) + return [] + try: + res = cnx.search_s(base, scope, searchstr, attrs) + except ldap.PARTIAL_RESULTS: + res = cnx.result(all=0)[1] + except ldap.NO_SUCH_OBJECT: + self.info('ldap NO SUCH OBJECT %s %s %s', base, scope, searchstr) + self._process_no_such_object(session, base) + return [] + # except ldap.REFERRAL, e: + # cnx = self.handle_referral(e) + # try: + # res = cnx.search_s(base, scope, searchstr, attrs) + # except ldap.PARTIAL_RESULTS: + # res_type, res = cnx.result(all=0) + result = [] + for rec_dn, rec_dict in res: + # When used against Active Directory, "rec_dict" may not be + # be a dictionary in some cases (instead, it can be a list) + # + # An example of a useless "res" entry that can be ignored + # from AD is + # (None, ['ldap://ForestDnsZones.PORTAL.LOCAL/DC=ForestDnsZones,DC=PORTAL,DC=LOCAL']) + # This appears to be some sort of internal referral, but + # we can't handle it, so we need to skip over it. + try: + items = rec_dict.iteritems() + except AttributeError: + continue + else: + itemdict = self._process_ldap_item(rec_dn, items) + result.append(itemdict) + #print '--->', result + self.debug('ldap built results %s', len(result)) + return result + + def _process_ldap_item(self, dn, iterator): + """Turn an ldap received item into a proper dict.""" + itemdict = {'dn': dn} + for key, value in iterator: + if not isinstance(value, str): + try: + for i in range(len(value)): + value[i] = unicode(value[i], 'utf8') + except Exception: + pass + if isinstance(value, list) and len(value) == 1: + itemdict[key] = value = value[0] + return itemdict + + def _process_no_such_object(self, session, dn): + """Some search return NO_SUCH_OBJECT error, handle this (usually because + an object whose dn is no more existent in ldap as been encountered). + + Do nothing by default, let sub-classes handle that. + """ diff -r 981f6e487788 -r 1867e252e487 server/sources/datafeed.py --- a/server/sources/datafeed.py Thu Feb 02 14:30:07 2012 +0100 +++ b/server/sources/datafeed.py Tue Jan 31 21:43:24 2012 +0100 @@ -103,6 +103,7 @@ if url.strip()] else: self.urls = [] + def update_config(self, source_entity, typedconfig): """update configuration from source entity. `typedconfig` is config properly typed with defaults set @@ -290,7 +291,7 @@ 'updated': set()} def normalize_url(self, url): - from cubicweb.sobjects.parsers import URL_MAPPING + from cubicweb.sobjects import URL_MAPPING # available after registration for mappedurl in URL_MAPPING: if url.startswith(mappedurl): return url.replace(mappedurl, URL_MAPPING[mappedurl], 1) @@ -374,6 +375,19 @@ """ return True + def update_if_necessary(self, entity, attrs): + self.notify_updated(entity) + entity.complete(tuple(attrs)) + # check modification date and compare attribute values to only update + # what's actually needed + mdate = attrs.get('modification_date') + if not mdate or mdate > entity.modification_date: + attrs = dict( (k, v) for k, v in attrs.iteritems() + if v != getattr(entity, k)) + if attrs: + entity.set_attributes(**attrs) + + class DataFeedXMLParser(DataFeedParser): def process(self, url, raise_on_error=False, partialcommit=True): diff -r 981f6e487788 -r 1867e252e487 server/sources/ldapfeed.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/sources/ldapfeed.py Tue Jan 31 21:43:24 2012 +0100 @@ -0,0 +1,45 @@ +# copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr +# +# This file is part of CubicWeb. +# +# CubicWeb is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 2.1 of the License, or (at your option) +# any later version. +# +# CubicWeb is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with CubicWeb. If not, see . +"""cubicweb ldap feed source + +unlike ldapuser source, this source is copy based and will import ldap content +(beside passwords for authentication) into the system source. +""" + +from cubicweb.server.sources import datafeed +from cubicweb.server import ldaputils + + +class LDAPFeedSource(ldaputils.LDAPSourceMixIn, + datafeed.DataFeedSource): + """LDAP feed source""" + use_cwuri_as_url = True + + options = datafeed.DataFeedSource.options + ldaputils.LDAPSourceMixIn.options + + def update_config(self, source_entity, typedconfig): + """update configuration from source entity. `typedconfig` is config + properly typed with defaults set + """ + datafeed.DataFeedSource.update_config(self, source_entity, typedconfig) + ldaputils.LDAPSourceMixIn.update_config(self, source_entity, typedconfig) + + def _entity_update(self, source_entity): + datafeed.DataFeedSource._entity_update(self, source_entity) + ldaputils.LDAPSourceMixIn._entity_update(self, source_entity) + diff -r 981f6e487788 -r 1867e252e487 server/sources/ldapuser.py --- a/server/sources/ldapuser.py Thu Feb 02 14:30:07 2012 +0100 +++ b/server/sources/ldapuser.py Tue Jan 31 21:43:24 2012 +0100 @@ -1,4 +1,4 @@ -# copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This file is part of CubicWeb. @@ -18,33 +18,20 @@ """cubicweb ldap user source this source is for now limited to a read-only CWUser source - -Part of the code is coming form Zope's LDAPUserFolder - -Copyright (c) 2004 Jens Vagelpohl. -All Rights Reserved. - -This software is subject to the provisions of the Zope Public License, -Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. -THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED -WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS -FOR A PARTICULAR PURPOSE. """ from __future__ import division from base64 import b64decode import ldap -from ldap.ldapobject import ReconnectLDAPObject -from ldap.filter import filter_format, escape_filter_chars -from ldapurl import LDAPUrl +from ldap.filter import escape_filter_chars from rql.nodes import Relation, VariableRef, Constant, Function -from cubicweb import AuthenticationError, UnknownEid, RepositoryError +from cubicweb import UnknownEid, RepositoryError +from cubicweb.server import ldaputils from cubicweb.server.utils import cartesian_product from cubicweb.server.sources import (AbstractSource, TrFunc, GlobTrFunc, - ConnectionWrapper, TimedCache) + TimedCache) # search scopes BASE = ldap.SCOPE_BASE @@ -58,97 +45,11 @@ } -class LDAPUserSource(AbstractSource): +class LDAPUserSource(ldaputils.LDAPSourceMixIn, AbstractSource): """LDAP read-only CWUser source""" support_entities = {'CWUser': False} - options = ( - ('host', - {'type' : 'string', - 'default': 'ldap', - 'help': 'ldap host. It may contains port information using \ -: notation.', - 'group': 'ldap-source', 'level': 1, - }), - ('protocol', - {'type' : 'choice', - 'default': 'ldap', - 'choices': ('ldap', 'ldaps', 'ldapi'), - 'help': 'ldap protocol (allowed values: ldap, ldaps, ldapi)', - 'group': 'ldap-source', 'level': 1, - }), - ('auth-mode', - {'type' : 'choice', - 'default': 'simple', - 'choices': ('simple', 'cram_md5', 'digest_md5', 'gssapi'), - 'help': 'authentication mode used to authenticate user to the ldap.', - 'group': 'ldap-source', 'level': 3, - }), - ('auth-realm', - {'type' : 'string', - 'default': None, - 'help': 'realm to use when using gssapi/kerberos authentication.', - 'group': 'ldap-source', 'level': 3, - }), - - ('data-cnx-dn', - {'type' : 'string', - 'default': '', - 'help': 'user dn to use to open data connection to the ldap (eg used \ -to respond to rql queries). Leave empty for anonymous bind', - 'group': 'ldap-source', 'level': 1, - }), - ('data-cnx-password', - {'type' : 'string', - 'default': '', - 'help': 'password to use to open data connection to the ldap (eg used to respond to rql queries). Leave empty for anonymous bind.', - 'group': 'ldap-source', 'level': 1, - }), - - ('user-base-dn', - {'type' : 'string', - 'default': 'ou=People,dc=logilab,dc=fr', - 'help': 'base DN to lookup for users', - 'group': 'ldap-source', 'level': 1, - }), - ('user-scope', - {'type' : 'choice', - 'default': 'ONELEVEL', - 'choices': ('BASE', 'ONELEVEL', 'SUBTREE'), - 'help': 'user search scope (valid values: "BASE", "ONELEVEL", "SUBTREE")', - 'group': 'ldap-source', 'level': 1, - }), - ('user-classes', - {'type' : 'csv', - 'default': ('top', 'posixAccount'), - 'help': 'classes of user (with Active Directory, you want to say "user" here)', - 'group': 'ldap-source', 'level': 1, - }), - ('user-filter', - {'type': 'string', - 'default': '', - 'help': 'additional filters to be set in the ldap query to find valid users', - 'group': 'ldap-source', 'level': 2, - }), - ('user-login-attr', - {'type' : 'string', - 'default': 'uid', - 'help': 'attribute used as login on authentication (with Active Directory, you want to use "sAMAccountName" here)', - 'group': 'ldap-source', 'level': 1, - }), - ('user-default-group', - {'type' : 'csv', - 'default': ('users',), - 'help': 'name of a group in which ldap users will be by default. \ -You can set multiple groups by separating them by a comma.', - 'group': 'ldap-source', 'level': 1, - }), - ('user-attrs-map', - {'type' : 'named', - 'default': {'uid': 'login', 'gecos': 'email'}, - 'help': 'map from ldap user attributes to cubicweb attributes (with Active Directory, you want to use sAMAccountName:login,mail:email,givenName:firstname,sn:surname)', - 'group': 'ldap-source', 'level': 1, - }), + options = ldaputils.LDAPSourceMixIn.options + ( ('synchronization-interval', {'type' : 'time', @@ -168,35 +69,32 @@ def __init__(self, repo, source_config, eid=None): AbstractSource.__init__(self, repo, source_config, eid) - self.update_config(None, self.check_conf_dict(eid, source_config)) - self._conn = None + self.update_config(None, self.check_conf_dict(eid, source_config, + fail_if_unknown=False)) + + def _entity_update(self, source_entity): + # XXX copy from datafeed source + if source_entity.url: + self.urls = [url.strip() for url in source_entity.url.splitlines() + if url.strip()] + else: + self.urls = [] + # /end XXX + ldaputils.LDAPSourceMixIn._entity_update(self, source_entity) def update_config(self, source_entity, typedconfig): """update configuration from source entity. `typedconfig` is config properly typed with defaults set """ - self.host = typedconfig['host'] - self.protocol = typedconfig['protocol'] - self.authmode = typedconfig['auth-mode'] - self._authenticate = getattr(self, '_auth_%s' % self.authmode) - self.cnx_dn = typedconfig['data-cnx-dn'] - self.cnx_pwd = typedconfig['data-cnx-password'] - self.user_base_dn = str(typedconfig['user-base-dn']) - self.user_base_scope = globals()[typedconfig['user-scope']] - self.user_login_attr = typedconfig['user-login-attr'] - self.user_default_groups = typedconfig['user-default-group'] - self.user_attrs = typedconfig['user-attrs-map'] - self.user_rev_attrs = {'eid': 'dn'} - for ldapattr, cwattr in self.user_attrs.items(): - self.user_rev_attrs[cwattr] = ldapattr - self.base_filters = [filter_format('(%s=%s)', ('objectClass', o)) - for o in typedconfig['user-classes']] - if typedconfig['user-filter']: - self.base_filters.append(typedconfig['user-filter']) + ldaputils.LDAPSourceMixIn.update_config(self, source_entity, typedconfig) self._interval = typedconfig['synchronization-interval'] self._cache_ttl = max(71, typedconfig['cache-life-time']) self.reset_caches() - self._conn = None + # XXX copy from datafeed source + if source_entity is not None: + self._entity_update(source_entity) + self.config = typedconfig + # /end XXX def reset_caches(self): """method called during test to reset potential source caches""" @@ -207,21 +105,24 @@ """method called by the repository once ready to handle request""" if activated: self.info('ldap init') + self._entity_update(source_entity) # set minimum period of 5min 1s (the additional second is to # minimize resonnance effet) - self.repo.looping_task(max(301, self._interval), self.synchronize) + if self.user_rev_attrs['email']: + self.repo.looping_task(max(301, self._interval), self.synchronize) self.repo.looping_task(self._cache_ttl // 10, self._query_cache.clear_expired) def synchronize(self): + self.pull_data(self.repo.internal_session()) + + def pull_data(self, session, force=False, raise_on_error=False): """synchronize content known by this repository with content in the external repository """ self.info('synchronizing ldap source %s', self.uri) - try: - ldap_emailattr = self.user_rev_attrs['email'] - except KeyError: - return # no email in ldap, we're done + ldap_emailattr = self.user_rev_attrs['email'] + assert ldap_emailattr session = self.repo.internal_session() execute = session.execute try: @@ -268,54 +169,6 @@ session.commit() session.close() - def get_connection(self): - """open and return a connection to the source""" - if self._conn is None: - try: - self._connect() - except Exception: - self.exception('unable to connect to ldap:') - return ConnectionWrapper(self._conn) - - def authenticate(self, session, login, password=None, **kwargs): - """return CWUser eid for the given login/password if this account is - defined in this source, else raise `AuthenticationError` - - two queries are needed since passwords are stored crypted, so we have - to fetch the salt first - """ - self.info('ldap authenticate %s', login) - if not password: - # On Windows + ADAM this would have succeeded (!!!) - # You get Authenticated as: 'NT AUTHORITY\ANONYMOUS LOGON'. - # we really really don't want that - raise AuthenticationError() - searchfilter = [filter_format('(%s=%s)', (self.user_login_attr, login))] - searchfilter.extend(self.base_filters) - searchstr = '(&%s)' % ''.join(searchfilter) - # first search the user - try: - user = self._search(session, self.user_base_dn, - self.user_base_scope, searchstr)[0] - except IndexError: - # no such user - raise AuthenticationError() - # check password by establishing a (unused) connection - try: - self._connect(user, password) - except ldap.LDAPError, ex: - # Something went wrong, most likely bad credentials - self.info('while trying to authenticate %s: %s', user, ex) - raise AuthenticationError() - except Exception: - self.error('while trying to authenticate %s', user, exc_info=True) - raise AuthenticationError() - eid = self.repo.extid2eid(self, user['dn'], 'CWUser', session) - if eid < 0: - # user has been moved away from this source - raise AuthenticationError() - return eid - def ldap_name(self, var): if var.stinfo['relations']: relname = iter(var.stinfo['relations']).next().r_type @@ -459,127 +312,18 @@ #print '--> ldap result', result return result - - def _connect(self, user=None, userpwd=None): - if self.protocol == 'ldapi': - hostport = self.host - elif not ':' in self.host: - hostport = '%s:%s' % (self.host, PROTO_PORT[self.protocol]) - else: - hostport = self.host - self.info('connecting %s://%s as %s', self.protocol, hostport, - user and user['dn'] or 'anonymous') - # don't require server certificate when using ldaps (will - # enable self signed certs) - ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_NEVER) - url = LDAPUrl(urlscheme=self.protocol, hostport=hostport) - conn = ReconnectLDAPObject(url.initializeUrl()) - # Set the protocol version - version 3 is preferred - try: - conn.set_option(ldap.OPT_PROTOCOL_VERSION, ldap.VERSION3) - except ldap.LDAPError: # Invalid protocol version, fall back safely - conn.set_option(ldap.OPT_PROTOCOL_VERSION, ldap.VERSION2) - # Deny auto-chasing of referrals to be safe, we handle them instead - #try: - # connection.set_option(ldap.OPT_REFERRALS, 0) - #except ldap.LDAPError: # Cannot set referrals, so do nothing - # pass - #conn.set_option(ldap.OPT_NETWORK_TIMEOUT, conn_timeout) - #conn.timeout = op_timeout - # Now bind with the credentials given. Let exceptions propagate out. - if user is None: - # no user specified, we want to initialize the 'data' connection, - assert self._conn is None - self._conn = conn - # XXX always use simple bind for data connection - if not self.cnx_dn: - conn.simple_bind_s(self.cnx_dn, self.cnx_pwd) - else: - self._authenticate(conn, {'dn': self.cnx_dn}, self.cnx_pwd) - else: - # user specified, we want to check user/password, no need to return - # the connection which will be thrown out - self._authenticate(conn, user, userpwd) - return conn - - def _auth_simple(self, conn, user, userpwd): - conn.simple_bind_s(user['dn'], userpwd) - - def _auth_cram_md5(self, conn, user, userpwd): - from ldap import sasl - auth_token = sasl.cram_md5(user['dn'], userpwd) - conn.sasl_interactive_bind_s('', auth_token) - - def _auth_digest_md5(self, conn, user, userpwd): - from ldap import sasl - auth_token = sasl.digest_md5(user['dn'], userpwd) - conn.sasl_interactive_bind_s('', auth_token) + def _process_ldap_item(self, dn, iterator): + itemdict = super(LDAPUserSource, self)._process_ldap_item(dn, iterator) + self._cache[dn] = itemdict + return itemdict - def _auth_gssapi(self, conn, user, userpwd): - # print XXX not proper sasl/gssapi - import kerberos - if not kerberos.checkPassword(user[self.user_login_attr], userpwd): - raise Exception('BAD login / mdp') - #from ldap import sasl - #conn.sasl_interactive_bind_s('', sasl.gssapi()) - - def _search(self, session, base, scope, - searchstr='(objectClass=*)', attrs=()): - """make an ldap query""" - self.debug('ldap search %s %s %s %s %s', self.uri, base, scope, - searchstr, list(attrs)) - # XXX for now, we do not have connections set support for LDAP, so - # this is always self._conn - cnx = session.cnxset.connection(self.uri).cnx - try: - res = cnx.search_s(base, scope, searchstr, attrs) - except ldap.PARTIAL_RESULTS: - res = cnx.result(all=0)[1] - except ldap.NO_SUCH_OBJECT: - self.info('ldap NO SUCH OBJECT') - eid = self.repo.extid2eid(self, base, 'CWUser', session, insert=False) - if eid: - self.warning('deleting ldap user with eid %s and dn %s', - eid, base) - entity = session.entity_from_eid(eid, 'CWUser') - self.repo.delete_info(session, entity, self.uri) - self.reset_caches() - return [] - # except ldap.REFERRAL, e: - # cnx = self.handle_referral(e) - # try: - # res = cnx.search_s(base, scope, searchstr, attrs) - # except ldap.PARTIAL_RESULTS: - # res_type, res = cnx.result(all=0) - result = [] - for rec_dn, rec_dict in res: - # When used against Active Directory, "rec_dict" may not be - # be a dictionary in some cases (instead, it can be a list) - # An example of a useless "res" entry that can be ignored - # from AD is - # (None, ['ldap://ForestDnsZones.PORTAL.LOCAL/DC=ForestDnsZones,DC=PORTAL,DC=LOCAL']) - # This appears to be some sort of internal referral, but - # we can't handle it, so we need to skip over it. - try: - items = rec_dict.items() - except AttributeError: - # 'items' not found on rec_dict, skip - continue - for key, value in items: # XXX syt: huuum ? - if not isinstance(value, str): - try: - for i in range(len(value)): - value[i] = unicode(value[i], 'utf8') - except Exception: - pass - if isinstance(value, list) and len(value) == 1: - rec_dict[key] = value = value[0] - rec_dict['dn'] = rec_dn - self._cache[rec_dn] = rec_dict - result.append(rec_dict) - #print '--->', result - self.debug('ldap built results %s', len(result)) - return result + def _process_no_such_object(self, session, dn): + eid = self.repo.extid2eid(self, dn, 'CWUser', session, insert=False) + if eid: + self.warning('deleting ldap user with eid %s and dn %s', eid, dn) + entity = session.entity_from_eid(eid, 'CWUser') + self.repo.delete_info(session, entity, self.uri) + self.reset_caches() def before_entity_insertion(self, session, lid, etype, eid, sourceparams): """called by the repository when an eid has been attributed for an @@ -604,13 +348,13 @@ self.debug('ldap after entity insertion') super(LDAPUserSource, self).after_entity_insertion( session, lid, entity, sourceparams) - dn = lid for group in self.user_default_groups: session.execute('SET X in_group G WHERE X eid %(x)s, G name %(group)s', {'x': entity.eid, 'group': group}) # search for existant email first try: - emailaddr = self._cache[dn][self.user_rev_attrs['email']] + # lid = dn + emailaddr = self._cache[lid][self.user_rev_attrs['email']] except KeyError: return if isinstance(emailaddr, list): @@ -632,6 +376,7 @@ """delete an entity from the source""" raise RepositoryError('this source is read only') + def _insert_email(session, emailaddr, ueid): session.execute('INSERT EmailAddress X: X address %(addr)s, U primary_email X ' 'WHERE U eid %(x)s', {'addr': emailaddr, 'x': ueid}) diff -r 981f6e487788 -r 1867e252e487 server/test/unittest_ldapuser.py --- a/server/test/unittest_ldapuser.py Thu Feb 02 14:30:07 2012 +0100 +++ b/server/test/unittest_ldapuser.py Tue Jan 31 21:43:24 2012 +0100 @@ -1,4 +1,4 @@ -# copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This file is part of CubicWeb. @@ -25,6 +25,8 @@ from socket import socket, error as socketerror from logilab.common.testlib import TestCase, unittest_main, mock_object, Tags + +from cubicweb import AuthenticationError from cubicweb.devtools.testlib import CubicWebTC from cubicweb.devtools.repotest import RQLGeneratorTC from cubicweb.devtools.httptest import get_available_port @@ -32,15 +34,14 @@ from cubicweb.server.sources.ldapuser import * -CONFIG = u'''host=%s -user-base-dn=ou=People,dc=cubicweb,dc=test +CONFIG = u'''user-base-dn=ou=People,dc=cubicweb,dc=test user-scope=ONELEVEL user-classes=top,posixAccount user-login-attr=uid user-default-group=users user-attrs-map=gecos:email,uid:login ''' - +URL = None def setUpModule(*args): create_slapd_configuration(LDAPUserSourceTC.config) @@ -49,7 +50,7 @@ terminate_slapd() def create_slapd_configuration(config): - global slapd_process, CONFIG + global slapd_process, URL basedir = join(config.apphome, "ldapdb") slapdconf = join(config.apphome, "slapd.conf") confin = file(join(config.apphome, "slapd.conf.in")).read() @@ -78,7 +79,7 @@ else: raise EnvironmentError('Cannot start slapd with cmdline="%s" (from directory "%s")' % (" ".join(cmdline), os.getcwd())) - CONFIG = CONFIG % host + URL = u'ldap://%s' % host def terminate_slapd(): global slapd_process @@ -93,23 +94,92 @@ print "DONE" del slapd_process -class LDAPUserSourceTC(CubicWebTC): + + + +class LDAPFeedSourceTC(CubicWebTC): + test_db_id = 'ldap-feed' + + @classmethod + def pre_setup_database(cls, session, config): + session.create_entity('CWSource', name=u'ldapuser', type=u'ldapfeed', parser=u'ldapfeed', + url=URL, config=CONFIG) + session.commit() + isession = session.repo.internal_session() + lfsource = isession.repo.sources_by_uri['ldapuser'] + stats = lfsource.pull_data(isession, force=True, raise_on_error=True) + + def setUp(self): + super(LDAPFeedSourceTC, self).setUp() + # ldap source url in the database may use a different port as the one + # just attributed + lfsource = self.repo.sources_by_uri['ldapuser'] + lfsource.urls = [URL] + + def assertMetadata(self, entity): + self.assertTrue(entity.creation_date) + self.assertTrue(entity.modification_date) + + def test_authenticate(self): + source = self.repo.sources_by_uri['ldapuser'] + self.session.set_cnxset() + self.assertRaises(AuthenticationError, + source.authenticate, self.session, 'toto', 'toto') + + def test_base(self): + # check a known one + rset = self.sexecute('CWUser X WHERE X login %(login)s', {'login': 'syt'}) + e = rset.get_entity(0, 0) + self.assertEqual(e.login, 'syt') + e.complete() + self.assertMetadata(e) + self.assertEqual(e.firstname, None) + self.assertEqual(e.surname, None) + self.assertEqual(e.in_group[0].name, 'users') + self.assertEqual(e.owned_by[0].login, 'syt') + self.assertEqual(e.created_by, ()) + self.assertEqual(e.primary_email[0].address, 'Sylvain Thenault') + # email content should be indexed on the user + rset = self.sexecute('CWUser X WHERE X has_text "thenault"') + self.assertEqual(rset.rows, [[e.eid]]) + + def test_copy_to_system_source(self): + source = self.repo.sources_by_uri['ldapuser'] + eid = self.sexecute('CWUser X WHERE X login %(login)s', {'login': 'syt'})[0][0] + self.sexecute('SET X cw_source S WHERE X eid %(x)s, S name "system"', {'x': eid}) + self.commit() + source.reset_caches() + rset = self.sexecute('CWUser X WHERE X login %(login)s', {'login': 'syt'}) + self.assertEqual(len(rset), 1) + e = rset.get_entity(0, 0) + self.assertEqual(e.eid, eid) + self.assertEqual(e.cw_metainformation(), {'source': {'type': u'native', 'uri': u'system', 'use-cwuri-as-url': False}, + 'type': 'CWUser', + 'extid': None}) + self.assertEqual(e.cw_source[0].name, 'system') + self.assertTrue(e.creation_date) + self.assertTrue(e.modification_date) + # XXX test some password has been set + source.pull_data(self.session) + rset = self.sexecute('CWUser X WHERE X login %(login)s', {'login': 'syt'}) + self.assertEqual(len(rset), 1) + + +class LDAPUserSourceTC(LDAPFeedSourceTC): test_db_id = 'ldap-user' tags = CubicWebTC.tags | Tags(('ldap')) @classmethod def pre_setup_database(cls, session, config): session.create_entity('CWSource', name=u'ldapuser', type=u'ldapuser', - config=CONFIG) + url=URL, config=CONFIG) session.commit() # XXX keep it there session.execute('CWUser U') - def test_authenticate(self): - source = self.repo.sources_by_uri['ldapuser'] - self.session.set_cnxset() - self.assertRaises(AuthenticationError, - source.authenticate, self.session, 'toto', 'toto') + def assertMetadata(self, entity): + self.assertEqual(entity.creation_date, None) + self.assertEqual(entity.modification_date, None) def test_synchronize(self): source = self.repo.sources_by_uri['ldapuser'] @@ -121,8 +191,7 @@ e = rset.get_entity(0, 0) self.assertEqual(e.login, 'syt') e.complete() - self.assertEqual(e.creation_date, None) - self.assertEqual(e.modification_date, None) + self.assertMetadata(e) self.assertEqual(e.firstname, None) self.assertEqual(e.surname, None) self.assertEqual(e.in_group[0].name, 'users') @@ -347,27 +416,6 @@ rset = cu.execute('Any F WHERE X has_text "iaminguestsgrouponly", X firstname F') self.assertEqual(rset.rows, [[None]]) - def test_copy_to_system_source(self): - source = self.repo.sources_by_uri['ldapuser'] - eid = self.sexecute('CWUser X WHERE X login %(login)s', {'login': 'syt'})[0][0] - self.sexecute('SET X cw_source S WHERE X eid %(x)s, S name "system"', {'x': eid}) - self.commit() - source.reset_caches() - rset = self.sexecute('CWUser X WHERE X login %(login)s', {'login': 'syt'}) - self.assertEqual(len(rset), 1) - e = rset.get_entity(0, 0) - self.assertEqual(e.eid, eid) - self.assertEqual(e.cw_metainformation(), {'source': {'type': u'native', 'uri': u'system', 'use-cwuri-as-url': False}, - 'type': 'CWUser', - 'extid': None}) - self.assertEqual(e.cw_source[0].name, 'system') - self.assertTrue(e.creation_date) - self.assertTrue(e.modification_date) - # XXX test some password has been set - source.synchronize() - rset = self.sexecute('CWUser X WHERE X login %(login)s', {'login': 'syt'}) - self.assertEqual(len(rset), 1) - def test_nonregr1(self): self.sexecute('Any X,AA ORDERBY AA DESC WHERE E eid %(x)s, E owned_by X, ' 'X modification_date AA', @@ -403,7 +451,6 @@ 'OR (EXISTS(U in_group H, ME in_group H, NOT H name "users")), U login UL, U is CWUser)', {'x': self.session.user.eid}) - class GlobTrFuncTC(TestCase): def test_count(self): @@ -438,6 +485,7 @@ res = trfunc.apply([[1, 2], [2, 4], [3, 6], [1, 5]]) self.assertEqual(res, [[1, 5], [2, 4], [3, 6]]) + class RQL2LDAPFilterTC(RQLGeneratorTC): tags = RQLGeneratorTC.tags | Tags(('ldap')) diff -r 981f6e487788 -r 1867e252e487 sobjects/__init__.py --- a/sobjects/__init__.py Thu Feb 02 14:30:07 2012 +0100 +++ b/sobjects/__init__.py Tue Jan 31 21:43:24 2012 +0100 @@ -1,4 +1,4 @@ -# copyright 2003-2010 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This file is part of CubicWeb. @@ -15,6 +15,16 @@ # # You should have received a copy of the GNU Lesser General Public License along # with CubicWeb. If not, see . -"""server side objects +"""server side objects""" + +import os.path as osp -""" +def registration_callback(vreg): + vreg.register_all(globals().values(), __name__) + global URL_MAPPING + URL_MAPPING = {} + if vreg.config.apphome: + url_mapping_file = osp.join(vreg.config.apphome, 'urlmapping.py') + if osp.exists(url_mapping_file): + URL_MAPPING = eval(file(url_mapping_file).read()) + vreg.info('using url mapping %s from %s', URL_MAPPING, url_mapping_file) diff -r 981f6e487788 -r 1867e252e487 sobjects/cwxmlparser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sobjects/cwxmlparser.py Tue Jan 31 21:43:24 2012 +0100 @@ -0,0 +1,487 @@ +# copyright 2010-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr +# +# This file is part of CubicWeb. +# +# CubicWeb is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 2.1 of the License, or (at your option) +# any later version. +# +# CubicWeb is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with CubicWeb. If not, see . +"""datafeed parser for xml generated by cubicweb + +Example of mapping for CWEntityXMLParser:: + + {u'CWUser': { # EntityType + (u'in_group', u'subject', u'link'): [ # (rtype, role, action) + (u'CWGroup', {u'linkattr': u'name'})], # -> rules = [(EntityType, options), ...] + (u'tags', u'object', u'link-or-create'): [ # (...) + (u'Tag', {u'linkattr': u'name'})], # -> ... + (u'use_email', u'subject', u'copy'): [ # (...) + (u'EmailAddress', {})] # -> ... + } + } + +""" + +from datetime import datetime, timedelta, time +from urllib import urlencode +from cgi import parse_qs # in urlparse with python >= 2.6 + +from logilab.common.date import todate, totime +from logilab.common.textutils import splitstrip, text_to_dict +from logilab.common.decorators import classproperty + +from yams.constraints import BASE_CONVERTERS +from yams.schema import role_name as rn + +from cubicweb import ValidationError, RegistryException, typed_eid +from cubicweb.view import Component +from cubicweb.server.sources import datafeed +from cubicweb.server.hook import match_rtype + +# XXX see cubicweb.cwvreg.YAMS_TO_PY +# XXX see cubicweb.web.views.xmlrss.SERIALIZERS +DEFAULT_CONVERTERS = BASE_CONVERTERS.copy() +DEFAULT_CONVERTERS['String'] = unicode +DEFAULT_CONVERTERS['Password'] = lambda x: x.encode('utf8') +def convert_date(ustr): + return todate(datetime.strptime(ustr, '%Y-%m-%d')) +DEFAULT_CONVERTERS['Date'] = convert_date +def convert_datetime(ustr): + if '.' in ustr: # assume %Y-%m-%d %H:%M:%S.mmmmmm + ustr = ustr.split('.',1)[0] + return datetime.strptime(ustr, '%Y-%m-%d %H:%M:%S') +DEFAULT_CONVERTERS['Datetime'] = convert_datetime +# XXX handle timezone, though this will be enough as TZDatetime are +# serialized without time zone by default (UTC time). See +# cw.web.views.xmlrss.SERIALIZERS. +DEFAULT_CONVERTERS['TZDatetime'] = convert_datetime +def convert_time(ustr): + return totime(datetime.strptime(ustr, '%H:%M:%S')) +DEFAULT_CONVERTERS['Time'] = convert_time +DEFAULT_CONVERTERS['TZTime'] = convert_time +def convert_interval(ustr): + return time(seconds=int(ustr)) +DEFAULT_CONVERTERS['Interval'] = convert_interval + +def extract_typed_attrs(eschema, stringdict, converters=DEFAULT_CONVERTERS): + typeddict = {} + for rschema in eschema.subject_relations(): + if rschema.final and rschema in stringdict: + if rschema in ('eid', 'cwuri', 'cwtype', 'cwsource'): + continue + attrtype = eschema.destination(rschema) + value = stringdict[rschema] + if value is not None: + value = converters[attrtype](value) + typeddict[rschema.type] = value + return typeddict + +def rtype_role_rql(rtype, role): + if role == 'object': + return 'Y %s X WHERE X eid %%(x)s' % rtype + else: + return 'X %s Y WHERE X eid %%(x)s' % rtype + + +class CWEntityXMLParser(datafeed.DataFeedXMLParser): + """datafeed parser for the 'xml' entity view + + Most of the logic is delegated to the following components: + + * an "item builder" component, turning an etree xml node into a specific + python dictionnary representing an entity + + * "action" components, selected given an entity, a relation and its role in + the relation, and responsible to link the entity to given related items + (eg dictionnary) + + So the parser is only doing the gluing service and the connection to the + source. + """ + __regid__ = 'cw.entityxml' + + def __init__(self, *args, **kwargs): + super(CWEntityXMLParser, self).__init__(*args, **kwargs) + self._parsed_urls = {} + self._processed_entities = set() + + def select_linker(self, action, rtype, role, entity=None): + try: + return self._cw.vreg['components'].select( + 'cw.entityxml.action.%s' % action, self._cw, entity=entity, + rtype=rtype, role=role, parser=self) + except RegistryException: + raise RegistryException('Unknown action %s' % action) + + def list_actions(self): + reg = self._cw.vreg['components'] + return sorted(clss[0].action for rid, clss in reg.iteritems() + if rid.startswith('cw.entityxml.action.')) + + # mapping handling ######################################################### + + def add_schema_config(self, schemacfg, checkonly=False): + """added CWSourceSchemaConfig, modify mapping accordingly""" + _ = self._cw._ + try: + rtype = schemacfg.schema.rtype.name + except AttributeError: + msg = _("entity and relation types can't be mapped, only attributes " + "or relations") + raise ValidationError(schemacfg.eid, {rn('cw_for_schema', 'subject'): msg}) + if schemacfg.options: + options = text_to_dict(schemacfg.options) + else: + options = {} + try: + role = options.pop('role') + if role not in ('subject', 'object'): + raise KeyError + except KeyError: + msg = _('"role=subject" or "role=object" must be specified in options') + raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg}) + try: + action = options.pop('action') + linker = self.select_linker(action, rtype, role) + linker.check_options(options, schemacfg.eid) + except KeyError: + msg = _('"action" must be specified in options; allowed values are ' + '%s') % ', '.join(self.list_actions()) + raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg}) + except RegistryException: + msg = _('allowed values for "action" are %s') % ', '.join(self.list_actions()) + raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg}) + if not checkonly: + if role == 'subject': + etype = schemacfg.schema.stype.name + ttype = schemacfg.schema.otype.name + else: + etype = schemacfg.schema.otype.name + ttype = schemacfg.schema.stype.name + etyperules = self.source.mapping.setdefault(etype, {}) + etyperules.setdefault((rtype, role, action), []).append( + (ttype, options) ) + self.source.mapping_idx[schemacfg.eid] = ( + etype, rtype, role, action, ttype) + + def del_schema_config(self, schemacfg, checkonly=False): + """deleted CWSourceSchemaConfig, modify mapping accordingly""" + etype, rtype, role, action, ttype = self.source.mapping_idx[schemacfg.eid] + rules = self.source.mapping[etype][(rtype, role, action)] + rules = [x for x in rules if not x[0] == ttype] + if not rules: + del self.source.mapping[etype][(rtype, role, action)] + + # import handling ########################################################## + + def process(self, url, raise_on_error=False, partialcommit=True): + """IDataFeedParser main entry point""" + if url.startswith('http'): # XXX similar loose test as in parse of sources.datafeed + url = self.complete_url(url) + super(CWEntityXMLParser, self).process(url, raise_on_error, partialcommit) + + def parse_etree(self, parent): + for node in list(parent): + builder = self._cw.vreg['components'].select( + 'cw.entityxml.item-builder', self._cw, node=node, + parser=self) + yield builder.build_item() + + def process_item(self, item, rels): + """ + item and rels are what's returned by the item builder `build_item` method: + + * `item` is an {attribute: value} dictionary + * `rels` is for relations and structured as + {role: {relation: [(related item, related rels)...]} + """ + entity = self.extid2entity(str(item['cwuri']), item['cwtype'], + cwsource=item['cwsource'], item=item) + if entity is None: + return None + if entity.eid in self._processed_entities: + return entity + self._processed_entities.add(entity.eid) + if not (self.created_during_pull(entity) or self.updated_during_pull(entity)): + attrs = extract_typed_attrs(entity.e_schema, item) + self.update_if_necessary(entity, attrs) + self.process_relations(entity, rels) + return entity + + def process_relations(self, entity, rels): + etype = entity.__regid__ + for (rtype, role, action), rules in self.source.mapping.get(etype, {}).iteritems(): + try: + related_items = rels[role][rtype] + except KeyError: + self.import_log.record_error('relation %s-%s not found in xml export of %s' + % (rtype, role, etype)) + continue + try: + linker = self.select_linker(action, rtype, role, entity) + except RegistryException: + self.import_log.record_error('no linker for action %s' % action) + else: + linker.link_items(related_items, rules) + + def before_entity_copy(self, entity, sourceparams): + """IDataFeedParser callback""" + attrs = extract_typed_attrs(entity.e_schema, sourceparams['item']) + entity.cw_edited.update(attrs) + + def complete_url(self, url, etype=None, known_relations=None): + """append to the url's query string information about relation that should + be included in the resulting xml, according to source mapping. + + If etype is not specified, try to guess it using the last path part of + the url, i.e. the format used by default in cubicweb to map all entities + of a given type as in 'http://mysite.org/EntityType'. + + If `known_relations` is given, it should be a dictionary of already + known relations, so they don't get queried again. + """ + try: + url, qs = url.split('?', 1) + except ValueError: + qs = '' + params = parse_qs(qs) + if not 'vid' in params: + params['vid'] = ['xml'] + if etype is None: + try: + etype = url.rsplit('/', 1)[1] + except ValueError: + return url + '?' + self._cw.build_url_params(**params) + try: + etype = self._cw.vreg.case_insensitive_etypes[etype.lower()] + except KeyError: + return url + '?' + self._cw.build_url_params(**params) + relations = params.setdefault('relation', []) + for rtype, role, _ in self.source.mapping.get(etype, ()): + if known_relations and rtype in known_relations.get('role', ()): + continue + reldef = '%s-%s' % (rtype, role) + if not reldef in relations: + relations.append(reldef) + return url + '?' + self._cw.build_url_params(**params) + + def complete_item(self, item, rels): + try: + return self._parsed_urls[item['cwuri']] + except KeyError: + itemurl = self.complete_url(item['cwuri'], item['cwtype'], rels) + item_rels = list(self.parse(itemurl)) + assert len(item_rels) == 1, 'url %s expected to bring back one '\ + 'and only one entity, got %s' % (itemurl, len(item_rels)) + self._parsed_urls[item['cwuri']] = item_rels[0] + if rels: + # XXX (do it better) merge relations + new_rels = item_rels[0][1] + new_rels.get('subject', {}).update(rels.get('subject', {})) + new_rels.get('object', {}).update(rels.get('object', {})) + return item_rels[0] + + +class CWEntityXMLItemBuilder(Component): + __regid__ = 'cw.entityxml.item-builder' + + def __init__(self, _cw, parser, node, **kwargs): + super(CWEntityXMLItemBuilder, self).__init__(_cw, **kwargs) + self.parser = parser + self.node = node + + def build_item(self): + """parse a XML document node and return two dictionaries defining (part + of) an entity: + + - {attribute: value} + - {role: {relation: [(related item, related rels)...]} + """ + node = self.node + item = dict(node.attrib.items()) + item['cwtype'] = unicode(node.tag) + item.setdefault('cwsource', None) + try: + item['eid'] = typed_eid(item['eid']) + except KeyError: + # cw < 3.11 compat mode XXX + item['eid'] = typed_eid(node.find('eid').text) + item['cwuri'] = node.find('cwuri').text + rels = {} + for child in node: + role = child.get('role') + if role: + # relation + related = rels.setdefault(role, {}).setdefault(child.tag, []) + related += self.parser.parse_etree(child) + elif child.text: + # attribute + item[child.tag] = unicode(child.text) + else: + # None attribute (empty tag) + item[child.tag] = None + return item, rels + + +class CWEntityXMLActionCopy(Component): + """implementation of cubicweb entity xml parser's'copy' action + + Takes no option. + """ + __regid__ = 'cw.entityxml.action.copy' + + def __init__(self, _cw, parser, rtype, role, entity=None, **kwargs): + super(CWEntityXMLActionCopy, self).__init__(_cw, **kwargs) + self.parser = parser + self.rtype = rtype + self.role = role + self.entity = entity + + @classproperty + def action(cls): + return cls.__regid__.rsplit('.', 1)[-1] + + def check_options(self, options, eid): + self._check_no_options(options, eid) + + def _check_no_options(self, options, eid, msg=None): + if options: + if msg is None: + msg = self._cw._("'%s' action doesn't take any options") % self.action + raise ValidationError(eid, {rn('options', 'subject'): msg}) + + def link_items(self, others, rules): + assert not any(x[1] for x in rules), "'copy' action takes no option" + ttypes = frozenset([x[0] for x in rules]) + eids = [] # local eids + for item, rels in others: + if item['cwtype'] in ttypes: + item, rels = self.parser.complete_item(item, rels) + other_entity = self.parser.process_item(item, rels) + if other_entity is not None: + eids.append(other_entity.eid) + if eids: + self._set_relation(eids) + else: + self._clear_relation(ttypes) + + def _clear_relation(self, ttypes): + if not self.parser.created_during_pull(self.entity): + if len(ttypes) > 1: + typerestr = ', Y is IN(%s)' % ','.join(ttypes) + else: + typerestr = ', Y is %s' % ','.join(ttypes) + self._cw.execute('DELETE ' + rtype_role_rql(self.rtype, self.role) + typerestr, + {'x': self.entity.eid}) + + def _set_relation(self, eids): + assert eids + rtype = self.rtype + rqlbase = rtype_role_rql(rtype, self.role) + eidstr = ','.join(str(eid) for eid in eids) + self._cw.execute('DELETE %s, NOT Y eid IN (%s)' % (rqlbase, eidstr), + {'x': self.entity.eid}) + if self.role == 'object': + rql = 'SET %s, Y eid IN (%s), NOT Y %s X' % (rqlbase, eidstr, rtype) + else: + rql = 'SET %s, Y eid IN (%s), NOT X %s Y' % (rqlbase, eidstr, rtype) + self._cw.execute(rql, {'x': self.entity.eid}) + + +class CWEntityXMLActionLink(CWEntityXMLActionCopy): + """implementation of cubicweb entity xml parser's'link' action + + requires a 'linkattr' option to control search of the linked entity. + """ + __regid__ = 'cw.entityxml.action.link' + + def check_options(self, options, eid): + if not 'linkattr' in options: + msg = self._cw._("'%s' action requires 'linkattr' option") % self.action + raise ValidationError(eid, {rn('options', 'subject'): msg}) + + create_when_not_found = False + + def link_items(self, others, rules): + for ttype, options in rules: + searchattrs = splitstrip(options.get('linkattr', '')) + self._related_link(ttype, others, searchattrs) + + def _related_link(self, ttype, others, searchattrs): + def issubset(x,y): + return all(z in y for z in x) + eids = [] # local eids + log = self.parser.import_log + for item, rels in others: + if item['cwtype'] != ttype: + continue + if not issubset(searchattrs, item): + item, rels = self.parser.complete_item(item, rels) + if not issubset(searchattrs, item): + log.record_error('missing attribute, got %s expected keys %s' + % (item, searchattrs)) + continue + # XXX str() needed with python < 2.6 + kwargs = dict((str(attr), item[attr]) for attr in searchattrs) + targets = self._find_entities(item, kwargs) + if len(targets) == 1: + entity = targets[0] + elif not targets and self.create_when_not_found: + entity = self._cw.create_entity(item['cwtype'], **kwargs) + else: + if len(targets) > 1: + log.record_error('ambiguous link: found %s entity %s with attributes %s' + % (len(targets), item['cwtype'], kwargs)) + else: + log.record_error('can not find %s entity with attributes %s' + % (item['cwtype'], kwargs)) + continue + eids.append(entity.eid) + self.parser.process_relations(entity, rels) + if eids: + self._set_relation(eids) + else: + self._clear_relation((ttype,)) + + def _find_entities(self, item, kwargs): + return tuple(self._cw.find_entities(item['cwtype'], **kwargs)) + + +class CWEntityXMLActionLinkInState(CWEntityXMLActionLink): + """custom implementation of cubicweb entity xml parser's'link' action for + in_state relation + """ + __select__ = match_rtype('in_state') + + def check_options(self, options, eid): + super(CWEntityXMLActionLinkInState, self).check_options(options, eid) + if not 'name' in options['linkattr']: + msg = self._cw._("'%s' action for in_state relation should at least have 'linkattr=name' option") % self.action + raise ValidationError(eid, {rn('options', 'subject'): msg}) + + def _find_entities(self, item, kwargs): + assert 'name' in item # XXX else, complete_item + state_name = item['name'] + wf = self.entity.cw_adapt_to('IWorkflowable').current_workflow + state = wf.state_by_name(state_name) + if state is None: + return () + return (state,) + + +class CWEntityXMLActionLinkOrCreate(CWEntityXMLActionLink): + """implementation of cubicweb entity xml parser's'link-or-create' action + + requires a 'linkattr' option to control search of the linked entity. + """ + __regid__ = 'cw.entityxml.action.link-or-create' + create_when_not_found = True diff -r 981f6e487788 -r 1867e252e487 sobjects/ldapparser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sobjects/ldapparser.py Tue Jan 31 21:43:24 2012 +0100 @@ -0,0 +1,99 @@ +# copyright 2011-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr +# +# This file is part of CubicWeb. +# +# CubicWeb is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 2.1 of the License, or (at your option) +# any later version. +# +# CubicWeb is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with CubicWeb. If not, see . +"""cubicweb ldap feed source + +unlike ldapuser source, this source is copy based and will import ldap content +(beside passwords for authentication) into the system source. +""" +from base64 import b64decode + +from logilab.common.decorators import cached + +from cubicweb.server.sources import datafeed + +class DataFeedlDAPParser(datafeed.DataFeedParser): + __regid__ = 'ldapfeed' + + def process(self, url, raise_on_error=False, partialcommit=True): + """IDataFeedParser main entry point""" + source = self.source + searchstr = '(&%s)' % ''.join(source.base_filters) + try: + ldap_emailattr = source.user_rev_attrs['email'] + except KeyError: + ldap_emailattr = None + for userdict in source._search(self._cw, source.user_base_dn, + source.user_base_scope, searchstr): + entity = self.extid2entity(userdict['dn'], 'CWUser', **userdict) + if not self.created_during_pull(entity): + self.notify_updated(entity) + attrs = dict( (k, v) for k, v in userdict.iteritems() + if not k in ('dn', 'email') ) + self.update_if_necessary(entity, attrs) + self._process_email(entity, userdict) + + def before_entity_copy(self, entity, sourceparams): + if entity.__regid__ == 'EmailAddress': + entity.cw_edited['address'] = sourceparams['address'] + else: + for ldapattr, cwattr in self.source.user_attrs.iteritems(): + if cwattr != 'email': + entity.cw_edited[cwattr] = sourceparams[ldapattr] + return entity + + def after_entity_copy(self, entity, sourceparams): + super(DataFeedlDAPParser, self).after_entity_copy(entity, sourceparams) + if entity.__regid__ == 'EmailAddress': + return + groups = [self._get_group(n) for n in self.source.user_default_groups] + entity.set_relations(in_group=groups) + self._process_email(entity, sourceparams) + + def is_deleted(self, extid, etype, eid): + try: + extid, _ = extid.rsplit('@@', 1) + except ValueError: + pass + return self.source.object_exists_in_ldap(extid) + + def _process_email(self, entity, userdict): + try: + emailaddrs = userdict[self.source.user_rev_attrs['email']] + except KeyError: + return # no email for that user, nothing to do + if not isinstance(emailaddrs, list): + emailaddrs = [emailaddrs] + for emailaddr in emailaddrs: + # search for existant email first, may be coming from another source + rset = self._cw.execute('EmailAddress X WHERE X address %(addr)s', + {'addr': emailaddr}) + if not rset: + # not found, create it. first forge an external id + emailextid = userdict['dn'] + '@@' + emailaddr + email = self.extid2entity(emailextid, 'EmailAddress', + address=emailaddr) + if entity.primary_email: + entity.set_relations(use_email=email) + else: + entity.set_relations(primary_email=email) + # XXX else check use_email relation? + + @cached + def _get_group(self, name): + return self._cw.execute('Any X WHERE X is CWGroup, X name %(name)s', + {'name': name}).get_entity(0, 0) diff -r 981f6e487788 -r 1867e252e487 sobjects/parsers.py --- a/sobjects/parsers.py Thu Feb 02 14:30:07 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,508 +0,0 @@ -# copyright 2010-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. -# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr -# -# This file is part of CubicWeb. -# -# CubicWeb is free software: you can redistribute it and/or modify it under the -# terms of the GNU Lesser General Public License as published by the Free -# Software Foundation, either version 2.1 of the License, or (at your option) -# any later version. -# -# CubicWeb is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more -# details. -# -# You should have received a copy of the GNU Lesser General Public License along -# with CubicWeb. If not, see . -"""datafeed parser for xml generated by cubicweb - -Example of mapping for CWEntityXMLParser:: - - {u'CWUser': { # EntityType - (u'in_group', u'subject', u'link'): [ # (rtype, role, action) - (u'CWGroup', {u'linkattr': u'name'})], # -> rules = [(EntityType, options), ...] - (u'tags', u'object', u'link-or-create'): [ # (...) - (u'Tag', {u'linkattr': u'name'})], # -> ... - (u'use_email', u'subject', u'copy'): [ # (...) - (u'EmailAddress', {})] # -> ... - } - } - -""" - -import os.path as osp -from datetime import datetime, timedelta, time -from urllib import urlencode -from cgi import parse_qs # in urlparse with python >= 2.6 - -from logilab.common.date import todate, totime -from logilab.common.textutils import splitstrip, text_to_dict -from logilab.common.decorators import classproperty - -from yams.constraints import BASE_CONVERTERS -from yams.schema import role_name as rn - -from cubicweb import ValidationError, RegistryException, typed_eid -from cubicweb.view import Component -from cubicweb.server.sources import datafeed -from cubicweb.server.hook import match_rtype - -# XXX see cubicweb.cwvreg.YAMS_TO_PY -# XXX see cubicweb.web.views.xmlrss.SERIALIZERS -DEFAULT_CONVERTERS = BASE_CONVERTERS.copy() -DEFAULT_CONVERTERS['String'] = unicode -DEFAULT_CONVERTERS['Password'] = lambda x: x.encode('utf8') -def convert_date(ustr): - return todate(datetime.strptime(ustr, '%Y-%m-%d')) -DEFAULT_CONVERTERS['Date'] = convert_date -def convert_datetime(ustr): - if '.' in ustr: # assume %Y-%m-%d %H:%M:%S.mmmmmm - ustr = ustr.split('.',1)[0] - return datetime.strptime(ustr, '%Y-%m-%d %H:%M:%S') -DEFAULT_CONVERTERS['Datetime'] = convert_datetime -# XXX handle timezone, though this will be enough as TZDatetime are -# serialized without time zone by default (UTC time). See -# cw.web.views.xmlrss.SERIALIZERS. -DEFAULT_CONVERTERS['TZDatetime'] = convert_datetime -def convert_time(ustr): - return totime(datetime.strptime(ustr, '%H:%M:%S')) -DEFAULT_CONVERTERS['Time'] = convert_time -DEFAULT_CONVERTERS['TZTime'] = convert_time -def convert_interval(ustr): - return time(seconds=int(ustr)) -DEFAULT_CONVERTERS['Interval'] = convert_interval - -def extract_typed_attrs(eschema, stringdict, converters=DEFAULT_CONVERTERS): - typeddict = {} - for rschema in eschema.subject_relations(): - if rschema.final and rschema in stringdict: - if rschema in ('eid', 'cwuri', 'cwtype', 'cwsource'): - continue - attrtype = eschema.destination(rschema) - value = stringdict[rschema] - if value is not None: - value = converters[attrtype](value) - typeddict[rschema.type] = value - return typeddict - -def rtype_role_rql(rtype, role): - if role == 'object': - return 'Y %s X WHERE X eid %%(x)s' % rtype - else: - return 'X %s Y WHERE X eid %%(x)s' % rtype - - -class CWEntityXMLParser(datafeed.DataFeedXMLParser): - """datafeed parser for the 'xml' entity view - - Most of the logic is delegated to the following components: - - * an "item builder" component, turning an etree xml node into a specific - python dictionnary representing an entity - - * "action" components, selected given an entity, a relation and its role in - the relation, and responsible to link the entity to given related items - (eg dictionnary) - - So the parser is only doing the gluing service and the connection to the - source. - """ - __regid__ = 'cw.entityxml' - - def __init__(self, *args, **kwargs): - super(CWEntityXMLParser, self).__init__(*args, **kwargs) - self._parsed_urls = {} - self._processed_entities = set() - - def select_linker(self, action, rtype, role, entity=None): - try: - return self._cw.vreg['components'].select( - 'cw.entityxml.action.%s' % action, self._cw, entity=entity, - rtype=rtype, role=role, parser=self) - except RegistryException: - raise RegistryException('Unknown action %s' % action) - - def list_actions(self): - reg = self._cw.vreg['components'] - return sorted(clss[0].action for rid, clss in reg.iteritems() - if rid.startswith('cw.entityxml.action.')) - - # mapping handling ######################################################### - - def add_schema_config(self, schemacfg, checkonly=False): - """added CWSourceSchemaConfig, modify mapping accordingly""" - _ = self._cw._ - try: - rtype = schemacfg.schema.rtype.name - except AttributeError: - msg = _("entity and relation types can't be mapped, only attributes " - "or relations") - raise ValidationError(schemacfg.eid, {rn('cw_for_schema', 'subject'): msg}) - if schemacfg.options: - options = text_to_dict(schemacfg.options) - else: - options = {} - try: - role = options.pop('role') - if role not in ('subject', 'object'): - raise KeyError - except KeyError: - msg = _('"role=subject" or "role=object" must be specified in options') - raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg}) - try: - action = options.pop('action') - linker = self.select_linker(action, rtype, role) - linker.check_options(options, schemacfg.eid) - except KeyError: - msg = _('"action" must be specified in options; allowed values are ' - '%s') % ', '.join(self.list_actions()) - raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg}) - except RegistryException: - msg = _('allowed values for "action" are %s') % ', '.join(self.list_actions()) - raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg}) - if not checkonly: - if role == 'subject': - etype = schemacfg.schema.stype.name - ttype = schemacfg.schema.otype.name - else: - etype = schemacfg.schema.otype.name - ttype = schemacfg.schema.stype.name - etyperules = self.source.mapping.setdefault(etype, {}) - etyperules.setdefault((rtype, role, action), []).append( - (ttype, options) ) - self.source.mapping_idx[schemacfg.eid] = ( - etype, rtype, role, action, ttype) - - def del_schema_config(self, schemacfg, checkonly=False): - """deleted CWSourceSchemaConfig, modify mapping accordingly""" - etype, rtype, role, action, ttype = self.source.mapping_idx[schemacfg.eid] - rules = self.source.mapping[etype][(rtype, role, action)] - rules = [x for x in rules if not x[0] == ttype] - if not rules: - del self.source.mapping[etype][(rtype, role, action)] - - # import handling ########################################################## - - def process(self, url, raise_on_error=False, partialcommit=True): - """IDataFeedParser main entry point""" - if url.startswith('http'): # XXX similar loose test as in parse of sources.datafeed - url = self.complete_url(url) - super(CWEntityXMLParser, self).process(url, raise_on_error, partialcommit) - - def parse_etree(self, parent): - for node in list(parent): - builder = self._cw.vreg['components'].select( - 'cw.entityxml.item-builder', self._cw, node=node, - parser=self) - yield builder.build_item() - - def process_item(self, item, rels): - """ - item and rels are what's returned by the item builder `build_item` method: - - * `item` is an {attribute: value} dictionary - * `rels` is for relations and structured as - {role: {relation: [(related item, related rels)...]} - """ - entity = self.extid2entity(str(item['cwuri']), item['cwtype'], - cwsource=item['cwsource'], item=item) - if entity is None: - return None - if entity.eid in self._processed_entities: - return entity - self._processed_entities.add(entity.eid) - if not (self.created_during_pull(entity) or self.updated_during_pull(entity)): - self.notify_updated(entity) - attrs = extract_typed_attrs(entity.e_schema, item) - # check modification date and compare attribute values to only - # update what's actually needed - entity.complete(tuple(attrs)) - mdate = attrs.get('modification_date') - if not mdate or mdate > entity.modification_date: - attrs = dict( (k, v) for k, v in attrs.iteritems() - if v != getattr(entity, k)) - if attrs: - entity.set_attributes(**attrs) - self.process_relations(entity, rels) - return entity - - def process_relations(self, entity, rels): - etype = entity.__regid__ - for (rtype, role, action), rules in self.source.mapping.get(etype, {}).iteritems(): - try: - related_items = rels[role][rtype] - except KeyError: - self.import_log.record_error('relation %s-%s not found in xml export of %s' - % (rtype, role, etype)) - continue - try: - linker = self.select_linker(action, rtype, role, entity) - except RegistryException: - self.import_log.record_error('no linker for action %s' % action) - else: - linker.link_items(related_items, rules) - - def before_entity_copy(self, entity, sourceparams): - """IDataFeedParser callback""" - attrs = extract_typed_attrs(entity.e_schema, sourceparams['item']) - entity.cw_edited.update(attrs) - - def complete_url(self, url, etype=None, known_relations=None): - """append to the url's query string information about relation that should - be included in the resulting xml, according to source mapping. - - If etype is not specified, try to guess it using the last path part of - the url, i.e. the format used by default in cubicweb to map all entities - of a given type as in 'http://mysite.org/EntityType'. - - If `known_relations` is given, it should be a dictionary of already - known relations, so they don't get queried again. - """ - try: - url, qs = url.split('?', 1) - except ValueError: - qs = '' - params = parse_qs(qs) - if not 'vid' in params: - params['vid'] = ['xml'] - if etype is None: - try: - etype = url.rsplit('/', 1)[1] - except ValueError: - return url + '?' + self._cw.build_url_params(**params) - try: - etype = self._cw.vreg.case_insensitive_etypes[etype.lower()] - except KeyError: - return url + '?' + self._cw.build_url_params(**params) - relations = params.setdefault('relation', []) - for rtype, role, _ in self.source.mapping.get(etype, ()): - if known_relations and rtype in known_relations.get('role', ()): - continue - reldef = '%s-%s' % (rtype, role) - if not reldef in relations: - relations.append(reldef) - return url + '?' + self._cw.build_url_params(**params) - - def complete_item(self, item, rels): - try: - return self._parsed_urls[item['cwuri']] - except KeyError: - itemurl = self.complete_url(item['cwuri'], item['cwtype'], rels) - item_rels = list(self.parse(itemurl)) - assert len(item_rels) == 1, 'url %s expected to bring back one '\ - 'and only one entity, got %s' % (itemurl, len(item_rels)) - self._parsed_urls[item['cwuri']] = item_rels[0] - if rels: - # XXX (do it better) merge relations - new_rels = item_rels[0][1] - new_rels.get('subject', {}).update(rels.get('subject', {})) - new_rels.get('object', {}).update(rels.get('object', {})) - return item_rels[0] - - -class CWEntityXMLItemBuilder(Component): - __regid__ = 'cw.entityxml.item-builder' - - def __init__(self, _cw, parser, node, **kwargs): - super(CWEntityXMLItemBuilder, self).__init__(_cw, **kwargs) - self.parser = parser - self.node = node - - def build_item(self): - """parse a XML document node and return two dictionaries defining (part - of) an entity: - - - {attribute: value} - - {role: {relation: [(related item, related rels)...]} - """ - node = self.node - item = dict(node.attrib.items()) - item['cwtype'] = unicode(node.tag) - item.setdefault('cwsource', None) - try: - item['eid'] = typed_eid(item['eid']) - except KeyError: - # cw < 3.11 compat mode XXX - item['eid'] = typed_eid(node.find('eid').text) - item['cwuri'] = node.find('cwuri').text - rels = {} - for child in node: - role = child.get('role') - if role: - # relation - related = rels.setdefault(role, {}).setdefault(child.tag, []) - related += self.parser.parse_etree(child) - elif child.text: - # attribute - item[child.tag] = unicode(child.text) - else: - # None attribute (empty tag) - item[child.tag] = None - return item, rels - - -class CWEntityXMLActionCopy(Component): - """implementation of cubicweb entity xml parser's'copy' action - - Takes no option. - """ - __regid__ = 'cw.entityxml.action.copy' - - def __init__(self, _cw, parser, rtype, role, entity=None, **kwargs): - super(CWEntityXMLActionCopy, self).__init__(_cw, **kwargs) - self.parser = parser - self.rtype = rtype - self.role = role - self.entity = entity - - @classproperty - def action(cls): - return cls.__regid__.rsplit('.', 1)[-1] - - def check_options(self, options, eid): - self._check_no_options(options, eid) - - def _check_no_options(self, options, eid, msg=None): - if options: - if msg is None: - msg = self._cw._("'%s' action doesn't take any options") % self.action - raise ValidationError(eid, {rn('options', 'subject'): msg}) - - def link_items(self, others, rules): - assert not any(x[1] for x in rules), "'copy' action takes no option" - ttypes = frozenset([x[0] for x in rules]) - eids = [] # local eids - for item, rels in others: - if item['cwtype'] in ttypes: - item, rels = self.parser.complete_item(item, rels) - other_entity = self.parser.process_item(item, rels) - if other_entity is not None: - eids.append(other_entity.eid) - if eids: - self._set_relation(eids) - else: - self._clear_relation(ttypes) - - def _clear_relation(self, ttypes): - if not self.parser.created_during_pull(self.entity): - if len(ttypes) > 1: - typerestr = ', Y is IN(%s)' % ','.join(ttypes) - else: - typerestr = ', Y is %s' % ','.join(ttypes) - self._cw.execute('DELETE ' + rtype_role_rql(self.rtype, self.role) + typerestr, - {'x': self.entity.eid}) - - def _set_relation(self, eids): - assert eids - rtype = self.rtype - rqlbase = rtype_role_rql(rtype, self.role) - eidstr = ','.join(str(eid) for eid in eids) - self._cw.execute('DELETE %s, NOT Y eid IN (%s)' % (rqlbase, eidstr), - {'x': self.entity.eid}) - if self.role == 'object': - rql = 'SET %s, Y eid IN (%s), NOT Y %s X' % (rqlbase, eidstr, rtype) - else: - rql = 'SET %s, Y eid IN (%s), NOT X %s Y' % (rqlbase, eidstr, rtype) - self._cw.execute(rql, {'x': self.entity.eid}) - - -class CWEntityXMLActionLink(CWEntityXMLActionCopy): - """implementation of cubicweb entity xml parser's'link' action - - requires a 'linkattr' option to control search of the linked entity. - """ - __regid__ = 'cw.entityxml.action.link' - - def check_options(self, options, eid): - if not 'linkattr' in options: - msg = self._cw._("'%s' action requires 'linkattr' option") % self.action - raise ValidationError(eid, {rn('options', 'subject'): msg}) - - create_when_not_found = False - - def link_items(self, others, rules): - for ttype, options in rules: - searchattrs = splitstrip(options.get('linkattr', '')) - self._related_link(ttype, others, searchattrs) - - def _related_link(self, ttype, others, searchattrs): - def issubset(x,y): - return all(z in y for z in x) - eids = [] # local eids - log = self.parser.import_log - for item, rels in others: - if item['cwtype'] != ttype: - continue - if not issubset(searchattrs, item): - item, rels = self.parser.complete_item(item, rels) - if not issubset(searchattrs, item): - log.record_error('missing attribute, got %s expected keys %s' - % (item, searchattrs)) - continue - # XXX str() needed with python < 2.6 - kwargs = dict((str(attr), item[attr]) for attr in searchattrs) - targets = self._find_entities(item, kwargs) - if len(targets) == 1: - entity = targets[0] - elif not targets and self.create_when_not_found: - entity = self._cw.create_entity(item['cwtype'], **kwargs) - else: - if len(targets) > 1: - log.record_error('ambiguous link: found %s entity %s with attributes %s' - % (len(targets), item['cwtype'], kwargs)) - else: - log.record_error('can not find %s entity with attributes %s' - % (item['cwtype'], kwargs)) - continue - eids.append(entity.eid) - self.parser.process_relations(entity, rels) - if eids: - self._set_relation(eids) - else: - self._clear_relation((ttype,)) - - def _find_entities(self, item, kwargs): - return tuple(self._cw.find_entities(item['cwtype'], **kwargs)) - - -class CWEntityXMLActionLinkInState(CWEntityXMLActionLink): - """custom implementation of cubicweb entity xml parser's'link' action for - in_state relation - """ - __select__ = match_rtype('in_state') - - def check_options(self, options, eid): - super(CWEntityXMLActionLinkInState, self).check_options(options, eid) - if not 'name' in options['linkattr']: - msg = self._cw._("'%s' action for in_state relation should at least have 'linkattr=name' option") % self.action - raise ValidationError(eid, {rn('options', 'subject'): msg}) - - def _find_entities(self, item, kwargs): - assert 'name' in item # XXX else, complete_item - state_name = item['name'] - wf = self.entity.cw_adapt_to('IWorkflowable').current_workflow - state = wf.state_by_name(state_name) - if state is None: - return () - return (state,) - - -class CWEntityXMLActionLinkOrCreate(CWEntityXMLActionLink): - """implementation of cubicweb entity xml parser's'link-or-create' action - - requires a 'linkattr' option to control search of the linked entity. - """ - __regid__ = 'cw.entityxml.action.link-or-create' - create_when_not_found = True - - -def registration_callback(vreg): - vreg.register_all(globals().values(), __name__) - global URL_MAPPING - URL_MAPPING = {} - if vreg.config.apphome: - url_mapping_file = osp.join(vreg.config.apphome, 'urlmapping.py') - if osp.exists(url_mapping_file): - URL_MAPPING = eval(file(url_mapping_file).read()) - vreg.info('using url mapping %s from %s', URL_MAPPING, url_mapping_file) diff -r 981f6e487788 -r 1867e252e487 sobjects/test/unittest_parsers.py --- a/sobjects/test/unittest_parsers.py Thu Feb 02 14:30:07 2012 +0100 +++ b/sobjects/test/unittest_parsers.py Tue Jan 31 21:43:24 2012 +0100 @@ -162,7 +162,7 @@ dfsource = self.repo.sources_by_uri['myfeed'] parser = dfsource._get_parser(self.session) self.assertEqual(parser.complete_url('http://www.cubicweb.org/CWUser'), - 'http://www.cubicweb.org/CWUser?relation=tags-object&relation=in_group-subject&relation=in_state-subject&relation=use_email-subject&vid=xml') + 'http://www.cubicweb.org/CWUser?relation=tags-object&relation=in_group-subject&relation=in_tate-subject&relation=use_email-subject&vid=xml') self.assertEqual(parser.complete_url('http://www.cubicweb.org/cwuser'), 'http://www.cubicweb.org/cwuser?relation=tags-object&relation=in_group-subject&relation=in_state-subject&relation=use_email-subject&vid=xml') self.assertEqual(parser.complete_url('http://www.cubicweb.org/cwuser?vid=rdf&relation=hop'),