[repo] optimize massive insertion/deletion by using the new set_operation function
Idea is that on massive insertion, cost of handling the list of operation
become non negligeable, so we should minimize the number of operations in
that list.
The set_operation function ease usage of operation associated to data in
session.transaction_data, and we only add the operation when data set isn't
initialized yet, else we simply add data to the set. The operation then
simply process accumulated data.
"""CubicWeb web client application object
:organization: Logilab
:copyright: 2001-2010 LOGILAB S.A. (Paris, FRANCE), license is LGPL v2.
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
:license: GNU Lesser General Public License, v2.1 - http://www.gnu.org/licenses
"""
__docformat__ = "restructuredtext en"
import sys
from time import clock, time
from logilab.common.deprecation import deprecated
from rql import BadRQLQuery
from cubicweb import set_log_methods, cwvreg
from cubicweb import (
ValidationError, Unauthorized, AuthenticationError, NoSelectableObject,
RepositoryError, CW_EVENT_MANAGER)
from cubicweb.web import LOGGER, component
from cubicweb.web import (
StatusResponse, DirectResponse, Redirect, NotFound,
RemoteCallFailed, ExplicitLogin, InvalidSession, RequestError)
# make session manager available through a global variable so the debug view can
# print information about web session
SESSION_MANAGER = None
class AbstractSessionManager(component.Component):
"""manage session data associated to a session identifier"""
__regid__ = 'sessionmanager'
def __init__(self, vreg):
self.session_time = vreg.config['http-session-time'] or None
assert self.session_time is None or self.session_time > 0
self.cleanup_session_time = vreg.config['cleanup-session-time'] or 43200
assert self.cleanup_session_time > 0
self.cleanup_anon_session_time = vreg.config['cleanup-anonymous-session-time'] or 120
assert self.cleanup_anon_session_time > 0
if self.session_time:
assert self.cleanup_session_time < self.session_time
assert self.cleanup_anon_session_time < self.session_time
self.authmanager = vreg['components'].select('authmanager', vreg=vreg)
def clean_sessions(self):
"""cleanup sessions which has not been unused since a given amount of
time. Return the number of sessions which have been closed.
"""
self.debug('cleaning http sessions')
closed, total = 0, 0
for session in self.current_sessions():
no_use_time = (time() - session.last_usage_time)
total += 1
if session.anonymous_connection:
if no_use_time >= self.cleanup_anon_session_time:
self.close_session(session)
closed += 1
elif no_use_time >= self.cleanup_session_time:
self.close_session(session)
closed += 1
return closed, total - closed
def has_expired(self, session):
"""return True if the web session associated to the session is expired
"""
return not (self.session_time is None or
time() < session.last_usage_time + self.session_time)
def current_sessions(self):
"""return currently open sessions"""
raise NotImplementedError()
def get_session(self, req, sessionid):
"""return existing session for the given session identifier"""
raise NotImplementedError()
def open_session(self, req):
"""open and return a new session for the given request
:raise ExplicitLogin: if authentication is required
"""
raise NotImplementedError()
def close_session(self, session):
"""close session on logout or on invalid session detected (expired out,
corrupted...)
"""
raise NotImplementedError()
class AbstractAuthenticationManager(component.Component):
"""authenticate user associated to a request and check session validity"""
id = 'authmanager'
vreg = None # XXX necessary until property for deprecation warning is on appobject
def __init__(self, vreg):
self.vreg = vreg
def authenticate(self, req):
"""authenticate user and return corresponding user object
:raise ExplicitLogin: if authentication is required (no authentication
info found or wrong user/password)
"""
raise NotImplementedError()
class CookieSessionHandler(object):
"""a session handler using a cookie to store the session identifier
:cvar SESSION_VAR:
string giving the name of the variable used to store the session
identifier
"""
SESSION_VAR = '__session'
def __init__(self, appli):
self.vreg = appli.vreg
self.session_manager = self.vreg['components'].select('sessionmanager',
vreg=self.vreg)
global SESSION_MANAGER
SESSION_MANAGER = self.session_manager
if not 'last_login_time' in self.vreg.schema:
self._update_last_login_time = lambda x: None
if self.vreg.config.mode != 'test':
# don't try to reset session manager during test, this leads to
# weird failures when running multiple tests
CW_EVENT_MANAGER.bind('after-registry-reload',
self.reset_session_manager)
def reset_session_manager(self):
data = self.session_manager.dump_data()
self.session_manager = self.vreg['components'].select('sessionmanager',
vreg=self.vreg)
self.session_manager.restore_data(data)
global SESSION_MANAGER
SESSION_MANAGER = self.session_manager
def clean_sessions(self):
"""cleanup sessions which has not been unused since a given amount of
time
"""
self.session_manager.clean_sessions()
def set_session(self, req):
"""associate a session to the request
Session id is searched from :
- # form variable
- cookie
if no session id is found, open a new session for the connected user
or request authentification as needed
:raise Redirect: if authentication has occured and succeed
"""
assert req.cnx is None # at this point no cnx should be set on the request
cookie = req.get_cookie()
try:
sessionid = str(cookie[self.SESSION_VAR].value)
except KeyError: # no session cookie
session = self.open_session(req)
else:
try:
session = self.get_session(req, sessionid)
except InvalidSession:
try:
session = self.open_session(req)
except ExplicitLogin:
req.remove_cookie(cookie, self.SESSION_VAR)
raise
# remember last usage time for web session tracking
session.last_usage_time = time()
def get_session(self, req, sessionid):
return self.session_manager.get_session(req, sessionid)
def open_session(self, req):
session = self.session_manager.open_session(req)
cookie = req.get_cookie()
cookie[self.SESSION_VAR] = session.sessionid
req.set_cookie(cookie, self.SESSION_VAR, maxage=None)
# remember last usage time for web session tracking
session.last_usage_time = time()
if not session.anonymous_connection:
self._postlogin(req)
return session
def _update_last_login_time(self, req):
try:
req.execute('SET X last_login_time NOW WHERE X eid %(x)s',
{'x' : req.user.eid}, 'x')
req.cnx.commit()
except (RepositoryError, Unauthorized):
# ldap user are not writeable for instance
req.cnx.rollback()
except:
req.cnx.rollback()
raise
def _postlogin(self, req):
"""postlogin: the user has been authenticated, redirect to the original
page (index by default) with a welcome message
"""
# Update last connection date
# XXX: this should be in a post login hook in the repository, but there
# we can't differentiate actual login of automatic session
# reopening. Is it actually a problem?
self._update_last_login_time(req)
args = req.form
for forminternal_key in ('__form_id', '__domid', '__errorurl'):
args.pop(forminternal_key, None)
args['__message'] = req._('welcome %s !') % req.user.login
if 'vid' in req.form:
args['vid'] = req.form['vid']
if 'rql' in req.form:
args['rql'] = req.form['rql']
path = req.relative_path(False)
if path == 'login':
path = 'view'
raise Redirect(req.build_url(path, **args))
def logout(self, req, goto_url):
"""logout from the instance by cleaning the session and raising
`AuthenticationError`
"""
self.session_manager.close_session(req.cnx)
req.remove_cookie(req.get_cookie(), self.SESSION_VAR)
raise AuthenticationError(url=goto_url)
class CubicWebPublisher(object):
"""the publisher is a singleton hold by the web frontend, and is responsible
to publish HTTP request.
"""
def __init__(self, config, debug=None,
session_handler_fact=CookieSessionHandler,
vreg=None):
self.info('starting web instance from %s', config.apphome)
if vreg is None:
vreg = cwvreg.CubicWebVRegistry(config, debug=debug)
self.vreg = vreg
# connect to the repository and get instance's schema
self.repo = config.repository(vreg)
if not vreg.initialized:
self.config.init_cubes(self.repo.get_cubes())
vreg.init_properties(self.repo.properties())
vreg.set_schema(self.repo.get_schema())
# set the correct publish method
if config['query-log-file']:
from threading import Lock
self._query_log = open(config['query-log-file'], 'a')
self.publish = self.log_publish
self._logfile_lock = Lock()
else:
self._query_log = None
self.publish = self.main_publish
# instantiate session and url resolving helpers
self.session_handler = session_handler_fact(self)
self.set_urlresolver()
CW_EVENT_MANAGER.bind('after-registry-reload', self.set_urlresolver)
def set_urlresolver(self):
self.url_resolver = self.vreg['components'].select('urlpublisher',
vreg=self.vreg)
def connect(self, req):
"""return a connection for a logged user object according to existing
sessions (i.e. a new connection may be created or an already existing
one may be reused
"""
self.session_handler.set_session(req)
# publish methods #########################################################
def log_publish(self, path, req):
"""wrapper around _publish to log all queries executed for a given
accessed path
"""
try:
return self.main_publish(path, req)
finally:
cnx = req.cnx
self._logfile_lock.acquire()
try:
try:
result = ['\n'+'*'*80]
result.append(req.url())
result += ['%s %s -- (%.3f sec, %.3f CPU sec)' % q for q in cnx.executed_queries]
cnx.executed_queries = []
self._query_log.write('\n'.join(result).encode(req.encoding))
self._query_log.flush()
except Exception:
self.exception('error while logging queries')
finally:
self._logfile_lock.release()
@deprecated("[3.4] use vreg['controllers'].select(...)")
def select_controller(self, oid, req):
try:
return self.vreg['controllers'].select(oid, req=req, appli=self)
except NoSelectableObject:
raise Unauthorized(req._('not authorized'))
def main_publish(self, path, req):
"""method called by the main publisher to process <path>
should return a string containing the resulting page or raise a
`NotFound` exception
:type path: str
:param path: the path part of the url to publish
:type req: `web.Request`
:param req: the request object
:rtype: str
:return: the result of the pusblished url
"""
path = path or 'view'
# don't log form values they may contains sensitive information
self.info('publish "%s" (form params: %s)', path, req.form.keys())
# remove user callbacks on a new request (except for json controllers
# to avoid callbacks being unregistered before they could be called)
tstart = clock()
try:
try:
ctrlid, rset = self.url_resolver.process(req, path)
try:
controller = self.vreg['controllers'].select(ctrlid, req,
appli=self)
except NoSelectableObject:
raise Unauthorized(req._('not authorized'))
req.update_search_state()
result = controller.publish(rset=rset)
if req.cnx is not None:
# req.cnx is None if anonymous aren't allowed and we are
# displaying the cookie authentication form
req.cnx.commit()
except (StatusResponse, DirectResponse):
req.cnx.commit()
raise
except Redirect:
# redirect is raised by edit controller when everything went fine,
# so try to commit
try:
txuuid = req.cnx.commit()
if txuuid is not None:
msg = u'<span class="undo">[<a href="%s">%s</a>]</span>' %(
req.build_url('undo', txuuid=txuuid), req._('undo'))
req.append_to_redirect_message(msg)
except ValidationError, ex:
self.validation_error_handler(req, ex)
except Unauthorized, ex:
req.data['errmsg'] = req._('You\'re not authorized to access this page. '
'If you think you should, please contact the site administrator.')
self.error_handler(req, ex, tb=False)
except Exception, ex:
self.error_handler(req, ex, tb=True)
else:
# delete validation errors which may have been previously set
if '__errorurl' in req.form:
req.del_session_data(req.form['__errorurl'])
raise
except (AuthenticationError, NotFound, RemoteCallFailed):
raise
except ValidationError, ex:
self.validation_error_handler(req, ex)
except (Unauthorized, BadRQLQuery, RequestError), ex:
self.error_handler(req, ex, tb=False)
except Exception, ex:
self.error_handler(req, ex, tb=True)
finally:
if req.cnx is not None:
try:
req.cnx.rollback()
except:
pass # ignore rollback error at this point
self.info('query %s executed in %s sec', req.relative_path(), clock() - tstart)
return result
def validation_error_handler(self, req, ex):
ex.errors = dict((k, v) for k, v in ex.errors.items())
if '__errorurl' in req.form:
forminfo = {'error': ex,
'values': req.form,
'eidmap': req.data.get('eidmap', {})
}
req.set_session_data(req.form['__errorurl'], forminfo)
# XXX form session key / __error_url should be differentiated:
# session key is 'url + #<form dom id', though we usually don't want
# the browser to move to the form since it hides the global
# messages.
raise Redirect(req.form['__errorurl'].rsplit('#', 1)[0])
self.error_handler(req, ex, tb=False)
def error_handler(self, req, ex, tb=False):
excinfo = sys.exc_info()
self.exception(repr(ex))
req.set_header('Cache-Control', 'no-cache')
req.remove_header('Etag')
req.reset_message()
req.reset_headers()
if req.json_request:
raise RemoteCallFailed(unicode(ex))
try:
req.data['ex'] = ex
if tb:
req.data['excinfo'] = excinfo
req.form['vid'] = 'error'
errview = self.vreg['views'].select('error', req)
template = self.main_template_id(req)
content = self.vreg['views'].main_template(req, template, view=errview)
except:
content = self.vreg['views'].main_template(req, 'error-template')
raise StatusResponse(500, content)
def need_login_content(self, req):
return self.vreg['views'].main_template(req, 'login')
def loggedout_content(self, req):
return self.vreg['views'].main_template(req, 'loggedout')
def notfound_content(self, req):
req.form['vid'] = '404'
view = self.vreg['views'].select('404', req)
template = self.main_template_id(req)
return self.vreg['views'].main_template(req, template, view=view)
def main_template_id(self, req):
template = req.form.get('__template', req.property_value('ui.main-template'))
if template not in self.vreg['views']:
template = 'main-template'
return template
set_log_methods(CubicWebPublisher, LOGGER)
set_log_methods(CookieSessionHandler, LOGGER)