diff -r 058bb3dc685f -r 0b59724cb3f2 utils.py --- a/utils.py Mon Jan 04 18:40:30 2016 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,716 +0,0 @@ -# copyright 2003-2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved. -# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr -# -# This file is part of CubicWeb. -# -# CubicWeb is free software: you can redistribute it and/or modify it under the -# terms of the GNU Lesser General Public License as published by the Free -# Software Foundation, either version 2.1 of the License, or (at your option) -# any later version. -# -# CubicWeb is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more -# details. -# -# You should have received a copy of the GNU Lesser General Public License along -# with CubicWeb. If not, see . -"""Some utilities for CubicWeb server/clients.""" - -from __future__ import division - -__docformat__ = "restructuredtext en" - -import decimal -import datetime -import random -import re -import json - -from operator import itemgetter -from inspect import getargspec -from itertools import repeat -from uuid import uuid4 -from warnings import warn -from threading import Lock -from logging import getLogger - -from six import text_type -from six.moves.urllib.parse import urlparse - -from logilab.mtconverter import xml_escape -from logilab.common.deprecation import deprecated -from logilab.common.date import ustrftime - -_MARKER = object() - -# initialize random seed from current time -random.seed() - -def admincnx(appid): - from cubicweb.cwconfig import CubicWebConfiguration - from cubicweb.server.repository import Repository - from cubicweb.server.utils import TasksManager - config = CubicWebConfiguration.config_for(appid) - - login = config.default_admin_config['login'] - password = config.default_admin_config['password'] - - repo = Repository(config, TasksManager()) - session = repo.new_session(login, password=password) - return session.new_cnx() - - -def make_uid(key=None): - """Return a unique identifier string. - - if specified, `key` is used to prefix the generated uid so it can be used - for instance as a DOM id or as sql table name. - - See uuid.uuid4 documentation for the shape of the generated identifier, but - this is basically a 32 bits hexadecimal string. - """ - if key is None: - return uuid4().hex - return str(key) + uuid4().hex - - -def support_args(callable, *argnames): - """return true if the callable support given argument names""" - if isinstance(callable, type): - callable = callable.__init__ - argspec = getargspec(callable) - if argspec[2]: - return True - for argname in argnames: - if argname not in argspec[0]: - return False - return True - - -class wrap_on_write(object): - """ Sometimes it is convenient to NOT write some container element - if it happens that there is nothing to be written within, - but this cannot be known beforehand. - Hence one can do this: - - .. sourcecode:: python - - with wrap_on_write(w, '

', '

') as wow: - component.render_stuff(wow) - """ - def __init__(self, w, tag, closetag=None): - self.written = False - self.tag = text_type(tag) - self.closetag = closetag - self.w = w - - def __enter__(self): - return self - - def __call__(self, data): - if self.written is False: - self.w(self.tag) - self.written = True - self.w(data) - - def __exit__(self, exctype, value, traceback): - if self.written is True: - if self.closetag: - self.w(text_type(self.closetag)) - else: - self.w(self.tag.replace('<', '' % ( - id(self), self._item, self._size) - def __len__(self): - return self._size - def __iter__(self): - return repeat(self._item, self._size) - def __getitem__(self, index): - if isinstance(index, slice): - # XXX could be more efficient, but do we bother? - return ([self._item] * self._size)[index] - return self._item - def __delitem__(self, idc): - assert self._size > 0 - self._size -= 1 - def __add__(self, other): - if isinstance(other, RepeatList): - if other._item == self._item: - return RepeatList(self._size + other._size, self._item) - return ([self._item] * self._size) + other[:] - return ([self._item] * self._size) + other - def __radd__(self, other): - if isinstance(other, RepeatList): - if other._item == self._item: - return RepeatList(self._size + other._size, self._item) - return other[:] + ([self._item] * self._size) - return other[:] + ([self._item] * self._size) - def __eq__(self, other): - if isinstance(other, RepeatList): - return other._size == self._size and other._item == self._item - return self[:] == other - def __ne__(self, other): - return not (self == other) - def __hash__(self): - raise NotImplementedError - def pop(self, i): - self._size -= 1 - - -class UStringIO(list): - """a file wrapper which automatically encode unicode string to an encoding - specifed in the constructor - """ - - def __init__(self, tracewrites=False, *args, **kwargs): - self.tracewrites = tracewrites - super(UStringIO, self).__init__(*args, **kwargs) - - def __bool__(self): - return True - - __nonzero__ = __bool__ - - def write(self, value): - assert isinstance(value, text_type), u"unicode required not %s : %s"\ - % (type(value).__name__, repr(value)) - if self.tracewrites: - from traceback import format_stack - stack = format_stack(None)[:-1] - escaped_stack = xml_escape(json_dumps(u'\n'.join(stack))) - escaped_html = xml_escape(value).replace('\n', '
\n') - tpl = u'%s' - value = tpl % (escaped_stack, escaped_html) - self.append(value) - - def getvalue(self): - return u''.join(self) - - def __repr__(self): - return '<%s at %#x>' % (self.__class__.__name__, id(self)) - - -class HTMLHead(UStringIO): - """wraps HTML header's stream - - Request objects use a HTMLHead instance to ease adding of - javascripts and stylesheets - """ - js_unload_code = u'''if (typeof(pageDataUnloaded) == 'undefined') { - jQuery(window).unload(unloadPageData); - pageDataUnloaded = true; -}''' - script_opening = u'' - - def __init__(self, req, *args, **kwargs): - super(HTMLHead, self).__init__(*args, **kwargs) - self.jsvars = [] - self.jsfiles = [] - self.cssfiles = [] - self.ie_cssfiles = [] - self.post_inlined_scripts = [] - self.pagedata_unload = False - self._cw = req - self.datadir_url = req.datadir_url - - def add_raw(self, rawheader): - self.write(rawheader) - - def define_var(self, var, value, override=True): - """adds a javascript var declaration / assginment in the header - - :param var: the variable name - :param value: the variable value (as a raw python value, - it will be jsonized later) - :param override: if False, don't set the variable value if the variable - is already defined. Default is True. - """ - self.jsvars.append( (var, value, override) ) - - def add_post_inline_script(self, content): - self.post_inlined_scripts.append(content) - - def add_onload(self, jscode): - self.add_post_inline_script(u"""$(cw).one('server-response', function(event) { -%s});""" % jscode) - - - def add_js(self, jsfile): - """adds `jsfile` to the list of javascripts used in the webpage - - This function checks if the file has already been added - :param jsfile: the script's URL - """ - if jsfile not in self.jsfiles: - self.jsfiles.append(jsfile) - - def add_css(self, cssfile, media='all'): - """adds `cssfile` to the list of javascripts used in the webpage - - This function checks if the file has already been added - :param cssfile: the stylesheet's URL - """ - if (cssfile, media) not in self.cssfiles: - self.cssfiles.append( (cssfile, media) ) - - def add_ie_css(self, cssfile, media='all', iespec=u'[if lt IE 8]'): - """registers some IE specific CSS""" - if (cssfile, media, iespec) not in self.ie_cssfiles: - self.ie_cssfiles.append( (cssfile, media, iespec) ) - - def add_unload_pagedata(self): - """registers onunload callback to clean page data on server""" - if not self.pagedata_unload: - self.post_inlined_scripts.append(self.js_unload_code) - self.pagedata_unload = True - - def concat_urls(self, urls): - """concatenates urls into one url usable by Apache mod_concat - - This method returns the url without modifying it if there is only - one element in the list - :param urls: list of local urls/filenames to concatenate - """ - if len(urls) == 1: - return urls[0] - len_prefix = len(self.datadir_url) - concated = u','.join(url[len_prefix:] for url in urls) - return (u'%s??%s' % (self.datadir_url, concated)) - - def group_urls(self, urls_spec): - """parses urls_spec in order to generate concatenated urls - for js and css includes - - This method checks if the file is local and if it shares options - with direct neighbors - :param urls_spec: entire list of urls/filenames to inspect - """ - concatable = [] - prev_islocal = False - prev_key = None - for url, key in urls_spec: - islocal = url.startswith(self.datadir_url) - if concatable and (islocal != prev_islocal or key != prev_key): - yield (self.concat_urls(concatable), prev_key) - del concatable[:] - if not islocal: - yield (url, key) - else: - concatable.append(url) - prev_islocal = islocal - prev_key = key - if concatable: - yield (self.concat_urls(concatable), prev_key) - - - def getvalue(self, skiphead=False): - """reimplement getvalue to provide a consistent (and somewhat browser - optimzed cf. http://stevesouders.com/cuzillion) order in external - resources declaration - """ - w = self.write - # 1/ variable declaration if any - if self.jsvars: - if skiphead: - w(u'') - else: - w(self.script_opening) - for var, value, override in self.jsvars: - vardecl = u'%s = %s;' % (var, json.dumps(value)) - if not override: - vardecl = (u'if (typeof %s == "undefined") {%s}' % - (var, vardecl)) - w(vardecl + u'\n') - if skiphead: - w(u'') - else: - w(self.script_closing) - # 2/ css files - ie_cssfiles = ((x, (y, z)) for x, y, z in self.ie_cssfiles) - if self.datadir_url and self._cw.vreg.config['concat-resources']: - cssfiles = self.group_urls(self.cssfiles) - ie_cssfiles = self.group_urls(ie_cssfiles) - jsfiles = (x for x, _ in self.group_urls((x, None) for x in self.jsfiles)) - else: - cssfiles = self.cssfiles - jsfiles = self.jsfiles - for cssfile, media in cssfiles: - w(u'\n' % - (media, xml_escape(cssfile))) - # 3/ ie css if necessary - if self.ie_cssfiles: # use self.ie_cssfiles because `ie_cssfiles` is a genexp - for cssfile, (media, iespec) in ie_cssfiles: - w(u' \n') - # 4/ js files - for jsfile in jsfiles: - if skiphead: - # Don't insert \n' % - xml_escape(jsfile)) - # 5/ post inlined scripts (i.e. scripts depending on other JS files) - if self.post_inlined_scripts: - if skiphead: - for script in self.post_inlined_scripts: - w(u'') - w(xml_escape(script)) - w(u'') - else: - w(self.script_opening) - w(u'\n\n'.join(self.post_inlined_scripts)) - w(self.script_closing) - # at the start of this function, the parent UStringIO may already have - # data in it, so we can't w(u'\n') at the top. Instead, we create - # a temporary UStringIO to get the same debugging output formatting - # if debugging is enabled. - headtag = UStringIO(tracewrites=self.tracewrites) - if not skiphead: - headtag.write(u'\n') - w(u'\n') - return headtag.getvalue() + super(HTMLHead, self).getvalue() - - -class HTMLStream(object): - """represents a HTML page. - - This is used my main templates so that HTML headers can be added - at any time during the page generation. - - HTMLStream uses the (U)StringIO interface to be compliant with - existing code. - """ - - def __init__(self, req): - self.tracehtml = req.tracehtml - # stream for - self.head = req.html_headers - # main stream - self.body = UStringIO(tracewrites=req.tracehtml) - # this method will be assigned to self.w in views - self.write = self.body.write - self.doctype = u'' - self._htmlattrs = [('lang', req.lang)] - # keep main_stream's reference on req for easier text/html demoting - req.main_stream = self - - @deprecated('[3.17] there are no namespaces in html, xhtml is not served any longer') - def add_namespace(self, prefix, uri): - pass - - @deprecated('[3.17] there are no namespaces in html, xhtml is not served any longer') - def set_namespaces(self, namespaces): - pass - - def add_htmlattr(self, attrname, attrvalue): - self._htmlattrs.append( (attrname, attrvalue) ) - - def set_htmlattrs(self, attrs): - self._htmlattrs = attrs - - def set_doctype(self, doctype, reset_xmldecl=None): - self.doctype = doctype - if reset_xmldecl is not None: - warn('[3.17] xhtml is no more supported', - DeprecationWarning, stacklevel=2) - - @property - def htmltag(self): - attrs = ' '.join('%s="%s"' % (attr, xml_escape(value)) - for attr, value in self._htmlattrs) - if attrs: - return '' % attrs - return '' - - def getvalue(self): - """writes HTML headers, closes tag and writes HTML body""" - if self.tracehtml: - css = u'\n'.join((u'span {', - u' font-family: monospace;', - u' word-break: break-all;', - u' word-wrap: break-word;', - u'}', - u'span:hover {', - u' color: red;', - u' text-decoration: underline;', - u'}')) - style = u'\n' % css - return (u'\n' - + u'\n\n%s\n\n' % style - + u'\n' - + u'' + xml_escape(self.doctype) + u'
' - + u'' + xml_escape(self.htmltag) + u'
' - + self.head.getvalue() - + self.body.getvalue() - + u'' + xml_escape(u'') + u'' - + u'\n') - return u'%s\n%s\n%s\n%s\n' % (self.doctype, - self.htmltag, - self.head.getvalue(), - self.body.getvalue()) - - -class CubicWebJsonEncoder(json.JSONEncoder): - """define a json encoder to be able to encode yams std types""" - - def default(self, obj): - if hasattr(obj, '__json_encode__'): - return obj.__json_encode__() - if isinstance(obj, datetime.datetime): - return ustrftime(obj, '%Y/%m/%d %H:%M:%S') - elif isinstance(obj, datetime.date): - return ustrftime(obj, '%Y/%m/%d') - elif isinstance(obj, datetime.time): - return obj.strftime('%H:%M:%S') - elif isinstance(obj, datetime.timedelta): - return (obj.days * 24 * 60 * 60) + obj.seconds - elif isinstance(obj, decimal.Decimal): - return float(obj) - try: - return json.JSONEncoder.default(self, obj) - except TypeError: - # we never ever want to fail because of an unknown type, - # just return None in those cases. - return None - -def json_dumps(value, **kwargs): - return json.dumps(value, cls=CubicWebJsonEncoder, **kwargs) - - -class JSString(str): - """use this string sub class in values given to :func:`js_dumps` to - insert raw javascript chain in some JSON string - """ - -def _dict2js(d, predictable=False): - if predictable: - it = sorted(d.items()) - else: - it = d.items() - res = [key + ': ' + js_dumps(val, predictable) - for key, val in it] - return '{%s}' % ', '.join(res) - -def _list2js(l, predictable=False): - return '[%s]' % ', '.join([js_dumps(val, predictable) for val in l]) - -def js_dumps(something, predictable=False): - """similar as :func:`json_dumps`, except values which are instances of - :class:`JSString` are expected to be valid javascript and will be output - as is - - >>> js_dumps({'hop': JSString('$.hop'), 'bar': None}, predictable=True) - '{bar: null, hop: $.hop}' - >>> js_dumps({'hop': '$.hop'}) - '{hop: "$.hop"}' - >>> js_dumps({'hip': {'hop': JSString('momo')}}) - '{hip: {hop: momo}}' - """ - if isinstance(something, dict): - return _dict2js(something, predictable) - if isinstance(something, list): - return _list2js(something, predictable) - if isinstance(something, JSString): - return something - return json_dumps(something, sort_keys=predictable) - -PERCENT_IN_URLQUOTE_RE = re.compile(r'%(?=[0-9a-fA-F]{2})') -def js_href(javascript_code): - """Generate a "javascript: ..." string for an href attribute. - - Some % which may be interpreted in a href context will be escaped. - - In an href attribute, url-quotes-looking fragments are interpreted before - being given to the javascript engine. Valid url quotes are in the form - ``%xx`` with xx being a byte in hexadecimal form. This means that ``%toto`` - will be unaltered but ``%babar`` will be mangled because ``ba`` is the - hexadecimal representation of 186. - - >>> js_href('alert("babar");') - 'javascript: alert("babar");' - >>> js_href('alert("%babar");') - 'javascript: alert("%25babar");' - >>> js_href('alert("%toto %babar");') - 'javascript: alert("%toto %25babar");' - >>> js_href('alert("%1337%");') - 'javascript: alert("%251337%");' - """ - return 'javascript: ' + PERCENT_IN_URLQUOTE_RE.sub(r'%25', javascript_code) - - -def parse_repo_uri(uri): - """ transform a command line uri into a (protocol, hostport, appid), e.g: - -> 'inmemory', None, '' - inmemory:// -> 'inmemory', None, '' - """ - parseduri = urlparse(uri) - scheme = parseduri.scheme - if scheme == '': - return ('inmemory', None, parseduri.path) - if scheme == 'inmemory': - return (scheme, None, parseduri.netloc) - raise NotImplementedError('URI protocol not implemented for `%s`' % uri) - - - -logger = getLogger('cubicweb.utils') - -class QueryCache(object): - """ a minimalist dict-like object to be used by the querier - and native source (replaces lgc.cache for this very usage) - - To be efficient it must be properly used. The usage patterns are - quite specific to its current clients. - - The ceiling value should be sufficiently high, else it will be - ruthlessly inefficient (there will be warnings when this happens). - A good (high enough) value can only be set on a per-application - value. A default, reasonnably high value is provided but tuning - e.g `rql-cache-size` can certainly help. - - There are two kinds of elements to put in this cache: - * frequently used elements - * occasional elements - - The former should finish in the _permanent structure after some - warmup. - - Occasional elements can be buggy requests (server-side) or - end-user (web-ui provided) requests. These have to be cleaned up - when they fill the cache, without evicting the useful, frequently - used entries. - """ - # quite arbitrary, but we want to never - # immortalize some use-a-little query - _maxlevel = 15 - - def __init__(self, ceiling=3000): - self._max = ceiling - # keys belonging forever to this cache - self._permanent = set() - # mapping of key (that can get wiped) to getitem count - self._transient = {} - self._data = {} - self._lock = Lock() - - def __len__(self): - with self._lock: - return len(self._data) - - def __getitem__(self, k): - with self._lock: - if k in self._permanent: - return self._data[k] - v = self._transient.get(k, _MARKER) - if v is _MARKER: - self._transient[k] = 1 - return self._data[k] - if v > self._maxlevel: - self._permanent.add(k) - self._transient.pop(k, None) - else: - self._transient[k] += 1 - return self._data[k] - - def __setitem__(self, k, v): - with self._lock: - if len(self._data) >= self._max: - self._try_to_make_room() - self._data[k] = v - - def pop(self, key, default=_MARKER): - with self._lock: - try: - if default is _MARKER: - return self._data.pop(key) - return self._data.pop(key, default) - finally: - if key in self._permanent: - self._permanent.remove(key) - else: - self._transient.pop(key, None) - - def clear(self): - with self._lock: - self._clear() - - def _clear(self): - self._permanent = set() - self._transient = {} - self._data = {} - - def _try_to_make_room(self): - current_size = len(self._data) - items = sorted(self._transient.items(), key=itemgetter(1)) - level = 0 - for k, v in items: - self._data.pop(k, None) - self._transient.pop(k, None) - if v > level: - datalen = len(self._data) - if datalen == 0: - return - if (current_size - datalen) / datalen > .1: - break - level = v - else: - # we removed cruft but everything is permanent - if len(self._data) >= self._max: - logger.warning('Cache %s is full.' % id(self)) - self._clear() - - def _usage_report(self): - with self._lock: - return {'itemcount': len(self._data), - 'transientcount': len(self._transient), - 'permanentcount': len(self._permanent)} - - def popitem(self): - raise NotImplementedError() - - def setdefault(self, key, default=None): - raise NotImplementedError() - - def update(self, other): - raise NotImplementedError()