diff -r 058bb3dc685f -r 0b59724cb3f2 cubicweb/server/sources/storages.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cubicweb/server/sources/storages.py Sat Jan 16 13:48:51 2016 +0100 @@ -0,0 +1,288 @@ +# copyright 2003-2013 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr +# +# This file is part of CubicWeb. +# +# CubicWeb is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 2.1 of the License, or (at your option) +# any later version. +# +# CubicWeb is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with CubicWeb. If not, see . +"""custom storages for the system source""" + +import os +import sys +from os import unlink, path as osp +from contextlib import contextmanager +import tempfile + +from six import PY2, PY3, text_type, binary_type + +from logilab.common import nullobject + +from yams.schema import role_name + +from cubicweb import Binary, ValidationError +from cubicweb.server import hook +from cubicweb.server.edition import EditedEntity + + +def set_attribute_storage(repo, etype, attr, storage): + repo.system_source.set_storage(etype, attr, storage) + +def unset_attribute_storage(repo, etype, attr): + repo.system_source.unset_storage(etype, attr) + + +class Storage(object): + """abstract storage + + * If `source_callback` is true (by default), the callback will be run during + query result process of fetched attribute's value and should have the + following prototype:: + + callback(self, source, cnx, value) + + where `value` is the value actually stored in the backend. None values + will be skipped (eg callback won't be called). + + * if `source_callback` is false, the callback will be run during sql + generation when some attribute with a custom storage is accessed and + should have the following prototype:: + + callback(self, generator, relation, linkedvar) + + where `generator` is the sql generator, `relation` the current rql syntax + tree relation and linkedvar the principal syntax tree variable holding the + attribute. + """ + is_source_callback = True + + def callback(self, *args): + """see docstring for prototype, which vary according to is_source_callback + """ + raise NotImplementedError() + + def entity_added(self, entity, attr): + """an entity using this storage for attr has been added""" + raise NotImplementedError() + def entity_updated(self, entity, attr): + """an entity using this storage for attr has been updatded""" + raise NotImplementedError() + def entity_deleted(self, entity, attr): + """an entity using this storage for attr has been deleted""" + raise NotImplementedError() + def migrate_entity(self, entity, attribute): + """migrate an entity attribute to the storage""" + raise NotImplementedError() + +# TODO +# * make it configurable without code +# * better file path attribution +# * handle backup/restore + +def uniquify_path(dirpath, basename): + """return a file descriptor and unique file name for `basename` in `dirpath` + """ + path = basename.replace(osp.sep, '-') + base, ext = osp.splitext(path) + return tempfile.mkstemp(prefix=base, suffix=ext, dir=dirpath) + +@contextmanager +def fsimport(cnx): + present = 'fs_importing' in cnx.transaction_data + old_value = cnx.transaction_data.get('fs_importing') + cnx.transaction_data['fs_importing'] = True + yield + if present: + cnx.transaction_data['fs_importing'] = old_value + else: + del cnx.transaction_data['fs_importing'] + + +_marker = nullobject() + + +class BytesFileSystemStorage(Storage): + """store Bytes attribute value on the file system""" + def __init__(self, defaultdir, fsencoding=_marker, wmode=0o444): + if PY3: + if not isinstance(defaultdir, text_type): + raise TypeError('defaultdir must be a unicode object in python 3') + if fsencoding is not _marker: + raise ValueError('fsencoding is no longer supported in python 3') + else: + self.fsencoding = fsencoding or 'utf-8' + if isinstance(defaultdir, text_type): + defaultdir = defaultdir.encode(fsencoding) + self.default_directory = defaultdir + # extra umask to use when creating file + # 0444 as in "only allow read bit in permission" + self._wmode = wmode + + def _writecontent(self, fd, binary): + """write the content of a binary in readonly file + + As the bfss never alters an existing file it does not prevent it from + working as intended. This is a better safe than sorry approach. + """ + os.fchmod(fd, self._wmode) + fileobj = os.fdopen(fd, 'wb') + binary.to_file(fileobj) + fileobj.close() + + + def callback(self, source, cnx, value): + """sql generator callback when some attribute with a custom storage is + accessed + """ + fpath = source.binary_to_str(value) + try: + return Binary.from_file(fpath) + except EnvironmentError as ex: + source.critical("can't open %s: %s", value, ex) + return None + + def entity_added(self, entity, attr): + """an entity using this storage for attr has been added""" + if entity._cw.transaction_data.get('fs_importing'): + binary = Binary.from_file(entity.cw_edited[attr].getvalue()) + entity._cw_dont_cache_attribute(attr, repo_side=True) + else: + binary = entity.cw_edited.pop(attr) + fd, fpath = self.new_fs_path(entity, attr) + # bytes storage used to store file's path + binary_obj = Binary(fpath if PY2 else fpath.encode('utf-8')) + entity.cw_edited.edited_attribute(attr, binary_obj) + self._writecontent(fd, binary) + AddFileOp.get_instance(entity._cw).add_data(fpath) + return binary + + def entity_updated(self, entity, attr): + """an entity using this storage for attr has been updated""" + # get the name of the previous file containing the value + oldpath = self.current_fs_path(entity, attr) + if entity._cw.transaction_data.get('fs_importing'): + # If we are importing from the filesystem, the file already exists. + # We do not need to create it but we need to fetch the content of + # the file as the actual content of the attribute + fpath = entity.cw_edited[attr].getvalue() + entity._cw_dont_cache_attribute(attr, repo_side=True) + assert fpath is not None + binary = Binary.from_file(fpath) + else: + # We must store the content of the attributes + # into a file to stay consistent with the behaviour of entity_add. + # Moreover, the BytesFileSystemStorage expects to be able to + # retrieve the current value of the attribute at anytime by reading + # the file on disk. To be able to rollback things, use a new file + # and keep the old one that will be removed on commit if everything + # went ok. + # + # fetch the current attribute value in memory + binary = entity.cw_edited.pop(attr) + if binary is None: + fpath = None + else: + # Get filename for it + fd, fpath = self.new_fs_path(entity, attr) + # write attribute value on disk + self._writecontent(fd, binary) + # Mark the new file as added during the transaction. + # The file will be removed on rollback + AddFileOp.get_instance(entity._cw).add_data(fpath) + # reinstall poped value + if fpath is None: + entity.cw_edited.edited_attribute(attr, None) + else: + # register the new location for the file. + binary_obj = Binary(fpath if PY2 else fpath.encode('utf-8')) + entity.cw_edited.edited_attribute(attr, binary_obj) + if oldpath is not None and oldpath != fpath: + # Mark the old file as useless so the file will be removed at + # commit. + DeleteFileOp.get_instance(entity._cw).add_data(oldpath) + return binary + + def entity_deleted(self, entity, attr): + """an entity using this storage for attr has been deleted""" + fpath = self.current_fs_path(entity, attr) + if fpath is not None: + DeleteFileOp.get_instance(entity._cw).add_data(fpath) + + def new_fs_path(self, entity, attr): + # We try to get some hint about how to name the file using attribute's + # name metadata, so we use the real file name and extension when + # available. Keeping the extension is useful for example in the case of + # PIL processing that use filename extension to detect content-type, as + # well as providing more understandable file names on the fs. + if PY2: + attr = attr.encode('ascii') + basename = [str(entity.eid), attr] + name = entity.cw_attr_metadata(attr, 'name') + if name is not None: + basename.append(name.encode(self.fsencoding) if PY2 else name) + fd, fspath = uniquify_path(self.default_directory, + '_'.join(basename)) + if fspath is None: + msg = entity._cw._('failed to uniquify path (%s, %s)') % ( + self.default_directory, '_'.join(basename)) + raise ValidationError(entity.eid, {role_name(attr, 'subject'): msg}) + assert isinstance(fspath, str) # bytes on py2, unicode on py3 + return fd, fspath + + def current_fs_path(self, entity, attr): + """return the current fs_path of the attribute, or None is the attr is + not stored yet. + """ + sysource = entity._cw.repo.system_source + cu = sysource.doexec(entity._cw, + 'SELECT cw_%s FROM cw_%s WHERE cw_eid=%s' % ( + attr, entity.cw_etype, entity.eid)) + rawvalue = cu.fetchone()[0] + if rawvalue is None: # no previous value + return None + fspath = sysource._process_value(rawvalue, cu.description[0], + binarywrap=binary_type) + if PY3: + fspath = fspath.decode('utf-8') + assert isinstance(fspath, str) # bytes on py2, unicode on py3 + return fspath + + def migrate_entity(self, entity, attribute): + """migrate an entity attribute to the storage""" + entity.cw_edited = EditedEntity(entity, **entity.cw_attr_cache) + self.entity_added(entity, attribute) + cnx = entity._cw + source = cnx.repo.system_source + attrs = source.preprocess_entity(entity) + sql = source.sqlgen.update('cw_' + entity.cw_etype, attrs, + ['cw_eid']) + source.doexec(cnx, sql, attrs) + entity.cw_edited = None + + +class AddFileOp(hook.DataOperationMixIn, hook.Operation): + def rollback_event(self): + for filepath in self.get_data(): + assert isinstance(filepath, str) # bytes on py2, unicode on py3 + try: + unlink(filepath) + except Exception as ex: + self.error("can't remove %s: %s" % (filepath, ex)) + +class DeleteFileOp(hook.DataOperationMixIn, hook.Operation): + def postcommit_event(self): + for filepath in self.get_data(): + assert isinstance(filepath, str) # bytes on py2, unicode on py3 + try: + unlink(filepath) + except Exception as ex: + self.error("can't remove %s: %s" % (filepath, ex))