# HG changeset patch # User RĂ©mi Cardona # Date 1442570052 -7200 # Node ID b261d90149d01eca6061df622b786850ac9e93dc # Parent e7eafadbeb15791a98b50160a1ceca9ab6626d6d [server] Port BFSS to py3k The BFSS API changes in python 3: * 'defaultdir' MUST be a unicode object * 'fsencoding' MUST NOT be set In python 2, fsencoding handles both the encoding of file paths on the file system (utf-8 by default, but the system may actually be using something else) and the encoding of file paths that will be stored in the database. So in python 3, we wipe the slate clean: * rely on sys.getfilesystemencoding() to convert unicode objects to bytes * always encode paths to utf-8 for storage in the database Caveat emptor / here be dragons: * sys.getfilesystemencoding() depends on the current locale, which therefore MUST be set properly * when migrating an existing instance from py2 to py3, one MAY need to reencode file paths stored in the database diff -r e7eafadbeb15 -r b261d90149d0 server/sources/storages.py --- a/server/sources/storages.py Thu Sep 17 15:35:26 2015 +0200 +++ b/server/sources/storages.py Fri Sep 18 11:54:12 2015 +0200 @@ -23,6 +23,10 @@ from contextlib import contextmanager import tempfile +from six import PY2, PY3, text_type, binary_type + +from logilab.common import nullobject + from yams.schema import role_name from cubicweb import Binary, ValidationError @@ -103,13 +107,22 @@ del cnx.transaction_data['fs_importing'] +_marker = nullobject() + + class BytesFileSystemStorage(Storage): """store Bytes attribute value on the file system""" - def __init__(self, defaultdir, fsencoding='utf-8', wmode=0o444): - if type(defaultdir) is unicode: - defaultdir = defaultdir.encode(fsencoding) + def __init__(self, defaultdir, fsencoding=_marker, wmode=0o444): + if PY3: + if not isinstance(defaultdir, text_type): + raise TypeError('defaultdir must be a unicode object in python 3') + if fsencoding is not _marker: + raise ValueError('fsencoding is no longer supported in python 3') + else: + self.fsencoding = fsencoding or 'utf-8' + if isinstance(defaultdir, text_type): + defaultdir = defaultdir.encode(fsencoding) self.default_directory = defaultdir - self.fsencoding = fsencoding # extra umask to use when creating file # 0444 as in "only allow read bit in permission" self._wmode = wmode @@ -145,7 +158,8 @@ binary = entity.cw_edited.pop(attr) fd, fpath = self.new_fs_path(entity, attr) # bytes storage used to store file's path - entity.cw_edited.edited_attribute(attr, Binary(fpath)) + binary_obj = Binary(fpath if PY2 else fpath.encode('utf-8')) + entity.cw_edited.edited_attribute(attr, binary_obj) self._writecontent(fd, binary) AddFileOp.get_instance(entity._cw).add_data(fpath) return binary @@ -187,7 +201,8 @@ entity.cw_edited.edited_attribute(attr, None) else: # register the new location for the file. - entity.cw_edited.edited_attribute(attr, Binary(fpath)) + binary_obj = Binary(fpath if PY2 else fpath.encode('utf-8')) + entity.cw_edited.edited_attribute(attr, binary_obj) if oldpath is not None and oldpath != fpath: # Mark the old file as useless so the file will be removed at # commit. @@ -206,16 +221,19 @@ # available. Keeping the extension is useful for example in the case of # PIL processing that use filename extension to detect content-type, as # well as providing more understandable file names on the fs. + if PY2: + attr = attr.encode('ascii') basename = [str(entity.eid), attr] name = entity.cw_attr_metadata(attr, 'name') if name is not None: - basename.append(name.encode(self.fsencoding)) + basename.append(name.encode(self.fsencoding) if PY2 else name) fd, fspath = uniquify_path(self.default_directory, '_'.join(basename)) if fspath is None: msg = entity._cw._('failed to uniquify path (%s, %s)') % ( self.default_directory, '_'.join(basename)) raise ValidationError(entity.eid, {role_name(attr, 'subject'): msg}) + assert isinstance(fspath, str) # bytes on py2, unicode on py3 return fd, fspath def current_fs_path(self, entity, attr): @@ -229,8 +247,12 @@ rawvalue = cu.fetchone()[0] if rawvalue is None: # no previous value return None - return sysource._process_value(rawvalue, cu.description[0], - binarywrap=str) + fspath = sysource._process_value(rawvalue, cu.description[0], + binarywrap=binary_type) + if PY3: + fspath = fspath.decode('utf-8') + assert isinstance(fspath, str) # bytes on py2, unicode on py3 + return fspath def migrate_entity(self, entity, attribute): """migrate an entity attribute to the storage""" @@ -248,15 +270,17 @@ class AddFileOp(hook.DataOperationMixIn, hook.Operation): def rollback_event(self): for filepath in self.get_data(): + assert isinstance(filepath, str) # bytes on py2, unicode on py3 try: unlink(filepath) except Exception as ex: - self.error('cant remove %s: %s' % (filepath, ex)) + self.error("can't remove %s: %s" % (filepath, ex)) class DeleteFileOp(hook.DataOperationMixIn, hook.Operation): def postcommit_event(self): for filepath in self.get_data(): + assert isinstance(filepath, str) # bytes on py2, unicode on py3 try: unlink(filepath) except Exception as ex: - self.error('cant remove %s: %s' % (filepath, ex)) + self.error("can't remove %s: %s" % (filepath, ex)) diff -r e7eafadbeb15 -r b261d90149d0 server/test/unittest_storage.py --- a/server/test/unittest_storage.py Thu Sep 17 15:35:26 2015 +0200 +++ b/server/test/unittest_storage.py Fri Sep 18 11:54:12 2015 +0200 @@ -17,12 +17,15 @@ # with CubicWeb. If not, see . """unit tests for module cubicweb.server.sources.storages""" +from six import PY2 + from logilab.common.testlib import unittest_main, tag, Tags from cubicweb.devtools.testlib import CubicWebTC from glob import glob import os import os.path as osp +import sys import shutil import tempfile @@ -57,12 +60,14 @@ def setup_database(self): self.tempdir = tempfile.mkdtemp() bfs_storage = storages.BytesFileSystemStorage(self.tempdir) + self.bfs_storage = bfs_storage storages.set_attribute_storage(self.repo, 'File', 'data', bfs_storage) storages.set_attribute_storage(self.repo, 'BFSSTestable', 'opt_attr', bfs_storage) def tearDown(self): super(StorageTC, self).tearDown() storages.unset_attribute_storage(self.repo, 'File', 'data') + del self.bfs_storage shutil.rmtree(self.tempdir) @@ -73,8 +78,8 @@ def fspath(self, cnx, entity): fspath = cnx.execute('Any fspath(D) WHERE F eid %(f)s, F data D', - {'f': entity.eid})[0][0] - return fspath.getvalue() + {'f': entity.eid})[0][0].getvalue() + return fspath if PY2 else fspath.decode('utf-8') def test_bfss_wrong_fspath_usage(self): with self.admin_access.repo_cnx() as cnx: