[server] Port BFSS to py3k
The BFSS API changes in python 3:
* 'defaultdir' MUST be a unicode object
* 'fsencoding' MUST NOT be set
In python 2, fsencoding handles both the encoding of file paths on the
file system (utf-8 by default, but the system may actually be using
something else) and the encoding of file paths that will be stored in
the database.
So in python 3, we wipe the slate clean:
* rely on sys.getfilesystemencoding() to convert unicode objects to
bytes
* always encode paths to utf-8 for storage in the database
Caveat emptor / here be dragons:
* sys.getfilesystemencoding() depends on the current locale, which
therefore MUST be set properly
* when migrating an existing instance from py2 to py3, one MAY need
to reencode file paths stored in the database
--- a/server/sources/storages.py Thu Sep 17 15:35:26 2015 +0200
+++ b/server/sources/storages.py Fri Sep 18 11:54:12 2015 +0200
@@ -23,6 +23,10 @@
from contextlib import contextmanager
import tempfile
+from six import PY2, PY3, text_type, binary_type
+
+from logilab.common import nullobject
+
from yams.schema import role_name
from cubicweb import Binary, ValidationError
@@ -103,13 +107,22 @@
del cnx.transaction_data['fs_importing']
+_marker = nullobject()
+
+
class BytesFileSystemStorage(Storage):
"""store Bytes attribute value on the file system"""
- def __init__(self, defaultdir, fsencoding='utf-8', wmode=0o444):
- if type(defaultdir) is unicode:
- defaultdir = defaultdir.encode(fsencoding)
+ def __init__(self, defaultdir, fsencoding=_marker, wmode=0o444):
+ if PY3:
+ if not isinstance(defaultdir, text_type):
+ raise TypeError('defaultdir must be a unicode object in python 3')
+ if fsencoding is not _marker:
+ raise ValueError('fsencoding is no longer supported in python 3')
+ else:
+ self.fsencoding = fsencoding or 'utf-8'
+ if isinstance(defaultdir, text_type):
+ defaultdir = defaultdir.encode(fsencoding)
self.default_directory = defaultdir
- self.fsencoding = fsencoding
# extra umask to use when creating file
# 0444 as in "only allow read bit in permission"
self._wmode = wmode
@@ -145,7 +158,8 @@
binary = entity.cw_edited.pop(attr)
fd, fpath = self.new_fs_path(entity, attr)
# bytes storage used to store file's path
- entity.cw_edited.edited_attribute(attr, Binary(fpath))
+ binary_obj = Binary(fpath if PY2 else fpath.encode('utf-8'))
+ entity.cw_edited.edited_attribute(attr, binary_obj)
self._writecontent(fd, binary)
AddFileOp.get_instance(entity._cw).add_data(fpath)
return binary
@@ -187,7 +201,8 @@
entity.cw_edited.edited_attribute(attr, None)
else:
# register the new location for the file.
- entity.cw_edited.edited_attribute(attr, Binary(fpath))
+ binary_obj = Binary(fpath if PY2 else fpath.encode('utf-8'))
+ entity.cw_edited.edited_attribute(attr, binary_obj)
if oldpath is not None and oldpath != fpath:
# Mark the old file as useless so the file will be removed at
# commit.
@@ -206,16 +221,19 @@
# available. Keeping the extension is useful for example in the case of
# PIL processing that use filename extension to detect content-type, as
# well as providing more understandable file names on the fs.
+ if PY2:
+ attr = attr.encode('ascii')
basename = [str(entity.eid), attr]
name = entity.cw_attr_metadata(attr, 'name')
if name is not None:
- basename.append(name.encode(self.fsencoding))
+ basename.append(name.encode(self.fsencoding) if PY2 else name)
fd, fspath = uniquify_path(self.default_directory,
'_'.join(basename))
if fspath is None:
msg = entity._cw._('failed to uniquify path (%s, %s)') % (
self.default_directory, '_'.join(basename))
raise ValidationError(entity.eid, {role_name(attr, 'subject'): msg})
+ assert isinstance(fspath, str) # bytes on py2, unicode on py3
return fd, fspath
def current_fs_path(self, entity, attr):
@@ -229,8 +247,12 @@
rawvalue = cu.fetchone()[0]
if rawvalue is None: # no previous value
return None
- return sysource._process_value(rawvalue, cu.description[0],
- binarywrap=str)
+ fspath = sysource._process_value(rawvalue, cu.description[0],
+ binarywrap=binary_type)
+ if PY3:
+ fspath = fspath.decode('utf-8')
+ assert isinstance(fspath, str) # bytes on py2, unicode on py3
+ return fspath
def migrate_entity(self, entity, attribute):
"""migrate an entity attribute to the storage"""
@@ -248,15 +270,17 @@
class AddFileOp(hook.DataOperationMixIn, hook.Operation):
def rollback_event(self):
for filepath in self.get_data():
+ assert isinstance(filepath, str) # bytes on py2, unicode on py3
try:
unlink(filepath)
except Exception as ex:
- self.error('cant remove %s: %s' % (filepath, ex))
+ self.error("can't remove %s: %s" % (filepath, ex))
class DeleteFileOp(hook.DataOperationMixIn, hook.Operation):
def postcommit_event(self):
for filepath in self.get_data():
+ assert isinstance(filepath, str) # bytes on py2, unicode on py3
try:
unlink(filepath)
except Exception as ex:
- self.error('cant remove %s: %s' % (filepath, ex))
+ self.error("can't remove %s: %s" % (filepath, ex))
--- a/server/test/unittest_storage.py Thu Sep 17 15:35:26 2015 +0200
+++ b/server/test/unittest_storage.py Fri Sep 18 11:54:12 2015 +0200
@@ -17,12 +17,15 @@
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
"""unit tests for module cubicweb.server.sources.storages"""
+from six import PY2
+
from logilab.common.testlib import unittest_main, tag, Tags
from cubicweb.devtools.testlib import CubicWebTC
from glob import glob
import os
import os.path as osp
+import sys
import shutil
import tempfile
@@ -57,12 +60,14 @@
def setup_database(self):
self.tempdir = tempfile.mkdtemp()
bfs_storage = storages.BytesFileSystemStorage(self.tempdir)
+ self.bfs_storage = bfs_storage
storages.set_attribute_storage(self.repo, 'File', 'data', bfs_storage)
storages.set_attribute_storage(self.repo, 'BFSSTestable', 'opt_attr', bfs_storage)
def tearDown(self):
super(StorageTC, self).tearDown()
storages.unset_attribute_storage(self.repo, 'File', 'data')
+ del self.bfs_storage
shutil.rmtree(self.tempdir)
@@ -73,8 +78,8 @@
def fspath(self, cnx, entity):
fspath = cnx.execute('Any fspath(D) WHERE F eid %(f)s, F data D',
- {'f': entity.eid})[0][0]
- return fspath.getvalue()
+ {'f': entity.eid})[0][0].getvalue()
+ return fspath if PY2 else fspath.decode('utf-8')
def test_bfss_wrong_fspath_usage(self):
with self.admin_access.repo_cnx() as cnx: