[py3k] unicode vs str vs bytes vs the world
authorRémi Cardona <remi.cardona@logilab.fr>
Wed, 16 Sep 2015 15:17:42 +0200
changeset 10689 49a62b8f6d43
parent 10688 fa29f3628a1b
child 10690 c6290d727c0c
[py3k] unicode vs str vs bytes vs the world
cwvreg.py
devtools/testlib.py
entity.py
hooks/integrity.py
schema.py
uilib.py
utils.py
--- a/cwvreg.py	Wed Sep 16 13:57:21 2015 +0200
+++ b/cwvreg.py	Wed Sep 16 15:17:42 2015 +0200
@@ -28,6 +28,8 @@
 from datetime import datetime, date, time, timedelta
 from functools import reduce
 
+from six import text_type, binary_type
+
 from logilab.common.decorators import cached, clear_cache
 from logilab.common.deprecation import deprecated, class_deprecated
 from logilab.common.modutils import cleanup_sys_modules
@@ -221,9 +223,9 @@
         """
         obj = self.select(oid, req, rset=rset, **kwargs)
         res = obj.render(**kwargs)
-        if isinstance(res, unicode):
+        if isinstance(res, text_type):
             return res.encode(req.encoding)
-        assert isinstance(res, str)
+        assert isinstance(res, binary_type)
         return res
 
     def possible_views(self, req, rset=None, **kwargs):
--- a/devtools/testlib.py	Wed Sep 16 13:57:21 2015 +0200
+++ b/devtools/testlib.py	Wed Sep 16 15:17:42 2015 +0200
@@ -961,11 +961,11 @@
         if content_type is None:
             content_type = 'text/html'
         if content_type in ('text/html', 'application/xhtml+xml') and output:
-            if output.startswith('<!DOCTYPE html>'):
+            if output.startswith(b'<!DOCTYPE html>'):
                 # only check XML well-formness since HTMLValidator isn't html5
                 # compatible and won't like various other extensions
                 default_validator = htmlparser.XMLSyntaxValidator
-            elif output.startswith('<?xml'):
+            elif output.startswith(b'<?xml'):
                 default_validator = htmlparser.DTDValidator
             else:
                 default_validator = htmlparser.HTMLValidator
@@ -1006,7 +1006,7 @@
                 str_exc = str(exc)
             except Exception:
                 str_exc = 'undisplayable exception'
-            msg += str_exc
+            msg += str_exc.encode(sys.getdefaultencoding(), 'replace')
             if content is not None:
                 position = getattr(exc, "position", (0,))[0]
                 if position:
--- a/entity.py	Wed Sep 16 13:57:21 2015 +0200
+++ b/entity.py	Wed Sep 16 15:17:42 2015 +0200
@@ -22,7 +22,7 @@
 from warnings import warn
 from functools import partial
 
-from six import string_types, integer_types
+from six import text_type, string_types, integer_types
 from six.moves import range
 
 from logilab.common.decorators import cached
@@ -60,7 +60,7 @@
     """return True if value can be used at the end of a Rest URL path"""
     if value is None:
         return False
-    value = unicode(value)
+    value = text_type(value)
     # the check for ?, /, & are to prevent problems when running
     # behind Apache mod_proxy
     if value == u'' or u'?' in value or u'/' in value or u'&' in value:
@@ -680,8 +680,8 @@
         if path is None:
             # fallback url: <base-url>/<eid> url is used as cw entities uri,
             # prefer it to <base-url>/<etype>/eid/<eid>
-            return unicode(value)
-        return '%s/%s' % (path, self._cw.url_quote(value))
+            return text_type(value)
+        return u'%s/%s' % (path, self._cw.url_quote(value))
 
     def cw_attr_metadata(self, attr, metadata):
         """return a metadata for an attribute (None if unspecified)"""
--- a/hooks/integrity.py	Wed Sep 16 13:57:21 2015 +0200
+++ b/hooks/integrity.py	Wed Sep 16 15:17:42 2015 +0200
@@ -24,6 +24,8 @@
 
 from threading import Lock
 
+from six import text_type
+
 from cubicweb import validation_error, neg_role
 from cubicweb.schema import (META_RTYPES, WORKFLOW_RTYPES,
                              RQLConstraint, RQLUniqueConstraint)
@@ -292,7 +294,7 @@
                     value = edited[attr]
                 except KeyError:
                     continue # no text to tidy
-                if isinstance(value, unicode): # filter out None and Binary
+                if isinstance(value, text_type): # filter out None and Binary
                     if getattr(entity, str(metaattr)) == 'text/html':
                         edited[attr] = soup2xhtml(value, self._cw.encoding)
 
--- a/schema.py	Wed Sep 16 13:57:21 2015 +0200
+++ b/schema.py	Wed Sep 16 15:17:42 2015 +0200
@@ -25,7 +25,7 @@
 from logging import getLogger
 from warnings import warn
 
-from six import string_types, add_metaclass
+from six import text_type, string_types, add_metaclass
 from six.moves import range
 
 from logilab.common import tempattr
@@ -557,9 +557,9 @@
         key = key + '_' + form
     # ensure unicode
     if context is not None:
-        return unicode(req.pgettext(context, key))
+        return text_type(req.pgettext(context, key))
     else:
-        return unicode(req._(key))
+        return text_type(req._(key))
 
 
 # Schema objects definition ###################################################
--- a/uilib.py	Wed Sep 16 13:57:21 2015 +0200
+++ b/uilib.py	Wed Sep 16 15:17:42 2015 +0200
@@ -28,7 +28,7 @@
 import re
 from io import StringIO
 
-from six import string_types, integer_types
+from six import PY3, text_type, binary_type, string_types, integer_types
 
 from logilab.mtconverter import xml_escape, html_unescape
 from logilab.common.date import ustrftime
@@ -64,7 +64,7 @@
     return value
 
 def print_int(value, req, props, displaytime=True):
-    return unicode(value)
+    return text_type(value)
 
 def print_date(value, req, props, displaytime=True):
     return ustrftime(value, req.property_value('ui.date-format'))
@@ -124,7 +124,7 @@
     return req._('no')
 
 def print_float(value, req, props, displaytime=True):
-    return unicode(req.property_value('ui.float-format') % value)
+    return text_type(req.property_value('ui.float-format') % value) # XXX cast needed ?
 
 PRINTERS = {
     'Bytes': print_bytes,
@@ -391,7 +391,7 @@
                               'img', 'area', 'input', 'col'))
 
 def sgml_attributes(attrs):
-    return u' '.join(u'%s="%s"' % (attr, xml_escape(unicode(value)))
+    return u' '.join(u'%s="%s"' % (attr, xml_escape(text_type(value)))
                      for attr, value in sorted(attrs.items())
                      if value is not None)
 
@@ -409,7 +409,7 @@
         value += u' ' + sgml_attributes(attrs)
     if content:
         if escapecontent:
-            content = xml_escape(unicode(content))
+            content = xml_escape(text_type(content))
         value += u'>%s</%s>' % (content, tag)
     else:
         if tag in HTML4_EMPTY_TAGS:
@@ -438,8 +438,8 @@
     stream = StringIO() #UStringIO() don't want unicode assertion
     formater.format(layout, stream)
     res = stream.getvalue()
-    if isinstance(res, str):
-        res = unicode(res, 'UTF8')
+    if isinstance(res, binary_type):
+        res = res.decode('UTF8')
     return res
 
 # traceback formatting ########################################################
@@ -447,14 +447,17 @@
 import traceback
 
 def exc_message(ex, encoding):
-    try:
-        excmsg = unicode(ex)
-    except Exception:
+    if PY3:
+        excmsg = str(ex)
+    else:
         try:
-            excmsg = unicode(str(ex), encoding, 'replace')
+            excmsg = unicode(ex)
         except Exception:
-            excmsg = unicode(repr(ex), encoding, 'replace')
-    exctype = unicode(ex.__class__.__name__)
+            try:
+                excmsg = unicode(str(ex), encoding, 'replace')
+            except Exception:
+                excmsg = unicode(repr(ex), encoding, 'replace')
+    exctype = ex.__class__.__name__
     return u'%s: %s' % (exctype, excmsg)
 
 
@@ -541,7 +544,7 @@
     def writerow(self, row):
         csvrow = []
         for elt in row:
-            if isinstance(elt, unicode):
+            if isinstance(elt, text_type):
                 csvrow.append(elt.encode(self.encoding))
             else:
                 csvrow.append(str(elt))
--- a/utils.py	Wed Sep 16 13:57:21 2015 +0200
+++ b/utils.py	Wed Sep 16 15:17:42 2015 +0200
@@ -101,7 +101,7 @@
     """
     def __init__(self, w, tag, closetag=None):
         self.written = False
-        self.tag = unicode(tag)
+        self.tag = text_type(tag)
         self.closetag = closetag
         self.w = w
 
@@ -117,7 +117,7 @@
     def __exit__(self, exctype, value, traceback):
         if self.written is True:
             if self.closetag:
-                self.w(unicode(self.closetag))
+                self.w(text_type(self.closetag))
             else:
                 self.w(self.tag.replace('<', '</', 1))