[py3k] unicode vs str vs bytes vs the world
authorRémi Cardona <remi.cardona@logilab.fr>
Wed, 16 Sep 2015 17:22:41 +0200
changeset 10696 4ba4be5553cf
parent 10695 321b99973b69
child 10697 c99130d18356
[py3k] unicode vs str vs bytes vs the world
devtools/htmlparser.py
web/formfields.py
web/views/formrenderers.py
--- a/devtools/htmlparser.py	Wed Sep 16 17:07:26 2015 +0200
+++ b/devtools/htmlparser.py	Wed Sep 16 17:22:41 2015 +0200
@@ -33,7 +33,7 @@
 
 ERR_COUNT = 0
 
-_REM_SCRIPT_RGX = re.compile(r"<script[^>]*>.*?</script>", re.U|re.M|re.I|re.S)
+_REM_SCRIPT_RGX = re.compile(br"<script[^>]*>.*?</script>", re.M|re.I|re.S)
 def _remove_script_tags(data):
     """Remove the script (usually javascript) tags to help the lxml
     XMLParser / HTMLParser do their job. Without that, they choke on
@@ -70,7 +70,7 @@
     #
     # using that, we'll miss most actual validation error we want to
     # catch. For now, use dumb regexp
-    return _REM_SCRIPT_RGX.sub('', data)
+    return _REM_SCRIPT_RGX.sub(b'', data)
 
 
 class Validator(object):
--- a/web/formfields.py	Wed Sep 16 17:07:26 2015 +0200
+++ b/web/formfields.py	Wed Sep 16 17:22:41 2015 +0200
@@ -66,7 +66,7 @@
 from warnings import warn
 from datetime import datetime, timedelta
 
-from six import string_types
+from six import text_type, string_types
 
 from logilab.mtconverter import xml_escape
 from logilab.common import nullobject
@@ -281,7 +281,7 @@
             return u''
         if value is True:
             return u'1'
-        return unicode(value)
+        return text_type(value)
 
     def get_widget(self, form):
         """return the widget instance associated to this field"""
@@ -796,7 +796,7 @@
             if data:
                 encoding = self.encoding(form)
                 try:
-                    form.formvalues[(self, form)] = unicode(data.getvalue(), encoding)
+                    form.formvalues[(self, form)] = data.getvalue().decode(encoding)
                 except UnicodeError:
                     pass
                 else:
@@ -817,7 +817,7 @@
 
     def _process_form_value(self, form):
         value = form._cw.form.get(self.input_name(form))
-        if isinstance(value, unicode):
+        if isinstance(value, text_type):
             # file modified using a text widget
             return Binary(value.encode(self.encoding(form)))
         return super(EditableFileField, self)._process_form_value(form)
@@ -948,7 +948,7 @@
     def format_single_value(self, req, value):
         if value:
             value = format_time(value.days * 24 * 3600 + value.seconds)
-            return unicode(value)
+            return text_type(value)
         return u''
 
     def example_format(self, req):
@@ -995,7 +995,7 @@
             try:
                 value = form._cw.parse_datetime(value, self.etype)
             except ValueError as ex:
-                raise ProcessFormError(unicode(ex))
+                raise ProcessFormError(text_type(ex))
         return value
 
 
@@ -1085,7 +1085,7 @@
         linkedto = form.linked_to.get((self.name, self.role))
         if linkedto:
             buildent = form._cw.entity_from_eid
-            return [(buildent(eid).view('combobox'), unicode(eid))
+            return [(buildent(eid).view('combobox'), text_type(eid))
                     for eid in linkedto]
         return []
 
@@ -1097,7 +1097,7 @@
         # vocabulary doesn't include current values, add them
         if form.edited_entity.has_eid():
             rset = form.edited_entity.related(self.name, self.role)
-            vocab += [(e.view('combobox'), unicode(e.eid))
+            vocab += [(e.view('combobox'), text_type(e.eid))
                       for e in rset.entities()]
         return vocab
 
@@ -1131,11 +1131,11 @@
             if entity.eid in done:
                 continue
             done.add(entity.eid)
-            res.append((entity.view('combobox'), unicode(entity.eid)))
+            res.append((entity.view('combobox'), text_type(entity.eid)))
         return res
 
     def format_single_value(self, req, value):
-        return unicode(value)
+        return text_type(value)
 
     def process_form_value(self, form):
         """process posted form and return correctly typed value"""
--- a/web/views/formrenderers.py	Wed Sep 16 17:07:26 2015 +0200
+++ b/web/views/formrenderers.py	Wed Sep 16 17:22:41 2015 +0200
@@ -37,6 +37,8 @@
 
 from warnings import warn
 
+from six import text_type
+
 from logilab.mtconverter import xml_escape
 from logilab.common.registry import yes
 
@@ -119,7 +121,7 @@
             data.insert(0, errormsg)
         # NOTE: we call unicode because `tag` objects may be found within data
         #       e.g. from the cwtags library
-        w(''.join(unicode(x) for x in data))
+        w(''.join(text_type(x) for x in data))
 
     def render_content(self, w, form, values):
         if self.display_progress_div: