--- a/devtools/htmlparser.py Wed Sep 16 17:07:26 2015 +0200
+++ b/devtools/htmlparser.py Wed Sep 16 17:22:41 2015 +0200
@@ -33,7 +33,7 @@
ERR_COUNT = 0
-_REM_SCRIPT_RGX = re.compile(r"<script[^>]*>.*?</script>", re.U|re.M|re.I|re.S)
+_REM_SCRIPT_RGX = re.compile(br"<script[^>]*>.*?</script>", re.M|re.I|re.S)
def _remove_script_tags(data):
"""Remove the script (usually javascript) tags to help the lxml
XMLParser / HTMLParser do their job. Without that, they choke on
@@ -70,7 +70,7 @@
#
# using that, we'll miss most actual validation error we want to
# catch. For now, use dumb regexp
- return _REM_SCRIPT_RGX.sub('', data)
+ return _REM_SCRIPT_RGX.sub(b'', data)
class Validator(object):