[dataimport] new ignore_errors argument to ucsvreader, default to False. Closes #2547200
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Thu, 03 Jan 2013 23:27:56 +0100
changeset 8637 e16561083d84
parent 8632 fa044b9157d7
child 8644 97202ea671e4
[dataimport] new ignore_errors argument to ucsvreader, default to False. Closes #2547200
dataimport.py
--- a/dataimport.py	Thu Jan 03 23:10:50 2013 +0100
+++ b/dataimport.py	Thu Jan 03 23:27:56 2013 +0100
@@ -124,15 +124,33 @@
     print ' %s rows imported' % rowcount
 
 def ucsvreader(stream, encoding='utf-8', separator=',', quote='"',
-               skipfirst=False):
+               skipfirst=False, ignore_errors=False):
     """A csv reader that accepts files with any encoding and outputs unicode
     strings
     """
     it = iter(csv.reader(stream, delimiter=separator, quotechar=quote))
-    if skipfirst:
-        it.next()
-    for row in it:
-        yield [item.decode(encoding) for item in row]
+    if not ignore_errors:
+        if skipfirst:
+            it.next()
+        for row in it:
+            yield [item.decode(encoding) for item in row]
+    else:
+        # Skip first line
+        try:
+            row = it.next()
+        except csv.Error:
+            pass
+        # Safe version, that can cope with error in CSV file
+        while True:
+            try:
+                row = it.next()
+            # End of CSV, break
+            except StopIteration:
+                break
+            # Error in CSV, ignore line and continue
+            except csv.Error:
+                continue
+            yield [item.decode(encoding) for item in row]
 
 def callfunc_every(func, number, iterable):
     """yield items of `iterable` one by one and call function `func`