[dataimport] Prevent ucsvreader from skipping the first line when ignore_errors is True (closes #3705791) stable
authorRémi Cardona <remi.cardona@logilab.fr>
Thu, 03 Apr 2014 15:36:33 +0200
branchstable
changeset 9695 aa982b7c3f2a
parent 9694 c90107199dea
child 9696 4de6e73f580d
[dataimport] Prevent ucsvreader from skipping the first line when ignore_errors is True (closes #3705791)
dataimport.py
test/unittest_dataimport.py
--- a/dataimport.py	Thu Apr 03 15:30:37 2014 +0200
+++ b/dataimport.py	Thu Apr 03 15:36:33 2014 +0200
@@ -143,11 +143,11 @@
             if not skip_empty or any(decoded):
                 yield decoded
     else:
-        # Skip first line
-        try:
-            row = it.next()
-        except csv.Error:
-            pass
+        if skipfirst:
+            try:
+                row = it.next()
+            except csv.Error:
+                pass
         # Safe version, that can cope with error in CSV file
         while True:
             try:
--- a/test/unittest_dataimport.py	Thu Apr 03 15:30:37 2014 +0200
+++ b/test/unittest_dataimport.py	Thu Apr 03 15:36:33 2014 +0200
@@ -1,6 +1,8 @@
 from StringIO import StringIO
 from logilab.common.testlib import TestCase, unittest_main
 from cubicweb import dataimport
+
+
 class UcsvreaderTC(TestCase):
 
     def test_empty_lines_skipped(self):
@@ -21,6 +23,34 @@
                           ],
                          list(dataimport.ucsvreader(stream, skip_empty=False)))
 
+    def test_skip_first(self):
+        stream = StringIO('a,b,c,d,\n'
+                          '1,2,3,4,\n')
+        reader = dataimport.ucsvreader(stream, skipfirst=True,
+                                       ignore_errors=True)
+        self.assertEqual(list(reader),
+                         [[u'1', u'2', u'3', u'4', u'']])
+
+        stream.seek(0)
+        reader = dataimport.ucsvreader(stream, skipfirst=True,
+                                       ignore_errors=False)
+        self.assertEqual(list(reader),
+                         [[u'1', u'2', u'3', u'4', u'']])
+
+        stream.seek(0)
+        reader = dataimport.ucsvreader(stream, skipfirst=False,
+                                       ignore_errors=True)
+        self.assertEqual(list(reader),
+                         [[u'a', u'b', u'c', u'd', u''],
+                          [u'1', u'2', u'3', u'4', u'']])
+
+        stream.seek(0)
+        reader = dataimport.ucsvreader(stream, skipfirst=False,
+                                       ignore_errors=False)
+        self.assertEqual(list(reader),
+                         [[u'a', u'b', u'c', u'd', u''],
+                          [u'1', u'2', u'3', u'4', u'']])
+
 
 if __name__ == '__main__':
     unittest_main()