[dataimport] Prevent ucsvreader from skipping the first line when ignore_errors is True (closes #3705791)
--- a/dataimport.py Thu Apr 03 15:30:37 2014 +0200
+++ b/dataimport.py Thu Apr 03 15:36:33 2014 +0200
@@ -143,11 +143,11 @@
if not skip_empty or any(decoded):
yield decoded
else:
- # Skip first line
- try:
- row = it.next()
- except csv.Error:
- pass
+ if skipfirst:
+ try:
+ row = it.next()
+ except csv.Error:
+ pass
# Safe version, that can cope with error in CSV file
while True:
try:
--- a/test/unittest_dataimport.py Thu Apr 03 15:30:37 2014 +0200
+++ b/test/unittest_dataimport.py Thu Apr 03 15:36:33 2014 +0200
@@ -1,6 +1,8 @@
from StringIO import StringIO
from logilab.common.testlib import TestCase, unittest_main
from cubicweb import dataimport
+
+
class UcsvreaderTC(TestCase):
def test_empty_lines_skipped(self):
@@ -21,6 +23,34 @@
],
list(dataimport.ucsvreader(stream, skip_empty=False)))
+ def test_skip_first(self):
+ stream = StringIO('a,b,c,d,\n'
+ '1,2,3,4,\n')
+ reader = dataimport.ucsvreader(stream, skipfirst=True,
+ ignore_errors=True)
+ self.assertEqual(list(reader),
+ [[u'1', u'2', u'3', u'4', u'']])
+
+ stream.seek(0)
+ reader = dataimport.ucsvreader(stream, skipfirst=True,
+ ignore_errors=False)
+ self.assertEqual(list(reader),
+ [[u'1', u'2', u'3', u'4', u'']])
+
+ stream.seek(0)
+ reader = dataimport.ucsvreader(stream, skipfirst=False,
+ ignore_errors=True)
+ self.assertEqual(list(reader),
+ [[u'a', u'b', u'c', u'd', u''],
+ [u'1', u'2', u'3', u'4', u'']])
+
+ stream.seek(0)
+ reader = dataimport.ucsvreader(stream, skipfirst=False,
+ ignore_errors=False)
+ self.assertEqual(list(reader),
+ [[u'a', u'b', u'c', u'd', u''],
+ [u'1', u'2', u'3', u'4', u'']])
+
if __name__ == '__main__':
unittest_main()