# HG changeset patch # User RĂ©mi Cardona # Date 1396532193 -7200 # Node ID aa982b7c3f2a2541895ca31bdbd4041dc3ab13f6 # Parent c90107199dea189bb1052028010a68c6e32d7a72 [dataimport] Prevent ucsvreader from skipping the first line when ignore_errors is True (closes #3705791) diff -r c90107199dea -r aa982b7c3f2a dataimport.py --- a/dataimport.py Thu Apr 03 15:30:37 2014 +0200 +++ b/dataimport.py Thu Apr 03 15:36:33 2014 +0200 @@ -143,11 +143,11 @@ if not skip_empty or any(decoded): yield decoded else: - # Skip first line - try: - row = it.next() - except csv.Error: - pass + if skipfirst: + try: + row = it.next() + except csv.Error: + pass # Safe version, that can cope with error in CSV file while True: try: diff -r c90107199dea -r aa982b7c3f2a test/unittest_dataimport.py --- a/test/unittest_dataimport.py Thu Apr 03 15:30:37 2014 +0200 +++ b/test/unittest_dataimport.py Thu Apr 03 15:36:33 2014 +0200 @@ -1,6 +1,8 @@ from StringIO import StringIO from logilab.common.testlib import TestCase, unittest_main from cubicweb import dataimport + + class UcsvreaderTC(TestCase): def test_empty_lines_skipped(self): @@ -21,6 +23,34 @@ ], list(dataimport.ucsvreader(stream, skip_empty=False))) + def test_skip_first(self): + stream = StringIO('a,b,c,d,\n' + '1,2,3,4,\n') + reader = dataimport.ucsvreader(stream, skipfirst=True, + ignore_errors=True) + self.assertEqual(list(reader), + [[u'1', u'2', u'3', u'4', u'']]) + + stream.seek(0) + reader = dataimport.ucsvreader(stream, skipfirst=True, + ignore_errors=False) + self.assertEqual(list(reader), + [[u'1', u'2', u'3', u'4', u'']]) + + stream.seek(0) + reader = dataimport.ucsvreader(stream, skipfirst=False, + ignore_errors=True) + self.assertEqual(list(reader), + [[u'a', u'b', u'c', u'd', u''], + [u'1', u'2', u'3', u'4', u'']]) + + stream.seek(0) + reader = dataimport.ucsvreader(stream, skipfirst=False, + ignore_errors=False) + self.assertEqual(list(reader), + [[u'a', u'b', u'c', u'd', u''], + [u'1', u'2', u'3', u'4', u'']]) + if __name__ == '__main__': unittest_main()