# HG changeset patch # User Sylvain Thénault # Date 1357252076 -3600 # Node ID e16561083d84e46d29b9c29a06d7d84bd6a4d0ab # Parent fa044b9157d7485a3d7813e9565a8cf523a02a38 [dataimport] new ignore_errors argument to ucsvreader, default to False. Closes #2547200 diff -r fa044b9157d7 -r e16561083d84 dataimport.py --- a/dataimport.py Thu Jan 03 23:10:50 2013 +0100 +++ b/dataimport.py Thu Jan 03 23:27:56 2013 +0100 @@ -124,15 +124,33 @@ print ' %s rows imported' % rowcount def ucsvreader(stream, encoding='utf-8', separator=',', quote='"', - skipfirst=False): + skipfirst=False, ignore_errors=False): """A csv reader that accepts files with any encoding and outputs unicode strings """ it = iter(csv.reader(stream, delimiter=separator, quotechar=quote)) - if skipfirst: - it.next() - for row in it: - yield [item.decode(encoding) for item in row] + if not ignore_errors: + if skipfirst: + it.next() + for row in it: + yield [item.decode(encoding) for item in row] + else: + # Skip first line + try: + row = it.next() + except csv.Error: + pass + # Safe version, that can cope with error in CSV file + while True: + try: + row = it.next() + # End of CSV, break + except StopIteration: + break + # Error in CSV, ignore line and continue + except csv.Error: + continue + yield [item.decode(encoding) for item in row] def callfunc_every(func, number, iterable): """yield items of `iterable` one by one and call function `func`