diff -r 13ed6de41774 -r 2eac0aa1d3f6 dataimport.py --- a/dataimport.py Mon Jul 22 12:08:37 2013 +0200 +++ b/dataimport.py Wed Jul 24 11:54:35 2013 +0200 @@ -105,8 +105,8 @@ return i+1 def ucsvreader_pb(stream_or_path, encoding='utf-8', separator=',', quote='"', - skipfirst=False, withpb=True): - """same as ucsvreader but a progress bar is displayed as we iter on rows""" + skipfirst=False, withpb=True, skip_empty=True): + """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows""" if isinstance(stream_or_path, basestring): if not osp.exists(stream_or_path): raise Exception("file doesn't exists: %s" % stream_or_path) @@ -118,23 +118,30 @@ rowcount -= 1 if withpb: pb = shellutils.ProgressBar(rowcount, 50) - for urow in ucsvreader(stream, encoding, separator, quote, skipfirst): + for urow in ucsvreader(stream, encoding, separator, quote, + skipfirst=skipfirst, skip_empty=skip_empty): yield urow if withpb: pb.update() print ' %s rows imported' % rowcount def ucsvreader(stream, encoding='utf-8', separator=',', quote='"', - skipfirst=False, ignore_errors=False): + skipfirst=False, ignore_errors=False, skip_empty=True): """A csv reader that accepts files with any encoding and outputs unicode strings + + if skip_empty (the default), lines without any values specified (only + separators) will be skipped. This is useful for Excel exports which may be + full of such lines. """ it = iter(csv.reader(stream, delimiter=separator, quotechar=quote)) if not ignore_errors: if skipfirst: it.next() for row in it: - yield [item.decode(encoding) for item in row] + decoded = [item.decode(encoding) for item in row] + if not skip_empty or any(decoded): + yield [item.decode(encoding) for item in row] else: # Skip first line try: @@ -151,7 +158,10 @@ # Error in CSV, ignore line and continue except csv.Error: continue - yield [item.decode(encoding) for item in row] + decoded = [item.decode(encoding) for item in row] + if not skip_empty or any(decoded): + yield decoded + def callfunc_every(func, number, iterable): """yield items of `iterable` one by one and call function `func`