--- a/dataimport.py Mon Jul 22 12:08:37 2013 +0200
+++ b/dataimport.py Wed Jul 24 11:54:35 2013 +0200
@@ -105,8 +105,8 @@
return i+1
def ucsvreader_pb(stream_or_path, encoding='utf-8', separator=',', quote='"',
- skipfirst=False, withpb=True):
- """same as ucsvreader but a progress bar is displayed as we iter on rows"""
+ skipfirst=False, withpb=True, skip_empty=True):
+ """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
if isinstance(stream_or_path, basestring):
if not osp.exists(stream_or_path):
raise Exception("file doesn't exists: %s" % stream_or_path)
@@ -118,23 +118,30 @@
rowcount -= 1
if withpb:
pb = shellutils.ProgressBar(rowcount, 50)
- for urow in ucsvreader(stream, encoding, separator, quote, skipfirst):
+ for urow in ucsvreader(stream, encoding, separator, quote,
+ skipfirst=skipfirst, skip_empty=skip_empty):
yield urow
if withpb:
pb.update()
print ' %s rows imported' % rowcount
def ucsvreader(stream, encoding='utf-8', separator=',', quote='"',
- skipfirst=False, ignore_errors=False):
+ skipfirst=False, ignore_errors=False, skip_empty=True):
"""A csv reader that accepts files with any encoding and outputs unicode
strings
+
+ if skip_empty (the default), lines without any values specified (only
+ separators) will be skipped. This is useful for Excel exports which may be
+ full of such lines.
"""
it = iter(csv.reader(stream, delimiter=separator, quotechar=quote))
if not ignore_errors:
if skipfirst:
it.next()
for row in it:
- yield [item.decode(encoding) for item in row]
+ decoded = [item.decode(encoding) for item in row]
+ if not skip_empty or any(decoded):
+ yield [item.decode(encoding) for item in row]
else:
# Skip first line
try:
@@ -151,7 +158,10 @@
# Error in CSV, ignore line and continue
except csv.Error:
continue
- yield [item.decode(encoding) for item in row]
+ decoded = [item.decode(encoding) for item in row]
+ if not skip_empty or any(decoded):
+ yield decoded
+
def callfunc_every(func, number, iterable):
"""yield items of `iterable` one by one and call function `func`