cubicweb: comparison dataimport.py

equal deleted inserted replaced

-:13ed6de41774
+:2eac0aa1d3f6
 pass
 f.seek(0)
 return i+1
 def ucsvreader_pb(stream_or_path, encoding='utf-8', separator=',', quote='"',
-skipfirst=False, withpb=True):
+skipfirst=False, withpb=True, skip_empty=True):
-"""same as ucsvreader but a progress bar is displayed as we iter on rows"""
+"""same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
 if isinstance(stream_or_path, basestring):
 if not osp.exists(stream_or_path):
 raise Exception("file doesn't exists: %s" % stream_or_path)
 stream = open(stream_or_path)
 else:
 rowcount = count_lines(stream)
 if skipfirst:
 rowcount -= 1
 if withpb:
 pb = shellutils.ProgressBar(rowcount, 50)
-for urow in ucsvreader(stream, encoding, separator, quote, skipfirst):
+for urow in ucsvreader(stream, encoding, separator, quote,
+skipfirst=skipfirst, skip_empty=skip_empty):
 yield urow
 if withpb:
 pb.update()
 print ' %s rows imported' % rowcount
 def ucsvreader(stream, encoding='utf-8', separator=',', quote='"',
-skipfirst=False, ignore_errors=False):
+skipfirst=False, ignore_errors=False, skip_empty=True):
 """A csv reader that accepts files with any encoding and outputs unicode
 strings
+if skip_empty (the default), lines without any values specified (only
+separators) will be skipped. This is useful for Excel exports which may be
+full of such lines.
 """
 it = iter(csv.reader(stream, delimiter=separator, quotechar=quote))
 if not ignore_errors:
 if skipfirst:
 it.next()
 for row in it:
-yield [item.decode(encoding) for item in row]
+decoded = [item.decode(encoding) for item in row]
+if not skip_empty or any(decoded):
+yield [item.decode(encoding) for item in row]
 else:
 # Skip first line
 try:
 row = it.next()
 except csv.Error:
 except StopIteration:
 break
 # Error in CSV, ignore line and continue
 except csv.Error:
 continue
-yield [item.decode(encoding) for item in row]
+decoded = [item.decode(encoding) for item in row]
+if not skip_empty or any(decoded):
+yield decoded
 def callfunc_every(func, number, iterable):
 """yield items of `iterable` one by one and call function `func`
 every `number` iterations. Always call function `func` at the end.
 """

branch	stable
changeset 9181	2eac0aa1d3f6
parent 8970	0a1bd0c590e2
child 9361	0542a85fe667
child 9425	d7e8293fa4de