dataimport.py
branchstable
changeset 9181 2eac0aa1d3f6
parent 8970 0a1bd0c590e2
child 9361 0542a85fe667
child 9425 d7e8293fa4de
--- a/dataimport.py	Mon Jul 22 12:08:37 2013 +0200
+++ b/dataimport.py	Wed Jul 24 11:54:35 2013 +0200
@@ -105,8 +105,8 @@
     return i+1
 
 def ucsvreader_pb(stream_or_path, encoding='utf-8', separator=',', quote='"',
-                  skipfirst=False, withpb=True):
-    """same as ucsvreader but a progress bar is displayed as we iter on rows"""
+                  skipfirst=False, withpb=True, skip_empty=True):
+    """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
     if isinstance(stream_or_path, basestring):
         if not osp.exists(stream_or_path):
             raise Exception("file doesn't exists: %s" % stream_or_path)
@@ -118,23 +118,30 @@
         rowcount -= 1
     if withpb:
         pb = shellutils.ProgressBar(rowcount, 50)
-    for urow in ucsvreader(stream, encoding, separator, quote, skipfirst):
+    for urow in ucsvreader(stream, encoding, separator, quote,
+                           skipfirst=skipfirst, skip_empty=skip_empty):
         yield urow
         if withpb:
             pb.update()
     print ' %s rows imported' % rowcount
 
 def ucsvreader(stream, encoding='utf-8', separator=',', quote='"',
-               skipfirst=False, ignore_errors=False):
+               skipfirst=False, ignore_errors=False, skip_empty=True):
     """A csv reader that accepts files with any encoding and outputs unicode
     strings
+
+    if skip_empty (the default), lines without any values specified (only
+    separators) will be skipped. This is useful for Excel exports which may be
+    full of such lines.
     """
     it = iter(csv.reader(stream, delimiter=separator, quotechar=quote))
     if not ignore_errors:
         if skipfirst:
             it.next()
         for row in it:
-            yield [item.decode(encoding) for item in row]
+            decoded = [item.decode(encoding) for item in row]
+            if not skip_empty or any(decoded):
+                yield [item.decode(encoding) for item in row]
     else:
         # Skip first line
         try:
@@ -151,7 +158,10 @@
             # Error in CSV, ignore line and continue
             except csv.Error:
                 continue
-            yield [item.decode(encoding) for item in row]
+            decoded = [item.decode(encoding) for item in row]
+            if not skip_empty or any(decoded):
+                yield decoded
+
 
 def callfunc_every(func, number, iterable):
     """yield items of `iterable` one by one and call function `func`