dataimport.py
branchstable
changeset 9181 2eac0aa1d3f6
parent 8970 0a1bd0c590e2
child 9361 0542a85fe667
child 9425 d7e8293fa4de
equal deleted inserted replaced
9180:13ed6de41774 9181:2eac0aa1d3f6
   103         pass
   103         pass
   104     f.seek(0)
   104     f.seek(0)
   105     return i+1
   105     return i+1
   106 
   106 
   107 def ucsvreader_pb(stream_or_path, encoding='utf-8', separator=',', quote='"',
   107 def ucsvreader_pb(stream_or_path, encoding='utf-8', separator=',', quote='"',
   108                   skipfirst=False, withpb=True):
   108                   skipfirst=False, withpb=True, skip_empty=True):
   109     """same as ucsvreader but a progress bar is displayed as we iter on rows"""
   109     """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
   110     if isinstance(stream_or_path, basestring):
   110     if isinstance(stream_or_path, basestring):
   111         if not osp.exists(stream_or_path):
   111         if not osp.exists(stream_or_path):
   112             raise Exception("file doesn't exists: %s" % stream_or_path)
   112             raise Exception("file doesn't exists: %s" % stream_or_path)
   113         stream = open(stream_or_path)
   113         stream = open(stream_or_path)
   114     else:
   114     else:
   116     rowcount = count_lines(stream)
   116     rowcount = count_lines(stream)
   117     if skipfirst:
   117     if skipfirst:
   118         rowcount -= 1
   118         rowcount -= 1
   119     if withpb:
   119     if withpb:
   120         pb = shellutils.ProgressBar(rowcount, 50)
   120         pb = shellutils.ProgressBar(rowcount, 50)
   121     for urow in ucsvreader(stream, encoding, separator, quote, skipfirst):
   121     for urow in ucsvreader(stream, encoding, separator, quote,
       
   122                            skipfirst=skipfirst, skip_empty=skip_empty):
   122         yield urow
   123         yield urow
   123         if withpb:
   124         if withpb:
   124             pb.update()
   125             pb.update()
   125     print ' %s rows imported' % rowcount
   126     print ' %s rows imported' % rowcount
   126 
   127 
   127 def ucsvreader(stream, encoding='utf-8', separator=',', quote='"',
   128 def ucsvreader(stream, encoding='utf-8', separator=',', quote='"',
   128                skipfirst=False, ignore_errors=False):
   129                skipfirst=False, ignore_errors=False, skip_empty=True):
   129     """A csv reader that accepts files with any encoding and outputs unicode
   130     """A csv reader that accepts files with any encoding and outputs unicode
   130     strings
   131     strings
       
   132 
       
   133     if skip_empty (the default), lines without any values specified (only
       
   134     separators) will be skipped. This is useful for Excel exports which may be
       
   135     full of such lines.
   131     """
   136     """
   132     it = iter(csv.reader(stream, delimiter=separator, quotechar=quote))
   137     it = iter(csv.reader(stream, delimiter=separator, quotechar=quote))
   133     if not ignore_errors:
   138     if not ignore_errors:
   134         if skipfirst:
   139         if skipfirst:
   135             it.next()
   140             it.next()
   136         for row in it:
   141         for row in it:
   137             yield [item.decode(encoding) for item in row]
   142             decoded = [item.decode(encoding) for item in row]
       
   143             if not skip_empty or any(decoded):
       
   144                 yield [item.decode(encoding) for item in row]
   138     else:
   145     else:
   139         # Skip first line
   146         # Skip first line
   140         try:
   147         try:
   141             row = it.next()
   148             row = it.next()
   142         except csv.Error:
   149         except csv.Error:
   149             except StopIteration:
   156             except StopIteration:
   150                 break
   157                 break
   151             # Error in CSV, ignore line and continue
   158             # Error in CSV, ignore line and continue
   152             except csv.Error:
   159             except csv.Error:
   153                 continue
   160                 continue
   154             yield [item.decode(encoding) for item in row]
   161             decoded = [item.decode(encoding) for item in row]
       
   162             if not skip_empty or any(decoded):
       
   163                 yield decoded
       
   164 
   155 
   165 
   156 def callfunc_every(func, number, iterable):
   166 def callfunc_every(func, number, iterable):
   157     """yield items of `iterable` one by one and call function `func`
   167     """yield items of `iterable` one by one and call function `func`
   158     every `number` iterations. Always call function `func` at the end.
   168     every `number` iterations. Always call function `func` at the end.
   159     """
   169     """