dataimport.py
changeset 10091 09878c2f8621
parent 10078 5eeffcfde1ba
child 10189 0b141ffcdd74
equal deleted inserted replaced
10090:0aebb1c0f849 10091:09878c2f8621
    97     for i, line in enumerate(f):
    97     for i, line in enumerate(f):
    98         pass
    98         pass
    99     f.seek(0)
    99     f.seek(0)
   100     return i+1
   100     return i+1
   101 
   101 
   102 def ucsvreader_pb(stream_or_path, encoding='utf-8', separator=',', quote='"',
   102 def ucsvreader_pb(stream_or_path, encoding='utf-8', delimiter=',', quotechar='"',
   103                   skipfirst=False, withpb=True, skip_empty=True):
   103                   skipfirst=False, withpb=True, skip_empty=True, separator=None,
       
   104                   quote=None):
   104     """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
   105     """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
       
   106     if separator is not None:
       
   107         delimiter = separator
       
   108         warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
       
   109     if quote is not None:
       
   110         quotechar = quote
       
   111         warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
   105     if isinstance(stream_or_path, basestring):
   112     if isinstance(stream_or_path, basestring):
   106         if not osp.exists(stream_or_path):
   113         if not osp.exists(stream_or_path):
   107             raise Exception("file doesn't exists: %s" % stream_or_path)
   114             raise Exception("file doesn't exists: %s" % stream_or_path)
   108         stream = open(stream_or_path)
   115         stream = open(stream_or_path)
   109     else:
   116     else:
   111     rowcount = count_lines(stream)
   118     rowcount = count_lines(stream)
   112     if skipfirst:
   119     if skipfirst:
   113         rowcount -= 1
   120         rowcount -= 1
   114     if withpb:
   121     if withpb:
   115         pb = shellutils.ProgressBar(rowcount, 50)
   122         pb = shellutils.ProgressBar(rowcount, 50)
   116     for urow in ucsvreader(stream, encoding, separator, quote,
   123     for urow in ucsvreader(stream, encoding, delimiter, quotechar,
   117                            skipfirst=skipfirst, skip_empty=skip_empty):
   124                            skipfirst=skipfirst, skip_empty=skip_empty):
   118         yield urow
   125         yield urow
   119         if withpb:
   126         if withpb:
   120             pb.update()
   127             pb.update()
   121     print ' %s rows imported' % rowcount
   128     print ' %s rows imported' % rowcount
   122 
   129 
   123 def ucsvreader(stream, encoding='utf-8', separator=',', quote='"',
   130 def ucsvreader(stream, encoding='utf-8', delimiter=',', quotechar='"',
   124                skipfirst=False, ignore_errors=False, skip_empty=True):
   131                skipfirst=False, ignore_errors=False, skip_empty=True,
       
   132                separator=None, quote=None):
   125     """A csv reader that accepts files with any encoding and outputs unicode
   133     """A csv reader that accepts files with any encoding and outputs unicode
   126     strings
   134     strings
   127 
   135 
   128     if skip_empty (the default), lines without any values specified (only
   136     if skip_empty (the default), lines without any values specified (only
   129     separators) will be skipped. This is useful for Excel exports which may be
   137     separators) will be skipped. This is useful for Excel exports which may be
   130     full of such lines.
   138     full of such lines.
   131     """
   139     """
   132     it = iter(csv.reader(stream, delimiter=separator, quotechar=quote))
   140     if separator is not None:
       
   141         delimiter = separator
       
   142         warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
       
   143     if quote is not None:
       
   144         quotechar = quote
       
   145         warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
       
   146     it = iter(csv.reader(stream, delimiter=delimiter, quotechar=quotechar))
   133     if not ignore_errors:
   147     if not ignore_errors:
   134         if skipfirst:
   148         if skipfirst:
   135             it.next()
   149             it.next()
   136         for row in it:
   150         for row in it:
   137             decoded = [item.decode(encoding) for item in row]
   151             decoded = [item.decode(encoding) for item in row]