103 pass |
103 pass |
104 f.seek(0) |
104 f.seek(0) |
105 return i+1 |
105 return i+1 |
106 |
106 |
107 def ucsvreader_pb(stream_or_path, encoding='utf-8', separator=',', quote='"', |
107 def ucsvreader_pb(stream_or_path, encoding='utf-8', separator=',', quote='"', |
108 skipfirst=False, withpb=True): |
108 skipfirst=False, withpb=True, skip_empty=True): |
109 """same as ucsvreader but a progress bar is displayed as we iter on rows""" |
109 """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows""" |
110 if isinstance(stream_or_path, basestring): |
110 if isinstance(stream_or_path, basestring): |
111 if not osp.exists(stream_or_path): |
111 if not osp.exists(stream_or_path): |
112 raise Exception("file doesn't exists: %s" % stream_or_path) |
112 raise Exception("file doesn't exists: %s" % stream_or_path) |
113 stream = open(stream_or_path) |
113 stream = open(stream_or_path) |
114 else: |
114 else: |
116 rowcount = count_lines(stream) |
116 rowcount = count_lines(stream) |
117 if skipfirst: |
117 if skipfirst: |
118 rowcount -= 1 |
118 rowcount -= 1 |
119 if withpb: |
119 if withpb: |
120 pb = shellutils.ProgressBar(rowcount, 50) |
120 pb = shellutils.ProgressBar(rowcount, 50) |
121 for urow in ucsvreader(stream, encoding, separator, quote, skipfirst): |
121 for urow in ucsvreader(stream, encoding, separator, quote, |
|
122 skipfirst=skipfirst, skip_empty=skip_empty): |
122 yield urow |
123 yield urow |
123 if withpb: |
124 if withpb: |
124 pb.update() |
125 pb.update() |
125 print ' %s rows imported' % rowcount |
126 print ' %s rows imported' % rowcount |
126 |
127 |
127 def ucsvreader(stream, encoding='utf-8', separator=',', quote='"', |
128 def ucsvreader(stream, encoding='utf-8', separator=',', quote='"', |
128 skipfirst=False, ignore_errors=False): |
129 skipfirst=False, ignore_errors=False, skip_empty=True): |
129 """A csv reader that accepts files with any encoding and outputs unicode |
130 """A csv reader that accepts files with any encoding and outputs unicode |
130 strings |
131 strings |
|
132 |
|
133 if skip_empty (the default), lines without any values specified (only |
|
134 separators) will be skipped. This is useful for Excel exports which may be |
|
135 full of such lines. |
131 """ |
136 """ |
132 it = iter(csv.reader(stream, delimiter=separator, quotechar=quote)) |
137 it = iter(csv.reader(stream, delimiter=separator, quotechar=quote)) |
133 if not ignore_errors: |
138 if not ignore_errors: |
134 if skipfirst: |
139 if skipfirst: |
135 it.next() |
140 it.next() |
136 for row in it: |
141 for row in it: |
137 yield [item.decode(encoding) for item in row] |
142 decoded = [item.decode(encoding) for item in row] |
|
143 if not skip_empty or any(decoded): |
|
144 yield [item.decode(encoding) for item in row] |
138 else: |
145 else: |
139 # Skip first line |
146 # Skip first line |
140 try: |
147 try: |
141 row = it.next() |
148 row = it.next() |
142 except csv.Error: |
149 except csv.Error: |
149 except StopIteration: |
156 except StopIteration: |
150 break |
157 break |
151 # Error in CSV, ignore line and continue |
158 # Error in CSV, ignore line and continue |
152 except csv.Error: |
159 except csv.Error: |
153 continue |
160 continue |
154 yield [item.decode(encoding) for item in row] |
161 decoded = [item.decode(encoding) for item in row] |
|
162 if not skip_empty or any(decoded): |
|
163 yield decoded |
|
164 |
155 |
165 |
156 def callfunc_every(func, number, iterable): |
166 def callfunc_every(func, number, iterable): |
157 """yield items of `iterable` one by one and call function `func` |
167 """yield items of `iterable` one by one and call function `func` |
158 every `number` iterations. Always call function `func` at the end. |
168 every `number` iterations. Always call function `func` at the end. |
159 """ |
169 """ |