equal
deleted
inserted
replaced
14 # details. |
14 # details. |
15 # |
15 # |
16 # You should have received a copy of the GNU Lesser General Public License along |
16 # You should have received a copy of the GNU Lesser General Public License along |
17 # with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
17 # with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
18 """Functions to help importing CSV data""" |
18 """Functions to help importing CSV data""" |
19 from __future__ import absolute_import, print_function |
|
20 |
|
21 import codecs |
19 import codecs |
22 import csv as csvmod |
20 import csv as csvmod |
23 |
|
24 from six import PY2, PY3, string_types |
|
25 |
21 |
26 from logilab.common import shellutils |
22 from logilab.common import shellutils |
27 |
23 |
28 |
24 |
29 def count_lines(stream_or_filename): |
25 def count_lines(stream_or_filename): |
30 if isinstance(stream_or_filename, string_types): |
26 if isinstance(stream_or_filename, str): |
31 f = open(stream_or_filename) |
27 f = open(stream_or_filename) |
32 else: |
28 else: |
33 f = stream_or_filename |
29 f = stream_or_filename |
34 f.seek(0) |
30 f.seek(0) |
35 i = 0 # useful is f is an empty file |
31 i = 0 # useful is f is an empty file |
40 |
36 |
41 |
37 |
42 def ucsvreader_pb(stream_or_path, encoding='utf-8', delimiter=',', quotechar='"', |
38 def ucsvreader_pb(stream_or_path, encoding='utf-8', delimiter=',', quotechar='"', |
43 skipfirst=False, withpb=True, skip_empty=True): |
39 skipfirst=False, withpb=True, skip_empty=True): |
44 """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows""" |
40 """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows""" |
45 if isinstance(stream_or_path, string_types): |
41 if isinstance(stream_or_path, str): |
46 stream = open(stream_or_path, 'rb') |
42 stream = open(stream_or_path, 'rb') |
47 else: |
43 else: |
48 stream = stream_or_path |
44 stream = stream_or_path |
49 rowcount = count_lines(stream) |
45 rowcount = count_lines(stream) |
50 if skipfirst: |
46 if skipfirst: |
66 |
62 |
67 if skip_empty (the default), lines without any values specified (only |
63 if skip_empty (the default), lines without any values specified (only |
68 separators) will be skipped. This is useful for Excel exports which may be |
64 separators) will be skipped. This is useful for Excel exports which may be |
69 full of such lines. |
65 full of such lines. |
70 """ |
66 """ |
71 if PY3: |
67 stream = codecs.getreader(encoding)(stream) |
72 stream = codecs.getreader(encoding)(stream) |
|
73 it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar)) |
68 it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar)) |
74 if not ignore_errors: |
69 if not ignore_errors: |
75 if skipfirst: |
70 if skipfirst: |
76 next(it) |
71 next(it) |
77 for row in it: |
72 for row in it: |
78 if PY2: |
73 if not skip_empty or any(row): |
79 decoded = [item.decode(encoding) for item in row] |
74 yield row |
80 else: |
|
81 decoded = row |
|
82 if not skip_empty or any(decoded): |
|
83 yield decoded |
|
84 else: |
75 else: |
85 if skipfirst: |
76 if skipfirst: |
86 try: |
77 try: |
87 row = next(it) |
78 row = next(it) |
88 except csvmod.Error: |
79 except csvmod.Error: |
95 except StopIteration: |
86 except StopIteration: |
96 break |
87 break |
97 # Error in CSV, ignore line and continue |
88 # Error in CSV, ignore line and continue |
98 except csvmod.Error: |
89 except csvmod.Error: |
99 continue |
90 continue |
100 if PY2: |
91 if not skip_empty or any(row): |
101 decoded = [item.decode(encoding) for item in row] |
92 yield row |
102 else: |
|
103 decoded = row |
|
104 if not skip_empty or any(decoded): |
|
105 yield decoded |
|