equal
deleted
inserted
replaced
16 # You should have received a copy of the GNU Lesser General Public License along |
16 # You should have received a copy of the GNU Lesser General Public License along |
17 # with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
17 # with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
18 """Functions to help importing CSV data""" |
18 """Functions to help importing CSV data""" |
19 from __future__ import absolute_import, print_function |
19 from __future__ import absolute_import, print_function |
20 |
20 |
|
21 import codecs |
21 import csv as csvmod |
22 import csv as csvmod |
22 import warnings |
23 import warnings |
23 import os.path as osp |
24 import os.path as osp |
24 |
25 |
25 from six import string_types |
26 from six import PY2, PY3, string_types |
26 |
27 |
27 from logilab.common import shellutils |
28 from logilab.common import shellutils |
28 |
29 |
29 |
30 |
30 def count_lines(stream_or_filename): |
31 def count_lines(stream_or_filename): |
48 warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead") |
49 warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead") |
49 if quote is not None: |
50 if quote is not None: |
50 quotechar = quote |
51 quotechar = quote |
51 warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead") |
52 warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead") |
52 if isinstance(stream_or_path, string_types): |
53 if isinstance(stream_or_path, string_types): |
53 if not osp.exists(stream_or_path): |
54 stream = open(stream_or_path, 'rb') |
54 raise Exception("file doesn't exists: %s" % stream_or_path) |
|
55 stream = open(stream_or_path) |
|
56 else: |
55 else: |
57 stream = stream_or_path |
56 stream = stream_or_path |
58 rowcount = count_lines(stream) |
57 rowcount = count_lines(stream) |
59 if skipfirst: |
58 if skipfirst: |
60 rowcount -= 1 |
59 rowcount -= 1 |
76 |
75 |
77 if skip_empty (the default), lines without any values specified (only |
76 if skip_empty (the default), lines without any values specified (only |
78 separators) will be skipped. This is useful for Excel exports which may be |
77 separators) will be skipped. This is useful for Excel exports which may be |
79 full of such lines. |
78 full of such lines. |
80 """ |
79 """ |
|
80 if PY3: |
|
81 stream = codecs.getreader(encoding)(stream) |
81 if separator is not None: |
82 if separator is not None: |
82 delimiter = separator |
83 delimiter = separator |
83 warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead") |
84 warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead") |
84 if quote is not None: |
85 if quote is not None: |
85 quotechar = quote |
86 quotechar = quote |
87 it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar)) |
88 it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar)) |
88 if not ignore_errors: |
89 if not ignore_errors: |
89 if skipfirst: |
90 if skipfirst: |
90 next(it) |
91 next(it) |
91 for row in it: |
92 for row in it: |
92 decoded = [item.decode(encoding) for item in row] |
93 if PY2: |
|
94 decoded = [item.decode(encoding) for item in row] |
|
95 else: |
|
96 decoded = row |
93 if not skip_empty or any(decoded): |
97 if not skip_empty or any(decoded): |
94 yield decoded |
98 yield decoded |
95 else: |
99 else: |
96 if skipfirst: |
100 if skipfirst: |
97 try: |
101 try: |
106 except StopIteration: |
110 except StopIteration: |
107 break |
111 break |
108 # Error in CSV, ignore line and continue |
112 # Error in CSV, ignore line and continue |
109 except csvmod.Error: |
113 except csvmod.Error: |
110 continue |
114 continue |
111 decoded = [item.decode(encoding) for item in row] |
115 if PY2: |
|
116 decoded = [item.decode(encoding) for item in row] |
|
117 else: |
|
118 decoded = row |
112 if not skip_empty or any(decoded): |
119 if not skip_empty or any(decoded): |
113 yield decoded |
120 yield decoded |
114 |
121 |