108 yield dict(zip(header, row)) |
111 yield dict(zip(header, row)) |
109 |
112 |
110 def mk_entity(row, map): |
113 def mk_entity(row, map): |
111 """Return a dict made from sanitized mapped values. |
114 """Return a dict made from sanitized mapped values. |
112 |
115 |
|
116 ValidationError can be raised on unexpected values found in checkers |
|
117 |
113 >>> row = {'myname': u'dupont'} |
118 >>> row = {'myname': u'dupont'} |
114 >>> map = [('myname', u'name', (capitalize_if_unicase,))] |
119 >>> map = [('myname', u'name', (capitalize_if_unicase,))] |
115 >>> mk_entity(row, map) |
120 >>> mk_entity(row, map) |
116 {'name': u'Dupont'} |
121 {'name': u'Dupont'} |
|
122 >>> row = {'myname': u'dupont', 'optname': u''} |
|
123 >>> map = [('myname', u'name', (capitalize_if_unicase,)), |
|
124 ... ('optname', u'MARKER', (optional,))] |
|
125 >>> mk_entity(row, map) |
|
126 {'name': u'Dupont'} |
117 """ |
127 """ |
118 res = {} |
128 res = {} |
|
129 assert isinstance(row, dict) |
|
130 assert isinstance(map, list) |
119 for src, dest, funcs in map: |
131 for src, dest, funcs in map: |
|
132 assert not (required in funcs and optional in funcs), "optional and required checks are exclusive" |
120 res[dest] = row[src] |
133 res[dest] = row[src] |
121 for func in funcs: |
134 try: |
122 res[dest] = func(res[dest]) |
135 for func in funcs: |
|
136 res[dest] = func(res[dest]) |
|
137 if res[dest] is None or res[dest]==False: |
|
138 raise AssertionError('undetermined value') |
|
139 except AssertionError, err: |
|
140 if optional in funcs: |
|
141 # Forget this field if exception is coming from optional function |
|
142 del res[dest] |
|
143 else: |
|
144 raise AssertionError('error with "%s" field: %s' % (src, err)) |
123 return res |
145 return res |
124 |
146 |
125 |
147 |
126 # user interactions ############################################################ |
148 # user interactions ############################################################ |
127 |
149 |
161 def capitalize_if_unicase(txt): |
183 def capitalize_if_unicase(txt): |
162 if txt.isupper() or txt.islower(): |
184 if txt.isupper() or txt.islower(): |
163 return txt.capitalize() |
185 return txt.capitalize() |
164 return txt |
186 return txt |
165 |
187 |
|
188 def uppercase(txt): |
|
189 return txt.upper() |
|
190 |
|
191 def lowercase(txt): |
|
192 return txt.lower() |
|
193 |
166 def no_space(txt): |
194 def no_space(txt): |
167 return txt.replace(' ','') |
195 return txt.replace(' ','') |
168 |
196 |
169 def no_uspace(txt): |
197 def no_uspace(txt): |
170 return txt.replace(u'\xa0','') |
198 return txt.replace(u'\xa0','') |
171 |
199 |
172 def no_dash(txt): |
200 def no_dash(txt): |
173 return txt.replace('-','') |
201 return txt.replace('-','') |
174 |
202 |
|
203 def decimal(value): |
|
204 """cast to float but with comma replacement |
|
205 |
|
206 We take care of some locale format as replacing ',' by '.'""" |
|
207 value = value.replace(',', '.') |
|
208 try: |
|
209 return float(value) |
|
210 except Exception, err: |
|
211 raise AssertionError(err) |
|
212 |
|
213 def integer(value): |
|
214 try: |
|
215 return int(value) |
|
216 except Exception, err: |
|
217 raise AssertionError(err) |
|
218 |
|
219 def strip(txt): |
|
220 return txt.strip() |
|
221 |
|
222 def yesno(value): |
|
223 return value.lower()[0] in 'yo1' |
|
224 |
|
225 def isalpha(value): |
|
226 if value.isalpha(): |
|
227 return value |
|
228 raise AssertionError("not all characters in the string alphabetic") |
|
229 |
|
230 def optional(value): |
|
231 """validation error will not been raised if you add this checker in chain""" |
|
232 return value |
|
233 |
|
234 def required(value): |
|
235 """raise AssertionError is value is empty |
|
236 |
|
237 This check should be often found in last position in the chain. |
|
238 """ |
|
239 if bool(value): |
|
240 return value |
|
241 raise AssertionError("required") |
|
242 |
|
243 @deprecated('use required(value)') |
|
244 def nonempty(value): |
|
245 return required(value) |
|
246 |
|
247 @deprecated('use integer(value)') |
175 def alldigits(txt): |
248 def alldigits(txt): |
176 if txt.isdigit(): |
249 if txt.isdigit(): |
177 return txt |
250 return txt |
178 else: |
251 else: |
179 return u'' |
252 return u'' |
180 |
253 |
181 def strip(txt): |
|
182 return txt.strip() |
|
183 |
|
184 |
254 |
185 # base integrity checking functions ############################################ |
255 # base integrity checking functions ############################################ |
186 |
256 |
187 def check_doubles(buckets): |
257 def check_doubles(buckets): |
188 """Extract the keys that have more than one item in their bucket.""" |
258 """Extract the keys that have more than one item in their bucket.""" |
194 |
264 |
195 |
265 |
196 # object stores ################################################################# |
266 # object stores ################################################################# |
197 |
267 |
198 class ObjectStore(object): |
268 class ObjectStore(object): |
199 """Store objects in memory for faster testing. Will not |
269 """Store objects in memory for *faster* validation (development mode) |
200 enforce the constraints of the schema and hence will miss |
270 |
201 some problems. |
271 But it will not enforce the constraints of the schema and hence will miss some problems |
202 |
272 |
203 >>> store = ObjectStore() |
273 >>> store = ObjectStore() |
204 >>> user = {'login': 'johndoe'} |
274 >>> user = {'login': 'johndoe'} |
205 >>> store.add('CWUser', user) |
275 >>> store.add('CWUser', user) |
206 >>> group = {'name': 'unknown'} |
276 >>> group = {'name': 'unknown'} |
207 >>> store.add('CWUser', group) |
277 >>> store.add('CWUser', group) |
208 >>> store.relate(user['eid'], 'in_group', group['eid']) |
278 >>> store.relate(user['eid'], 'in_group', group['eid']) |
209 """ |
279 """ |
210 |
|
211 def __init__(self): |
280 def __init__(self): |
212 self.items = [] |
281 self.items = [] |
213 self.eids = {} |
282 self.eids = {} |
214 self.types = {} |
283 self.types = {} |
215 self.relations = set() |
284 self.relations = set() |
226 eid = item['eid'] = self._put(type, item) |
295 eid = item['eid'] = self._put(type, item) |
227 self.eids[eid] = item |
296 self.eids[eid] = item |
228 self.types.setdefault(type, []).append(eid) |
297 self.types.setdefault(type, []).append(eid) |
229 |
298 |
230 def relate(self, eid_from, rtype, eid_to): |
299 def relate(self, eid_from, rtype, eid_to): |
231 eids_valid = (eid_from < len(self.items) and eid_to <= len(self.items)) |
300 """Add new relation (reverse type support is available) |
232 assert eids_valid, 'eid error %s %s' % (eid_from, eid_to) |
301 |
233 self.relations.add( (eid_from, rtype, eid_to) ) |
302 >>> 1,2 = eid_from, eid_to |
234 |
303 >>> self.relate(eid_from, 'in_group', eid_to) |
235 def build_index(self, name, type, func): |
304 1, 'in_group', 2 |
|
305 >>> self.relate(eid_from, 'reverse_in_group', eid_to) |
|
306 2, 'in_group', 1 |
|
307 """ |
|
308 if rtype.startswith('reverse_'): |
|
309 eid_from, eid_to = eid_to, eid_from |
|
310 rtype = rtype[8:] |
|
311 relation = eid_from, rtype, eid_to |
|
312 self.relations.add(relation) |
|
313 return relation |
|
314 |
|
315 def build_index(self, name, type, func=None): |
236 index = {} |
316 index = {} |
|
317 if func is None or not callable(func): |
|
318 func = lambda x: x['eid'] |
237 for eid in self.types[type]: |
319 for eid in self.types[type]: |
238 index.setdefault(func(self.eids[eid]), []).append(eid) |
320 index.setdefault(func(self.eids[eid]), []).append(eid) |
|
321 assert index, "new index '%s' cannot be empty" % name |
239 self.indexes[name] = index |
322 self.indexes[name] = index |
240 |
323 |
|
324 def build_rqlindex(self, name, type, key, rql, rql_params=False, func=None): |
|
325 """build an index by rql query |
|
326 |
|
327 rql should return eid in first column |
|
328 ctl.store.build_index('index_name', 'users', 'login', 'Any U WHERE U is CWUser') |
|
329 """ |
|
330 rset = self.rql(rql, rql_params or {}) |
|
331 for entity in rset.entities(): |
|
332 getattr(entity, key) # autopopulate entity with key attribute |
|
333 self.eids[entity.eid] = dict(entity) |
|
334 if entity.eid not in self.types.setdefault(type, []): |
|
335 self.types[type].append(entity.eid) |
|
336 assert self.types[type], "new index type '%s' cannot be empty (0 record found)" % type |
|
337 |
|
338 # Build index with specified key |
|
339 func = lambda x: x[key] |
|
340 self.build_index(name, type, func) |
|
341 |
|
342 @deprecated('get_many() deprecated. Use fetch() instead') |
241 def get_many(self, name, key): |
343 def get_many(self, name, key): |
242 return self.indexes[name].get(key, []) |
344 return self.fetch(name, key, unique=False) |
243 |
345 |
|
346 @deprecated('get_one() deprecated. Use fetch(..., unique=True) instead') |
244 def get_one(self, name, key): |
347 def get_one(self, name, key): |
|
348 return self.fetch(name, key, unique=True) |
|
349 |
|
350 def fetch(self, name, key, unique=False, decorator=None): |
|
351 """ |
|
352 decorator is a callable method or an iterator of callable methods (usually a lambda function) |
|
353 decorator=lambda x: x[:1] (first value is returned) |
|
354 |
|
355 We can use validation check function available in _entity |
|
356 """ |
245 eids = self.indexes[name].get(key, []) |
357 eids = self.indexes[name].get(key, []) |
246 assert len(eids) == 1, 'expected a single one got %i' % len(eids) |
358 if decorator is not None: |
247 return eids[0] |
359 if not hasattr(decorator, '__iter__'): |
|
360 decorator = (decorator,) |
|
361 for f in decorator: |
|
362 eids = f(eids) |
|
363 if unique: |
|
364 assert len(eids) == 1, u'expected a single one value for key "%s" in index "%s". Got %i' % (key, name, len(eids)) |
|
365 eids = eids[0] # FIXME maybe it's better to keep an iterator here ? |
|
366 return eids |
248 |
367 |
249 def find(self, type, key, value): |
368 def find(self, type, key, value): |
250 for idx in self.types[type]: |
369 for idx in self.types[type]: |
251 item = self.items[idx] |
370 item = self.items[idx] |
252 if item[key] == value: |
371 if item[key] == value: |
253 yield item |
372 yield item |
254 |
373 |
|
374 def rql(self, *args): |
|
375 if self._rql is not None: |
|
376 return self._rql(*args) |
|
377 |
255 def checkpoint(self): |
378 def checkpoint(self): |
256 pass |
379 pass |
257 |
380 |
258 |
381 |
259 class RQLObjectStore(ObjectStore): |
382 class RQLObjectStore(ObjectStore): |
260 """ObjectStore that works with an actual RQL repository.""" |
383 """ObjectStore that works with an actual RQL repository (production mode)""" |
261 _rql = None # bw compat |
384 _rql = None # bw compat |
262 |
385 |
263 def __init__(self, session=None, checkpoint=None): |
386 def __init__(self, session=None, checkpoint=None): |
264 ObjectStore.__init__(self) |
387 ObjectStore.__init__(self) |
265 if session is not None: |
388 if session is not None: |
290 def _put(self, type, item): |
413 def _put(self, type, item): |
291 query = ('INSERT %s X: ' % type) + ', '.join(['X %s %%(%s)s' % (key,key) for key in item]) |
414 query = ('INSERT %s X: ' % type) + ', '.join(['X %s %%(%s)s' % (key,key) for key in item]) |
292 return self.rql(query, item)[0][0] |
415 return self.rql(query, item)[0][0] |
293 |
416 |
294 def relate(self, eid_from, rtype, eid_to): |
417 def relate(self, eid_from, rtype, eid_to): |
|
418 # if reverse relation is found, eids are exchanged |
|
419 eid_from, rtype, eid_to = super(RQLObjectStore, self).relate(eid_from, rtype, eid_to) |
295 self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype, |
420 self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype, |
296 {'x': int(eid_from), 'y': int(eid_to)}, ('x', 'y')) |
421 {'x': int(eid_from), 'y': int(eid_to)}, ('x', 'y')) |
297 self.relations.add( (eid_from, rtype, eid_to) ) |
|
298 |
422 |
299 |
423 |
300 # the import controller ######################################################## |
424 # the import controller ######################################################## |
301 |
425 |
302 class CWImportController(object): |
426 class CWImportController(object): |
363 if buckets: |
487 if buckets: |
364 err = func(buckets) |
488 err = func(buckets) |
365 if err: |
489 if err: |
366 self.errors[title] = (help, err) |
490 self.errors[title] = (help, err) |
367 self.store.checkpoint() |
491 self.store.checkpoint() |
368 self.tell('\nImport completed: %i entities (%i types), %i relations' |
492 nberrors = sum(len(err[1]) for err in self.errors.values()) |
|
493 self.tell('\nImport completed: %i entities, %i types, %i relations and %i errors' |
369 % (len(self.store.eids), len(self.store.types), |
494 % (len(self.store.eids), len(self.store.types), |
370 len(self.store.relations))) |
495 len(self.store.relations), nberrors)) |
371 nberrors = sum(len(err[1]) for err in self.errors.values()) |
496 if self.errors: |
372 if nberrors: |
497 if self.askerror==2 or (self.askerror and confirm('Display errors ?')): |
373 print '%s errors' % nberrors |
498 from pprint import pformat |
374 if self.errors and self.askerror and confirm('Display errors?'): |
499 for errkey, error in self.errors.items(): |
375 import pprint |
500 self.tell("\n%s (%s): %d\n" % (error[0], errkey, len(error[1]))) |
376 pprint.pprint(self.errors) |
501 self.tell(pformat(sorted(error[1]))) |
377 |
502 |
378 def get_data(self, key): |
503 def get_data(self, key): |
379 return self.data.get(key) |
504 return self.data.get(key) |
380 |
505 |
381 def index(self, name, key, value): |
506 def index(self, name, key, value, unique=False): |
|
507 """create a new index |
|
508 |
|
509 If unique is set to True, only first occurence will be kept not the following ones |
|
510 """ |
|
511 if unique: |
|
512 try: |
|
513 if value in self.store.indexes[name][key]: |
|
514 return |
|
515 except KeyError: |
|
516 # we're sure that one is the first occurence; so continue... |
|
517 pass |
382 self.store.indexes.setdefault(name, {}).setdefault(key, []).append(value) |
518 self.store.indexes.setdefault(name, {}).setdefault(key, []).append(value) |
383 |
519 |
384 def tell(self, msg): |
520 def tell(self, msg): |
385 self._tell(msg) |
521 self._tell(msg) |
386 |
522 |