# HG changeset patch # User Sylvain Thénault # Date 1307545839 -7200 # Node ID 7dc405ad7bf35bdcd833517d7b7a97a9849d7a69 # Parent a164fdf3de5dff775f5f1f4c34fcd95ef6a3f0c3 [datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing diff -r a164fdf3de5d -r 7dc405ad7bf3 sobjects/parsers.py --- a/sobjects/parsers.py Wed Jun 08 17:08:00 2011 +0200 +++ b/sobjects/parsers.py Wed Jun 08 17:10:39 2011 +0200 @@ -135,6 +135,8 @@ 'link-or-create': self.related_link_or_create, 'link': self.related_link, } + self._parsed_urls = {} + self._processed_entities = set() # mapping handling ######################################################### @@ -196,6 +198,9 @@ item=item) if entity is None: return None + if entity.eid in self._processed_entities: + return entity + self._processed_entities.add(entity.eid) if not (self.created_during_pull(entity) or self.updated_during_pull(entity)): self.notify_updated(entity) item.pop('eid') @@ -296,13 +301,18 @@ self._clear_relation(entity, rtype, role, (ttype,)) def _complete_item(self, item, add_relations=True): - itemurl = item['cwuri'] + '?vid=xml' - if add_relations: - for rtype, role, _ in self.source.mapping.get(item['cwtype'], ()): - itemurl += '&relation=%s_%s' % (rtype, role) - item_rels = list(self.parse(itemurl)) - assert len(item_rels) == 1 - return item_rels[0] + try: + return self._parsed_urls[(item['cwuri'], add_relations)] + except KeyError: + itemurl = item['cwuri'] + '?vid=xml' + if add_relations: + for rtype, role, _ in self.source.mapping.get(item['cwtype'], ()): + itemurl += '&relation=%s_%s' % (rtype, role) + item_rels = list(self.parse(itemurl)) + assert len(item_rels) == 1, 'url %s expected to bring back one '\ + 'and only one entity, got %s' % (itemurl, len(item_rels)) + self._parsed_urls[(item['cwuri'], add_relations)] = item_rels[0] + return item_rels[0] def _clear_relation(self, entity, rtype, role, ttypes): if entity.eid not in self.stats['created']: