[datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Wed, 08 Jun 2011 17:10:39 +0200
changeset 7474 7dc405ad7bf3
parent 7473 a164fdf3de5d
child 7476 4be45b64d755
[datafeed cwxml parser] cache processed urls/entities to avoid unnecessary http requests and processing
sobjects/parsers.py
--- a/sobjects/parsers.py	Wed Jun 08 17:08:00 2011 +0200
+++ b/sobjects/parsers.py	Wed Jun 08 17:10:39 2011 +0200
@@ -135,6 +135,8 @@
             'link-or-create': self.related_link_or_create,
             'link': self.related_link,
             }
+        self._parsed_urls = {}
+        self._processed_entities = set()
 
     # mapping handling #########################################################
 
@@ -196,6 +198,9 @@
                                    item=item)
         if entity is None:
             return None
+        if entity.eid in self._processed_entities:
+            return entity
+        self._processed_entities.add(entity.eid)
         if not (self.created_during_pull(entity) or self.updated_during_pull(entity)):
             self.notify_updated(entity)
             item.pop('eid')
@@ -296,13 +301,18 @@
             self._clear_relation(entity, rtype, role, (ttype,))
 
     def _complete_item(self, item, add_relations=True):
-        itemurl = item['cwuri'] + '?vid=xml'
-        if add_relations:
-            for rtype, role, _ in self.source.mapping.get(item['cwtype'], ()):
-                itemurl += '&relation=%s_%s' % (rtype, role)
-        item_rels = list(self.parse(itemurl))
-        assert len(item_rels) == 1
-        return item_rels[0]
+        try:
+            return self._parsed_urls[(item['cwuri'], add_relations)]
+        except KeyError:
+            itemurl = item['cwuri'] + '?vid=xml'
+            if add_relations:
+                for rtype, role, _ in self.source.mapping.get(item['cwtype'], ()):
+                    itemurl += '&relation=%s_%s' % (rtype, role)
+            item_rels = list(self.parse(itemurl))
+            assert len(item_rels) == 1, 'url %s expected to bring back one '\
+                   'and only one entity, got %s' % (itemurl, len(item_rels))
+            self._parsed_urls[(item['cwuri'], add_relations)] = item_rels[0]
+            return item_rels[0]
 
     def _clear_relation(self, entity, rtype, role, ttypes):
         if entity.eid not in self.stats['created']: