[server] Improve massive deletion of entities
sqlserver is limited on the "IN" array size and produce an error "The query
processor ran out of internal resources and could not produce a query plan".
The exact limitation is unclear but can occur with > 10000 item in the array.
See https://support.microsoft.com/en-us/kb/288095
Now we process the deletion by batch of 10000, some quick benchmarks show that
there is no performance impact:
delete_in(50000): 46.765182972
delete_in_batch(50000): 46.3242430687
delete_in(100000): 62.2969429493
delete_in_batch(100000): 61.8402349949
--- a/cubicweb/server/sources/native.py Mon Jun 06 15:18:12 2016 +0200
+++ b/cubicweb/server/sources/native.py Tue May 10 16:09:34 2016 +0200
@@ -944,8 +944,16 @@
* remove record from the `entities` table
"""
self.fti_unindex_entities(cnx, entities)
- attrs = {'eid': '(%s)' % ','.join([str(_e.eid) for _e in entities])}
- self.doexec(cnx, self.sqlgen.delete_many('entities', attrs), attrs)
+
+ # sqlserver is limited on the array size, the limit can occur starting
+ # from > 10000 item, so we process by batch of 10000
+ count = len(entities)
+ batch_size = 10000
+ for i in range(0, count, batch_size):
+ in_eid = ",".join(str(entities[index].eid)
+ for index in range(i, min(i + batch_size, count)))
+ attrs = {'eid': '(%s)' % (in_eid,)}
+ self.doexec(cnx, self.sqlgen.delete_many('entities', attrs), attrs)
# undo support #############################################################