[migration/3.18] Idempotency fixes
Try to make partially-upgraded instances migrate properly.
# -*- coding: utf-8 -*-# copyright 2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved.# contact http://www.logilab.fr -- mailto:contact@logilab.fr## This program is free software: you can redistribute it and/or modify it under# the terms of the GNU Lesser General Public License as published by the Free# Software Foundation, either version 2.1 of the License, or (at your option)# any later version.## This program is distributed in the hope that it will be useful, but WITHOUT# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more# details.## You should have received a copy of the GNU Lesser General Public License along# with this program. If not, see <http://www.gnu.org/licenses/>."""This module imports the Diseasome data into a CubicWeb instance."""# Python importsimportsysimportargparse# Logilab import, for timingfromlogilab.common.decoratorsimporttimed# CubicWeb importsimportcubicweb.dataimportascwdifromcubes.dataioimportdataimportasmcwdi# Diseasome parser importimportdiseasome_parserasparserdef_is_of_class(instance,class_name):"""Helper function to determine whether an instance is of a specified class or not. Returns a True if this is the case and False otherwise. """ifinstance.__class__.__name__==class_name:returnTrueelse:returnFalse@timeddefdiseasome_import(session,file_name,store):"""Main function for importing Diseasome data. It uses the Diseasome data parser to get the contents of the data from a file, then uses a store for importing the data into a CubicWeb instance. >>> diseasome_import(session, 'file_name', Store) """exturis=dict(session.execute('Any U, X WHERE X is ExternalUri, X uri U'))uri_to_eid={}uri_to_etype={}all_relations={}etypes={('http://www4.wiwiss.fu-berlin.de/''diseasome/resource/diseasome/genes'):'Gene',('http://www4.wiwiss.fu-berlin.de/''diseasome/resource/diseasome/diseases'):'Disease'}# Read the parsed dataforentity,relationsinparser.entities_from_rdf(file_name,('gene','disease')):uri=entity.get('cwuri',None)types=list(relations.get('types',[]))ifnottypes:continueetype=etypes.get(types[0])ifnotetype:sys.stderr.write('Entity type %s not recognized.',types[0])sys.stderr.flush()if_is_of_class(store,'MassiveObjectStore'):forrelationin(set(relations).intersection(('classes','possible_drugs','omim','omim_page','chromosomal_location','same_as','gene_id','hgnc_id','hgnc_page'))):store.init_rtype_table(etype,relation,'ExternalUri')forrelationinset(relations).intersection(('subtype_of',)):store.init_rtype_table(etype,relation,'Disease')forrelationinset(relations).intersection(('associated_genes',)):store.init_rtype_table(etype,relation,'Gene')# Create the entitiesent=store.create_entity(etype,**entity)ifnot_is_of_class(store,'MassiveObjectStore'):uri_to_eid[uri]=ent.eiduri_to_etype[uri]=ent.cw_etypeelse:uri_to_eid[uri]=uriuri_to_etype[uri]=etype# Store relations for afterall_relations[uri]=relations# Perform a first commit, of the entitiesstore.flush()kwargs={}foruri,relationsinall_relations.iteritems():from_eid=uri_to_eid.get(uri)# ``subjtype`` should be initialized if ``SQLGenObjectStore`` is used# and there are inlined relations in the schema.# If ``subjtype`` is not given, while ``SQLGenObjectStore`` is used# and there are inlined relations in the schema, the store# tries to infer the type of the subject, but this does not always # work, e.g. when there are several object types for the relation.# ``subjtype`` is ignored for other stores, or if there are no# inlined relations in the schema.kwargs['subjtype']=uri_to_etype.get(uri)ifnotfrom_eid:continueforrtype,relsinrelations.iteritems():ifrtypein('classes','possible_drugs','omim','omim_page','chromosomal_location','same_as','gene_id','hgnc_id','hgnc_page'):forrelinlist(rels):ifrelnotinexturis:# Create the "ExternalUri" entities, which are the# objects of the relationsextu=store.create_entity('ExternalUri',uri=rel)ifnot_is_of_class(store,'MassiveObjectStore'):rel_eid=extu.eidelse:# For the "MassiveObjectStore", the EIDs are # in fact the URIs.rel_eid=relexturis[rel]=rel_eidelse:rel_eid=exturis[rel]# Create the relations that have "ExternalUri"s as objectsifnot_is_of_class(store,'MassiveObjectStore'):store.relate(from_eid,rtype,rel_eid,**kwargs)else:store.relate_by_iid(from_eid,rtype,rel_eid)elifrtypein('subtype_of','associated_genes'):forrelinlist(rels):to_eid=uri_to_eid.get(rel)ifto_eid:# Create relations that have objects of other type # than "ExternalUri"ifnot_is_of_class(store,'MassiveObjectStore'):store.relate(from_eid,rtype,to_eid,**kwargs)else:store.relate_by_iid(from_eid,rtype,to_eid)else:sys.stderr.write('Missing entity with URI %s ''for relation %s'%(rel,rtype))sys.stderr.flush()# Perform a second commit, of the "ExternalUri" entities.# when the stores in the CubicWeb ``dataimport`` module are used,# relations are also committed.store.flush()# If the ``MassiveObjectStore`` is used, then entity and relation metadata# are pushed as well. By metadata we mean information on the creation# time and author.if_is_of_class(store,'MassiveObjectStore'):store.flush_meta_data()forrelationin('classes','possible_drugs','omim','omim_page','chromosomal_location','same_as'):# Afterwards, relations are actually created in the database.store.convert_relations('Disease',relation,'ExternalUri','cwuri','uri')store.convert_relations('Disease','subtype_of','Disease','cwuri','cwuri')store.convert_relations('Disease','associated_genes','Gene','cwuri','cwuri')forrelationin('gene_id','hgnc_id','hgnc_page','same_as'):store.convert_relations('Gene',relation,'ExternalUri','cwuri','uri')# Clean up temporary tables in the databasestore.cleanup()if__name__=='__main__':# Change sys.argv so that ``cubicweb-ctl shell`` can work out the options# we give to our ``diseasome_import.py`` script.sys.argv=[argforarginsys.argv[sys.argv.index("--")-1:]ifarg!="--"]PARSER=argparse.ArgumentParser(description="Import Diseasome data")PARSER.add_argument("-df","--datafile",type=str,help="RDF data file name")PARSER.add_argument("-st","--store",type=str,default="RQLObjectStore",help="data import store")ARGS=PARSER.parse_args()ifARGS.datafile:FILENAME=ARGS.datafileifARGS.storein(st+"ObjectStore"forstin("RQL","NoHookRQL","SQLGen")):IMPORT_STORE=getattr(cwdi,ARGS.store)(session)elifARGS.store=="MassiveObjectStore":IMPORT_STORE=mcwdi.MassiveObjectStore(session)else:sys.exit("Import store unknown")diseasome_import(session,FILENAME,IMPORT_STORE)else:sys.exit("Data file not found or not specified")