misc/migration/3.10.9_Any.py
author Vladimir Popescu <vladimir.popescu@logilab.fr>
Tue, 12 Mar 2013 18:31:15 +0100
changeset 8836 8a57802d40d3
parent 8694 d901c36bcfce
child 11178 b3d3e23be27b
permissions -rw-r--r--
[cubicweb/doc] Add tutorial on data import in CubicWeb. This involves creating the "tutorials/dataimport" directory structure under "cubicweb/doc" and, inside the "dataimport" directory, putting several files: - a ResT file containing the tutorial *per se*; this tutorial addresses the following issues: * creating a CubicWeb schema for representing a given data set (here, the Diseasome RDF data, for illustration purposes); * parsing the data; * importing the data, by using several stores: + the ``RQLObjectStore``, ``NoHookRQLObjectStore`` and ``SQLGenObjectStore`` from the ``dataimport`` module in CubicWeb; + the ``MassiveObjectStore`` from the ``dataimport`` module in the ``dataio`` cube. The tutorial also provides timing benchmarks of the various stores. - a set of Python files illustrating the data import, in the context of Diseasome RDF data parsing: * a Diseasome RDF data parse module, * a Diseasome data import module, * a CubicWeb schema for representing Diseasome data.

import sys

if confirm('fix some corrupted entities noticed on several instances?'):
    rql('DELETE CWConstraint X WHERE NOT E constrained_by X')
    rql('SET X is_instance_of Y WHERE X is Y, NOT X is_instance_of Y')
    commit()

if confirm('fix existing cwuri?'):
    from logilab.common.shellutils import progress
    from cubicweb.server.session import hooks_control
    rset = rql('Any X, XC WHERE X cwuri XC, X cwuri ~= "%/eid/%"')
    title = "%i entities to fix" % len(rset)
    nbops = rset.rowcount
    enabled = interactive_mode
    with progress(title=title, nbops=nbops, size=30, enabled=enabled) as pb:
        for i,  row in enumerate(rset):
            with hooks_control(session, session.HOOKS_DENY_ALL, 'integrity'):
                data = {'eid': row[0], 'cwuri': row[1].replace(u'/eid', u'')}
                rql('SET X cwuri %(cwuri)s WHERE X eid %(eid)s', data)
            if not i % 100: # commit every 100 entities to limit memory consumption
                pb.text = "%i committed" % i
                commit(ask_confirm=False)
            pb.update()
        commit(ask_confirm=False)

try:
    from cubicweb import devtools
    option_group_changed('anonymous-user', 'main', 'web')
    option_group_changed('anonymous-password', 'main', 'web')
except ImportError:
    # cubicweb-dev unavailable, nothing needed
    pass