dataimport.py
branchstable
changeset 6122 4d2b04b32cdc
parent 5557 1a534c596bff
child 6133 6f3eabbbdf2e
child 6142 8bc6eac1fac1
equal deleted inserted replaced
6121:57dddb6b5913 6122:4d2b04b32cdc
    49          ]
    49          ]
    50 
    50 
    51   GENERATORS.append( (gen_users, CHK) )
    51   GENERATORS.append( (gen_users, CHK) )
    52 
    52 
    53   # create controller
    53   # create controller
    54   ctl = CWImportController(RQLObjectStore(cnx))
    54   if 'cnx' in globals():
       
    55       ctl = CWImportController(RQLObjectStore(cnx))
       
    56   else:
       
    57       print 'debug mode (not connected)'
       
    58       print 'run through cubicweb-ctl shell to access an instance'
       
    59       ctl = CWImportController(ObjectStore())
    55   ctl.askerror = 1
    60   ctl.askerror = 1
    56   ctl.generators = GENERATORS
    61   ctl.generators = GENERATORS
    57   ctl.data['utilisateurs'] = lazytable(utf8csvreader(open('users.csv')))
    62   ctl.data['utilisateurs'] = lazytable(ucsvreader(open('users.csv')))
    58   # run
    63   # run
    59   ctl.run()
    64   ctl.run()
    60 
    65 
    61 .. BUG file with one column are not parsable
    66 .. BUG file with one column are not parsable
    62 .. TODO rollback() invocation is not possible yet
    67 .. TODO rollback() invocation is not possible yet
    75 from logilab.common.decorators import cached
    80 from logilab.common.decorators import cached
    76 from logilab.common.deprecation import deprecated
    81 from logilab.common.deprecation import deprecated
    77 
    82 
    78 from cubicweb.server.utils import eschema_eid
    83 from cubicweb.server.utils import eschema_eid
    79 
    84 
    80 def ucsvreader_pb(filepath, encoding='utf-8', separator=',', quote='"',
    85 def count_lines(stream_or_filename):
       
    86     if isinstance(stream_or_filename, basestring):
       
    87         f = open(filename)
       
    88     else:
       
    89         f = stream_or_filename
       
    90         f.seek(0)
       
    91     for i, line in enumerate(f):
       
    92         pass
       
    93     f.seek(0)
       
    94     return i+1
       
    95 
       
    96 def ucsvreader_pb(stream_or_path, encoding='utf-8', separator=',', quote='"',
    81                   skipfirst=False, withpb=True):
    97                   skipfirst=False, withpb=True):
    82     """same as ucsvreader but a progress bar is displayed as we iter on rows"""
    98     """same as ucsvreader but a progress bar is displayed as we iter on rows"""
    83     if not osp.exists(filepath):
    99     if isinstance(stream_or_path, basestring):
    84         raise Exception("file doesn't exists: %s" % filepath)
   100         if not osp.exists(filepath):
    85     rowcount = int(shellutils.Execute('wc -l "%s"' % filepath).out.strip().split()[0])
   101             raise Exception("file doesn't exists: %s" % filepath)
       
   102         stream = open(stream_or_path)
       
   103     else:
       
   104         stream = stream_or_path
       
   105     rowcount = count_lines(stream)
    86     if skipfirst:
   106     if skipfirst:
    87         rowcount -= 1
   107         rowcount -= 1
    88     if withpb:
   108     if withpb:
    89         pb = shellutils.ProgressBar(rowcount, 50)
   109         pb = shellutils.ProgressBar(rowcount, 50)
    90     for urow in ucsvreader(file(filepath), encoding, separator, quote, skipfirst):
   110     for urow in ucsvreader(stream, encoding, separator, quote, skipfirst):
    91         yield urow
   111         yield urow
    92         if withpb:
   112         if withpb:
    93             pb.update()
   113             pb.update()
    94     print ' %s rows imported' % rowcount
   114     print ' %s rows imported' % rowcount
    95 
   115 
   114 
   134 
   115 def lazytable(reader):
   135 def lazytable(reader):
   116     """The first row is taken to be the header of the table and
   136     """The first row is taken to be the header of the table and
   117     used to output a dict for each row of data.
   137     used to output a dict for each row of data.
   118 
   138 
   119     >>> data = lazytable(utf8csvreader(open(filename)))
   139     >>> data = lazytable(ucsvreader(open(filename)))
   120     """
   140     """
   121     header = reader.next()
   141     header = reader.next()
   122     for row in reader:
   142     for row in reader:
   123         yield dict(zip(header, row))
   143         yield dict(zip(header, row))
   124 
   144 
   394     """ObjectStore that works with an actual RQL repository (production mode)"""
   414     """ObjectStore that works with an actual RQL repository (production mode)"""
   395     _rql = None # bw compat
   415     _rql = None # bw compat
   396 
   416 
   397     def __init__(self, session=None, commit=None):
   417     def __init__(self, session=None, commit=None):
   398         ObjectStore.__init__(self)
   418         ObjectStore.__init__(self)
   399         if session is not None:
   419         if session is None:
   400             if not hasattr(session, 'set_pool'):
   420             sys.exit('please provide a session of run this script with cubicweb-ctl shell and pass cnx as session')
   401                 # connection
   421             session = cnx
   402                 cnx = session
   422         if not hasattr(session, 'set_pool'):
   403                 session = session.request()
   423             # connection
   404                 session.set_pool = lambda : None
   424             cnx = session
   405                 commit = commit or cnx.commit
   425             session = session.request()
   406             else:
   426             session.set_pool = lambda : None
   407                 session.set_pool()
   427             commit = commit or cnx.commit
   408             self.session = session
   428         else:
   409             self._commit = commit or session.commit
   429             session.set_pool()
   410         elif commit is not None:
   430         self.session = session
   411             self._commit = commit
   431         self._commit = commit or session.commit
   412             # XXX .session
       
   413 
   432 
   414     @deprecated("[3.7] checkpoint() deprecated. use commit() instead")
   433     @deprecated("[3.7] checkpoint() deprecated. use commit() instead")
   415     def checkpoint(self):
   434     def checkpoint(self):
   416         self.commit()
   435         self.commit()
   417 
   436