devtools/fill.py
changeset 0 b97547f5f1fa
child 428 7d80331a91d7
equal deleted inserted replaced
-1:000000000000 0:b97547f5f1fa
       
     1 # -*- coding: iso-8859-1 -*-
       
     2 """This modules defines func / methods for creating test repositories
       
     3 
       
     4 :organization: Logilab
       
     5 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
       
     6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     7 """
       
     8 __docformat__ = "restructuredtext en"
       
     9 
       
    10 from random import randint, choice
       
    11 from copy import deepcopy
       
    12 
       
    13 from mx.DateTime import DateTime, DateTimeDelta
       
    14 from decimal import Decimal
       
    15 from yams.constraints import (SizeConstraint, StaticVocabularyConstraint,
       
    16                               IntervalBoundConstraint)
       
    17 from rql.utils import decompose_b26 as base_decompose_b26
       
    18 
       
    19 from cubicweb import Binary
       
    20 from cubicweb.schema import RQLConstraint
       
    21 
       
    22 def decompose_b26(index, ascii=False):
       
    23     """return a letter (base-26) decomposition of index"""
       
    24     if ascii:
       
    25         return base_decompose_b26(index)
       
    26     return base_decompose_b26(index, u'éabcdefghijklmnopqrstuvwxyz')
       
    27 
       
    28 def get_choices(eschema, attrname):
       
    29     """returns possible choices for 'attrname'
       
    30     if attrname doesn't have ChoiceConstraint, return None
       
    31     """
       
    32     for cst in eschema.constraints(attrname):
       
    33         if isinstance(cst, StaticVocabularyConstraint):
       
    34             return cst.vocabulary()
       
    35     return None
       
    36     
       
    37 
       
    38 def get_max_length(eschema, attrname):
       
    39     """returns the maximum length allowed for 'attrname'"""
       
    40     for cst in eschema.constraints(attrname):
       
    41         if isinstance(cst, SizeConstraint) and cst.max:
       
    42             return cst.max
       
    43     return 300
       
    44     #raise AttributeError('No Size constraint on attribute "%s"' % attrname)
       
    45 
       
    46 def get_bounds(eschema, attrname):
       
    47     for cst in eschema.constraints(attrname):
       
    48         if isinstance(cst, IntervalBoundConstraint):
       
    49             return cst.minvalue, cst.maxvalue
       
    50     return None, None
       
    51 
       
    52 
       
    53 _GENERATED_VALUES = {}
       
    54 
       
    55 class _ValueGenerator(object):
       
    56     """generates integers / dates / strings / etc. to fill a DB table"""
       
    57 
       
    58     def __init__(self, eschema, choice_func=None):
       
    59         """<choice_func> is a function that returns a list of possible
       
    60         choices for a given entity type and an attribute name. It should
       
    61         looks like :
       
    62             def values_for(etype, attrname):
       
    63                 # some stuff ...
       
    64                 return alist_of_acceptable_values # or None
       
    65         """
       
    66         self.e_schema = eschema
       
    67         self.choice_func = choice_func
       
    68 
       
    69     def _generate_value(self, attrname, index, **kwargs):
       
    70         if not self.e_schema.has_unique_values(attrname):
       
    71             return self.__generate_value(attrname, index, **kwargs)
       
    72         value = self.__generate_value(attrname, index, **kwargs)
       
    73         while value in _GENERATED_VALUES.get((self.e_schema.type, attrname), ()):
       
    74             index += 1
       
    75             value = self.__generate_value(attrname, index, **kwargs)
       
    76         _GENERATED_VALUES.setdefault((self.e_schema.type, attrname), set()).add(value)
       
    77         return value
       
    78         
       
    79     def __generate_value(self, attrname, index, **kwargs):
       
    80         """generates a consistent value for 'attrname'"""
       
    81         attrtype = str(self.e_schema.destination(attrname)).lower()
       
    82         # Before calling generate_%s functions, try to find values domain
       
    83         etype = self.e_schema.type
       
    84         if self.choice_func is not None:
       
    85             values_domain = self.choice_func(etype, attrname)
       
    86             if values_domain is not None:
       
    87                 return choice(values_domain)
       
    88         gen_func = getattr(self, 'generate_%s_%s' % (self.e_schema.type, attrname), None)
       
    89         if gen_func is None:
       
    90             gen_func = getattr(self, 'generate_Any_%s' % attrname, None)
       
    91         if gen_func is not None:
       
    92             return gen_func(index, **kwargs)
       
    93         # If no specific values domain, then generate a dummy value
       
    94         gen_func = getattr(self, 'generate_%s' % (attrtype))
       
    95         return gen_func(attrname, index, **kwargs)
       
    96 
       
    97     def generate_choice(self, attrname, index):
       
    98         """generates a consistent value for 'attrname' if it's a choice"""
       
    99         choices = get_choices(self.e_schema, attrname)
       
   100         if choices is None:
       
   101             return None
       
   102         return unicode(choice(choices)) # FIXME
       
   103         
       
   104     def generate_string(self, attrname, index, format=None):
       
   105         """generates a consistent value for 'attrname' if it's a string"""
       
   106         # First try to get choices
       
   107         choosed = self.generate_choice(attrname, index)
       
   108         if choosed is not None:
       
   109             return choosed
       
   110         # All other case, generate a default string
       
   111         attrlength = get_max_length(self.e_schema, attrname)
       
   112         num_len = numlen(index)
       
   113         if num_len >= attrlength:
       
   114             ascii = self.e_schema.rproperty(attrname, 'internationalizable')
       
   115             return ('&'+decompose_b26(index, ascii))[:attrlength]
       
   116         # always use plain text when no format is specified
       
   117         attrprefix = attrname[:max(attrlength-num_len-1, 0)]
       
   118         if format == 'text/html':
       
   119             value = u'<span>é%s<b>%d</b></span>' % (attrprefix, index)
       
   120         elif format == 'text/rest':
       
   121             value = u"""
       
   122 title
       
   123 -----
       
   124 
       
   125 * %s
       
   126 * %d
       
   127 * é&
       
   128 """ % (attrprefix, index)
       
   129         else:
       
   130             value = u'é&%s%d' % (attrprefix, index)
       
   131         return value[:attrlength]
       
   132 
       
   133     def generate_password(self, attrname, index):
       
   134         """generates a consistent value for 'attrname' if it's a password"""
       
   135         return u'toto'
       
   136         
       
   137     def generate_integer(self, attrname, index):
       
   138         """generates a consistent value for 'attrname' if it's an integer"""
       
   139         minvalue, maxvalue = get_bounds(self.e_schema, attrname)
       
   140         if maxvalue is not None and maxvalue <= 0 and minvalue is None:
       
   141             minvalue = maxvalue - index # i.e. randint(-index, 0)
       
   142         else:
       
   143             maxvalue = maxvalue or index
       
   144         return randint(minvalue or 0, maxvalue)
       
   145     
       
   146     generate_int = generate_integer
       
   147     
       
   148     def generate_float(self, attrname, index):
       
   149         """generates a consistent value for 'attrname' if it's a float"""
       
   150         return float(randint(-index, index))
       
   151     
       
   152     def generate_decimal(self, attrname, index):
       
   153         """generates a consistent value for 'attrname' if it's a float"""
       
   154         return Decimal(str(self.generate_float(attrname, index)))
       
   155     
       
   156     def generate_date(self, attrname, index):
       
   157         """generates a random date (format is 'yyyy-mm-dd')"""
       
   158         return DateTime(randint(2000, 2004), randint(1, 12), randint(1, 28))
       
   159 
       
   160     def generate_time(self, attrname, index):
       
   161         """generates a random time (format is ' HH:MM')"""
       
   162         return DateTimeDelta(0, 11, index%60) #'11:%02d' % (index % 60)
       
   163     
       
   164     def generate_datetime(self, attrname, index):
       
   165         """generates a random date (format is 'yyyy-mm-dd HH:MM')"""
       
   166         return DateTime(randint(2000, 2004), randint(1, 12), randint(1, 28), 11, index%60)
       
   167         
       
   168 
       
   169     def generate_bytes(self, attrname, index, format=None):
       
   170         # modpython way
       
   171         fakefile = Binary("%s%s" % (attrname, index))
       
   172         fakefile.filename = "file_%s" % attrname
       
   173         fakefile.value = fakefile.getvalue()
       
   174         return fakefile
       
   175     
       
   176     def generate_boolean(self, attrname, index):
       
   177         """generates a consistent value for 'attrname' if it's a boolean"""
       
   178         return index % 2 == 0
       
   179 
       
   180     def generate_Any_data_format(self, index, **kwargs):
       
   181         # data_format attribute of Image/File has no vocabulary constraint, we
       
   182         # need this method else stupid values will be set which make mtconverter
       
   183         # raise exception
       
   184         return u'application/octet-stream'
       
   185     
       
   186     def generate_Any_content_format(self, index, **kwargs):
       
   187         # content_format attribute of EmailPart has no vocabulary constraint, we
       
   188         # need this method else stupid values will be set which make mtconverter
       
   189         # raise exception
       
   190         return u'text/plain'
       
   191 
       
   192     def generate_Image_data_format(self, index, **kwargs):
       
   193         # data_format attribute of Image/File has no vocabulary constraint, we
       
   194         # need this method else stupid values will be set which make mtconverter
       
   195         # raise exception
       
   196         return u'image/png'
       
   197 
       
   198 
       
   199 class autoextend(type):
       
   200     def __new__(mcs, name, bases, classdict):
       
   201         for attrname, attrvalue in classdict.items():
       
   202             if callable(attrvalue):
       
   203                 if attrname.startswith('generate_') and \
       
   204                        attrvalue.func_code.co_argcount < 2:
       
   205                     raise TypeError('generate_xxx must accept at least 1 argument')
       
   206                 setattr(_ValueGenerator, attrname, attrvalue)
       
   207         return type.__new__(mcs, name, bases, classdict)
       
   208 
       
   209 class ValueGenerator(_ValueGenerator):
       
   210     __metaclass__ = autoextend
       
   211 
       
   212 
       
   213 def _default_choice_func(etype, attrname):
       
   214     """default choice_func for insert_entity_queries"""
       
   215     return None
       
   216 
       
   217 def insert_entity_queries(etype, schema, vreg, entity_num,
       
   218                           choice_func=_default_choice_func):
       
   219     """returns a list of 'add entity' queries (couples query, args)
       
   220     :type etype: str
       
   221     :param etype: the entity's type
       
   222 
       
   223     :type schema: cubicweb.schema.Schema
       
   224     :param schema: the application schema
       
   225 
       
   226     :type entity_num: int
       
   227     :param entity_num: the number of entities to insert
       
   228 
       
   229     XXX FIXME: choice_func is here for *historical* reasons, it should
       
   230                probably replaced by a nicer way to specify choices
       
   231     :type choice_func: function
       
   232     :param choice_func: a function that takes an entity type, an attrname and
       
   233                         returns acceptable values for this attribute
       
   234     """
       
   235     # XXX HACK, remove or fix asap
       
   236     if etype in (('String', 'Int', 'Float', 'Boolean', 'Date', 'EGroup', 'EUser')):
       
   237         return []
       
   238     queries = []
       
   239     for index in xrange(entity_num):
       
   240         restrictions = []
       
   241         args = {}
       
   242         for attrname, value in make_entity(etype, schema, vreg, index, choice_func).items():
       
   243             restrictions.append('X %s %%(%s)s' % (attrname, attrname))
       
   244             args[attrname] = value
       
   245         if restrictions:
       
   246             queries.append(('INSERT %s X: %s' % (etype, ', '.join(restrictions)),
       
   247                             args))
       
   248             assert not 'eid' in args, args
       
   249         else:
       
   250             queries.append(('INSERT %s X' % etype, {}))        
       
   251     return queries
       
   252 
       
   253 
       
   254 def make_entity(etype, schema, vreg, index=0, choice_func=_default_choice_func,
       
   255                 form=False):
       
   256     """generates a random entity and returns it as a dict
       
   257 
       
   258     by default, generate an entity to be inserted in the repository
       
   259     elif form, generate an form dictionnary to be given to a web controller
       
   260     """
       
   261     eschema = schema.eschema(etype)
       
   262     valgen = ValueGenerator(eschema, choice_func)
       
   263     entity = {}
       
   264     # preprocessing to deal with _format fields
       
   265     attributes = []
       
   266     relatedfields = {}
       
   267     for rschema, attrschema in eschema.attribute_definitions():
       
   268         attrname = rschema.type
       
   269         if attrname == 'eid':
       
   270             # don't specify eids !
       
   271             continue
       
   272         if attrname.endswith('_format') and attrname[:-7] in eschema.subject_relations():
       
   273             relatedfields[attrname[:-7]] = attrschema
       
   274         else:
       
   275             attributes.append((attrname, attrschema))
       
   276     for attrname, attrschema in attributes:
       
   277         if attrname in relatedfields:
       
   278             # first generate a format and record it
       
   279             format = valgen._generate_value(attrname + '_format', index)
       
   280             entity[attrname + '_format'] = format
       
   281             # then a value coherent with this format
       
   282             value = valgen._generate_value(attrname, index, format=format)
       
   283         else:
       
   284             value = valgen._generate_value(attrname, index)
       
   285         if form: # need to encode values
       
   286             if attrschema.type == 'Bytes':
       
   287                 # twisted way
       
   288                 fakefile = value
       
   289                 filename = value.filename
       
   290                 value = (filename, u"text/plain", fakefile)
       
   291             elif attrschema.type == 'Date':
       
   292                 value = value.strftime(vreg.property_value('ui.date-format'))
       
   293             elif attrschema.type == 'Datetime':
       
   294                 value = value.strftime(vreg.property_value('ui.datetime-format'))
       
   295             elif attrschema.type == 'Time':
       
   296                 value = value.strftime(vreg.property_value('ui.time-format'))
       
   297             elif attrschema.type == 'Float':
       
   298                 fmt = vreg.property_value('ui.float-format')
       
   299                 value = fmt % value
       
   300             else:
       
   301                 value = unicode(value)
       
   302         entity[attrname] = value
       
   303     return entity
       
   304 
       
   305 
       
   306 
       
   307 def select(constraints, cursor, selectvar='O'):
       
   308     """returns list of eids matching <constraints>
       
   309 
       
   310     <selectvar> should be either 'O' or 'S' to match schema definitions
       
   311     """
       
   312     try:
       
   313         rset = cursor.execute('Any %s WHERE %s' % (selectvar, constraints))
       
   314     except:
       
   315         print "could restrict eid_list with given constraints (%r)" % constraints
       
   316         return []
       
   317     return set(eid for eid, in rset.rows)
       
   318 
       
   319 
       
   320 
       
   321 def make_relations_queries(schema, edict, cursor, ignored_relations=(),
       
   322                            existingrels=None):
       
   323     """returns a list of generated RQL queries for relations
       
   324     :param schema: The application schema
       
   325 
       
   326     :param e_dict: mapping between etypes and eids
       
   327 
       
   328     :param ignored_relations: list of relations to ignore (i.e. don't try
       
   329                               to generate insert queries for these relations)
       
   330     """
       
   331     gen = RelationsQueriesGenerator(schema, cursor, existingrels)
       
   332     return gen.compute_queries(edict, ignored_relations)
       
   333 
       
   334 
       
   335 class RelationsQueriesGenerator(object):
       
   336     rql_tmpl = 'SET S %s O WHERE S eid %%(subjeid)s, O eid %%(objeid)s'
       
   337     def __init__(self, schema, cursor, existing=None):
       
   338         self.schema = schema
       
   339         self.cursor = cursor
       
   340         self.existingrels = existing or {}
       
   341 
       
   342     def compute_queries(self, edict, ignored_relations):
       
   343         queries = []
       
   344         #   1/ skip final relations and explictly ignored relations
       
   345         rels = [rschema for rschema in self.schema.relations()
       
   346                 if not (rschema.is_final() or rschema in ignored_relations)]
       
   347         # for each relation
       
   348         #   2/ take each possible couple (subj, obj)
       
   349         #   3/ analyze cardinality of relation
       
   350         #      a/ if relation is mandatory, insert one relation
       
   351         #      b/ else insert N relations where N is the mininum
       
   352         #         of 20 and the number of existing targetable entities
       
   353         for rschema in rels:
       
   354             sym = set()
       
   355             sedict = deepcopy(edict)
       
   356             oedict = deepcopy(edict)
       
   357             delayed = []
       
   358             # for each couple (subjschema, objschema), insert relations
       
   359             for subj, obj in rschema.iter_rdefs():
       
   360                 sym.add( (subj, obj) )
       
   361                 if rschema.symetric and (obj, subj) in sym:
       
   362                     continue
       
   363                 subjcard, objcard = rschema.rproperty(subj, obj, 'cardinality')
       
   364                 # process mandatory relations first
       
   365                 if subjcard in '1+' or objcard in '1+': 
       
   366                     queries += self.make_relation_queries(sedict, oedict,
       
   367                                                           rschema, subj, obj)
       
   368                 else:
       
   369                     delayed.append( (subj, obj) )
       
   370             for subj, obj in delayed:
       
   371                 queries += self.make_relation_queries(sedict, oedict, rschema,
       
   372                                                       subj, obj)
       
   373         return queries
       
   374         
       
   375     def qargs(self, subjeids, objeids, subjcard, objcard, subjeid, objeid):
       
   376         if subjcard in '?1':
       
   377             subjeids.remove(subjeid)
       
   378         if objcard in '?1':
       
   379             objeids.remove(objeid)
       
   380         return {'subjeid' : subjeid, 'objeid' : objeid}
       
   381 
       
   382     def make_relation_queries(self, sedict, oedict, rschema, subj, obj):
       
   383         subjcard, objcard = rschema.rproperty(subj, obj, 'cardinality')
       
   384         subjeids = sedict.get(subj, frozenset())
       
   385         used = self.existingrels[rschema.type]
       
   386         preexisting_subjrels = set(subj for subj, obj in used)
       
   387         preexisting_objrels = set(obj for subj, obj in used)
       
   388         # if there are constraints, only select appropriate objeids
       
   389         q = self.rql_tmpl % rschema.type
       
   390         constraints = [c for c in rschema.rproperty(subj, obj, 'constraints')
       
   391                        if isinstance(c, RQLConstraint)]
       
   392         if constraints:
       
   393             restrictions = ', '.join(c.restriction for c in constraints)
       
   394             q += ', %s' % restrictions
       
   395             # restrict object eids if possible
       
   396             objeids = select(restrictions, self.cursor)
       
   397         else:
       
   398             objeids = oedict.get(obj, frozenset())
       
   399         if subjcard in '?1' or objcard in '?1':
       
   400             for subjeid, objeid in used:
       
   401                 if subjcard in '?1' and subjeid in subjeids:
       
   402                     subjeids.remove(subjeid)
       
   403                     if objeid in objeids:
       
   404                         objeids.remove(objeid)
       
   405                 if objcard in '?1' and objeid in objeids:
       
   406                     objeids.remove(objeid)
       
   407                     if subjeid in subjeids:
       
   408                         subjeids.remove(subjeid)
       
   409         if not subjeids:
       
   410             check_card_satisfied(objcard, objeids, subj, rschema, obj)
       
   411             return 
       
   412         if not objeids:
       
   413             check_card_satisfied(subjcard, subjeids, subj, rschema, obj)
       
   414             return
       
   415         if subjcard in '?1+':
       
   416             for subjeid in tuple(subjeids):
       
   417                 # do not insert relation if this entity already has a relation
       
   418                 if subjeid in preexisting_subjrels:
       
   419                     continue
       
   420                 objeid = choose_eid(objeids, subjeid)
       
   421                 if objeid is None or (subjeid, objeid) in used:
       
   422                     continue
       
   423                 yield q, self.qargs(subjeids, objeids, subjcard, objcard,
       
   424                                     subjeid, objeid)
       
   425                 used.add( (subjeid, objeid) )
       
   426                 if not objeids:
       
   427                     check_card_satisfied(subjcard, subjeids, subj, rschema, obj)
       
   428                     break
       
   429         elif objcard in '?1+':
       
   430             for objeid in tuple(objeids):
       
   431                 # do not insert relation if this entity already has a relation
       
   432                 if objeid in preexisting_objrels:
       
   433                     continue
       
   434                 subjeid = choose_eid(subjeids, objeid)
       
   435                 if subjeid is None or (subjeid, objeid) in used:
       
   436                     continue
       
   437                 yield q, self.qargs(subjeids, objeids, subjcard, objcard,
       
   438                                     subjeid, objeid)
       
   439                 used.add( (subjeid, objeid) )
       
   440                 if not subjeids:
       
   441                     check_card_satisfied(objcard, objeids, subj, rschema, obj)
       
   442                     break
       
   443         else:
       
   444             # FIXME: 20 should be read from config
       
   445             subjeidsiter = [choice(tuple(subjeids)) for i in xrange(min(len(subjeids), 20))]
       
   446             objeidsiter = [choice(tuple(objeids)) for i in xrange(min(len(objeids), 20))]
       
   447             for subjeid, objeid in zip(subjeidsiter, objeidsiter):
       
   448                 if subjeid != objeid and not (subjeid, objeid) in used:
       
   449                     used.add( (subjeid, objeid) )
       
   450                     yield q, self.qargs(subjeids, objeids, subjcard, objcard,
       
   451                                         subjeid, objeid)
       
   452                     
       
   453 def check_card_satisfied(card, remaining, subj, rschema, obj):
       
   454     if card in '1+' and remaining:
       
   455         raise Exception("can't satisfy cardinality %s for relation %s %s %s"
       
   456                         % (card, subj, rschema, obj))
       
   457 
       
   458 def choose_eid(values, avoid):
       
   459     values = tuple(values)
       
   460     if len(values) == 1 and values[0] == avoid:
       
   461         return None
       
   462     objeid = choice(values)
       
   463     while objeid == avoid: # avoid infinite recursion like in X comment X
       
   464         objeid = choice(values)
       
   465     return objeid
       
   466                     
       
   467                 
       
   468 
       
   469 # UTILITIES FUNCS ##############################################################
       
   470 def make_tel(num_tel):
       
   471     """takes an integer, converts is as a string and inserts
       
   472     white spaces each 2 chars (french notation)
       
   473     """
       
   474     num_list = list(str(num_tel))
       
   475     for index in (6, 4, 2):
       
   476         num_list.insert(index, ' ')
       
   477 
       
   478     return ''.join(num_list)
       
   479 
       
   480 
       
   481 def numlen(number):
       
   482     """returns the number's length"""
       
   483     return len(str(number))