[schema] load schema from modules names instead of directories
authorPhilippe Pepiot <philippe.pepiot@logilab.fr>
Thu, 19 Jan 2017 15:27:39 +0100
changeset 11899 bf6106b91633
parent 11898 c5d3382f14e9
child 11900 8496135b6dc1
[schema] load schema from modules names instead of directories Introspect cubicweb, cubes and apphome using pkgutil to generate the full list of modules names for loading the schema. Keep historical behavior and check if source .py file exists if a module is found using python bytecode file (.pyc and .pyo) Loading schema from apphome require apphome to be present in sys.path and that "schema" module resolve to a file located in apphome. Update migraction tests to explicitely update sys.path when loading schema from different apps, use a contextmanager for this so it's more readable. Require updated logilab-common and yams
cubicweb/cwconfig.py
cubicweb/schema.py
cubicweb/server/test/unittest_migractions.py
cubicweb/test/unittest_cwconfig.py
cubicweb/test/unittest_schema.py
requirements/dev.txt
--- a/cubicweb/cwconfig.py	Wed Jan 18 17:16:00 2017 +0100
+++ b/cubicweb/cwconfig.py	Thu Jan 19 15:27:39 2017 +0100
@@ -185,7 +185,7 @@
 import logging.config
 import os
 from os.path import (exists, join, expanduser, abspath, normpath,
-                     basename, isdir, dirname, splitext)
+                     basename, isdir, dirname, splitext, realpath)
 import pkgutil
 import pkg_resources
 import re
@@ -275,6 +275,40 @@
     return cube
 
 
+def _expand_modname(modname):
+    """expand modules names `modname` if exists by walking non package submodules
+    and yield (submodname, filepath) including `modname` itself
+
+    If the file ends with .pyc or .pyo (python bytecode) also check that the
+    corresponding source .py file exists before yielding.
+    """
+    try:
+        loader = pkgutil.find_loader(modname)
+    except ImportError:
+        return
+    if not loader:
+        return
+
+    def check_source_file(filepath):
+        if filepath[-4:] in ('.pyc', '.pyo'):
+            if not exists(filepath[:-1]):
+                return False
+        return True
+
+    filepath = loader.get_filename()
+    if not check_source_file(filepath):
+        return
+    yield modname, filepath
+    if loader.is_package(modname):
+        path = dirname(filepath)
+        for subloader, subname, ispkg in pkgutil.walk_packages([path]):
+            # ignore subpackages (historical behavior)
+            if not ispkg:
+                filepath = subloader.find_module(subname).get_filename()
+                if check_source_file(filepath):
+                    yield modname + '.' + subname, filepath
+
+
 # persistent options definition
 PERSISTENT_OPTIONS = (
     ('encoding',
@@ -775,6 +809,20 @@
         # configure simpleTal logger
         logging.getLogger('simpleTAL').setLevel(logging.ERROR)
 
+    def schema_modnames(self):
+        modnames = []
+        for name in ('bootstrap', 'base', 'workflow', 'Bookmark'):
+            modnames.append(('cubicweb', 'cubicweb.schemas.' + name))
+        for cube in reversed(self.cubes()):
+            for modname, filepath in _expand_modname('cubes.{0}.schema'.format(cube)):
+                modnames.append((cube, modname))
+        if self.apphome:
+            apphome = realpath(self.apphome)
+            for modname, filepath in _expand_modname('schema'):
+                if realpath(filepath).startswith(apphome):
+                    modnames.append(('data', modname))
+        return modnames
+
     def appobjects_path(self):
         """return a list of files or directories where the registry will look
         for application objects. By default return nothing in NoApp config.
--- a/cubicweb/schema.py	Wed Jan 18 17:16:00 2017 +0100
+++ b/cubicweb/schema.py	Thu Jan 19 15:27:39 2017 +0100
@@ -19,6 +19,7 @@
 
 from __future__ import print_function
 
+import pkgutil
 import re
 from os.path import join, basename
 from hashlib import md5
@@ -1369,19 +1370,12 @@
     """
     schemacls = CubicWebSchema
 
-    def load(self, config, path=(), **kwargs):
+    def load(self, config, modnames=(['cubicweb', 'cubicweb.schemas.bootstrap'],), **kwargs):
         """return a Schema instance from the schema definition read
         from <directory>
         """
         return super(BootstrapSchemaLoader, self).load(
-            path, config.appid, register_base_types=False, **kwargs)
-
-    def _load_definition_files(self, cubes=None):
-        # bootstraping, ignore cubes
-        filepath = join(cubicweb.CW_SOFTWARE_ROOT, 'schemas', 'bootstrap.py')
-        self.info('loading %s', filepath)
-        with tempattr(ybo, 'PACKAGE', 'cubicweb'):  # though we don't care here
-            self.handle_file(filepath)
+            modnames, name=config.appid, register_base_types=False, **kwargs)
 
     def unhandled_file(self, filepath):
         """called when a file without handler associated has been found"""
@@ -1402,30 +1396,12 @@
         from <directory>
         """
         self.info('loading %s schemas', ', '.join(config.cubes()))
-        self.extrapath = config.extrapath
-        if config.apphome:
-            path = tuple(reversed([config.apphome] + config.cubes_path()))
-        else:
-            path = tuple(reversed(config.cubes_path()))
         try:
-            return super(CubicWebSchemaLoader, self).load(config, path=path, **kwargs)
+            return super(CubicWebSchemaLoader, self).load(config, config.schema_modnames(), **kwargs)
         finally:
             # we've to cleanup modules imported from cubicweb.schemas as well
             cleanup_sys_modules([join(cubicweb.CW_SOFTWARE_ROOT, 'schemas')])
 
-    def _load_definition_files(self, cubes):
-        for filepath in (join(cubicweb.CW_SOFTWARE_ROOT, 'schemas', 'bootstrap.py'),
-                         join(cubicweb.CW_SOFTWARE_ROOT, 'schemas', 'base.py'),
-                         join(cubicweb.CW_SOFTWARE_ROOT, 'schemas', 'workflow.py'),
-                         join(cubicweb.CW_SOFTWARE_ROOT, 'schemas', 'Bookmark.py')):
-            self.info('loading %s', filepath)
-            with tempattr(ybo, 'PACKAGE', 'cubicweb'):
-                self.handle_file(filepath)
-        for cube in cubes:
-            for filepath in self.get_schema_files(cube):
-                with tempattr(ybo, 'PACKAGE', basename(cube)):
-                    self.handle_file(filepath)
-
     # these are overridden by set_log_methods below
     # only defining here to prevent pylint from complaining
     info = warning = error = critical = exception = debug = lambda msg, *a, **kw: None
--- a/cubicweb/server/test/unittest_migractions.py	Wed Jan 18 17:16:00 2017 +0100
+++ b/cubicweb/server/test/unittest_migractions.py	Thu Jan 19 15:27:39 2017 +0100
@@ -19,6 +19,7 @@
 
 import os
 import os.path as osp
+import sys
 from datetime import date
 from contextlib import contextmanager
 import tempfile
@@ -77,13 +78,25 @@
         # we have to read schema from the database to get eid for schema entities
         self.repo.set_schema(self.repo.deserialize_schema(), resetvreg=False)
         # hack to read the schema from data/migrschema
-        config = self.config
-        config.appid = osp.join(self.appid, 'migratedapp')
-        config._apphome = osp.join(HERE, config.appid)
-        global migrschema
-        migrschema = config.load_schema()
-        config.appid = self.appid
-        config._apphome = osp.join(HERE, self.appid)
+
+        @contextmanager
+        def temp_app(config, appid, apphome):
+            old = config.apphome, config.appid
+            sys.path.remove(old[0])
+            sys.path.insert(0, apphome)
+            config._apphome, config.appid = apphome, appid
+            try:
+                yield config
+            finally:
+                sys.path.remove(apphome)
+                sys.path.insert(0, old[0])
+                config._apphome, config.appid = old
+
+        appid = osp.join(self.appid, 'migratedapp')
+        apphome = osp.join(HERE, appid)
+        with temp_app(self.config, appid, apphome) as config:
+            global migrschema
+            migrschema = config.load_schema()
 
     def setUp(self):
         self.configcls.cls_adjust_sys_path()
--- a/cubicweb/test/unittest_cwconfig.py	Wed Jan 18 17:16:00 2017 +0100
+++ b/cubicweb/test/unittest_cwconfig.py	Thu Jan 19 15:27:39 2017 +0100
@@ -17,8 +17,12 @@
 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
 """cubicweb.cwconfig unit tests"""
 
+import contextlib
+import compileall
+import functools
 import sys
 import os
+import pkgutil
 from os.path import dirname, join, abspath
 from pkg_resources import EntryPoint, Distribution
 import unittest
@@ -31,7 +35,8 @@
 
 from cubicweb.devtools import ApptestConfiguration
 from cubicweb.devtools.testlib import BaseTestCase, TemporaryDirectory
-from cubicweb.cwconfig import _find_prefix
+from cubicweb.cwconfig import (
+    CubicWebConfiguration, _find_prefix, _expand_modname)
 
 
 def unabsolutize(path):
@@ -44,6 +49,50 @@
     raise Exception('duh? %s' % path)
 
 
+def templibdir(func):
+    """create a temporary directory and insert it in sys.path"""
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        with TemporaryDirectory() as libdir:
+            sys.path.insert(0, libdir)
+            try:
+                args = args + (libdir,)
+                return func(*args, **kwargs)
+            finally:
+                sys.path.remove(libdir)
+    return wrapper
+
+
+def create_filepath(filepath):
+    filedir = dirname(filepath)
+    if not os.path.exists(filedir):
+        os.makedirs(filedir)
+    with open(filepath, 'a'):
+        pass
+
+
+@contextlib.contextmanager
+def temp_config(appid, instance_dir, cubes_dir, cubes):
+    """context manager that create a config object with specified appid,
+    instance_dir, cubes_dir and cubes"""
+    cls = CubicWebConfiguration
+    old = (cls._INSTANCES_DIR, cls.CUBES_DIR, cls.CUBES_PATH,
+           sys.path[:], sys.meta_path[:])
+    old_modules = set(sys.modules)
+    try:
+        cls._INSTANCES_DIR, cls.CUBES_DIR, cls.CUBES_PATH = (
+            instance_dir, cubes_dir, [])
+        config = cls(appid)
+        config._cubes = cubes
+        config.adjust_sys_path()
+        yield config
+    finally:
+        (cls._INSTANCES_DIR, cls.CUBES_DIR, cls.CUBES_PATH,
+         sys.path[:], sys.meta_path[:]) = old
+        for module in set(sys.modules) - old_modules:
+            del sys.modules[module]
+
+
 class CubicWebConfigurationTC(BaseTestCase):
 
     @classmethod
@@ -313,5 +362,109 @@
                 os.environ['VIRTUAL_ENV'] = venv
 
 
+class ModnamesTC(unittest.TestCase):
+
+    @templibdir
+    def test_expand_modnames(self, libdir):
+        tempdir = join(libdir, 'lib')
+        filepaths = [
+            join(tempdir, '__init__.py'),
+            join(tempdir, 'a.py'),
+            join(tempdir, 'b.py'),
+            join(tempdir, 'c.py'),
+            join(tempdir, 'b', '__init__.py'),
+            join(tempdir, 'b', 'a.py'),
+            join(tempdir, 'b', 'c.py'),
+            join(tempdir, 'b', 'd', '__init__.py'),
+            join(tempdir, 'e', 'e.py'),
+        ]
+        for filepath in filepaths:
+            create_filepath(filepath)
+        # not importable
+        self.assertEqual(list(_expand_modname('isnotimportable')), [])
+        # not a python package
+        self.assertEqual(list(_expand_modname('lib.e')), [])
+        self.assertEqual(list(_expand_modname('lib.a')), [
+            ('lib.a', join(tempdir, 'a.py')),
+        ])
+        # lib.b.d (subpackage) not to be imported
+        self.assertEqual(list(_expand_modname('lib.b')), [
+            ('lib.b', join(tempdir, 'b', '__init__.py')),
+            ('lib.b.a', join(tempdir, 'b', 'a.py')),
+            ('lib.b.c', join(tempdir, 'b', 'c.py')),
+        ])
+        self.assertEqual(list(_expand_modname('lib')), [
+            ('lib', join(tempdir, '__init__.py')),
+            ('lib.a', join(tempdir, 'a.py')),
+            ('lib.c', join(tempdir, 'c.py')),
+        ])
+        for source in (
+            join(tempdir, 'c.py'),
+            join(tempdir, 'b', 'c.py'),
+        ):
+            if not PY3:
+                # ensure pyc file exists.
+                # Doesn't required for PY3 since it create __pycache__
+                # directory and will not import if source file doesn't
+                # exists.
+                compileall.compile_file(source, force=True)
+                self.assertTrue(os.path.exists(source + 'c'))
+            # remove source file
+            os.remove(source)
+        self.assertEqual(list(_expand_modname('lib.c')), [])
+        self.assertEqual(list(_expand_modname('lib.b')), [
+            ('lib.b', join(tempdir, 'b', '__init__.py')),
+            ('lib.b.a', join(tempdir, 'b', 'a.py')),
+        ])
+        self.assertEqual(list(_expand_modname('lib')), [
+            ('lib', join(tempdir, '__init__.py')),
+            ('lib.a', join(tempdir, 'a.py')),
+        ])
+
+    @templibdir
+    def test_schema_modnames(self, libdir):
+        for filepath in (
+            join(libdir, 'schema.py'),
+            join(libdir, 'cubicweb_foo', '__init__.py'),
+            join(libdir, 'cubicweb_foo', 'schema', '__init__.py'),
+            join(libdir, 'cubicweb_foo', 'schema', 'a.py'),
+            join(libdir, 'cubicweb_foo', 'schema', 'b.py'),
+            join(libdir, 'cubes', '__init__.py'),
+            join(libdir, 'cubes', 'bar', '__init__.py'),
+            join(libdir, 'cubes', 'bar', 'schema.py'),
+            join(libdir, '_instance_dir', 'data1', 'schema.py'),
+            join(libdir, '_instance_dir', 'data2', 'noschema.py'),
+        ):
+            create_filepath(filepath)
+        expected = [
+            ('cubicweb', 'cubicweb.schemas.bootstrap'),
+            ('cubicweb', 'cubicweb.schemas.base'),
+            ('cubicweb', 'cubicweb.schemas.workflow'),
+            ('cubicweb', 'cubicweb.schemas.Bookmark'),
+            ('bar', 'cubes.bar.schema'),
+            ('foo', 'cubes.foo.schema'),
+            ('foo', 'cubes.foo.schema.a'),
+            ('foo', 'cubes.foo.schema.b'),
+        ]
+        # app has schema file
+        instance_dir, cubes_dir = (
+            join(libdir, '_instance_dir'), join(libdir, 'cubes'))
+        with temp_config('data1', instance_dir, cubes_dir,
+                         ('foo', 'bar')) as config:
+            self.assertEqual(pkgutil.find_loader('schema').get_filename(),
+                             join(libdir, '_instance_dir',
+                                  'data1', 'schema.py'))
+            self.assertEqual(config.schema_modnames(),
+                             expected + [('data', 'schema')])
+        # app doesn't have schema file
+        with temp_config('data2', instance_dir, cubes_dir,
+                         ('foo', 'bar')) as config:
+            self.assertEqual(pkgutil.find_loader('schema').get_filename(),
+                             join(libdir, 'schema.py'))
+            self.assertEqual(config.schema_modnames(), expected)
+
+
+
+
 if __name__ == '__main__':
     unittest.main()
--- a/cubicweb/test/unittest_schema.py	Wed Jan 18 17:16:00 2017 +0100
+++ b/cubicweb/test/unittest_schema.py	Thu Jan 19 15:27:39 2017 +0100
@@ -402,7 +402,7 @@
         self.loader.post_build_callbacks = []
 
     def _test(self, schemafile, msg):
-        self.loader.handle_file(join(DATADIR, schemafile))
+        self.loader.handle_file(join(DATADIR, schemafile), None)
         sch = self.loader.schemacls('toto')
         with self.assertRaises(BadSchemaDefinition) as cm:
             fill_schema(sch, self.loader.defined, False)
--- a/requirements/dev.txt	Wed Jan 18 17:16:00 2017 +0100
+++ b/requirements/dev.txt	Thu Jan 19 15:27:39 2017 +0100
@@ -1,1 +1,3 @@
 pytest
+http://hg.logilab.org/master/logilab/common/archive/default.tar.bz2#egg=logilab-common
+http://hg.logilab.org/master/yams/archive/default.tar.bz2#egg=yams