web/views/staticcontrollers.py
author Pierre-Yves David <pierre-yves.david@logilab.fr>
Mon, 19 Mar 2012 14:37:43 +0100
changeset 8323 fe60a77ae4a7
parent 8298 2a4bc6f75e9c
child 8325 7f2337d7937f
permissions -rw-r--r--
static-file: properly set/use cache header for static file (closes #2255013) This changesets enables the standard http cache mechanism where the static controller may reply "304 Not modified" based on `last-modified` in HTTP response and `if-modified-since` in HTTP query. The last modified time is computed using the file-system information. The pre-existing logic using an `Expires` header to prevent client from sending request stay in place. The new logic just prevents sending the file again if not necessary.

# copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of CubicWeb.
#
# CubicWeb is free software: you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
"""Set of static resources controllers for :

- /data/...
- /static/...
- /fckeditor/...

"""

import os
import os.path as osp
import hashlib
import mimetypes
from time import mktime
from datetime import datetime, timedelta
from logging import getLogger

from cubicweb.web import NotFound
from cubicweb.web.http_headers import generateDateTime
from cubicweb.web.controller import Controller
from cubicweb.web.views.urlrewrite import URLRewriter



class StaticFileController(Controller):
    """an abtract class to serve static file

    Make sure to add your subclass to the STATIC_CONTROLLERS list"""
    __abstract__ = True
    directory_listing_allowed = False

    def max_age(self, path):
        """max cache TTL"""
        return 60*60*24*7

    def static_file(self, path):
        """Return full content of a static file.

        XXX iterable content would be better
        """
        debugmode = self._cw.vreg.config.debugmode
        if osp.isdir(path):
            if self.directory_listing_allowed:
                return u''
            raise NotFound(path)
        if not osp.isfile(path):
            raise NotFound()
        if not debugmode:
            # XXX: Don't provide additional resource information to error responses
            #
            # the HTTP RFC recommands not going further than 1 year ahead
            expires = datetime.now() + timedelta(days=6*30)
            self._cw.set_header('Expires', generateDateTime(mktime(expires.timetuple())))

        # XXX system call to os.stats could be cached once and for all in
        # production mode (where static files are not expected to change)
        #
        # Note that: we do a osp.isdir + osp.isfile before and a potential
        # os.read after. Improving this specific call will not help
        #
        # Real production environment should use dedicated static file serving.
        self._cw.set_header('last-modified', generateDateTime(os.stat(path).st_mtime))
        self._cw.validate_cache()
        # XXX elif uri.startswith('/https/'): uri = uri[6:]
        mimetype, encoding = mimetypes.guess_type(path)
        self._cw.set_content_type(mimetype, osp.basename(path), encoding)
        return file(path).read()

    @property
    def relpath(self):
        """path of a requested file relative to the controller"""
        path = self._cw.form.get('static_relative_path')
        if path is None:
            path = self._cw.relative_path(includeparams=True)
        return path


class ConcatFilesHandler(object):
    """Emulating the behavior of modconcat

    this serve multiple file as a single one.
    """

    def __init__(self, config):
        self._resources = {}
        self.config = config
        self.logger = getLogger('cubicweb.web')

    def _resource(self, path):
        """get the resouce"""
        try:
            return self._resources[path]
        except KeyError:
            self._resources[path] = self.config.locate_resource(path)
            return self._resources[path]

    def _up_to_date(self, filepath, paths):
        """
        The concat-file is considered up-to-date if it exists.
        In debug mode, an additional check is performed to make sure that
        concat-file is more recent than all concatenated files
        """
        if not osp.isfile(filepath):
            return False
        if self.config.debugmode:
            concat_lastmod = os.stat(filepath).st_mtime
            for path in paths:
                dirpath, rid = self._resource(path)
                if rid is None:
                    raise NotFound(path)
                path = osp.join(dirpath, rid)
                if os.stat(path).st_mtime > concat_lastmod:
                    return False
        return True

    def build_filepath(self, paths):
        """return the filepath that will be used to cache concatenation of `paths`
        """
        _, ext = osp.splitext(paths[0])
        fname = 'cache_concat_' + hashlib.md5(';'.join(paths)).hexdigest() + ext
        return osp.join(self.config.appdatahome, 'uicache', fname)

    def concat_cached_filepath(self, paths):
        filepath = self.build_filepath(paths)
        if not self._up_to_date(filepath, paths):
            with open(filepath, 'wb') as f:
                for path in paths:
                    dirpath, rid = self._resource(path)
                    if rid is None:
                        # In production mode log an error, do not return a 404
                        # XXX the erroneous content is cached anyway
                        self.logger.error('concatenated data url error: %r file '
                                          'does not exist', path)
                        if self.config.debugmode:
                            raise NotFound(path)
                    else:
                        for line in open(osp.join(dirpath, rid)):
                            f.write(line)
                        f.write('\n')
        return filepath


class DataController(StaticFileController):
    """Controller in charge of serving static file in /data/

    Handle modeconcat like url.
    """

    __regid__ = 'data'

    def __init__(self, *args, **kwargs):
        super(DataController, self).__init__(*args, **kwargs)
        config = self._cw.vreg.config
        md5_version = config.instance_md5_version()
        self.base_datapath = config.data_relpath()
        self.data_modconcat_basepath = '%s??' % self.base_datapath
        self.concat_files_registry = ConcatFilesHandler(config)

    def publish(self, rset=None):
        config = self._cw.vreg.config
        # includeparams=True for modconcat-like urls
        relpath = self.relpath
        if relpath.startswith(self.data_modconcat_basepath):
            paths = relpath[len(self.data_modconcat_basepath):].split(',')
            filepath = self.concat_files_registry.concat_cached_filepath(paths)
            return self.static_file(filepath)
        else:
            relpath = relpath[len(self.base_datapath):] # skip leading '/data/'
        dirpath, rid = config.locate_resource(relpath)
        if dirpath is None:
            raise NotFound()
        return self.static_file(osp.join(dirpath, rid))


class FCKEditorController(StaticFileController):
    """Controller in charge of serving FCKEditor related file

    The motivational for a dedicated controller have been lost.
    """

    __regid__ = 'fckeditor'

    def publish(self, rset=None):
        config = self._cw.vreg.config
        if self._cw.https:
            uiprops = config.https_uiprops
        else:
            uiprops = config.uiprops
        relpath = self.relpath
        return self.static_file(osp.join(uiprops['FCKEDITOR_PATH'], relpath))


class StaticDirectoryController(StaticFileController):
    """Controller in charge of serving static file in /static/
    """
    __regid__ = 'static'

    def publish(self, rset=None):
        staticdir = self._cw.vreg.config.static_directory
        relpath = self.relpath
        return self.static_file(osp.join(staticdir, relpath))

STATIC_CONTROLLERS = [DataController, FCKEditorController,
                      StaticDirectoryController]

class StaticControlerRewriter(URLRewriter):
    """a quick and dirty rewritter in charge of server static file.

    This is a work around the flatness of url handling in cubicweb."""

    __regid__ = 'static'

    priority = 10

    def rewrite(self, req, uri):
        for ctrl in STATIC_CONTROLLERS:
            if uri.startswith('/%s/' % ctrl.__regid__):
                break
        else:
            self.debug("not a static file uri: %s", uri)
            raise KeyError(uri)
        relpath = self._cw.relative_path(includeparams=False)
        self._cw.form['static_relative_path'] = self._cw.relative_path(includeparams=True)
        return ctrl.__regid__, None