cubicweb/etwist/server.py
changeset 12530 9d88e1177c35
parent 12529 7276f1c89ddd
child 12531 2b9e815d20dc
--- a/cubicweb/etwist/server.py	Thu Mar 21 12:05:30 2019 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,297 +0,0 @@
-# copyright 2003-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
-# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
-#
-# This file is part of CubicWeb.
-#
-# CubicWeb is free software: you can redistribute it and/or modify it under the
-# terms of the GNU Lesser General Public License as published by the Free
-# Software Foundation, either version 2.1 of the License, or (at your option)
-# any later version.
-#
-# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
-# details.
-#
-# You should have received a copy of the GNU Lesser General Public License along
-# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
-"""twisted server for CubicWeb web instances"""
-
-import sys
-import traceback
-import threading
-from cgi import FieldStorage, parse_header
-from functools import partial
-import warnings
-
-from cubicweb.statsd_logger import statsd_timeit
-
-from twisted.internet import reactor, task, threads
-from twisted.web import http, server
-from twisted.web import resource
-from twisted.web.server import NOT_DONE_YET
-
-
-from logilab.mtconverter import xml_escape
-from logilab.common.decorators import monkeypatch
-
-from cubicweb import ConfigurationError, CW_EVENT_MANAGER
-from cubicweb.utils import json_dumps
-from cubicweb.web import DirectResponse
-from cubicweb.web.application import CubicWebPublisher
-from cubicweb.etwist.request import CubicWebTwistedRequestAdapter
-from cubicweb.etwist.http import HTTPResponse
-
-
-def start_task(interval, func):
-    lc = task.LoopingCall(func)
-    # wait until interval has expired to actually start the task, else we have
-    # to wait all tasks to be finished for the server to be actually started
-    lc.start(interval, now=False)
-
-
-class CubicWebRootResource(resource.Resource):
-    def __init__(self, config, repo):
-        resource.Resource.__init__(self)
-        self.config = config
-        # instantiate publisher here and not in init_publisher to get some
-        # checks done before daemonization (eg versions consistency)
-        self.appli = CubicWebPublisher(repo, config)
-        self.base_url = config['base-url']
-        global MAX_POST_LENGTH
-        MAX_POST_LENGTH = config['max-post-length']
-
-    def init_publisher(self):
-        config = self.config
-        # when we have an in-memory repository, clean unused sessions every XX
-        # seconds and properly shutdown the server
-        if config['repository-uri'] == 'inmemory://':
-            if config.mode != 'test':
-                reactor.addSystemEventTrigger('before', 'shutdown',
-                                              self.shutdown_event)
-                warnings.warn(
-                    'twisted server does not start repository looping tasks anymore; '
-                    'use the standalone "scheduler" command if needed'
-                )
-        self.set_url_rewriter()
-        CW_EVENT_MANAGER.bind('after-registry-reload', self.set_url_rewriter)
-
-    def start_service(self):
-        start_task(self.appli.session_handler.clean_sessions_interval,
-                   self.appli.session_handler.clean_sessions)
-
-    def set_url_rewriter(self):
-        self.url_rewriter = self.appli.vreg['components'].select_or_none('urlrewriter')
-
-    def shutdown_event(self):
-        """callback fired when the server is shutting down to properly
-        clean opened sessions
-        """
-        self.appli.repo.shutdown()
-
-    def getChild(self, path, request):
-        """Indicate which resource to use to process down the URL's path"""
-        return self
-
-    def on_request_finished_ko(self, request, reason):
-        # annotate the twisted request so that we're able later to check for
-        # failure without having to dig into request's internal attributes such
-        # as _disconnected
-        request.cw_failed = True
-        self.warning('request finished abnormally: %s', reason)
-
-    def render(self, request):
-        """Render a page from the root resource"""
-        finish_deferred = request.notifyFinish()
-        finish_deferred.addErrback(partial(self.on_request_finished_ko, request))
-        # reload modified files in debug mode
-        if self.config.debugmode:
-            self.config.uiprops.reload_if_needed()
-            self.appli.vreg.reload_if_needed()
-        if self.config['profile']: # default profiler don't trace threads
-            return self.render_request(request)
-        else:
-            deferred = threads.deferToThread(self.render_request, request)
-            return NOT_DONE_YET
-
-    @statsd_timeit
-    def render_request(self, request):
-        try:
-            # processing HUGE files (hundred of megabytes) in http.processReceived
-            # blocks other HTTP requests processing
-            # due to the clumsy & slow parsing algorithm of cgi.FieldStorage
-            # so we deferred that part to the cubicweb thread
-            request.process_multipart()
-            return self._render_request(request)
-        except Exception:
-            trace = traceback.format_exc()
-            return HTTPResponse(stream='<pre>%s</pre>' % xml_escape(trace),
-                                code=500, twisted_request=request)
-
-    def _render_request(self, request):
-        origpath = request.path
-        host = request.host
-        if self.url_rewriter is not None:
-            # XXX should occur before authentication?
-            path = self.url_rewriter.rewrite(host, origpath, request)
-            request.uri.replace(origpath, path, 1)
-        req = CubicWebTwistedRequestAdapter(request, self.appli.vreg)
-        try:
-            ### Try to generate the actual request content
-            content = self.appli.handle_request(req)
-        except DirectResponse as ex:
-            return ex.response
-        # at last: create twisted object
-        return HTTPResponse(code    = req.status_out,
-                            headers = req.headers_out,
-                            stream  = content,
-                            twisted_request=req._twreq)
-
-    # these are overridden by set_log_methods below
-    # only defining here to prevent pylint from complaining
-    @classmethod
-    def debug(cls, msg, *a, **kw):
-        pass
-    info = warning = error = critical = exception = debug
-
-
-JSON_PATHS = set(('json',))
-FRAME_POST_PATHS = set(('validateform',))
-
-orig_gotLength = http.Request.gotLength
-@monkeypatch(http.Request)
-def gotLength(self, length):
-    orig_gotLength(self, length)
-    if length > MAX_POST_LENGTH: # length is 0 on GET
-        path = self.channel._path.split('?', 1)[0].rstrip('/').rsplit('/', 1)[-1]
-        self.clientproto = 'HTTP/1.1' # not yet initialized
-        self.channel.persistent = 0   # force connection close on cleanup
-        self.setResponseCode(http.REQUEST_ENTITY_TOO_LARGE)
-        if path in JSON_PATHS: # XXX better json path detection
-            self.setHeader('content-type',"application/json")
-            body = json_dumps({'reason': 'request max size exceeded'})
-        elif path in FRAME_POST_PATHS: # XXX better frame post path detection
-            self.setHeader('content-type',"text/html")
-            body = ('<script type="text/javascript">'
-                    'window.parent.handleFormValidationResponse(null, null, null, %s, null);'
-                    '</script>' % json_dumps( (False, 'request max size exceeded', None) ))
-        else:
-            self.setHeader('content-type',"text/html")
-            body = ("<html><head><title>Processing Failed</title></head><body>"
-                    "<b>request max size exceeded</b></body></html>")
-        self.setHeader('content-length', str(len(body)))
-        self.write(body)
-        # see request.finish(). Done here since we get error due to not full
-        # initialized request
-        self.finished = 1
-        if not self.queued:
-            self._cleanup()
-        for d in self.notifications:
-            d.callback(None)
-        self.notifications = []
-
-@monkeypatch(http.Request)
-def requestReceived(self, command, path, version):
-    """Called by channel when all data has been received.
-
-    This method is not intended for users.
-    """
-    self.content.seek(0, 0)
-    self.args = {}
-    self.files = {}
-    self.stack = []
-    self.method, self.uri = command, path
-    self.clientproto = version
-    x = self.uri.split('?', 1)
-    if len(x) == 1:
-        self.path = self.uri
-    else:
-        self.path, argstring = x
-        self.args = http.parse_qs(argstring, 1)
-    # cache the client and server information, we'll need this later to be
-    # serialized and sent with the request so CGIs will work remotely
-    self.client = self.channel.transport.getPeer()
-    self.host = self.channel.transport.getHost()
-    # Argument processing
-    ctype = self.getHeader('content-type')
-    self._do_process_multipart = False
-    if self.method == "POST" and ctype:
-        key, pdict = parse_header(ctype)
-        if key == 'application/x-www-form-urlencoded':
-            self.args.update(http.parse_qs(self.content.read(), 1))
-            self.content.seek(0)
-        elif key == 'multipart/form-data':
-            # defer this as it can be extremely time consumming
-            # with big files
-            self._do_process_multipart = True
-    self.process()
-
-@monkeypatch(http.Request)
-def process_multipart(self):
-    if not self._do_process_multipart:
-        return
-    form = FieldStorage(self.content, self.received_headers,
-                        environ={'REQUEST_METHOD': 'POST'},
-                        keep_blank_values=1,
-                        strict_parsing=1)
-    for key in form:
-        values = form[key]
-        if not isinstance(values, list):
-            values = [values]
-        for value in values:
-            if value.filename:
-                if value.done != -1: # -1 is transfer has been interrupted
-                    self.files.setdefault(key, []).append((value.filename, value.file))
-                else:
-                    self.files.setdefault(key, []).append((None, None))
-            else:
-                self.args.setdefault(key, []).append(value.value)
-
-from logging import getLogger
-from cubicweb import set_log_methods
-LOGGER = getLogger('cubicweb.twisted')
-set_log_methods(CubicWebRootResource, LOGGER)
-
-def run(config, debug=None, repo=None):
-    # repo may by passed during test.
-    #
-    # Test has already created a repo object so we should not create a new one.
-    # Explicitly passing the repo object avoid relying on the fragile
-    # config.repository() cache. We could imagine making repo a mandatory
-    # argument and receives it from the starting command directly.
-    if debug is not None:
-        config.debugmode = debug
-    config.check_writeable_uid_directory(config.appdatahome)
-    # create the site
-    if repo is None:
-        repo = config.repository()
-    root_resource = CubicWebRootResource(config, repo)
-    website = server.Site(root_resource)
-    # serve it via standard HTTP on port set in the configuration
-    port = config['port'] or 8080
-    interface = config['interface']
-    reactor.suggestThreadPoolSize(config['webserver-threadpool-size'])
-    reactor.listenTCP(port, website, interface=interface)
-    if not config.debugmode:
-        if sys.platform == 'win32':
-            raise ConfigurationError("Under windows, you must use the service management "
-                                     "commands (e.g : 'net start my_instance)'")
-        from logilab.common.daemon import daemonize
-        LOGGER.info('instance started in the background on %s', root_resource.base_url)
-        whichproc = daemonize(config['pid-file'], umask=config['umask'])
-        if whichproc: # 1 = orig process, 2 = first fork, None = second fork (eg daemon process)
-            return whichproc # parent process
-    root_resource.init_publisher() # before changing uid
-    if config['uid'] is not None:
-        from logilab.common.daemon import setugid
-        setugid(config['uid'])
-    root_resource.start_service()
-    LOGGER.info('instance started on %s', root_resource.base_url)
-    # avoid annoying warnign if not in Main Thread
-    signals = threading.currentThread().getName() == 'MainThread'
-    if config['profile']:
-        import cProfile
-        cProfile.runctx('reactor.run(installSignalHandlers=%s)' % signals,
-                        globals(), locals(), config['profile'])
-    else:
-        reactor.run(installSignalHandlers=signals)