Switched from TwistedWeb2 to TwistedWeb
- added HTTPResponse class in etwist/http.py (could be then abstracted in cubicweb/web)
- added twisted.web2 http_headers.py file in cubicweb/web to handle HTTP headers conversion between raw headers and python object
- deleted caching for base views (except for startup views). A better solution would be using weak entity tags (but they don't seem
to be implemented in twisted.web).
- added forbidden access message when browsing static local directories
- tested with TwistedWeb 8, 9 and 10
TODO:
=====
- Handle file uploading in forms.
twisted.web seems to keep very little information (only file content) about uploaded files in twisted_request.args['input_field_name']. But it doesn't seem to keep track of filenames.
Possible solutions :
- use web2 code to parse raw request content still stored and available in twisted_request.content
- find a magic function in twisted.web API to get the filenames
- More tests.
"""twisted server for CubicWeb web instances
:organization: Logilab
:copyright: 2001-2010 LOGILAB S.A. (Paris, FRANCE), license is LGPL v2.
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
:license: GNU Lesser General Public License, v2.1 - http://www.gnu.org/licenses
"""
__docformat__ = "restructuredtext en"
import sys
import os
import select
import errno
from time import mktime
from datetime import date, timedelta
from urlparse import urlsplit, urlunsplit
from cgi import FieldStorage, parse_header
from twisted.internet import reactor, task, threads
from twisted.internet.defer import maybeDeferred
from twisted.web import http, server
from twisted.web import static, resource
from twisted.web.server import NOT_DONE_YET
from logilab.common.decorators import monkeypatch
from cubicweb import ConfigurationError, CW_EVENT_MANAGER
from cubicweb.web import (AuthenticationError, NotFound, Redirect,
RemoteCallFailed, DirectResponse, StatusResponse,
ExplicitLogin)
from cubicweb.web.application import CubicWebPublisher
from cubicweb.etwist.request import CubicWebTwistedRequestAdapter
from cubicweb.etwist.http import HTTPResponse
def daemonize():
# XXX unix specific
# XXX factorize w/ code in cw.server.server and cw.server.serverctl
# (start-repository command)
# See http://www.erlenstar.demon.co.uk/unix/faq_toc.html#TOC16
if os.fork(): # launch child and...
return 1
os.setsid()
if os.fork(): # launch child again.
return 1
# move to the root to avoit mount pb
os.chdir('/')
# set paranoid umask
os.umask(077)
null = os.open('/dev/null', os.O_RDWR)
for i in range(3):
try:
os.dup2(null, i)
except OSError, e:
if e.errno != errno.EBADF:
raise
os.close(null)
return None
def start_task(interval, func):
lc = task.LoopingCall(func)
# wait until interval has expired to actually start the task, else we have
# to wait all task to be finished for the server to be actually started
lc.start(interval, now=False)
def host_prefixed_baseurl(baseurl, host):
scheme, netloc, url, query, fragment = urlsplit(baseurl)
netloc_domain = '.' + '.'.join(netloc.split('.')[-2:])
if host.endswith(netloc_domain):
netloc = host
baseurl = urlunsplit((scheme, netloc, url, query, fragment))
return baseurl
class ForbiddenDirectoryLister(resource.Resource):
def render(self, request):
return HTTPResponse(twisted_request=request,
code=http.FORBIDDEN,
stream='Access forbidden')
class File(static.File):
"""Prevent from listing directories"""
def directoryListing(self):
return ForbiddenDirectoryLister()
class LongTimeExpiringFile(File):
"""overrides static.File and sets a far future ``Expires`` date
on the resouce.
versions handling is done by serving static files by different
URLs for each version. For instance::
http://localhost:8080/data-2.48.2/cubicweb.css
http://localhost:8080/data-2.49.0/cubicweb.css
etc.
"""
def render(self, request):
def setExpireHeader(response):
# Don't provide additional resource information to error responses
if response.code < 400:
# the HTTP RFC recommands not going further than 1 year ahead
expires = date.today() + timedelta(days=6*30)
response.headers.setHeader('Expires', mktime(expires.timetuple()))
return response
d = maybeDeferred(super(LongTimeExpiringFile, self).render, request)
return d.addCallback(setExpireHeader)
class CubicWebRootResource(resource.Resource):
def __init__(self, config, debug=None):
self.debugmode = debug
self.config = config
# instantiate publisher here and not in init_publisher to get some
# checks done before daemonization (eg versions consistency)
self.appli = CubicWebPublisher(config, debug=self.debugmode)
self.base_url = config['base-url']
self.https_url = config['https-url']
self.versioned_datadir = 'data%s' % config.instance_md5_version()
self.children = {}
def init_publisher(self):
config = self.config
# when we have an in-memory repository, clean unused sessions every XX
# seconds and properly shutdown the server
if config.repo_method == 'inmemory':
reactor.addSystemEventTrigger('before', 'shutdown',
self.shutdown_event)
if config.pyro_enabled():
# if pyro is enabled, we have to register to the pyro name
# server, create a pyro daemon, and create a task to handle pyro
# requests
self.pyro_daemon = self.appli.repo.pyro_register()
self.pyro_listen_timeout = 0.02
self.appli.repo.looping_task(1, self.pyro_loop_event)
self.appli.repo.start_looping_tasks()
self.set_url_rewriter()
CW_EVENT_MANAGER.bind('after-registry-reload', self.set_url_rewriter)
def start_service(self):
config = self.config
interval = min(config['cleanup-session-time'] or 120,
config['cleanup-anonymous-session-time'] or 720) / 2.
start_task(interval, self.appli.session_handler.clean_sessions)
def set_url_rewriter(self):
self.url_rewriter = self.appli.vreg['components'].select_or_none('urlrewriter')
def shutdown_event(self):
"""callback fired when the server is shutting down to properly
clean opened sessions
"""
self.appli.repo.shutdown()
def pyro_loop_event(self):
"""listen for pyro events"""
try:
self.pyro_daemon.handleRequests(self.pyro_listen_timeout)
except select.error:
return
def getChild(self, path, request):
"""Indicate which resource to use to process down the URL's path"""
pre_path = request.prePathURL()
# XXX testing pre_path[0] not enough?
if any(s in pre_path
for s in (self.versioned_datadir, 'data', 'static')):
# Anything in data/, static/ is treated as static files
if 'static' in pre_path:
# instance static directory
datadir = self.config.static_directory
elif 'fckeditor' in pre_path:
fckeditordir = self.config.ext_resources['FCKEDITOR_PATH']
return File(fckeditordir)
else:
# cube static data file
datadir = self.config.locate_resource(path)
if datadir is None:
return self
self.info('static file %s from %s', path, datadir)
if 'data' in pre_path:
return File(os.path.join(datadir, path))
else:
return LongTimeExpiringFile(datadir)
elif path == 'fckeditor':
fckeditordir = self.config.ext_resources['FCKEDITOR_PATH']
return File(fckeditordir)
# Otherwise we use this single resource
return self
def render(self, request):
"""Render a page from the root resource"""
# reload modified files in debug mode
if self.debugmode:
self.appli.vreg.register_objects(self.config.vregistry_path())
if self.config['profile']: # default profiler don't trace threads
return self.render_request(request)
else:
deferred = threads.deferToThread(self.render_request, request)
return NOT_DONE_YET
def render_request(self, request):
origpath = request.path
host = request.host
# dual http/https access handling: expect a rewrite rule to prepend
# 'https' to the path to detect https access
if origpath.split('/', 2)[1] == 'https':
origpath = origpath[6:]
request.uri = request.uri[6:]
https = True
baseurl = self.https_url or self.base_url
else:
https = False
baseurl = self.base_url
if self.config['use-request-subdomain']:
baseurl = host_prefixed_baseurl(baseurl, host)
self.warning('used baseurl is %s for this request', baseurl)
req = CubicWebTwistedRequestAdapter(request, self.appli.vreg, https, baseurl)
if req.authmode == 'http':
# activate realm-based auth
realm = self.config['realm']
req.set_header('WWW-Authenticate', [('Basic', {'realm' : realm })], raw=False)
try:
self.appli.connect(req)
except AuthenticationError:
return self.request_auth(request=req)
except Redirect, ex:
return self.redirect(request=req, location=ex.location)
if https and req.cnx.anonymous_connection:
# don't allow anonymous on https connection
return self.request_auth(request=req)
if self.url_rewriter is not None:
# XXX should occur before authentication?
try:
path = self.url_rewriter.rewrite(host, origpath, req)
except Redirect, ex:
return self.redirect(req, ex.location)
request.uri.replace(origpath, path, 1)
else:
path = origpath
if not path or path == "/":
path = 'view'
try:
result = self.appli.publish(path, req)
except DirectResponse, ex:
return ex.response
except StatusResponse, ex:
return HTTPResponse(stream=ex.content, code=ex.status,
twisted_request=req._twreq,
headers=req.headers_out)
except RemoteCallFailed, ex:
req.set_header('content-type', 'application/json')
return HTTPResponse(twisted_request=req._twreq, code=http.INTERNAL_SERVER_ERROR,
stream=ex.dumps(), headers=req.headers_out)
except NotFound:
result = self.appli.notfound_content(req)
return HTTPResponse(twisted_request=req._twreq, code=http.NOT_FOUND,
stream=result, headers=req.headers_out)
except ExplicitLogin: # must be before AuthenticationError
return self.request_auth(request=req)
except AuthenticationError, ex:
if self.config['auth-mode'] == 'cookie' and getattr(ex, 'url', None):
return self.redirect(request=req, location=ex.url)
# in http we have to request auth to flush current http auth
# information
return self.request_auth(request=req, loggedout=True)
except Redirect, ex:
return self.redirect(request=req, location=ex.location)
# request may be referenced by "onetime callback", so clear its entity
# cache to avoid memory usage
req.drop_entity_cache()
return HTTPResponse(twisted_request=req._twreq, code=http.OK,
stream=result, headers=req.headers_out)
def redirect(self, request, location):
self.debug('redirecting to %s', str(location))
request.headers_out.setHeader('location', str(location))
# 303 See other
return HTTPResponse(twisted_request=request._twreq, code=303,
headers=request.headers_out)
def request_auth(self, request, loggedout=False):
if self.https_url and request.base_url() != self.https_url:
return self.redirect(request, self.https_url + 'login')
if self.config['auth-mode'] == 'http':
code = http.UNAUTHORIZED
else:
code = http.FORBIDDEN
if loggedout:
if request.https:
request._base_url = self.base_url
request.https = False
content = self.appli.loggedout_content(request)
else:
content = self.appli.need_login_content(request)
return HTTPResponse(twisted_request=request._twreq,
stream=content, code=code,
headers=request.headers_out)
#TODO
# # XXX max upload size in the configuration
@monkeypatch(http.Request)
def requestReceived(self, command, path, version):
"""Called by channel when all data has been received.
This method is not intended for users.
"""
self.content.seek(0,0)
self.args = {}
self.files = {}
self.stack = []
self.method, self.uri = command, path
self.clientproto = version
x = self.uri.split('?', 1)
if len(x) == 1:
self.path = self.uri
else:
self.path, argstring = x
self.args = http.parse_qs(argstring, 1)
# cache the client and server information, we'll need this later to be
# serialized and sent with the request so CGIs will work remotely
self.client = self.channel.transport.getPeer()
self.host = self.channel.transport.getHost()
# Argument processing
ctype = self.getHeader('content-type')
if self.method == "POST" and ctype:
key, pdict = parse_header(ctype)
if key == 'application/x-www-form-urlencoded':
self.args.update(http.parse_qs(self.content.read(), 1))
elif key == 'multipart/form-data':
self.content.seek(0,0)
form = FieldStorage(self.content, self.received_headers,
environ={'REQUEST_METHOD': 'POST'},
keep_blank_values=1,
strict_parsing=1)
for key in form:
value = form[key]
if isinstance(value, list):
self.args[key] = [v.value for v in value]
elif value.filename:
if value.done != -1: # -1 is transfer has been interrupted
self.files[key] = (value.filename, value.file)
else:
self.files[key] = (None, None)
else:
self.args[key] = value.value
self.process()
from logging import getLogger
from cubicweb import set_log_methods
LOGGER = getLogger('cubicweb.twisted')
set_log_methods(CubicWebRootResource, LOGGER)
def run(config, debug):
# create the site
root_resource = CubicWebRootResource(config, debug)
website = server.Site(root_resource)
# serve it via standard HTTP on port set in the configuration
port = config['port'] or 8080
reactor.listenTCP(port, website)
logger = getLogger('cubicweb.twisted')
if not debug:
if sys.platform == 'win32':
raise ConfigurationError("Under windows, you must use the service management "
"commands (e.g : 'net start my_instance)'")
print 'instance starting in the background'
if daemonize():
return # child process
if config['pid-file']:
# ensure the directory where the pid-file should be set exists (for
# instance /var/run/cubicweb may be deleted on computer restart)
piddir = os.path.dirname(config['pid-file'])
if not os.path.exists(piddir):
os.makedirs(piddir)
file(config['pid-file'], 'w').write(str(os.getpid()))
root_resource.init_publisher() # before changing uid
if config['uid'] is not None:
try:
uid = int(config['uid'])
except ValueError:
from pwd import getpwnam
uid = getpwnam(config['uid']).pw_uid
os.setuid(uid)
root_resource.start_service()
logger.info('instance started on %s', root_resource.base_url)
if config['profile']:
import cProfile
cProfile.runctx('reactor.run()', globals(), locals(), config['profile'])
else:
reactor.run()