etwist/server.py
changeset 0 b97547f5f1fa
child 151 343e7a18675d
equal deleted inserted replaced
-1:000000000000 0:b97547f5f1fa
       
     1 """twisted server for CubicWeb web applications
       
     2 
       
     3 :organization: Logilab
       
     4 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
       
     5 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     6 """
       
     7 __docformat__ = "restructuredtext en"
       
     8 
       
     9 import sys
       
    10 import select
       
    11 
       
    12 from mx.DateTime import today, RelativeDate
       
    13 
       
    14 from twisted.application import service, strports
       
    15 from twisted.internet import reactor, task, threads
       
    16 from twisted.internet.defer import maybeDeferred
       
    17 from twisted.web2 import channel, http, server, iweb
       
    18 from twisted.web2 import static, resource, responsecode
       
    19 
       
    20 from cubicweb import ObjectNotFound
       
    21 from cubicweb.web import (AuthenticationError, NotFound, Redirect, 
       
    22                        RemoteCallFailed, DirectResponse, StatusResponse,
       
    23                        ExplicitLogin)
       
    24 from cubicweb.web.application import CubicWebPublisher
       
    25 
       
    26 from cubicweb.etwist.request import CubicWebTwistedRequestAdapter
       
    27 
       
    28 
       
    29 def start_task(interval, func):
       
    30     lc = task.LoopingCall(func)
       
    31     lc.start(interval)
       
    32 
       
    33 def start_looping_tasks(repo):
       
    34     for interval, func in repo._looping_tasks:
       
    35         repo.info('starting twisted task %s with interval %.2fs',
       
    36                   func.__name__, interval)
       
    37         def catch_error_func(repo=repo, func=func):
       
    38             try:
       
    39                 func()
       
    40             except:
       
    41                 repo.exception('error in looping task')
       
    42         start_task(interval, catch_error_func)
       
    43     # ensure no tasks will be further added
       
    44     repo._looping_tasks = ()
       
    45     
       
    46 
       
    47 class LongTimeExpiringFile(static.File):
       
    48     """overrides static.File and sets a far futre ``Expires`` date
       
    49     on the resouce.
       
    50 
       
    51     versions handling is done by serving static files by different
       
    52     URLs for each version. For instance::
       
    53 
       
    54       http://localhost:8080/data-2.48.2/cubicweb.css
       
    55       http://localhost:8080/data-2.49.0/cubicweb.css
       
    56       etc.
       
    57 
       
    58     """
       
    59     def renderHTTP(self, request):
       
    60         def setExpireHeader(response):
       
    61             response = iweb.IResponse(response)
       
    62             # Don't provide additional resource information to error responses
       
    63             if response.code < 400:
       
    64                 # the HTTP RFC recommands not going further than 1 year ahead
       
    65                 expires = today() + RelativeDate(months=6)
       
    66                 response.headers.setHeader('Expires', int(expires.ticks()))
       
    67             return response
       
    68         d = maybeDeferred(super(LongTimeExpiringFile, self).renderHTTP, request)
       
    69         return d.addCallback(setExpireHeader)
       
    70 
       
    71 
       
    72 class CubicWebRootResource(resource.PostableResource):
       
    73     addSlash = False
       
    74     
       
    75     def __init__(self, config, debug=None):
       
    76         self.appli = CubicWebPublisher(config, debug=debug)
       
    77         self.debugmode = debug
       
    78         self.config = config
       
    79         self.base_url = config['base-url'] or config.default_base_url()
       
    80         self.versioned_datadir = 'data%s' % config.instance_md5_version()
       
    81         assert self.base_url[-1] == '/'
       
    82         self.https_url = config['https-url']
       
    83         assert not self.https_url or self.https_url[-1] == '/'
       
    84         # when we have an in-memory repository, clean unused sessions every XX
       
    85         # seconds and properly shutdown the server
       
    86         if config.repo_method == 'inmemory':
       
    87             reactor.addSystemEventTrigger('before', 'shutdown',
       
    88                                           self.shutdown_event)
       
    89             # monkey path start_looping_task to get proper reactor integration
       
    90             self.appli.repo.__class__.start_looping_tasks = start_looping_tasks
       
    91             if config.pyro_enabled():
       
    92                 # if pyro is enabled, we have to register to the pyro name
       
    93                 # server, create a pyro daemon, and create a task to handle pyro
       
    94                 # requests
       
    95                 self.pyro_daemon = self.appli.repo.pyro_register()
       
    96                 self.pyro_listen_timeout = 0.02
       
    97                 start_task(1, self.pyro_loop_event)
       
    98             self.appli.repo.start_looping_tasks()
       
    99         try:
       
   100             self.url_rewriter = self.appli.vreg.select_component('urlrewriter')
       
   101         except ObjectNotFound:
       
   102             self.url_rewriter = None
       
   103         interval = min(config['cleanup-session-time'] or 120,
       
   104                        config['cleanup-anonymous-session-time'] or 720) / 2.
       
   105         start_task(interval, self.appli.session_handler.clean_sessions)
       
   106         
       
   107     def shutdown_event(self):
       
   108         """callback fired when the server is shutting down to properly
       
   109         clean opened sessions
       
   110         """
       
   111         self.appli.repo.shutdown()
       
   112 
       
   113     def pyro_loop_event(self):
       
   114         """listen for pyro events"""
       
   115         try:
       
   116             self.pyro_daemon.handleRequests(self.pyro_listen_timeout)
       
   117         except select.error:
       
   118             return
       
   119         
       
   120     def locateChild(self, request, segments):
       
   121         """Indicate which resource to use to process down the URL's path"""
       
   122         if segments:
       
   123             if segments[0] == 'https':
       
   124                 segments = segments[1:]
       
   125             if len(segments) >= 2:
       
   126                 if segments[0] in (self.versioned_datadir, 'data'):
       
   127                     # Anything in data/ is treated as static files
       
   128                     datadir = self.config.locate_resource(segments[1])
       
   129                     if datadir is None:
       
   130                         return None, []
       
   131                     self.info('static file %s from %s', segments[-1], datadir)
       
   132                     if segments[0] == 'data':
       
   133                         return static.File(str(datadir)), segments[1:]
       
   134                     else:
       
   135                         return LongTimeExpiringFile(datadir), segments[1:]
       
   136                 elif segments[0] == 'fckeditor':
       
   137                     fckeditordir = self.config.ext_resources['FCKEDITOR_PATH']
       
   138                     return static.File(fckeditordir), segments[1:]
       
   139         # Otherwise we use this single resource
       
   140         return self, ()
       
   141     
       
   142     def render(self, request):
       
   143         """Render a page from the root resource"""
       
   144         # reload modified files (only in development or debug mode)
       
   145         if self.config.mode == 'dev' or self.debugmode:
       
   146             self.appli.vreg.register_objects(self.config.vregistry_path())
       
   147         if self.config['profile']: # default profiler don't trace threads
       
   148             return self.render_request(request)
       
   149         else:
       
   150             return threads.deferToThread(self.render_request, request)
       
   151             
       
   152     def render_request(self, request):
       
   153         origpath = request.path
       
   154         host = request.host
       
   155         # dual http/https access handling: expect a rewrite rule to prepend
       
   156         # 'https' to the path to detect https access
       
   157         if origpath.split('/', 2)[1] == 'https':
       
   158             origpath = origpath[6:]
       
   159             request.uri = request.uri[6:]
       
   160             https = True
       
   161             baseurl = self.https_url or self.base_url 
       
   162         else:
       
   163             https = False
       
   164             baseurl = self.base_url
       
   165         req = CubicWebTwistedRequestAdapter(request, self.appli.vreg, https, baseurl)
       
   166         if req.authmode == 'http':
       
   167             # activate realm-based auth
       
   168             realm = self.config['realm']
       
   169             req.set_header('WWW-Authenticate', [('Basic', {'realm' : realm })], raw=False)
       
   170         try:
       
   171             self.appli.connect(req)
       
   172         except AuthenticationError:
       
   173             return self.request_auth(req)
       
   174         except Redirect, ex:
       
   175             return self.redirect(req, ex.location)
       
   176         if https and req.cnx.anonymous_connection:
       
   177             # don't allow anonymous on https connection
       
   178             return self.request_auth(req)            
       
   179         if self.url_rewriter is not None:
       
   180             # XXX should occurs before authentication?
       
   181             try:
       
   182                 path = self.url_rewriter.rewrite(host, origpath)
       
   183             except Redirect, ex:
       
   184                 return self.redirect(req, ex.location)
       
   185             request.uri.replace(origpath, path, 1)
       
   186         else:
       
   187             path = origpath
       
   188         if not path or path == "/":
       
   189             path = 'view'
       
   190         try:
       
   191             result = self.appli.publish(path, req)
       
   192         except DirectResponse, ex:
       
   193             return ex.response
       
   194         except StatusResponse, ex:
       
   195             return http.Response(stream=ex.content, code=ex.status,
       
   196                                  headers=req.headers_out or None)
       
   197         except RemoteCallFailed, ex:
       
   198             req.set_header('content-type', 'application/json')
       
   199             return http.Response(stream=ex.dumps(),
       
   200                                  code=responsecode.INTERNAL_SERVER_ERROR)
       
   201         except NotFound:
       
   202             result = self.appli.notfound_content(req)
       
   203             return http.Response(stream=result, code=responsecode.NOT_FOUND,
       
   204                                  headers=req.headers_out or None)
       
   205         except ExplicitLogin:  # must be before AuthenticationError
       
   206             return self.request_auth(req)
       
   207         except AuthenticationError:
       
   208             if self.config['auth-mode'] == 'cookie':
       
   209                 # in cookie mode redirecting to the index view is enough :
       
   210                 # either anonymous connection is allowed and the page will
       
   211                 # be displayed or we'll be redirected to the login form
       
   212                 msg = req._('you have been logged out')
       
   213                 if req.https:
       
   214                     req._base_url =  self.base_url
       
   215                     req.https = False
       
   216                 url = req.build_url('view', vid='index', __message=msg)
       
   217                 return self.redirect(req, url)
       
   218             else:
       
   219                 # in http we have to request auth to flush current http auth
       
   220                 # information
       
   221                 return self.request_auth(req, loggedout=True)
       
   222         except Redirect, ex:
       
   223             return self.redirect(req, ex.location)
       
   224         if not result:
       
   225             # no result, something went wrong...
       
   226             self.error('no data (%s)', req)
       
   227             # 500 Internal server error
       
   228             return self.redirect(req, req.build_url('error'))
       
   229         # request may be referenced by "onetime callback", so clear its entity
       
   230         # cache to avoid memory usage
       
   231         req.drop_entity_cache()
       
   232         return http.Response(stream=result, code=responsecode.OK,
       
   233                              headers=req.headers_out or None)
       
   234 
       
   235     def redirect(self, req, location):
       
   236         req.headers_out.setHeader('location', str(location))
       
   237         self.debug('redirecting to %s', location)
       
   238         # 303 See other
       
   239         return http.Response(code=303, headers=req.headers_out)
       
   240         
       
   241     def request_auth(self, req, loggedout=False):
       
   242         if self.https_url and req.base_url() != self.https_url:
       
   243             req.headers_out.setHeader('location', self.https_url + 'login')
       
   244             return http.Response(code=303, headers=req.headers_out)            
       
   245         if self.config['auth-mode'] == 'http':
       
   246             code = responsecode.UNAUTHORIZED
       
   247         else:
       
   248             code = responsecode.FORBIDDEN
       
   249         if loggedout:
       
   250             if req.https:
       
   251                 req._base_url =  self.base_url
       
   252                 req.https = False
       
   253             content = self.appli.loggedout_content(req)
       
   254         else:
       
   255             content = self.appli.need_login_content(req)
       
   256         return http.Response(code, req.headers_out, content)
       
   257 
       
   258     
       
   259 # This part gets run when you run this file via: "twistd -noy demo.py"
       
   260 def main(appid, cfgname):
       
   261     """Starts an cubicweb  twisted server for an application
       
   262 
       
   263     appid: application's identifier
       
   264     cfgname: name of the configuration to use (twisted or all-in-one)
       
   265     """
       
   266     from cubicweb.cwconfig import CubicWebConfiguration
       
   267     from cubicweb.etwist import twconfig # trigger configuration registration
       
   268     config = CubicWebConfiguration.config_for(appid, cfgname)
       
   269     # XXX why calling init_available_cubes here ?
       
   270     config.init_available_cubes()
       
   271     # create the site and application objects
       
   272     if '-n' in sys.argv: # debug mode
       
   273         cubicweb = CubicWebRootResource(config, debug=True)
       
   274     else:
       
   275         cubicweb = CubicWebRootResource(config)
       
   276     #toplevel = vhost.VHostURIRewrite(base_url, cubicweb)
       
   277     toplevel = cubicweb
       
   278     website = server.Site(toplevel)
       
   279     application = service.Application("cubicweb")
       
   280     # serve it via standard HTTP on port set in the configuration
       
   281     s = strports.service('tcp:%04d' % (config['port'] or 8080),
       
   282                          channel.HTTPFactory(website))
       
   283     s.setServiceParent(application)
       
   284     return application
       
   285 
       
   286 
       
   287 from twisted.python import failure
       
   288 from twisted.internet import defer
       
   289 from twisted.web2 import fileupload
       
   290 
       
   291 # XXX set max file size to 100Mo: put max upload size in the configuration
       
   292 # line below for twisted >= 8.0, default param value for earlier version
       
   293 resource.PostableResource.maxSize = 100*1024*1024 
       
   294 def parsePOSTData(request, maxMem=100*1024, maxFields=1024,
       
   295                   maxSize=100*1024*1024):
       
   296     if request.stream.length == 0:
       
   297         return defer.succeed(None)
       
   298     
       
   299     ctype = request.headers.getHeader('content-type')
       
   300 
       
   301     if ctype is None:
       
   302         return defer.succeed(None)
       
   303 
       
   304     def updateArgs(data):
       
   305         args = data
       
   306         request.args.update(args)
       
   307 
       
   308     def updateArgsAndFiles(data):
       
   309         args, files = data
       
   310         request.args.update(args)
       
   311         request.files.update(files)
       
   312 
       
   313     def error(f):
       
   314         f.trap(fileupload.MimeFormatError)
       
   315         raise http.HTTPError(responsecode.BAD_REQUEST)
       
   316     
       
   317     if ctype.mediaType == 'application' and ctype.mediaSubtype == 'x-www-form-urlencoded':
       
   318         d = fileupload.parse_urlencoded(request.stream, keep_blank_values=True)
       
   319         d.addCallbacks(updateArgs, error)
       
   320         return d
       
   321     elif ctype.mediaType == 'multipart' and ctype.mediaSubtype == 'form-data':
       
   322         boundary = ctype.params.get('boundary')
       
   323         if boundary is None:
       
   324             return defer.fail(http.HTTPError(
       
   325                 http.StatusResponse(responsecode.BAD_REQUEST,
       
   326                                     "Boundary not specified in Content-Type.")))
       
   327         d = fileupload.parseMultipartFormData(request.stream, boundary,
       
   328                                               maxMem, maxFields, maxSize)
       
   329         d.addCallbacks(updateArgsAndFiles, error)
       
   330         return d
       
   331     else:
       
   332         raise http.HTTPError(responsecode.BAD_REQUEST)
       
   333 
       
   334 server.parsePOSTData = parsePOSTData
       
   335 
       
   336 
       
   337 from logging import getLogger
       
   338 from cubicweb import set_log_methods
       
   339 set_log_methods(CubicWebRootResource, getLogger('cubicweb.twisted'))
       
   340 
       
   341 
       
   342 
       
   343 def _gc_debug():
       
   344     import gc
       
   345     from pprint import pprint
       
   346     from cubicweb.vregistry import VObject
       
   347     gc.collect()
       
   348     count = 0
       
   349     acount = 0
       
   350     ocount = {}
       
   351     for obj in gc.get_objects():
       
   352         if isinstance(obj, CubicWebTwistedRequestAdapter):
       
   353             count += 1
       
   354         elif isinstance(obj, VObject):
       
   355             acount += 1
       
   356         else:
       
   357             try:
       
   358                 ocount[obj.__class__]+= 1
       
   359             except KeyError:
       
   360                 ocount[obj.__class__] = 1
       
   361             except AttributeError:
       
   362                 pass
       
   363     print 'IN MEM REQUESTS', count
       
   364     print 'IN MEM APPOBJECTS', acount
       
   365     ocount = sorted(ocount.items(), key=lambda x: x[1], reverse=True)[:20]
       
   366     pprint(ocount)
       
   367     print 'UNREACHABLE', gc.garbage