I am rewriting wsgi.py in nevow. I face some problems with the nevow url module. Firstly, only guard and rootaccessor uses IRequest.getRootURL() In twisted the root url is always like http://domain.com/ . In non-twisted setups like wsgi, it is not so. And well, nevow is designed to work with cgi apps too (nevow-deployment.txt) Consider this cgi/wsgi/nevow setup: http://example.com/cgi-bin/nevowcgi.py/a/b/c nevowcgi.py will createWSGIApplication and call run_with_cgi. The wsgi.py module will then use the PATH_INFO environment variable for request URI (IRequest.uri). Thus in the above example, IRequest.uri is '/a/b/c'. wsgi.py in addition sets the appRootURL to 'http://example.com/cgi-bin/nevowcgi.py/' which is passed from nevowcgi.py Lot of code in nevow doesn't use the appRootURL. For example if PageC.addSlash is True, the above url will be redirected to http://example.com/a/b/c/ and not http://example.com/cgi-bin/nevowcgi.py/a/b/c/ as expected (not the leading slash) Because of this reason, I couldn't get wsgi.py work fully. To fix some problems (excluding the redirect problem) WSGIRequest.prePathURL will return the full URL prefixed with appRootURL (which is not the case with twisted.web.server.Request.prePathURL -- Sridhar Ratna - http://srid.bsdnerds.org
I am rewriting wsgi.py in nevow. I face some problems with the nevow url module. [snip]
Herewith I am attaching the current wsgi.py
---
# TODO:
# 1. make exception renderer work
import os, sys, socket, math, time
import cgi # for FieldStorage
import types
from urllib import unquote, quote
from nevow import context, flat, inevow, util
from nevow import __version__ as nevowversion
def log(msg):
print >>sys.stderr, "WSGI: {%s}" % str(msg)
errorMarker = object()
class NevowWSGISite(object):
def __init__(self, request, resource):
self.request = request
self.resource = resource
self.context = context.SiteContext()
def remember(self, obj, inter=None):
self.context.remember(obj, inter)
def getPageContextForRequestContext(self, ctx):
"""Retrieve a resource from this site for a particular request. The
resource will be wrapped in a PageContext which keeps track
of how the resource was located.
"""
path = inevow.IRemainingSegments(ctx)
res = inevow.IResource(self.resource)
pageContext = context.PageContext(tag=res, parent=ctx)
return self.handleSegment(
res.locateChild(pageContext, path),
ctx.tag, path, pageContext)
def handleSegment(self, result, request, path, pageContext):
if result is errorMarker:
return errorMarker
newres, newpath = result
# If the child resource is None then display a 404 page
if newres is None:
from nevow.rend import FourOhFour
return context.PageContext(tag=FourOhFour(), parent=pageContext)
# If we got a deferred then we need to call back later, once the
# child is actually available.
#if isinstance(newres, defer.Deferred):
# return newres.addCallback(
# lambda actualRes: self.handleSegment(
# (actualRes, newpath), request, path, pageContext))
newres = inevow.IResource(newres, persist=True)
if newres is pageContext.tag:
assert not newpath is path, "URL traversal cycle detected
when attempting to locateChild %r from resource %r." % (path,
pageContext.tag)
assert len(newpath) < len(path), "Infinite loop impending..."
## We found a Resource... update the request.prepath and postpath
for x in xrange(len(path) - len(newpath)):
request.prepath.append(request.postpath.pop(0))
## Create a context object to represent this new resource
ctx = context.PageContext(tag=newres, parent=pageContext)
ctx.remember(tuple(request.prepath), inevow.ICurrentSegments)
ctx.remember(tuple(request.postpath), inevow.IRemainingSegments)
res = newres
path = newpath
if not path:
return ctx
return self.handleSegment(
res.locateChild(ctx, path),
request, path, ctx)
def createWSGIApplication(page, rootURL=None):
"""Given a Page instance, return a WSGI callable.
`rootURL` - URL to be remembered as root
"""
page.flattenFactory = flat.iterflatten
siteCtx = context.SiteContext(tag=None)
def application(environ, start_response):
request = WSGIRequest(environ, start_response)
if rootURL:
request.rememberRootURL(rootURL)
site = NevowWSGISite(request, page)
request.site = site
result = request.process()
if not request.headersSent:
request.write('') # send headers now
if isinstance(result, str):
yield result
elif isinstance(result, util.Deferred):
## So we can use the wsgi module if twisted is installed
## TODO use render synchronously instead maybe? I'm pretty
## sure after the application callable returns, the request
## is "closed". Investigate with the latest wsgi spec and
## some implementations.
#raise 'PH' + str(dir(result)) + '{{%s}}' % str(result.result)
yield result.result
else:
for x in result:
yield x
return application
# TODO: convert interface comments
class WSGIRequest(object):
__implements__ = inevow.IRequest,
"""A HTTP request.
Subclasses should override the process() method to determine how
the request will be processed.
@ivar method: The HTTP method that was used.
@ivar uri: The full URI that was requested (includes arguments).
@ivar path: The path only (arguments not included).
@ivar args: All of the arguments, including URL and POST arguments.
@type args: A mapping of strings (the argument names) to lists of values.
i.e.,
?foo=bar&foo=lf._parseQuery(environ.get('QUERY_STRING', ''))
for k,v in environ.items():
if k.startswith('HTTP_'):
self.received_headers[k[5:].lower()] = v
self.setResponseCode("200")baz&quux=spam results in
{'foo': ['bar', 'baz'], 'quux': ['spam']}.
@ivar received_headers: All received headers
"""
def __init__(self, environ, start_response):
self.environ = environ
self.start_response = start_response
self.outgoingHeaders = []
self.received_headers = {}
self.lastModified = None
self.etag = None
self.method = environ.get('REQUEST_METHOD', 'GET')
self.args = self._parseQuery(environ.get('QUERY_STRING', ''))
self.host = (self.environ['REMOTE_ADDR'],
int(self.environ['REMOTE_PORT']))
for k,v in environ.items():
if k.startswith('HTTP_'):
self.received_headers[k[5:].lower()] = v
self.setResponseCode("200")
self.headersSent = False
self.appRootURL = None
self.deferred = util.Deferred()
def process(self):
"""When a form is POSTed,
we create a cgi.FieldStorage instance using the data posted,
and set it as the request.fields attribute. This way, we can
get at information about filenames and mime-types of
files that were posted."""
if self.method == 'POST':
self.fields = cgi.FieldStorage(
self.environ['wsgi.input'],
self.received_headers,
environ={'REQUEST_METHOD': 'POST'})
# set various default headers
self.setHeader('server', nevowversion)
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(time.time())
# HTTP date string format
s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
weekdayname[wd],
day, monthname[month], year,
hh, mm, ss)
self.setHeader('date', s)
self.setHeader('content-type', 'text/html; charset=UTF-8')
# Resource Identification
self.prepath = []
self.postpath = map(unquote, self.path[1:].split('/'))
self.sitepath = []
requestContext = context.RequestContext(
parent=self.site.context, tag=self)
requestContext.remember( (), inevow.ICurrentSegments)
requestContext.remember(tuple(self.postpath), inevow.IRemainingSegments)
pageContext = self.site.getPageContextForRequestContext(requestContext)
return self.gotPageContext(pageContext)
def gotPageContext(self, pageContext):
if pageContext is errorMarker:
return None
html = pageContext.tag.renderHTTP(pageContext)
if isinstance(html, util.Deferred):
# This is a deferred object
# Let us return it synchronously
# (wsgi has nothing to do with sync, async)
# XXX: Is this correct?
html = html.result
# FIXME: I dunno what to do when a generator comes ..
# Perhaps, it may generate non-str? I dunno
if type(html) is types.GeneratorType:
html = ''.join(list(html))
if html is errorMarker:
## Error webpage has already been rendered and finish called
pass
elif isinstance(html, str):
return html
else:
res = inevow.IResource(html, None)
if res is not None:
pageContext = context.PageContext(tag=res, parent=pageContext)
return self.gotPageContext(pageContext)
else:
# import traceback; traceback.print_stack()
print >>sys.stderr, "html is not a string: %s on %s" %
(str(html), pageContext.tag)
return html
def _parseQuery(self, qs):
d = {}
items = [s2 for s1 in qs.split("&") for s2 in s1.split(";")]
for item in items:
try:
k, v = item.split("=", 1)
except ValueError:
# no strict parsing
continue
k = unquote(k.replace("+", " "))
v = unquote(v.replace("+", " "))
if k in d:
d[k].append(v)
else:
d[k] = [v]
return d
def _getPath(self):
pth = self.environ.get('PATH_INFO', '')
if not pth: pth = '/'
return pth
path = property(_getPath)
def _getURI(self):
query = self.environ.get('QUERY_STRING', '')
if query:
query = '?' + query
return self.path + query
uri = property(_getURI)
# Methods for received request
def getHeader(self, key):
"""Get a header that was sent from the network.
"""
return self.received_headers.get(key.lower())
def getCookie(self, key):
"""Get a cookie that was sent from the network.
"""
def getAllHeaders(self):
"""Return dictionary of all headers the request received."""
return self.received_headers
def getRequestHostname(self):
"""Get the hostname that the user passed in to the request.
This will either use the Host: header (if it is available) or the
host we are listening on if the header is unavailable.
"""
return (self.getHeader('host') or
socket.gethostbyaddr(self.getHost()[1])[0]
).split(':')[0]
def getHost(self):
"""Get my originally requesting transport's host.
Don't rely on the 'transport' attribute, since Request objects may be
copied remotely. For information on this method's return value, see
twisted.internet.tcp.Port.
"""
def getClientIP(self):
return self.environ.get('REMOTE_ADDR', None)
def getClient(self):
pass
def getUser(self):
pass
def getPassword(self):
pass
def isSecure(self):
return self.environ['wsgi.url_scheme'] == 'https'
def getSession(self, sessionInterface = None):
pass
def URLPath(self):
from nevow import url
return url.URL.fromString(self.appRootURL+self.uri)
def prePathURL(self):
if self.isSecure():
default = 443
else:
default = 80
# TODO: use getHost().port after getHost is implemented
port = default
if port == default:
hostport = ''
else:
hostport = ':%d' % port
# FIXME: This hack, until url module is fixed to support RootURLs
# Or is this the right way to do?
if self.appRootURL:
return quote('%s%s' % (self.appRootURL,
'/'.join(self.prepath)),
'/:')
return quote('http%s://%s%s/%s' % (
self.isSecure() and 's' or '',
self.getRequestHostname(),
hostport,
'/'.join(self.prepath)), '/:')
def rememberRootURL(self, url=None):
# result = p.renderHTTP(pctx)
"""
Remember the currently-processed part of the URL for later
recalling.
"""
if url is None:
raise NotImplementedError
self.appRootURL = url
def getRootURL(self):
"""
Get a previously-remembered URL.
"""
return self.appRootURL
# Methods for outgoing request
def finish(self):
"""We are finished writing data."""
def write(self, data):
"""
Write some data as a result of an HTTP request. The first
time this is called, it writes out response data.
"""
if self.headersSent:
self._write(data)
return
headerkeys = [k for k,v in self.outgoingHeaders]
self._write = self.start_response(
self.responseCode, self.outgoingHeaders, None)
self.headersSent = True
if data:
self._write(data)
def addCookie(self, k, v, expires=None, domain=None, path=None,
max_age=None, comment=None, secure=None):
"""Set an outgoing HTTP cookie.
In general, you should consider using sessions instead of cookies, see
twisted.web.server.Request.getSession and the
twisted.web.server.Session class for details.
"""
def setResponseCode(self, code, message=None):
"""Set the HTTP response code.
"""
self.responseCode = '%s %s' % (code, RESPONSES[int(str(code))])
def setHeader(self, header, value):
"""Set an outgoing HTTP header.
"""
self.outgoingHeaders.append((header.lower(), value))
def redirect(self, url):
"""Utility function that does a redirect.
The request should have finish() called after this.
"""
log('REDIRECT to ' + str(url))
self.setResponseCode(str(302))
self.setHeader('location', url)
def setLastModified(self, when):
"""Set the X{Last-Modified} time for the response to this request.
If I am called more than once, I ignore attempts to set
Last-Modified earlier, only replacing the Last-Modified time
if it is to a later value.
If I am a conditional request, I may modify my response code
to L{NOT_MODIFIED} if appropriate for the time given.
@param when: The last time the resource being returned was
modified, in seconds since the epoch.
@type when: number
@return: If I am a X{If-Modified-Since} conditional request and
the time given is not newer than the condition, I return
L{http.CACHED<CACHED>} to indicate that you should write no
body. Otherwise, I return a false value.
"""
# time.time() may be a float, but the HTTP-date strings are
# only good for whole seconds.
when = long(math.ceil(when))
if (not self.lastModified) or (self.lastModified < when):
self.lastModified = when
modified_since = self.getHeader('if-modified-since')
if modified_since:
modified_since = stringToDatetime(modified_since)
if modified_since >= when:
self.setResponseCode(NOT_MODIFIED)
return '' # TODO: return http.CACHED (requires Twisted)
return None
def setETag(self, etag):
"""Set an X{entity tag} for the outgoing response.
That's \"entity tag\" as in the HTTP/1.1 X{ETag} header, \"used
for comparing two or more entities from the same requested
resource.\"
If I am a conditional request, I may modify my response code
to L{NOT_MODIFIED
participants (1)
-
Sridhar Ratna