fix nevow exception in args parsing
I noticed an exception in nevow, for example with divmod.com/?=x=x=x. This is the fix, please apply this (or an equivalent one) to SVN: Index: Nevow/nevow/url.py =================================================================== --- Nevow/nevow/url.py (revision 1748) +++ Nevow/nevow/url.py (working copy) @@ -21,7 +21,7 @@ def _uqf(query): for x in query.split('&'): if '=' in x: - yield tuple( [urllib.unquote_plus(s) for s in x.split('=')] ) + yield tuple( [urllib.unquote_plus(s) for s in x.split('=')[:2]] ) elif x: yield (urllib.unquote_plus(x), None) unquerify = lambda query: list(_uqf(query)) And here again my latest version of the usual caching patches based on dialtone's stuff, that prevents high traffic pages to hurt. Index: Nevow/nevow/rend.py =================================================================== --- Nevow/nevow/rend.py (revision 1748) +++ Nevow/nevow/rend.py (working copy) @@ -476,6 +476,8 @@ self.children[name] = child +_CACHE = {} + class Page(Fragment, ConfigurableFactory, ChildLookupMixin): """A page is the main Nevow resource and renders a document loaded via the document factory (docFactory). @@ -489,8 +491,46 @@ afterRender = None addSlash = None + cache = False + lifetime = 0 + flattenFactory = lambda self, *args: flat.flattenFactory(*args) + def hasCache(self, ctx): + if not self.cache: + return + + c = self.lookupCache(ctx) + + if c is None: + self.storeCache(ctx, [util.Deferred()]) + return + + if isinstance(c[0], util.Deferred): + d = util.Deferred() + c.append(d) + return d + + if now() > c[1] + self.lifetime and self.lifetime > 0: + self.storeCache(ctx, [util.Deferred()]) + return + + return c[0] + def cacheRendered(self, ctx, data): + if self.cache: + defer_list = self.lookupCache(ctx) + assert(isinstance(defer_list[0], util.Deferred)) + # overwrite the deferred with the data + self.storeCache(ctx, (data, now())) + for d in defer_list: + d.callback(data) + def cacheIDX(self, ctx): + return str(url.URL.fromContext(ctx)) + def storeCache(self, ctx, c): + _CACHE[self.cacheIDX(ctx)] = c + def lookupCache(self, ctx): + return _CACHE.get(self.cacheIDX(ctx)) + def renderHTTP(self, ctx): if self.beforeRender is not None: return util.maybeDeferred(self.beforeRender,ctx).addCallback( @@ -515,11 +555,18 @@ if self.afterRender is not None: return util.maybeDeferred(self.afterRender,ctx) - if self.buffered: + c = self.hasCache(ctx) + if c is not None: + finishRequest() + return c + + if self.buffered or self.cache: io = StringIO() writer = io.write def finisher(result): - request.write(io.getvalue()) + c = io.getvalue() + self.cacheRendered(ctx, c) + request.write(c) return util.maybeDeferred(finishRequest).addCallback(lambda r: result) else: writer = request.write And here the fix to avoid wget to write garbage in filenames under the nevow guard (I suggest to merge it). Index: Nevow/nevow/guard.py =================================================================== --- Nevow/nevow/guard.py (revision 1748) +++ Nevow/nevow/guard.py (working copy) @@ -297,7 +297,8 @@ if path.startswith(SESSION_KEY): key = path[len(SESSION_KEY):] if key not in self.sessions: - return urlToChild(request, *segments[1:], **{'__start_session__':1}), () + #return urlToChild(request, *segments[1:], **{'__start_session__':1}), () + return urlToChild(request, *segments[1:]), () self.sessions[key].setLifetime(self.sessionLifetime) if cookie == key: # /sessionized-url/${SESSION_KEY}aef9c34aecc3d9148/foo @@ -305,7 +306,8 @@ # we are this getChild # with a matching cookie self.sessions[key].sessionJustStarted = True - return urlToChild(request, *segments[1:], **{'__session_just_started__':1}), () + #return urlToChild(request, *segments[1:], **{'__session_just_started__':1}), () + return urlToChild(request, *segments[1:]), () else: # We attempted to negotiate the session but failed (the user # probably has cookies disabled): now we're going to return the Keep up the great work, thanks! PS. While sending this email I take the opportunity to point you to the nevow sourcecode of a little GPL nevow appserver I developed last weekend, any suggestion for improvements is welcome ;). http://klive.cpushare.com/downloads/
On Sat, 2005-09-03 at 03:37 +0200, Andrea Arcangeli wrote:
PS. While sending this email I take the opportunity to point you to the nevow sourcecode of a little GPL nevow appserver I developed last weekend, any suggestion for improvements is welcome ;).
Yes, here's a suggestion for a big improvement: DOCSTRINGS!!!!! :-) Code is so much more readable (and easily documented with epydoc or something like it) when people include plenty of docstrings to explain what's going on. Best regards, Ed
On Fri, Sep 02, 2005 at 06:58:37PM -0700, Ed Suominen wrote:
On Sat, 2005-09-03 at 03:37 +0200, Andrea Arcangeli wrote:
PS. While sending this email I take the opportunity to point you to the nevow sourcecode of a little GPL nevow appserver I developed last weekend, any suggestion for improvements is welcome ;).
Yes, here's a suggestion for a big improvement:
DOCSTRINGS!!!!! :-)
eheh ;)
Code is so much more readable (and easily documented with epydoc or something like it) when people include plenty of docstrings to explain what's going on.
Indeed, I'm pretty lazy at documenting code, especially for two days projects like klive. Actually I may be putting comment in the code but I never used the docstrings as .__doc__ yet (except for the nevow abuse of it ;). But I tried to write klive in a readable way nevertheless...
-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 Andrea Arcangeli wrote:
I noticed an exception in nevow, for example with divmod.com/?=x=x=x.
This is the fix, please apply this (or an equivalent one) to SVN:
Index: Nevow/nevow/url.py =================================================================== --- Nevow/nevow/url.py (revision 1748) +++ Nevow/nevow/url.py (working copy) @@ -21,7 +21,7 @@ def _uqf(query): for x in query.split('&'): if '=' in x: - yield tuple( [urllib.unquote_plus(s) for s in x.split('=')] ) + yield tuple( [urllib.unquote_plus(s) for s in x.split('=')[:2]] ) elif x: yield (urllib.unquote_plus(x), None) unquerify = lambda query: list(_uqf(query))
I don't think that's correct - it throws away information. A better solution is probably: yield tuple( [urllib.unquote_plus(s) for s in x.split('=', 1)] ) Do you agree? If so, I'll apply this fix later. - - Matt - -- __ / \__ Matt Goodall, Pollenation Internet Ltd \__/ \ w: http://www.pollenation.net __/ \__/ e: matt@pollenation.net / \__/ \ t: +44 (0)113 2252500 \__/ \__/ / \ Any views expressed are my own and do not necessarily \__/ reflect the views of my employer. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.2.5 (GNU/Linux) Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org iD8DBQFDGWkqPpOMbs6rEZERAn7GAJ47QPgvkjdOS9SI5jLrZsdCnnwsXgCfbFwx ef8CSTRNYq2VGWo6hCoO0lI= =fUlK -----END PGP SIGNATURE-----
Matt Goodall wrote:
Andrea Arcangeli wrote:
I noticed an exception in nevow, for example with divmod.com/?=x=x=x.
This is the fix, please apply this (or an equivalent one) to SVN:
Index: Nevow/nevow/url.py =================================================================== --- Nevow/nevow/url.py (revision 1748) +++ Nevow/nevow/url.py (working copy) @@ -21,7 +21,7 @@ def _uqf(query): for x in query.split('&'): if '=' in x: - yield tuple( [urllib.unquote_plus(s) for s in x.split('=')] ) + yield tuple( [urllib.unquote_plus(s) for s in x.split('=')[:2]] ) elif x: yield (urllib.unquote_plus(x), None) unquerify = lambda query: list(_uqf(query))
I don't think that's correct - it throws away information. A better solution is probably:
yield tuple( [urllib.unquote_plus(s) for s in x.split('=', 1)] )
Do you agree? If so, I'll apply this fix later.
cgi.parse_qs agrees with me so I just committed my version of Andrea's fix (with a test). Thanks for highlighting the error Andrea. - Matt -- __ / \__ Matt Goodall, Pollenation Internet Ltd \__/ \ w: http://www.pollenation.net __/ \__/ e: matt@pollenation.net / \__/ \ t: +44 (0)113 2252500 \__/ \__/ / \ Any views expressed are my own and do not necessarily \__/ reflect the views of my employer.
On Sat, Sep 03, 2005 at 12:11:58PM +0100, Matt Goodall wrote:
Matt Goodall wrote:
Andrea Arcangeli wrote:
I noticed an exception in nevow, for example with divmod.com/?=x=x=x.
This is the fix, please apply this (or an equivalent one) to SVN:
Index: Nevow/nevow/url.py =================================================================== --- Nevow/nevow/url.py (revision 1748) +++ Nevow/nevow/url.py (working copy) @@ -21,7 +21,7 @@ def _uqf(query): for x in query.split('&'): if '=' in x: - yield tuple( [urllib.unquote_plus(s) for s in x.split('=')] ) + yield tuple( [urllib.unquote_plus(s) for s in x.split('=')[:2]] ) elif x: yield (urllib.unquote_plus(x), None) unquerify = lambda query: list(_uqf(query))
I don't think that's correct - it throws away information. A better solution is probably:
yield tuple( [urllib.unquote_plus(s) for s in x.split('=', 1)] )
Do you agree? If so, I'll apply this fix later.
cgi.parse_qs agrees with me so I just committed my version of Andrea's fix (with a test). Thanks for highlighting the error Andrea.
You're welcome! I agree this is more correct (and hopefully all code knows that "=" can be inside the argument ;).
On Sat, Sep 03, 2005 at 03:37:40AM +0200, Andrea Arcangeli wrote:
And here again my latest version of the usual caching patches based on dialtone's stuff, that prevents high traffic pages to hurt.
I reworked the whole caching patch, because I've too many variations of the same page (with different args) and I can't keep all of them in memory at the same time, so now I'm freeing the cache once the timeout triggers (it's invalid cache anyway, it doesn't worth to keep it in ram) and I let the traffic to choose and cache the high traffic ones. I also added a per-page cache limit to be sure not to swap/oom-kill etc... Even when the cache size limit triggers, the cache code has a positive effect of preventing to run the same query many times for all waiting connections. I believe these features could be useful for more than just my little webapp. All you have to do to enable caching is to add: cache = True lifetime = 0 (infinite/notimeout) or >0 (timeout) default is 0 max_cache_size = bytes (0 means to send to all waiting clients but no cache), None is the default (None means cache size unlimited) to the declaration of your rand.Page inherited istances. With this applied I can as usual serve >200req/sec saturating the bandwidth of the server, without it I can serve only 2/3 req/sec. Despite being very new (not well tested) code, I tried to put it online, let's see what happens ;). Index: Nevow/nevow/util.py =================================================================== --- Nevow/nevow/util.py (revision 1765) +++ Nevow/nevow/util.py (working copy) @@ -133,6 +133,7 @@ from twisted.python.failure import Failure from twisted.trial.unittest import deferredError from twisted.python import log + from twisted.internet import reactor try: # work with twisted before retrial Index: Nevow/nevow/rend.py =================================================================== --- Nevow/nevow/rend.py (revision 1765) +++ Nevow/nevow/rend.py (working copy) @@ -481,6 +481,56 @@ self.children[name] = child +class PageCache(object): + def __init__(self): + self.__db = {} + def cacheIDX(self, ctx): + return str(url.URL.fromContext(ctx)) + def __storeCache(self, cacheIDX, c): + self.__db[cacheIDX] = c + def __deleteCache(self, cacheIDX): + del self.__db[cacheIDX] + def __deleteCacheData(self, cacheIDX, page): + size = self.__db[cacheIDX][1] + assert len(self.__db[cacheIDX][0]) == size + page.subCacheSize(size) + self.__deleteCache(cacheIDX) + def __lookupCache(self, cacheIDX): + return self.__db.get(cacheIDX) + def getCache(self, ctx): + cacheIDX = self.cacheIDX(ctx) + c = self.__lookupCache(cacheIDX) + + if c is None: + self.__storeCache(cacheIDX, [util.Deferred()]) + return + + if isinstance(c[0], util.Deferred): + d = util.Deferred() + c.append(d) + return d + + return c[0] + def cacheRendered(self, ctx, data, page): + cacheIDX = self.cacheIDX(ctx) + defer_list = self.__lookupCache(cacheIDX) + assert(isinstance(defer_list[0], util.Deferred)) + size = len(data) + if page.canCache(size): + # overwrite the deferred with the data + timer = None + if page.lifetime > 0: + timer = util.reactor.callLater(page.lifetime, + self.__deleteCacheData, cacheIDX, page) + page.addCacheSize(size) + self.__storeCache(cacheIDX, (data, size, timer, )) + else: + self.__deleteCache(cacheIDX) + for d in defer_list: + d.callback(data) + +_CACHE = PageCache() + class Page(Fragment, ConfigurableFactory, ChildLookupMixin): """A page is the main Nevow resource and renders a document loaded via the document factory (docFactory). @@ -494,8 +544,27 @@ afterRender = None addSlash = None + cache = False + lifetime = 0 + max_cache_size = None + __cache_size = 0 + flattenFactory = lambda self, *args: flat.flattenFactory(*args) + def hasCache(self, ctx): + if not self.cache: + return + return _CACHE.getCache(ctx) + def addCacheSize(self, size): + assert self.canCache(size) + self.__cache_size += size + def subCacheSize(self, size): + self.__cache_size -= size + assert self.__cache_size >= 0 + def canCache(self, size): + return self.max_cache_size is None or \ + self.__cache_size + size <= self.max_cache_size + def renderHTTP(self, ctx): if self.beforeRender is not None: return util.maybeDeferred(self.beforeRender,ctx).addCallback( @@ -520,11 +589,20 @@ if self.afterRender is not None: return util.maybeDeferred(self.afterRender,ctx) - if self.buffered: + c = self.hasCache(ctx) + if c is not None: + assert self.afterRender is None + finishRequest() + return c + + if self.buffered or self.cache: io = StringIO() writer = io.write def finisher(result): - request.write(io.getvalue()) + c = io.getvalue() + if self.cache: + _CACHE.cacheRendered(ctx, c, self) + request.write(c) return util.maybeDeferred(finishRequest).addCallback(lambda r: result) else: writer = request.write
participants (3)
-
Andrea Arcangeli
-
Ed Suominen
-
Matt Goodall