--- a/http.py Mon Feb 16 01:40:45 2009 +0200
+++ b/http.py Mon Feb 16 17:55:09 2009 +0200
@@ -11,6 +11,25 @@
# for path handling
import os.path
+def request_url (env) :
+ """
+ Attempt to reconstruct the URL of the given request environment.
+
+ Works best when env is a WSGI-compliant env dict
+ """
+
+ # HTTP/HTTPs scheme
+ scheme = env.get('wsgi.url_scheme', '[???]')
+
+ # the host
+ host = env.get('HTTP_HOST', '[???]')
+
+ # the path
+ path = env.get('REQUEST_URI', '[???]')
+
+ # return
+ return "%s://%s%s" % (scheme, host, path)
+
class Request (object) :
"""
HTTP Request with associated metadata
@@ -28,7 +47,7 @@
self.arg_str = env.get('QUERY_STRING', '')
# parse query args
- self.arg_dict = cgi.parse_qs(self.arg_str, True)
+ self.arg_dict = cgi.parse_qs(self.arg_str, keep_blank_values=True)
# load post data?
if self.is_post() :
@@ -46,8 +65,8 @@
input = self.env['wsgi.input']
# use cgi.FieldStorage to parse this
- self.post_data = cgi.FieldStorage(fp=input, environ=self.env, keep_blank_values=1)
-
+ self.post_data = cgi.FieldStorage(fp=input, environ=self.env, keep_blank_values=True)
+
else :
# no post data
self.post_data = None
@@ -72,28 +91,44 @@
return os.path.dirname(self.env['SCRIPT_NAME']).rstrip('/')
+ def _normalize_path (self, path) :
+ """
+ Normalizes an URL path to remove back-references, but keep any trailing-slash indicator
+ """
+
+ # keep trailing / postfix
+ path_postfix = '/' if path.endswith('/') else ''
+
+ # avoid nasty '.' paths
+ if path :
+ return os.path.normpath(path) + path_postfix
+
+ else :
+ return ''
+
@property
def page_prefix (self) :
"""
- Returns the URL path root for page URLs, based on REQUEST_URI with PATH_INFO removed
+ Returns the URL path root for page URLs, based on REQUEST_URI with PATH_INFO removed. This will have a
+ a preceeding slash, but no trailing slash, unless it's empty:
/ ->
/foo.cgi -> /foo.cgi
/foo.cgi/index -> /foo.cgi
- /foo.cgi/quux/bar -> /foo.cgi
+ /foo.cgi/quux/bar/ -> /foo.cgi
/quux/foo.cgi/bar -> /quux/foo.cgi
/bar ->
"""
- # XXX: request uri path without the query string
- request_path = self.env.get('REQUEST_URI', '').split('?', 1)[0].rstrip('/')
+ # request uri path without the query string
+ request_path = self._normalize_path(self.env.get('REQUEST_URI', '').split('?', 1)[0])
# path info
page_name = self.get_page_name()
# special-case for empty page_name
if not page_name :
- return request_path
+ return request_path.rstrip('/')
# sanity-check
assert request_path.endswith(page_name)
@@ -103,27 +138,23 @@
def get_page_name (self) :
"""
- Returns the requested page path with no leading slash, i.e.
+ Returns the requested page path with no leading slash, but preserved trailing slash:
/foo.cgi ->
/foo.cgi/ ->
/foo.cgi/bar -> bar
/foo.cgi/quux/ -> quux/
+ /foo/../ -> foo/
"""
- # the raw PATH_INFO
- path_info = self.env.get('PATH_INFO')
-
- # avoid nasty '.' paths
- if path_info :
- return os.path.normpath(path_info).lstrip('/')
-
- else :
- return ''
+ # the normalized PATH_INFO
+ return self._normalize_path(self.env.get('PATH_INFO')).lstrip('/')
def get_arg (self, name, default=None) :
"""
- Get a single value for an argument with the given key, or the default if missing
+ Get a single value for an argument with the given key, or the default if missing.
+
+ This evaluates valueless query args (?foo&bar=) as empty strings.
"""
if name in self.arg_dict :
@@ -137,7 +168,7 @@
Iterate over all available (key, value) pairs from the query string
"""
- return cgi.parse_qsl(self.arg_str)
+ return cgi.parse_qsl(self.arg_str, keep_blank_values=True)
def is_post (self) :
"""
@@ -156,7 +187,7 @@
"""
# sanity-check
- assert self.post_data
+ assert self.post_data is not None
if name in self.post_data :
# return the FieldStorage value