--- a/http.py Mon Feb 16 01:40:45 2009 +0200
+++ b/http.py Mon Feb 16 17:55:09 2009 +0200
@@ -11,6 +11,25 @@
# for path handling
import os.path
+def request_url (env) :
+ """
+ Attempt to reconstruct the URL of the given request environment.
+
+ Works best when env is a WSGI-compliant env dict
+ """
+
+ # HTTP/HTTPs scheme
+ scheme = env.get('wsgi.url_scheme', '[???]')
+
+ # the host
+ host = env.get('HTTP_HOST', '[???]')
+
+ # the path
+ path = env.get('REQUEST_URI', '[???]')
+
+ # return
+ return "%s://%s%s" % (scheme, host, path)
+
class Request (object) :
"""
HTTP Request with associated metadata
@@ -28,7 +47,7 @@
self.arg_str = env.get('QUERY_STRING', '')
# parse query args
- self.arg_dict = cgi.parse_qs(self.arg_str, True)
+ self.arg_dict = cgi.parse_qs(self.arg_str, keep_blank_values=True)
# load post data?
if self.is_post() :
@@ -46,8 +65,8 @@
input = self.env['wsgi.input']
# use cgi.FieldStorage to parse this
- self.post_data = cgi.FieldStorage(fp=input, environ=self.env, keep_blank_values=1)
-
+ self.post_data = cgi.FieldStorage(fp=input, environ=self.env, keep_blank_values=True)
+
else :
# no post data
self.post_data = None
@@ -72,28 +91,44 @@
return os.path.dirname(self.env['SCRIPT_NAME']).rstrip('/')
+ def _normalize_path (self, path) :
+ """
+ Normalizes an URL path to remove back-references, but keep any trailing-slash indicator
+ """
+
+ # keep trailing / postfix
+ path_postfix = '/' if path.endswith('/') else ''
+
+ # avoid nasty '.' paths
+ if path :
+ return os.path.normpath(path) + path_postfix
+
+ else :
+ return ''
+
@property
def page_prefix (self) :
"""
- Returns the URL path root for page URLs, based on REQUEST_URI with PATH_INFO removed
+ Returns the URL path root for page URLs, based on REQUEST_URI with PATH_INFO removed. This will have a
+ a preceeding slash, but no trailing slash, unless it's empty:
/ ->
/foo.cgi -> /foo.cgi
/foo.cgi/index -> /foo.cgi
- /foo.cgi/quux/bar -> /foo.cgi
+ /foo.cgi/quux/bar/ -> /foo.cgi
/quux/foo.cgi/bar -> /quux/foo.cgi
/bar ->
"""
- # XXX: request uri path without the query string
- request_path = self.env.get('REQUEST_URI', '').split('?', 1)[0].rstrip('/')
+ # request uri path without the query string
+ request_path = self._normalize_path(self.env.get('REQUEST_URI', '').split('?', 1)[0])
# path info
page_name = self.get_page_name()
# special-case for empty page_name
if not page_name :
- return request_path
+ return request_path.rstrip('/')
# sanity-check
assert request_path.endswith(page_name)
@@ -103,27 +138,23 @@
def get_page_name (self) :
"""
- Returns the requested page path with no leading slash, i.e.
+ Returns the requested page path with no leading slash, but preserved trailing slash:
/foo.cgi ->
/foo.cgi/ ->
/foo.cgi/bar -> bar
/foo.cgi/quux/ -> quux/
+ /foo/../ -> foo/
"""
- # the raw PATH_INFO
- path_info = self.env.get('PATH_INFO')
-
- # avoid nasty '.' paths
- if path_info :
- return os.path.normpath(path_info).lstrip('/')
-
- else :
- return ''
+ # the normalized PATH_INFO
+ return self._normalize_path(self.env.get('PATH_INFO')).lstrip('/')
def get_arg (self, name, default=None) :
"""
- Get a single value for an argument with the given key, or the default if missing
+ Get a single value for an argument with the given key, or the default if missing.
+
+ This evaluates valueless query args (?foo&bar=) as empty strings.
"""
if name in self.arg_dict :
@@ -137,7 +168,7 @@
Iterate over all available (key, value) pairs from the query string
"""
- return cgi.parse_qsl(self.arg_str)
+ return cgi.parse_qsl(self.arg_str, keep_blank_values=True)
def is_post (self) :
"""
@@ -156,7 +187,7 @@
"""
# sanity-check
- assert self.post_data
+ assert self.post_data is not None
if name in self.post_data :
# return the FieldStorage value
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test.py Mon Feb 16 17:55:09 2009 +0200
@@ -0,0 +1,126 @@
+#!/usr/bin/env python2.5
+
+"""
+ General unit tests
+"""
+
+import unittest
+from cStringIO import StringIO
+
+import http
+
+class TestHttpUtil (unittest.TestCase) :
+ def test_request_url (self) :
+ for env, url in (
+ ({
+ 'wsgi.url_scheme': "https",
+ 'HTTP_HOST': "testhost",
+ 'REQUEST_URI': "/test"
+ }, "https://testhost/test"),
+ ({
+ 'HTTP_HOST': "-"
+ }, "[???]://-[???]")
+ ) :
+ self.assertEqual(http.request_url(env), url)
+
+class TestHttpRequest (unittest.TestCase) :
+ def build_request (self, env_dict={}, **env_kw) :
+ env = {
+ 'wsgi.url_scheme': "http",
+ 'HTTP_HOST': "testhost",
+ 'REQUEST_METHOD': "GET",
+ }
+ env.update(env_dict)
+ env.update(env_kw)
+
+ return http.Request(env)
+
+ def test_site_host (self) :
+ self.assertEqual(self.build_request(HTTP_HOST='testhost').site_host, "testhost")
+ self.assertEqual(self.build_request(HTTP_HOST='testhost:13').site_host, "testhost:13")
+
+ def test_site_root (self) :
+ for script_name, site_root in (
+ ('', "" ),
+ ('/foo', "" ),
+ ('/foo/bar/', "/foo/bar" ),
+ ) :
+ self.assertEqual(self.build_request(SCRIPT_NAME=script_name).site_root, site_root)
+
+ def test_get_page_name (self) :
+ for path_info, page_name in (
+ ('', ""),
+ ('/', ""),
+ ('/foo1/bar', "foo1/bar"),
+ ('/foo2/bar/', "foo2/bar/"),
+ ('/foo3/bar/../', "foo3/"),
+ ('/foo4/bar/..', "foo4"),
+ ('//', ""),
+ ) :
+ self.assertEqual(self.build_request(PATH_INFO=path_info).get_page_name(), page_name)
+
+ def _test_page_prefix (self, request_uri, path_info, page_prefix) :
+ self.assertEqual(self.build_request(REQUEST_URI=request_uri, PATH_INFO=path_info).page_prefix, page_prefix)
+
+ def test_page_prefix_empty (self) :
+ self._test_page_prefix('', '', "")
+
+ def test_page_prefix_root_dir (self) :
+ self._test_page_prefix('/foo/', '/foo/', "")
+
+ def test_page_prefix_cgi (self) :
+ self._test_page_prefix('/foo.cgi/quux', '/quux', "/foo.cgi")
+
+ def test_page_prefix_qargs (self) :
+ self._test_page_prefix('/foo.cgi/?foo', '/', "/foo.cgi")
+
+ def test_get_arg (self) :
+ for query_string, key, value in (
+ ('', 'foo', None),
+ ('foo', 'foo', ''),
+ ('foo=', 'foo', ''),
+ ('foo=bar', 'foo', 'bar'),
+ ('foo=&bar=.', 'bar', '.'),
+ ) :
+ self.assertEqual(self.build_request(QUERY_STRING=query_string).get_arg(key, None), value)
+
+ def test_get_args (self) :
+ for query_string, keyvals in (
+ ('', []),
+ ('foo1=&bar', [('foo1', ''), ('bar', '')]),
+ ('foo2=bar', [('foo2', 'bar')]),
+ ('foo3=bar&foo3', [('foo3', 'bar'), ('foo3', '')]),
+ ) :
+ self.assertEqual(self.build_request(QUERY_STRING=query_string).get_args(), keyvals)
+
+ def _build_post (self, method='POST', **vals) :
+ data = '&'.join('%s=%s' % kv for kv in vals.iteritems())
+
+ return self.build_request({'wsgi.input': StringIO(data)}, REQUEST_METHOD=method, CONTENT_TYPE='application/x-www-form-urlencoded')
+
+ def test_is_post (self) :
+ for request_method, bool in (
+ ('GET', False),
+ ('POST', True),
+ ('post', True),
+ ) :
+ self.assertEqual(self._build_post(method=request_method).is_post(), bool)
+
+ def _test_post (self, **vals) :
+ req = self._build_post(**vals)
+
+ def test_post_empty (self) :
+ req = self._build_post()
+
+ self.assertTrue(req.is_post())
+ self.assertEqual(req.get_post('foo', None), None)
+
+ def test_post_simple (self) :
+ req = self._build_post(foo='bar')
+
+ self.assertTrue(req.is_post())
+ self.assertEqual(req.get_post('foo', None), 'bar')
+
+if __name__ == '__main__' :
+ unittest.main()
+