add a http.request_url utility function, start writing some unit tests (now for http.Request), fix some issues
authorTero Marttila <terom@fixme.fi>
Mon, 16 Feb 2009 17:55:09 +0200
changeset 73 1554d3d083b8
parent 72 5160b9e0edf1
child 74 406cf77d23f9
add a http.request_url utility function, start writing some unit tests (now for http.Request), fix some issues
http.py
test.py
--- a/http.py	Mon Feb 16 01:40:45 2009 +0200
+++ b/http.py	Mon Feb 16 17:55:09 2009 +0200
@@ -11,6 +11,25 @@
 # for path handling
 import os.path
 
+def request_url (env) :
+    """
+        Attempt to reconstruct the URL of the given request environment.
+
+        Works best when env is a WSGI-compliant env dict
+    """
+    
+    # HTTP/HTTPs scheme
+    scheme = env.get('wsgi.url_scheme', '[???]')
+
+    # the host
+    host = env.get('HTTP_HOST', '[???]')
+
+    # the path
+    path = env.get('REQUEST_URI', '[???]')
+
+    # return
+    return "%s://%s%s" % (scheme, host, path)
+
 class Request (object) :
     """
         HTTP Request with associated metadata
@@ -28,7 +47,7 @@
         self.arg_str = env.get('QUERY_STRING', '')
 
         # parse query args
-        self.arg_dict = cgi.parse_qs(self.arg_str, True)
+        self.arg_dict = cgi.parse_qs(self.arg_str, keep_blank_values=True)
 
         # load post data?
         if self.is_post() :
@@ -46,8 +65,8 @@
             input = self.env['wsgi.input']
 
             # use cgi.FieldStorage to parse this
-            self.post_data = cgi.FieldStorage(fp=input, environ=self.env, keep_blank_values=1)
-        
+            self.post_data = cgi.FieldStorage(fp=input, environ=self.env, keep_blank_values=True)
+
         else :
             # no post data
             self.post_data = None
@@ -72,28 +91,44 @@
 
         return os.path.dirname(self.env['SCRIPT_NAME']).rstrip('/')
     
+    def _normalize_path (self, path) :
+        """
+            Normalizes an URL path to remove back-references, but keep any trailing-slash indicator
+        """
+
+        # keep trailing / postfix
+        path_postfix = '/' if path.endswith('/') else ''
+        
+        # avoid nasty '.' paths
+        if path :
+            return os.path.normpath(path) + path_postfix
+
+        else :
+            return ''
+
     @property
     def page_prefix (self) :
         """
-            Returns the URL path root for page URLs, based on REQUEST_URI with PATH_INFO removed
+            Returns the URL path root for page URLs, based on REQUEST_URI with PATH_INFO removed. This will have a
+            a preceeding slash, but no trailing slash, unless it's empty:
 
             /                   -> 
             /foo.cgi            -> /foo.cgi
             /foo.cgi/index      -> /foo.cgi
-            /foo.cgi/quux/bar   -> /foo.cgi
+            /foo.cgi/quux/bar/  -> /foo.cgi
             /quux/foo.cgi/bar   -> /quux/foo.cgi
             /bar                -> 
         """
         
-        # XXX: request uri path without the query string
-        request_path = self.env.get('REQUEST_URI', '').split('?', 1)[0].rstrip('/')
+        # request uri path without the query string
+        request_path = self._normalize_path(self.env.get('REQUEST_URI', '').split('?', 1)[0])
 
         # path info
         page_name = self.get_page_name()
 
         # special-case for empty page_name
         if not page_name :
-            return request_path
+            return request_path.rstrip('/')
         
         # sanity-check
         assert request_path.endswith(page_name)
@@ -103,27 +138,23 @@
     
     def get_page_name (self) :
         """
-            Returns the requested page path with no leading slash, i.e.
+            Returns the requested page path with no leading slash, but preserved trailing slash:
 
             /foo.cgi        -> 
             /foo.cgi/       -> 
             /foo.cgi/bar    -> bar
             /foo.cgi/quux/  -> quux/
+            /foo/../        -> foo/
         """
         
-        # the raw PATH_INFO
-        path_info = self.env.get('PATH_INFO')
-        
-        # avoid nasty '.' paths
-        if path_info :
-            return os.path.normpath(path_info).lstrip('/')
-
-        else :
-            return ''
+        # the normalized PATH_INFO
+        return self._normalize_path(self.env.get('PATH_INFO')).lstrip('/')
     
     def get_arg (self, name, default=None) :
         """
-            Get a single value for an argument with the given key, or the default if missing
+            Get a single value for an argument with the given key, or the default if missing.
+
+            This evaluates valueless query args (?foo&bar=) as empty strings.
         """
 
         if name in self.arg_dict :
@@ -137,7 +168,7 @@
             Iterate over all available (key, value) pairs from the query string
         """
 
-        return cgi.parse_qsl(self.arg_str)
+        return cgi.parse_qsl(self.arg_str, keep_blank_values=True)
     
     def is_post (self) :
         """
@@ -156,7 +187,7 @@
         """
         
         # sanity-check
-        assert self.post_data
+        assert self.post_data is not None
         
         if name in self.post_data :
             # return the FieldStorage value
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test.py	Mon Feb 16 17:55:09 2009 +0200
@@ -0,0 +1,126 @@
+#!/usr/bin/env python2.5
+
+"""
+    General unit tests
+"""
+
+import unittest
+from cStringIO import StringIO
+
+import http
+
+class TestHttpUtil (unittest.TestCase) :
+    def test_request_url (self) :
+        for env, url in (
+            ({
+                'wsgi.url_scheme':  "https",
+                'HTTP_HOST':        "testhost",
+                'REQUEST_URI':      "/test"
+            }, "https://testhost/test"),
+            ({
+                'HTTP_HOST':        "-"
+            }, "[???]://-[???]")
+        ) :
+            self.assertEqual(http.request_url(env), url)
+
+class TestHttpRequest (unittest.TestCase) :
+    def build_request (self, env_dict={}, **env_kw) :
+        env = {
+            'wsgi.url_scheme':      "http",
+            'HTTP_HOST':            "testhost",
+            'REQUEST_METHOD':       "GET",
+        }
+        env.update(env_dict)
+        env.update(env_kw)
+
+        return http.Request(env)
+    
+    def test_site_host (self) :
+        self.assertEqual(self.build_request(HTTP_HOST='testhost').site_host, "testhost")
+        self.assertEqual(self.build_request(HTTP_HOST='testhost:13').site_host, "testhost:13")
+    
+    def test_site_root (self) :
+        for script_name, site_root in (
+            ('',            ""         ),
+            ('/foo',        ""         ),
+            ('/foo/bar/',   "/foo/bar"  ),
+        ) :
+            self.assertEqual(self.build_request(SCRIPT_NAME=script_name).site_root, site_root)
+    
+    def test_get_page_name (self) :
+        for path_info, page_name in (
+            ('',                ""),
+            ('/',               ""),
+            ('/foo1/bar',       "foo1/bar"),
+            ('/foo2/bar/',      "foo2/bar/"),
+            ('/foo3/bar/../',   "foo3/"),
+            ('/foo4/bar/..',    "foo4"),
+            ('//',              ""),
+        ) :
+            self.assertEqual(self.build_request(PATH_INFO=path_info).get_page_name(), page_name)
+    
+    def _test_page_prefix (self, request_uri, path_info, page_prefix) :
+        self.assertEqual(self.build_request(REQUEST_URI=request_uri, PATH_INFO=path_info).page_prefix, page_prefix)
+
+    def test_page_prefix_empty (self) :
+        self._test_page_prefix('', '', "")
+
+    def test_page_prefix_root_dir (self) :
+        self._test_page_prefix('/foo/', '/foo/', "")
+    
+    def test_page_prefix_cgi (self) :
+        self._test_page_prefix('/foo.cgi/quux', '/quux', "/foo.cgi")
+
+    def test_page_prefix_qargs (self) :
+        self._test_page_prefix('/foo.cgi/?foo', '/', "/foo.cgi")
+    
+    def test_get_arg (self) :
+        for query_string, key, value in (
+            ('',                'foo',      None),
+            ('foo',             'foo',      ''),
+            ('foo=',            'foo',      ''),
+            ('foo=bar',         'foo',      'bar'),
+            ('foo=&bar=.',      'bar',      '.'),
+        ) :
+            self.assertEqual(self.build_request(QUERY_STRING=query_string).get_arg(key, None), value)
+    
+    def test_get_args (self) :
+        for query_string, keyvals in (
+            ('',                []),
+            ('foo1=&bar',       [('foo1', ''), ('bar', '')]),
+            ('foo2=bar',        [('foo2', 'bar')]),
+            ('foo3=bar&foo3',   [('foo3', 'bar'), ('foo3', '')]),
+        ) :
+            self.assertEqual(self.build_request(QUERY_STRING=query_string).get_args(), keyvals)
+    
+    def _build_post (self, method='POST', **vals) :
+        data = '&'.join('%s=%s' % kv for kv in vals.iteritems())
+
+        return self.build_request({'wsgi.input': StringIO(data)}, REQUEST_METHOD=method, CONTENT_TYPE='application/x-www-form-urlencoded')
+
+    def test_is_post (self) :
+        for request_method, bool in (
+            ('GET',             False),
+            ('POST',            True),
+            ('post',            True),
+        ) :
+            self.assertEqual(self._build_post(method=request_method).is_post(), bool)
+        
+    def _test_post (self, **vals) :
+        req = self._build_post(**vals)
+    
+    def test_post_empty (self) :
+        req = self._build_post()
+
+        self.assertTrue(req.is_post())
+        self.assertEqual(req.get_post('foo', None), None)
+    
+    def test_post_simple (self) :
+        req = self._build_post(foo='bar')
+
+        self.assertTrue(req.is_post())
+        self.assertEqual(req.get_post('foo', None), 'bar')
+
+if __name__ == '__main__' :
+    unittest.main()
+