http.py
changeset 73 1554d3d083b8
parent 69 2e93ba90fb92
--- a/http.py	Mon Feb 16 01:40:45 2009 +0200
+++ b/http.py	Mon Feb 16 17:55:09 2009 +0200
@@ -11,6 +11,25 @@
 # for path handling
 import os.path
 
+def request_url (env) :
+    """
+        Attempt to reconstruct the URL of the given request environment.
+
+        Works best when env is a WSGI-compliant env dict
+    """
+    
+    # HTTP/HTTPs scheme
+    scheme = env.get('wsgi.url_scheme', '[???]')
+
+    # the host
+    host = env.get('HTTP_HOST', '[???]')
+
+    # the path
+    path = env.get('REQUEST_URI', '[???]')
+
+    # return
+    return "%s://%s%s" % (scheme, host, path)
+
 class Request (object) :
     """
         HTTP Request with associated metadata
@@ -28,7 +47,7 @@
         self.arg_str = env.get('QUERY_STRING', '')
 
         # parse query args
-        self.arg_dict = cgi.parse_qs(self.arg_str, True)
+        self.arg_dict = cgi.parse_qs(self.arg_str, keep_blank_values=True)
 
         # load post data?
         if self.is_post() :
@@ -46,8 +65,8 @@
             input = self.env['wsgi.input']
 
             # use cgi.FieldStorage to parse this
-            self.post_data = cgi.FieldStorage(fp=input, environ=self.env, keep_blank_values=1)
-        
+            self.post_data = cgi.FieldStorage(fp=input, environ=self.env, keep_blank_values=True)
+
         else :
             # no post data
             self.post_data = None
@@ -72,28 +91,44 @@
 
         return os.path.dirname(self.env['SCRIPT_NAME']).rstrip('/')
     
+    def _normalize_path (self, path) :
+        """
+            Normalizes an URL path to remove back-references, but keep any trailing-slash indicator
+        """
+
+        # keep trailing / postfix
+        path_postfix = '/' if path.endswith('/') else ''
+        
+        # avoid nasty '.' paths
+        if path :
+            return os.path.normpath(path) + path_postfix
+
+        else :
+            return ''
+
     @property
     def page_prefix (self) :
         """
-            Returns the URL path root for page URLs, based on REQUEST_URI with PATH_INFO removed
+            Returns the URL path root for page URLs, based on REQUEST_URI with PATH_INFO removed. This will have a
+            a preceeding slash, but no trailing slash, unless it's empty:
 
             /                   -> 
             /foo.cgi            -> /foo.cgi
             /foo.cgi/index      -> /foo.cgi
-            /foo.cgi/quux/bar   -> /foo.cgi
+            /foo.cgi/quux/bar/  -> /foo.cgi
             /quux/foo.cgi/bar   -> /quux/foo.cgi
             /bar                -> 
         """
         
-        # XXX: request uri path without the query string
-        request_path = self.env.get('REQUEST_URI', '').split('?', 1)[0].rstrip('/')
+        # request uri path without the query string
+        request_path = self._normalize_path(self.env.get('REQUEST_URI', '').split('?', 1)[0])
 
         # path info
         page_name = self.get_page_name()
 
         # special-case for empty page_name
         if not page_name :
-            return request_path
+            return request_path.rstrip('/')
         
         # sanity-check
         assert request_path.endswith(page_name)
@@ -103,27 +138,23 @@
     
     def get_page_name (self) :
         """
-            Returns the requested page path with no leading slash, i.e.
+            Returns the requested page path with no leading slash, but preserved trailing slash:
 
             /foo.cgi        -> 
             /foo.cgi/       -> 
             /foo.cgi/bar    -> bar
             /foo.cgi/quux/  -> quux/
+            /foo/../        -> foo/
         """
         
-        # the raw PATH_INFO
-        path_info = self.env.get('PATH_INFO')
-        
-        # avoid nasty '.' paths
-        if path_info :
-            return os.path.normpath(path_info).lstrip('/')
-
-        else :
-            return ''
+        # the normalized PATH_INFO
+        return self._normalize_path(self.env.get('PATH_INFO')).lstrip('/')
     
     def get_arg (self, name, default=None) :
         """
-            Get a single value for an argument with the given key, or the default if missing
+            Get a single value for an argument with the given key, or the default if missing.
+
+            This evaluates valueless query args (?foo&bar=) as empty strings.
         """
 
         if name in self.arg_dict :
@@ -137,7 +168,7 @@
             Iterate over all available (key, value) pairs from the query string
         """
 
-        return cgi.parse_qsl(self.arg_str)
+        return cgi.parse_qsl(self.arg_str, keep_blank_values=True)
     
     def is_post (self) :
         """
@@ -156,7 +187,7 @@
         """
         
         # sanity-check
-        assert self.post_data
+        assert self.post_data is not None
         
         if name in self.post_data :
             # return the FieldStorage value