"""
Tree-based URL mapping.
urltree is used to construct a tree-like structure of URL handlers, and to match a given URL against the tree,
returning the matching handler.
URLs are handled as a sequence of labels, separated by slashes (/). Thus, an URL like /foo/bar/baz would be handled
as a list of three labels:
['foo', 'bar', 'baz']
The URL handler rules are constructed as a sequence of Labels, which then match and evaluate URL segments.
StaticLabel just matches a pre-defined/constant segment, and is used to route URLs through the URL tree.
ValueLabel matches segments dynamically, capturing them as a typed value. It also supports default values.
urltree also handles building URL strings for a given handler, and a set of values.
urltree can also handle query-string parameters (i.e. GET params).
"""
import re
import os.path
import werkzeug
class URLError (Exception) :
"""
Error with an URL definition, parsing, or value handling.
"""
pass
class URLRuleError (URLError) :
"""
Error in URLTree structure definition.
"""
class URLMatchError (URLError) :
"""
Error looking up the URL against our URLTree.
"""
class URLValueError (URLError) :
"""
Error parsing an URL segment's value via URLType.
"""
pass
class URLBuildError (URLError) :
"""
Error re-constructing an outgoing URL.
"""
class Label (object) :
"""
Abstract base class for URL segment-matching rules.
A Label subclass is constructed by parse()ing a text-form URL segment spec (== label), and knows how to do a
yes/value/no match() against an URL segment, as well as re-build() the URL segment for the matched value.
XXX: rename to URLRuleLabel or something like that
"""
@staticmethod
def parse (spec, defaults, config) :
"""
Parse the given label-spec, and return an appropriate Label subclass instance.
spec - the rule specification as text
defaults - externally supplied { key: default } dict of default values
config - URLConfig instance to use for constructed Label
"""
# empty/root?
if not spec :
return EmptyLabel()
# construct subclass instance by matching against regexps
for label_type in (
ValueLabel,
StaticLabel,
) :
# test
match = label_type.PARSE_RE.match(spec)
if match :
return label_type.parse(match, defaults, config)
# invalid
raise URLError("Unrecognized label: %r" % (spec, ))
def match (self, value=None) :
"""
Match this label against the given URL segment, returning one of :
* True to match without a value
* False/None to not match, and continue looking
* a (key, value, label, is_default) capture-tuple to capture a named value from the URL segment
value - a segment from the URL as text, or None to force default value
"""
raise NotImplemented()
def build (self, values) :
"""
Re-construct this label, using values from the supplied { key: value } dict if needed.
"""
raise NotImplemented()
def build_default (self, values) :
"""
Return an (is_default, value) tuple
XXX: used internally by build()?
"""
raise NotImplemented()
class EmptyLabel (Label) :
"""
An empty label, which matches empty URL segments, i.e. '//foo' or '/'
"""
def __eq__ (self, other) :
"""
Just compares type
"""
return isinstance(other, EmptyLabel)
def match (self, value=None) :
"""
Matches empty segment.
"""
# no default
if value is None :
return False
# only empty segments
if value == '' :
return True
return False
def build (self, values) :
return ''
def build_default (self, values) :
return (False, '')
def __str__ (self) :
return ''
class StaticLabel (Label) :
"""
A simple literal Label, used to match fixed segments in the URL.
"""
PARSE_RE = re.compile(r'^(?P<name>[a-zA-Z0-9_.-]+)$')
@classmethod
def parse (cls, match, defaults, config) :
"""
Construct and return a new instance from the given PARSE_RE match
"""
# use name as required segment value
return cls(match.group('name'))
def __init__ (self, name) :
"""
The given name is the literal name of this label
"""
self.name = name
def __eq__ (self, other) :
"""
Compares names
"""
return isinstance(other, StaticLabel) and self.name == other.name
def match (self, value=None) :
"""
Match exactly -> True
"""
# no defaults
if value is None :
return False
# match name
elif value == self.name :
return True
# fail
else :
return False
def build (self, values) :
return self.name
def build_default (self, values) :
return (False, self.name)
def __str__ (self) :
return self.name
class ValueLabel (Label) :
"""
A label with a key and a typed value
"""
# / "{" <key> [ ":" <type> ] [ "=" [ <default> ] ] "}" /
PARSE_RE = re.compile(r'^\{(?P<key>[a-zA-Z_][a-zA-Z0-9_]*)(:(?P<type>[a-zA-Z_][a-zA-Z0-9_]*))?(=(?P<default>[^}]*))?\}$')
@classmethod
def parse (cls, match, defaults, config) :
"""
Construct and return a new instance from the given PARSE_RE match
"""
# value name
key = match.group('key')
# value type, or None
type_name = match.group("type")
# lookup URLType, None -> default type
type = config.get_type(type_name)
# get default value from dict, or None
default = defaults.get(key)
if not default :
# default vlaue from expr?
default = match.group('default')
if default :
# parse text-form default to actual value
# XXX: is this "" or None if match is for '{foo=}' ?
default = type.parse(default)
# build
return cls(key, type, default, type_name)
def __init__ (self, key, type, default, type_name) :
"""
The given key is the name of this label's value.
The given type_name is is None for the default type, otherwise the type's name. Type is a URLType.
key - the name of this label's value
type - the URLType used to parse/build the value
default - default for value, if no value is given in URL, or None for no default (value required)
type_name - parsed name for the type; used for debugging
"""
# store
self.key = key
self.default = default
# type
self.type_name = type_name
self.type = type
def __eq__ (self, other) :
"""
Compares keys
"""
return isinstance(other, ValueLabel) and self.key == other.key
def match (self, seg=None) :
"""
Match segment -> (key, value, type, is_default)
"""
if seg is None and self.default is not None :
# default value
return self.key, self.default, self.type, True
# we don't have a default value, so don't match partial URLs
elif not seg :
return False
# test against type's syntax
elif not self.type.test(seg) :
return False
else :
# convert with type
value = self.type.parse(seg)
# False == non-default value
return self.key, value, self.type, False
def build (self, values) :
"""
Return either the assigned value from values, our default value, or raise an error
"""
# just proxy to build_default
return self.build_default(values)[1]
def build_default (self, values) :
"""
Check if we have a value in values, and return based on that.
"""
# state
is_default = False
# value given?
if self.key not in values or values[self.key] is None :
if self.default is None :
# no value given for required label
raise URLBuildError("No value given for label %r" % (self.key, ))
# use default
else :
is_default = True
value = self.default
else :
# lookup the value to use
value = values[self.key]
# smart-match against default value
# XXX: would it be better not to omit it as a default value in this case?
is_default = bool(value == self.default)
# convert value to str
value = self.type.build(value)
# return
return (is_default, value)
def __str__ (self) :
"""
Pretty-format back into parsed form.
"""
return '{%s%s%s}' % (
self.key,
(':%s' % (self.type_name, ) if self.type_name is not None else ''),
'=%s' % (self.default, ) if self.default is not None else '',
)
class QueryItem (object) :
"""
Like a normal URL Label, except for dealing with query-string arguments.
"""
@classmethod
def parse (cls, spec, defaults, config) :
"""
Construct and return a new instance from the given spec
"""
# parse default value
if '=' in spec :
# default value, might be empty
spec, default_spec = spec.split('=')
else :
# no default, value required
default_spec = None
# parse type
if ':' in spec :
# use named type
spec, type_name = spec.split(':')
else :
# use default type
type_name = None
# name for value
key = spec
# URLType
type = config.get_type(type_name)
if key in defaults :
# default value from external defaults dict
default = defaults[key]
elif default_spec :
# given default value spec
default = type.parse(default_spec)
elif default_spec is not None :
# XXX: no default value, but not required either... silly
default = ''
else :
# no default value, required
default = None
# build
return cls(key, type, default)
def __init__ (self, key, type, default) :
"""
The given key is the name of this label's value.
The given type_name is is None for the default type, otherwise the type's name. Type is a URLType.
key - the name of this label's value
type - the URLType used to parse/build the value
default - default for value, if no value is given in URL, or None for no default (value required)
"""
# store
self.key = key
self.type = type
self.default = default
def match (self, arg=None) :
"""
Parse value from given query-string argument for this key
arg - the list of query-string values given, or None for missing value.
Returns value to use, or None to omit.
"""
if not arg :
if self.default is None :
# no default value!
raise URLValueError("No value given for query string param: ?%s=..." % (self.key, ))
elif self.default == '' :
# XXX: omit
return None
else :
# use default value
return self.default
# parse with our type
value = None
for value_spec in arg :
if value is None :
# parse first
value = self.type.parse(value_spec)
else :
# parse multiple
value = self.type.append(value, self.type.parse(value_spec))
# ok
return value
def build (self, values) :
"""
Build and return list of query-string values to pass for this argument, or None to omit.
"""
if self.key not in values :
if self.default is None :
# fail
raise URLBuildError("No value given for query string arugment %r" % (self.key, ))
else :
# omit, and use default value
return None
else :
# lookup our value
value = values[self.key]
if value is not None :
# map to values
return self.type.build_multi(value)
else :
# omit
return None
class URLType (object) :
"""
Handles the type-ness of values in the URL
"""
def test (self, value) :
"""
Tests if the given value is accepted for this type.
Defaults to calling parse(), and returning False on errors, True otherwise
"""
try :
self.parse(value)
# XXX: change to URLValueError to not swallow bugs
except Exception :
return False
else :
return True
def parse (self, seg) :
"""
Parse the given URL segment text, which was tested earlier with test(), and return the value object.
Raise an URLValueError to reject the value via the default test() implementation.
"""
raise NotImplementedError()
def append (self, old_value, value) :
"""
Handle multi-valued values, by combining the given previously parse()'d value and newly parse()'d value.
Defaults to raise an error.
"""
raise URLValueError("Multiple values given")
def build (self, value) :
"""
Reverse of parse(), return an URL segment built from the given object value
"""
raise NotImplementedError()
def build_multi (self, value) :
"""
Return a list of URL segments for the given object value (as from parse/append), for multi-value types.
Defaults to return [self.build(obj)]
"""
return [self.build(value)]
class URLStringType (URLType) :
"""
The default URLType, just plain strings.
XXX: handle unicode here, or assume URL is unicode?
"""
def parse (self, seg) :
"""
Identitiy
"""
return unicode(seg)
def build (self, value) :
"""
Return value's string representation for URL
"""
return unicode(value)
class URLIntegerType (URLType) :
"""
A URLType for simple int's, with some constraing checking
"""
def __init__ (self, allow_negative=True, allow_zero=True, max=None) :
"""
Pass in allow_negative=False to disallow negative numbers, allow_zero=False to disallow zero, or non-zero
max to specifiy maximum value
"""
self.allow_negative = allow_negative
self.allow_zero = allow_zero
self.max = max
def _validate (self, value) :
"""
Test to make sure value fits our criteria
"""
# negative?
if not self.allow_negative and value < 0 :
raise URLValueError("value is negative")
# zero?
if not self.allow_zero and value == 0 :
raise URLValueError("value is zero")
# max?
if self.max is not None and value > self.max :
raise URLValueError("value is too large: %d" % value)
return value
def parse (self, seg) :
"""
Convert str -> int
"""
try :
value = int(seg)
except ValueError, ex :
# reject
raise URLValueError(ex)
# validate
return self._validate(value)
def build (self, obj) :
"""
Convert int -> str
"""
return unicode(self._validate(obj))
class URLListType (URLStringType) :
"""
A list of strings
"""
def parse (self, seg) :
"""
URL segments are just parsed into single-item lists.
"""
return [super(URLListType, self).parse(seg)]
def append (self, old_value, value) :
"""
Accumulate values in list.
"""
# combine lists
return old_value + value
def build_multi (self, value) :
"""
Return as a list of build()'d values.
"""
return [super(URLListType, self).build(item) for item in value]
class URLConfig (object) :
"""
Shared configuration relevant to a set of constructed URLRules.
This can be used as a factory to accumulate constructed URLRules, and then create an URLTree out of them.
Simply __call__ the URLConfig() instance with the normal URLRule() *args (except, of course, config=), and finally
just pass the URLConfig() to URLTree() - it's iter()able.
"""
# built-in type codes
BUILTIN_TYPES = {
# default - string
None : URLStringType(),
# string
'str' : URLStringType(),
# integer
'int' : URLIntegerType(),
# list of strs
'list' : URLListType(),
}
def __init__ (self, type_dict=None, reject_extra_args=None) :
"""
Create an URLConfig for use with URLTree/URLs.
type_dict - (optional) { type_name: URLType } dict of additional URLTypes to make
available for use in URLRules. This will take care of ._init_name().
Use None as a key to change the default URLType.
reject_extra_args - instead of ignoring unrecognized query arguments in matched URLs, reject them
"""
# our type_dict
self.type_dict = self.BUILTIN_TYPES.copy()
if type_dict :
# merge
self.type_dict.update(type_dict)
# init
self.ignore_extra_args = not(reject_extra_args)
# accumulated URLRules
self.urls = []
def get_type (self, type_name=None) :
"""
Lookup an URLType by type_name, None for default type.
"""
# lookup + return
return self.type_dict[type_name]
def __call__ (self, *args, **kwargs) :
"""
Return new URLRule with this config and the given args, adding it to our list of urls
"""
# build
url = URLRule(self, *args, **kwargs)
# store
self.urls.append(url)
# return
return url
def __iter__ (self) :
"""
Returns all defined URLRules
"""
return iter(self.urls)
class URLRule (object) :
"""
A set of Labels defining a path to a handler in the URLTree, parsed from a string-form URL-spec.
XXX: also handles query_args, spec that properly...
"""
def __init__ (self, config, url_spec, handler, **defaults) :
"""
Create an URL using the given URLConfig, with the given url mask, handler, and default values.
config - the URLConfig() used for this rule
url_spec - a string-form sequence of Label rules to parse
Should be rooted, i.e. start with a '/'
handler - the target of this URLRule(), returned as the result of the lookup
**defaults - additional default values. Can be used to specify/override the default values in the
url_spec for complex values, or just extra args to pass through.
"""
# store
self.config = config
self.url_spec = url_spec
self.handler = handler
self.defaults = defaults
# query string spec
self.query_items = dict()
# remove prepending root /
url_spec = url_spec.lstrip('/')
# parse any query string
# XXX: conflicts with regexp syntax
if '/?' in url_spec :
url_spec, query_spec = url_spec.split('/?')
else :
query_spec = None
# parse the spec into a sequence of Labels
self.label_path = [Label.parse(label_spec, defaults, config) for label_spec in url_spec.split('/')]
# parse query args
if query_spec :
for item_spec in query_spec.split('&') :
# parse spec into QueryItem
query_item = QueryItem.parse(item_spec, defaults, config)
# store, case-insensitive
self.query_items[query_item.key.lower()] = query_item
def get_label_path (self) :
"""
Returns the Label-path for this URLRule.
"""
# copy self.label_path
return list(self.label_path)
def evaluate (self, url_values, query_args) :
"""
Process the set of argument matched against this URLRule, both those values captured from the URL by
Labels, and any incoming query string arguments:
url_values - a [ (key, value, type, is_default) ] sequence of capture-tuples values from URL segments/labels
query_args - a [ (key, [ value ] ] sequence of query string arguments. This will be mutated!
Returns the parsed set of arguments, and the remaining query string arguments:
return (
args - a { key: value } dict of label and query string values, suitable for passing to
handler function as keyword arguments
qargs - a [ (key, [ value ] ] sequence of remaining query-string arguments not included
in kwargs
"""
# convert url_values to dict for lookup
url_values = dict((key, (value, type, is_default)) for key, value, type, is_default in url_values)
# collect a list of (key, value, is_default) tuples to later apply
values = []
# start with the explicitly supplied defaults
# these may actually include additional arguments to be passed through to the handler
for key, value in self.defaults.iteritems() :
values.append((key, value, True))
# then add all the label values matched from the URL
for key, (value, type, is_default) in url_values.iteritems() :
values.append((key, value, is_default))
# then add in query args
for key, value_list in query_args :
# case-insensitive
key = key.lower()
if key in self.query_items :
# defined as a QueryItem
query_item = self.query_items[key]
# parse as list/value
value = query_item.match(value_list)
# override URL label values if they were parsed as implicit defaults
elif key in url_values :
old_value, type, is_default = url_values[key]
if not is_default :
# XXX: ignore extra value as per original implementation
continue
# parse into value
value = None
for value_spec in value_list :
if value is None :
# parse
value = type.parse(value_spec)
else :
# multi-value
# XXX: this will usually fail, we can't really mix these very well
value = type.append(value, type.parse(value_spec))
# unknown, ignore
else :
continue
# apply
values.append((key, value, False))
# outgoing args
args = {}
# args set to default value
default_args = set()
# apply values
for key, value, is_default in values :
# fresh value
if key not in args :
# store
args[key] = value
if is_default :
default_args.add(key)
# default value
elif key in args and key in default_args :
# replace default value with more specific value
args[key] = value
if not is_default :
default_args.remove(key)
# existing value
else :
raise URLValueError("Multiple values given for param: %s" % (key, ))
# then check for missing query_items
for key, query_item in self.query_items.iteritems() :
# skip those already parsed
if key in args :
continue
# apply default
value = query_item.match(None)
if value is not None :
# set
args[key] = value
# remaining qargs not handled
qargs = [(key, values) for key, values in query_args if key not in args]
# ok
return args, qargs
def build (self, **values) :
"""
Build an absolute URL from our URLTree's root pointing to this target, with the given values embedded.
Default values are left off if they are at the end of the URL.
Values given as None are ignored.
Returns (
* the absolute URL from our URLTree's root to this URLRule as a str
* query args for URL as a [ (key, [value]) ] list
)
# XXX: reject unrecognized values
"""
# root
segments = [(False, '')]
# collect our Label-path's with values as a list of (is_default, segment) tuples
segments += [label.build_default(values) for label in self.label_path]
# optimize by trimming None/default items off the end
for is_default, segment in segments[::-1] :
if segment is None or is_default :
segments.pop(-1)
else :
break
# should have root left at least
assert segments
# join
url = '/'.join(segment for is_default, segment in segments)
# build query args as { key -> [value] }
qargs = [
(
key, query_item.build(values)
) for key, query_item in self.query_items.iteritems()
]
# filter
qargs = [(key, values) for key, values in qargs if values is not None]
return url, qargs
def __str__ (self) :
"""
Format pretty representation
"""
return '/'.join(str(label) for label in self.label_path)
def __repr__ (self) :
"""
Format debug representation
"""
return "URL(%r, %r)" % (str(self), self.handler)
class URLNode (object) :
"""
A Node in the URLTree structure, with the Label that it corresponds to.
Stores the set of URLNodes that exist below this, and optionally the URLRule this node maps to.
Used by URLTree to resolve an incoming URL.
"""
def __init__ (self, parent, label, url_rule=None) :
"""
Initialize with the given parent and label, empty children dict
parent - the URLNode above this node, or None for root. Used to build() URL string
label - the Label matched by this URLNode
url_rule - the URLRule this Node maps to; set later using add_url() with an empty label_path
"""
# store
self.parent = parent
self.label = label
self.url = None
# list of child URLNodes
self.children = []
def _build_child (self, label) :
"""
Build, insert and return a new child Node that maps from this Node via the given Label.
"""
# build new child
child = URLNode(self, label)
# add to children
self.children.append(child)
# return
return child
def set_url (self, url) :
"""
Set the given URLRule as our mapped target (i.e. corresponds directly to this Node).
Fails if we already have a mapped URLRule, since we can't have more than one.
"""
if self.url :
raise URLRuleError(url, "node already has an URLRule mapped")
# set
self.url = url
def add_url (self, url, label_path) :
"""
Insert an URLRule into the tree, using recursion to process the given label path, creating new URLNodes as needed.
url - the URLRule we are storing in the tree
label_path - the path of Labels to the URLRule from this Node down
If label_path is empty (len zero, or [EmptyLabel]), then the given url is mapped from this Node using set_url().
"""
# resolves to this node?
# XXX: earlier, this was just anywhere, but that would mess up '/foo/bar//quux' def...
if not label_path or (len(label_path) == 1 and isinstance(label_path[0], EmptyLabel)) :
self.set_url(url)
else :
# pop child's label from label_path
child_label = label_path.pop(0)
# look for the child to recurse into
child = None
# look for an existing child with that label
for child in self.children :
if child.label == child_label :
# found, use this
break
else :
# build and append a new child
child = self._build_child(child_label)
# recurse to handle the rest of the label_path
child.add_url(url, label_path)
def match (self, url_segments) :
"""
Locate and return the URLRule object corresponding to the given sequence of URL segments value under this node.
Return a (URLRule, [ LabelValue ] ) tuple, containing the matched URLRule, and all captured values from the URL.
"""
# determine value to use
value = None
# label_path ends at this node
# XXX: earlier this was just startswith, and would have matched '/foo/quux//bar/asdf' against '/foo/quux' - not sure if desired
# XXX: always accept trailing / ?
if not url_segments or (len(url_segments) == 1 and not url_segments[0]) :
# the search ends at this node
if self.url :
# this URL is the shortest match
return (self.url, [])
elif not self.children :
# incomplete URL
raise URLMatchError("No URLRule found for URL")
else :
# apply default value, i.e. Label.match(None), and continue recursing
segment = None
else :
# pop the next URL segment from the label path as the segments to match against our next
segment = url_segments.pop(0)
# return one match...
match = value = None
# recurse through our children, DFS
for child in self.children :
# match URL segment against next-down Label rule
match_value = child.label.match(segment)
if match_value is True :
# capture without value
pass
elif not match_value :
# skip those that don't match at all
continue
# match() gave a capture-tuple
else :
# match_value is (key, value, type, is_default)
value = match_value
# already found a match? :/
if match :
# matches against multiple URLRules
# XXX: just return first defined rule instead?
# XXX: include troublesome nodes in error to aid debugging
raise URLMatchError("Ambiguous URL: %s <-> %s" % (match, child))
else :
# ok, but continue looking to make sure there's no ambiguous URLs
match = child
# found something?
if not match :
# 404, kind of
raise URLMatchError("URL not found: %s -> %s + %s" % (self.get_url(), segment, '/'.join(str(seg) for seg in url_segments)))
# recurse into the match using the remaining segments to get the rest of the values
url, url_values = match.match(url_segments)
# captured a value?
if value is not None :
# add it to the set of values captured below us
url_values.append(value)
# return the match
return url, url_values
def get_url (self) :
"""
Returns the URLRule spec leading to this node, by iterating over our parents.
"""
# URL segments in reverse order, ending with a /
segments = ['']
# start with ourself
node = self
# iterate up to root
while node :
# URLRule spec
spec = str(node.label)
segments.append(spec)
node = node.parent
# reverse
segments.reverse()
# return
return '/'.join(segments)
def dump (self, indent=0) :
"""
Returns a multi-line nested-node string representation of this Node
"""
return '\n'.join([
"%-45s%s" % (
' '*indent + str(self.label) + ('/' if self.children else ''),
(' -> %r' % self.url) if self.url else ''
)
] + [
child.dump(indent + 4) for child in self.children
])
def __str__ (self) :
"""
Return a short representation of this Node's match sequence, and our children.
"""
# XXX: recurse for unit-test purposes...
return "%s/[%s]" % (self.label, ','.join(str(child) for child in self.children))
class URLTree (object) :
"""
Map incoming URLs to URLRules and handlers, using a defined tree of URLNodes.
"""
def __init__ (self, url_rules) :
"""
Initialize the tree using the given sequence of URLRules
"""
# root node, matching '/'
self.root = URLNode(None, EmptyLabel())
# insert each URL into the tree
for url in url_rules :
self.add_url(url)
def add_url (self, url_rule) :
"""
Adds the given URLRule to the tree. The URLRule must begin with a root slash.
"""
# get url's Label path
path = url_rule.get_label_path()
# insert into tree by path
self.root.add_url(url_rule, path)
def match (self, url) :
"""
Find the URL object corresponding to the given incoming url, matching any ValueLabels.
Returns an (URLRule, [(value, label, is_default)]) tuple.
XXX: handle unicode on URLs?
"""
# remove leading root, since that's where we always start
url = url.lstrip('/')
# split it into segments
segments = url.split('/')
# recurse into the tree
return self.root.match(segments)
def evaluate (self, url, qargs=None) :
"""
Look up the URLRule's handler for the given incoming url:
url - the text-form URL
qargs - a [ (key, [ value ] ) ] list of query string arguments
and return:
* the `handler` configured for the URLRule
* a { key: value } dict of URL values suitable for use as **kwargs
* a [ (key, [ value ] ) ] list of remaining query string arguments not included in the returned keyword
args
"""
# lookup
rule, url_values = self.match(url)
# evaluate
args, qargs = rule.evaluate(url_values, qargs)
return rule.handler, args, qargs