sites/irclogs.qmsk.net/urltree.py
author Tero Marttila <terom@fixme.fi>
Sun, 08 Feb 2009 00:29:36 +0200
branchsites
changeset 41 9585441a4bfb
parent 40 71ab68f31a1c
child 42 5a72c00c4ae4
permissions -rw-r--r--
working basic logs stuff
"""
    Tree-based URL mapping
"""

import re
import os.path

# for Mapper
from lib import map

class URLError (Exception) :
    """
        Error with an URL definition
    """

    pass

class LabelValue (object) :
    """
        Represents the value of a ValueLabel... love these names
    """

    def __init__ (self, label, value) :
        """
            Just store
        """

        self.label = label
        self.value = value
    
    def __str__ (self) :
        return "%s=%r" % (self.label.key, self.value)

    def __repr__ (self) :
        return "<%s>" % self

class Label (object) :
    """
        Base class for URL labels (i.e. the segments of the URL between /s)
    """

    @staticmethod
    def parse (mask, defaults, types) :
        """
            Parse the given label-segment, and return a *Label instance
        """

        # empty?
        if not mask :
            return EmptyLabel()

        # simple value?
        match = SimpleValueLabel.EXPR.match(mask)

        if match :
            # key
            key = match.group('key')

            # type
            type = match.group("type")
            
            # lookup type, None for default
            type = types[type]

            # defaults?
            default = defaults.get(key)

            if not default :
                default = match.group('default')

                if default :
                    # apply type to default
                    default = type(default)

            # build
            return SimpleValueLabel(key, type, default)
        
        # static?
        match = StaticLabel.EXPR.match(mask)

        if match :
            return StaticLabel(match.group('name'))

        # invalid
        raise URLError("Invalid label: %r" % (mask, ))
    
    def match (self, value=None) :
        """
            Match this label against the given value, returning either True to match without a value, a LabelValue
            object, or boolean false to not match.

            If value is None, this means that only a default value should be returned.
        """

        abstract

class EmptyLabel (Label) :
    """
        An empty label, i.e. just a slash in the URL
    """
    
    def __eq__ (self, other) :
        """
            Just compares type
        """

        return isinstance(other, EmptyLabel)
    
    def match (self, value=None) :
        """
            Match empty string -> True
        """
        
        # no default
        if value is None :
            return False
        
        # only empty segments
        if value == '' :
            return True

    def __str__ (self) :
        return ''

class StaticLabel (Label) :
    """
        A simple literal Label, used for fixed terms in the URL
    """

    EXPR = re.compile(r'^(?P<name>[a-zA-Z_.-]+)$')

    def __init__ (self, name) :
        """
            The given name is the literal name of this label
        """

        self.name = name

    def __eq__ (self, other) :
        """
            Compares names
        """

        return isinstance(other, StaticLabel) and self.name == other.name
    
    def match (self, value=None) :
        """
            Match exactly -> True
        """

        # no defaults
        if value is None :
            return False
        
        # match name
        if value == self.name :
            return True

    def __str__ (self) :
        return self.name

class ValueLabel (Label) :
    """
        A label with a key and a value

        XXX: do we even need this?
    """

    def __init__ (self, key, default) :
        """
            Set the key and default value. Default value may be None if there is no default value defined
        """

        self.key = key
        self.default = default

    def __eq__ (self, other) :
        """
            Compares keys
        """

        return isinstance(other, ValueLabel) and self.key == other.key

class SimpleValueLabel (ValueLabel) :
    """
        A label that has a name and a simple string value
    """

    EXPR = re.compile(r'^\{(?P<key>[a-zA-Z_][a-zA-Z0-9_]*)(:(?P<type>[a-zA-Z_][a-zA-Z0-9_]*))?(=(?P<default>[^}]+))?\}$')

    def __init__ (self, key, type=str, default=None) :
        """
            The given key is the name of this label's value
        """

        # type
        self.type = type

        # store
        self.key = key
        self.default = default
        
    def match (self, value=None) :
        """
            Match -> LabelValue
        """
        
        # default?
        if value is None and self.default :
            return LabelValue(self, self.default)
        
        # only non-empty values!
        elif value :
            # convert with type
            try :
                value = self.type(value)

            except Exception, e :
                raise URLError("Bad value %r for type %s: %s: %s" % (value, self.type.__name__, type(e).__name__, e))

            return LabelValue(self, value)

    def __str__ (self) :
        return '{%s%s%s}' % (
            self.key, 
            ':%s' % (self.type.__name__ ) if self.type != str else '',
            '=%s' % (self.default, ) if self.default else '',
        )

class URLConfig (object) :
    """
        Global configuration relevant to all URLs
    """

    # built-in type codes
    BUILTIN_TYPES = {
        # default
        None    : str,

        # integer
        'int'   : int,
    }

    def __init__ (self, type_dict=None) :
        """
            Create an URLConfig for use with URL

            If type_dict is given, it should be a mapping of type names -> callables, and they will be available for
            type specifications in addition to the defaults.
        """

        # build our type_dict
        self.type_dict = self.BUILTIN_TYPES.copy()
        
        # apply the given type_dict
        if type_dict :
            self.type_dict.update(type_dict)

class URL (object) :
    """
        Represents a specific URL
    """


    def __init__ (self, config, url_mask, handler, type_dict=None, **defaults) :
        """
            Create an URL using the given URLConfig, with the given url mask, handler, and default values.
        """

        # store
        self.config = config
        self.url_mask = url_mask
        self.handler = handler
        self.defaults = defaults

        # build our labels
        self.label_path = [Label.parse(mask, defaults, config.type_dict) for mask in url_mask.split('/')]
        
    def get_label_path (self) :
        """
            Returns a list containing the labels in this url
        """
        
        # copy self.label_path
        return list(self.label_path)

    def execute (self, request, label_values) :
        """
            Invoke the handler, using the given label values
        """
        
        # start with the defaults
        kwargs = self.defaults.copy()

        # then add all the values
        for label_value in label_values :
            kwargs[label_value.label.key] = label_value.value
            
        # execute the handler
        return self.handler(request, **kwargs)

    def __str__ (self) :
        return '/'.join(str(label) for label in self.label_path)
    
    def __repr__ (self) :
        return "URL(%r, %r)" % (str(self), self.handler)

class URLNode (object) :
    """
        Represents a node in the URLTree
    """

    def __init__ (self, parent, label) :
        """
            Initialize with the given parent and label, empty children dict
        """
        
        # the parent URLNode
        self.parent = parent

        # this node's Label
        self.label = label

        # list of child URLNodes
        self.children = []

        # this node's URL, set by add_url for an empty label_path
        self.url = None

    def _build_child (self, label) :
        """
            Build, insert and return a new child Node
        """
        
        # build new child
        child = URLNode(self, label)
        
        # add to children
        self.children.append(child)

        # return
        return child

    def add_url (self, url, label_path) :
        """
            Add a URL object to this node under the given path. Uses recursion to process the path.

            The label_path argument is a (partial) label path as returned by URL.get_label_path.

            If label_path is empty (len zero, or begins with EmptyLabel), then the given url is assigned to this node, if no
            url was assigned before.
        """
        
        # matches this node?
        if not label_path or isinstance(label_path[0], EmptyLabel) :
            if self.url :
                raise URLError(url, "node already defined")

            else :
                # set
                self.url = url

        else :
            # pop child label from label_path
            child_label = label_path.pop(0)

            # look for the child to recurse into
            child = None

            # look for an existing child with that label
            for child in self.children :
                if child.label == child_label :
                    # found, use this
                    break

            else :
                # build a new child
                child = self._build_child(child_label)

            # recurse to handle the rest of the label_path
            child.add_url(url, label_path)
    
    def match (self, label_path) :
        """
            Locate the URL object corresponding to the given label_path value under this node.

            Returns a (url, label_values) tuple
        """

        # determine value to use
        value = None

        # empty label_path?
        if not label_path or label_path[0] == '' :
            # the search ends at this node
            if self.url :
                # this URL is the best match
                return (self.url, [])
            
            elif not self.children :
                # incomplete URL
                raise URLError("no URL handler defined for this Node")
            
            else :
                # use default value, i.e. Label.match(None)
                label = None

        else :
            # pop the next label from the label path
            label = label_path.pop(0)

        # return one match...
        match = value = None

        # recurse through our children, DFS
        for child in self.children :
            # match value
            value = child.label.match(label)

            # skip those that don't match at all
            if not value :
                continue;
            
            # already found a match? :/
            if match :
                raise URLError("Ambiguous URL")

            # ok, but continue looking to make sure there's no ambiguous URLs
            match = child
        
        # found something?
        if not match :
            raise URLError("No child found for label: %s + %s + %s" % (self.get_url(), label, '/'.join(str(l) for l in label_path)))

        # ok, recurse into the match
        url, label_value = match.match(label_path)

        # add our value?
        if isinstance(value, LabelValue) :
            label_value.append(value)

        # return the match
        return url, label_value

    def get_url (self) :
        """
            Returns the URL for this node, by iterating over our parents
        """
        
        # URL segments in reverse order
        segments = ['']
        
        # start with ourself
        node = self
        
        # iterate up to root
        while node :
            segments.append(str(node.label))

            node = node.parent

        # reverse
        segments.reverse()

        # return
        return '/'.join(segments)

    def dump (self, indent=0) :
        """
            Returns a multi-line string representation of this Node
        """

        return '\n'.join([
            "%-45s%s" % (
                ' '*indent + str(self.label) + ('/' if self.children else ''), 
                (' -> %r' % self.url) if self.url else ''
            )
        ] + [
            child.dump(indent + 4) for child in self.children
        ])

    def __str__ (self) :
        return "%s/[%s]" % (self.label, ','.join(str(child) for child in self.children))

class URLTree (map.Mapper) :
    """
        Map requests to handlers, using a defined tree of URLs
    """

    def __init__ (self, url_list) :
        """
            Initialize the tree using the given list of URLs
        """

        # root node
        self.root = URLNode(None, EmptyLabel())
        
        # just add each URL
        for url in url_list :
            self.add_url(url)

    def add_url (self, url) :
        """
            Adds the given URL to the tree. The URL must begin with a root slash.
        """
        # get url's label path
        path = url.get_label_path()

        # should begin with root
        root_label = path.pop(0)
        assert root_label == self.root.label, "URL must begin with root"

        # add to root
        self.root.add_url(url, path)
        
    def match (self, url) :
        """
            Find the URL object best corresponding to the given url, matching any ValueLabels.

            Returns an (URL, [LabelValue]) tuple.
        """

        # split it into labels
        path = url.split('/')
        
        # empty URL is empty
        if url :
            # ensure that it doesn't start with a /
            assert not self.root.label.match(path[0]), "URL must not begin with root"

        # just match starting at root
        return self.root.match(path)

    def handle_request (self, request) :
        """
            Looks up the request's URL, and invokes its handler
        """
        
        # get the requested URL
        request_url = request.get_page_name()

        # find the URL+values to use
        url, label_values = self.match(request_url)

        # let the URL handle it
        return url.execute(request, label_values)