qmsk.irclogs: lib/page_tree.py@b538e1f7011c

"""
    Implements the tree containing pages and their metadata
"""

import tree_parse

# path to file containing the page metadata tree
PAGE_TREE_FILE = "pages/list"

class PageTreeError (Exception) :
    """
        Error parsing/loading the page tree
    """

    pass

class PageInfo (object) :
    """
        Contains metainformation about a page
    """

    def __init__ (self, parent, name, title, children=None) :
        """
            Initialize, children defaults to empty list
        """

        # store
        self.parent = parent
        self.name = name
        self.title = title
        self.children = children if children else []

        # no url get
        self._url = None
    
    def set_parent (self, parent) :
        """
            Set a parent where non was set before
        """

        assert self.parent is None

        self.parent = parent

    def add_child (self, child) :
        """
            Add a PageInfo child
        """

        self.children.append(child)
    
    def get_child (self, name) :
        """
            Look up a child by name, returning None if not found
        """

        return dict((c.name, c) for c in self.children).get(name)

    def get_ancestry (self) :
        """
            Returns a list of this page's parents and the page itself, but not root
        """
        
        # collect in reverse order
        ancestry = []
        
        # starting from self
        item = self
        
        # add all items, but not root
        while item and item.parent :
            ancestry.append(item)

            item = item.parent

        # reverse
        ancestry.reverse()
        
        # done
        return ancestry

    @property
    def url (self) :
        """
            Build this page's URL
        """

        # cached?
        if self._url :
            return self._url

        segments = [item.name for item in self.get_ancestry()]
        
        # add empty segment if dir
        if self.children :
            segments.append('')
        
        # join
        url = '/'.join(segments)
        
        # cache
        self._url = url
        
        # done
        return url

class PageTree (object) :
    """
        The tree of pages, rooted at .root.

        Use load_page_tree to initialize the global page_tree instance, and then use that
    """

    def __init__ (self) :
        """
            Empty PageList, must call load_page_list to initialize, once
        """

    def _load (self, path) :
        """
            Processes the lines in the given file
        """
        
        # parse tree
        tree = tree_parse.parse(path, ':')

        if not tree :
            raise PageTreeError("No root node found")

        def _create_node (parent, item) :
            """
                Creates and returns a PageInfo from the given parent node and (line_number, line, children) tuple item
            """

            # unpack
            line_number, line, children = item
            
            # parse line
            url = title = None
            
            try :
                url, title = line.split(':')

            except :
                raise PageTreeError("Invalid line: %s:%d: %r" % (path, line_number, line))

            # remove whitespace
            url = url.strip()
            title = title.strip()
            
            # create PageInfo
            node = PageInfo(parent, url, title)
            
            # set node children
            node.children = [
                _create_node(node, child_item) for child_item in children
            ]

            # return
            return node
        
        # translate
        self.root = _create_node(None, tree)
            
        # *evil cackle*
        self.root.children.insert(0, self.root)
        
    def get_page (self, url) :
        """
            Lookup the given page URL, and return the matching PageInfo object, or None, if not found
        """
        
        # start from root
        node = self.root
        
        # traverse the object tree
        for segment in url.split('/') :
            if segment :
                node = node.get_child(segment)

            if not node :
                return None
        
        # return
        return node
    
    def get_siblings (self, url) :
        """
            Get the list of siblings for the given url, including the given page itself
        """
        
        # look up the page itself
        page = self.get_page(url)
        
        # specialcase root/unknown node
        if page and page.parent :
            return page.parent.children

        else :
            return self.root.children
    
    def dump (self) :
        """
            Returns a string representation of the tree
        """
        
        def _print_node (indent, node) :
            return '\n'.join('%s%s' % (' '*indent, line) for line in [
                "%-15s : %s" % (node.name, node.title)
            ] + [
                _print_node(indent + 4, child) for child in node.children if child != node
            ])

        return _print_node(0, self.root)

# global singleton PageList instance
page_tree = PageTree()

def load () :
    """
        Load the global singleton PageInfo instance
    """
    
    page_tree._load(PAGE_TREE_FILE)
author	Tero Marttila <terom@fixme.fi>
	Sat, 07 Feb 2009 02:51:36 +0200
changeset 17	b538e1f7011c
parent 16	4a40718c7b4b
permissions	-rw-r--r--