lib/tree_parse.py
author Tero Marttila <terom@fixme.fi>
Sat, 07 Feb 2009 02:51:36 +0200
changeset 17 b538e1f7011c
parent 16 4a40718c7b4b
child 21 b05979822dee
permissions -rw-r--r--
add some better error checking to the page_tree stuff

"""
    Parsing trees of node stored using a python-like syntax.

    A file consists of a number of lines, and each line consists of indenting whitespace and data. Each line has a parent
"""

class TreeParseError (Exception) :
    """
        Error parsing a tree file
    """

    pass

def _read_lines (path, stop_tokens='') :
    """
        Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where 
        line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line
        data with whitespace stripped.

        Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its
        indentation is taken as zero.
    """

    for line_number, line in enumerate(open(path, 'rb')) :
        indent = 0

        # count indent
        for char in line :
            # tabs break things
            assert char != '\t'
            
            # increment up to first non-space char
            if char == ' ' :
                indent += 1
            
            elif char in stop_tokens :
                # consider line as not having any indentation at all
                indent = 0
                break

            else :
                break
        
        # strip whitespace
        line = line.strip()

        # ignore empty lines
        if not line :
            continue

        # yield
        yield line_number + 1, indent, line

def parse (path, stop_tokens='') :
    """
        Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples.
    """

    # stack of (indent, PageInfo) items
    stack = []

    # the root item
    root = None

    # the previous item processed, None for first one
    prev = None
    
    # read lines
    for line_number, indent, line in _read_lines(path, stop_tokens) :
        # create item
        item = (line_number, line, [])

        # are we the first item?
        if not prev :
            # root node does not have a parent
            parent = None
            
            # set root
            root = item

            # initialize stack
            stack.append((0, root))
            
        else :
            # peek stack
            stack_indent, stack_parent = stack[-1]

            # new indent level?
            if indent > stack_indent :
                # set parent to previous item, and push new indent level + parent to stack
                parent = prev

                # push new indent level + its parent
                stack.append((indent, parent))

            # same indent level as previous
            elif indent == stack_indent :
                # parent is the one of the current stack level, stack doesn't change
                parent = stack_parent
            
            # unravel stack
            elif indent < stack_indent :
                while True :
                    # remove current stack level
                    stack.pop(-1)

                    # peek next level
                    stack_indent, stack_parent = stack[-1]
                    
                    # found the level to return to?
                    if stack_indent == indent :
                        # restore prev
                        parent = stack_parent

                        break

                    elif stack_indent < indent :
                        raise TreeParseError("Bad unindent on %s:%d, %d < %d" % (path, line_number, stack_indent, indent))
        
        # add to parent?
        if parent :
            parent[2].append(item)

        # update prev
        prev = item
    
    # return the root
    return root