--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/tree_parse.py Sat Feb 07 02:46:58 2009 +0200
@@ -0,0 +1,123 @@
+
+"""
+ Parsing trees of node stored using a python-like syntax.
+
+ A file consists of a number of lines, and each line consists of indenting whitespace and data. Each line has a parent
+"""
+
+def _read_lines (path, stop_tokens='') :
+ """
+ Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where
+ line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line
+ data with whitespace stripped.
+
+ Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its
+ indentation is taken as zero.
+ """
+
+ for line_number, line in enumerate(open(path, 'rb')) :
+ indent = 0
+
+ # count indent
+ for char in line :
+ # tabs break things
+ assert char != '\t'
+
+ # increment up to first non-space char
+ if char == ' ' :
+ indent += 1
+
+ elif char in stop_tokens :
+ # consider line as not having any indentation at all
+ indent = 0
+ break
+
+ else :
+ break
+
+ # strip whitespace
+ line = line.strip()
+
+ # ignore empty lines
+ if not line :
+ continue
+
+ # yield
+ yield line_number + 1, indent, line
+
+def parse (path, stop_tokens='') :
+ """
+ Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples.
+ """
+
+ # stack of (indent, PageInfo) items
+ stack = []
+
+ # the root item
+ root = None
+
+ # the previous item processed, None for first one
+ prev = None
+
+ # read lines
+ for line_number, indent, line in _read_lines(path, stop_tokens) :
+ # create item
+ item = (line_number, line, [])
+
+ # are we the first item?
+ if not prev :
+ # root node does not have a parent
+ parent = None
+
+ # set root
+ root = item
+
+ # initialize stack
+ stack.append((0, root))
+
+ else :
+ # peek stack
+ stack_indent, stack_parent = stack[-1]
+
+ # new indent level?
+ if indent > stack_indent :
+ # set parent to previous item, and push new indent level + parent to stack
+ parent = prev
+
+ # push new indent level + its parent
+ stack.append((indent, parent))
+
+ # same indent level as previous
+ elif indent == stack_indent :
+ # parent is the one of the current stack level, stack doesn't change
+ parent = stack_parent
+
+ # unravel stack
+ elif indent < stack_indent :
+ while True :
+ # remove current stack level
+ stack.pop(-1)
+
+ # peek next level
+ stack_indent, stack_parent = stack[-1]
+
+ # found the level to return to?
+ if stack_indent == indent :
+ # restore prev
+ parent = stack_parent
+
+ break
+
+ elif stack_indent < indent :
+ assert False, "Bad un-indent"
+
+ # add to parent?
+ if parent :
+ parent[2].append(item)
+
+ # update prev
+ prev = item
+
+ # return the root
+ return root
+