diff -r e2fe2baa7910 -r 4a40718c7b4b lib/tree_parse.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/tree_parse.py Sat Feb 07 02:46:58 2009 +0200 @@ -0,0 +1,123 @@ + +""" + Parsing trees of node stored using a python-like syntax. + + A file consists of a number of lines, and each line consists of indenting whitespace and data. Each line has a parent +""" + +def _read_lines (path, stop_tokens='') : + """ + Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where + line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line + data with whitespace stripped. + + Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its + indentation is taken as zero. + """ + + for line_number, line in enumerate(open(path, 'rb')) : + indent = 0 + + # count indent + for char in line : + # tabs break things + assert char != '\t' + + # increment up to first non-space char + if char == ' ' : + indent += 1 + + elif char in stop_tokens : + # consider line as not having any indentation at all + indent = 0 + break + + else : + break + + # strip whitespace + line = line.strip() + + # ignore empty lines + if not line : + continue + + # yield + yield line_number + 1, indent, line + +def parse (path, stop_tokens='') : + """ + Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples. + """ + + # stack of (indent, PageInfo) items + stack = [] + + # the root item + root = None + + # the previous item processed, None for first one + prev = None + + # read lines + for line_number, indent, line in _read_lines(path, stop_tokens) : + # create item + item = (line_number, line, []) + + # are we the first item? + if not prev : + # root node does not have a parent + parent = None + + # set root + root = item + + # initialize stack + stack.append((0, root)) + + else : + # peek stack + stack_indent, stack_parent = stack[-1] + + # new indent level? + if indent > stack_indent : + # set parent to previous item, and push new indent level + parent to stack + parent = prev + + # push new indent level + its parent + stack.append((indent, parent)) + + # same indent level as previous + elif indent == stack_indent : + # parent is the one of the current stack level, stack doesn't change + parent = stack_parent + + # unravel stack + elif indent < stack_indent : + while True : + # remove current stack level + stack.pop(-1) + + # peek next level + stack_indent, stack_parent = stack[-1] + + # found the level to return to? + if stack_indent == indent : + # restore prev + parent = stack_parent + + break + + elif stack_indent < indent : + assert False, "Bad un-indent" + + # add to parent? + if parent : + parent[2].append(item) + + # update prev + prev = item + + # return the root + return root +