lib/tree_parse.py
changeset 16 4a40718c7b4b
child 17 b538e1f7011c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/tree_parse.py	Sat Feb 07 02:46:58 2009 +0200
@@ -0,0 +1,123 @@
+
+"""
+    Parsing trees of node stored using a python-like syntax.
+
+    A file consists of a number of lines, and each line consists of indenting whitespace and data. Each line has a parent
+"""
+
+def _read_lines (path, stop_tokens='') :
+    """
+        Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where 
+        line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line
+        data with whitespace stripped.
+
+        Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its
+        indentation is taken as zero.
+    """
+
+    for line_number, line in enumerate(open(path, 'rb')) :
+        indent = 0
+
+        # count indent
+        for char in line :
+            # tabs break things
+            assert char != '\t'
+            
+            # increment up to first non-space char
+            if char == ' ' :
+                indent += 1
+            
+            elif char in stop_tokens :
+                # consider line as not having any indentation at all
+                indent = 0
+                break
+
+            else :
+                break
+        
+        # strip whitespace
+        line = line.strip()
+
+        # ignore empty lines
+        if not line :
+            continue
+
+        # yield
+        yield line_number + 1, indent, line
+
+def parse (path, stop_tokens='') :
+    """
+        Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples.
+    """
+
+    # stack of (indent, PageInfo) items
+    stack = []
+
+    # the root item
+    root = None
+
+    # the previous item processed, None for first one
+    prev = None
+    
+    # read lines
+    for line_number, indent, line in _read_lines(path, stop_tokens) :
+        # create item
+        item = (line_number, line, [])
+
+        # are we the first item?
+        if not prev :
+            # root node does not have a parent
+            parent = None
+            
+            # set root
+            root = item
+
+            # initialize stack
+            stack.append((0, root))
+            
+        else :
+            # peek stack
+            stack_indent, stack_parent = stack[-1]
+
+            # new indent level?
+            if indent > stack_indent :
+                # set parent to previous item, and push new indent level + parent to stack
+                parent = prev
+
+                # push new indent level + its parent
+                stack.append((indent, parent))
+
+            # same indent level as previous
+            elif indent == stack_indent :
+                # parent is the one of the current stack level, stack doesn't change
+                parent = stack_parent
+            
+            # unravel stack
+            elif indent < stack_indent :
+                while True :
+                    # remove current stack level
+                    stack.pop(-1)
+
+                    # peek next level
+                    stack_indent, stack_parent = stack[-1]
+                    
+                    # found the level to return to?
+                    if stack_indent == indent :
+                        # restore prev
+                        parent = stack_parent
+
+                        break
+
+                    elif stack_indent < indent :
+                        assert False, "Bad un-indent"
+        
+        # add to parent?
+        if parent :
+            parent[2].append(item)
+
+        # update prev
+        prev = item
+    
+    # return the root
+    return root
+