lib/tree_parse.py
changeset 16 4a40718c7b4b
child 17 b538e1f7011c
equal deleted inserted replaced
15:e2fe2baa7910 16:4a40718c7b4b
       
     1 
       
     2 """
       
     3     Parsing trees of node stored using a python-like syntax.
       
     4 
       
     5     A file consists of a number of lines, and each line consists of indenting whitespace and data. Each line has a parent
       
     6 """
       
     7 
       
     8 def _read_lines (path, stop_tokens='') :
       
     9     """
       
    10         Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where 
       
    11         line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line
       
    12         data with whitespace stripped.
       
    13 
       
    14         Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its
       
    15         indentation is taken as zero.
       
    16     """
       
    17 
       
    18     for line_number, line in enumerate(open(path, 'rb')) :
       
    19         indent = 0
       
    20 
       
    21         # count indent
       
    22         for char in line :
       
    23             # tabs break things
       
    24             assert char != '\t'
       
    25             
       
    26             # increment up to first non-space char
       
    27             if char == ' ' :
       
    28                 indent += 1
       
    29             
       
    30             elif char in stop_tokens :
       
    31                 # consider line as not having any indentation at all
       
    32                 indent = 0
       
    33                 break
       
    34 
       
    35             else :
       
    36                 break
       
    37         
       
    38         # strip whitespace
       
    39         line = line.strip()
       
    40 
       
    41         # ignore empty lines
       
    42         if not line :
       
    43             continue
       
    44 
       
    45         # yield
       
    46         yield line_number + 1, indent, line
       
    47 
       
    48 def parse (path, stop_tokens='') :
       
    49     """
       
    50         Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples.
       
    51     """
       
    52 
       
    53     # stack of (indent, PageInfo) items
       
    54     stack = []
       
    55 
       
    56     # the root item
       
    57     root = None
       
    58 
       
    59     # the previous item processed, None for first one
       
    60     prev = None
       
    61     
       
    62     # read lines
       
    63     for line_number, indent, line in _read_lines(path, stop_tokens) :
       
    64         # create item
       
    65         item = (line_number, line, [])
       
    66 
       
    67         # are we the first item?
       
    68         if not prev :
       
    69             # root node does not have a parent
       
    70             parent = None
       
    71             
       
    72             # set root
       
    73             root = item
       
    74 
       
    75             # initialize stack
       
    76             stack.append((0, root))
       
    77             
       
    78         else :
       
    79             # peek stack
       
    80             stack_indent, stack_parent = stack[-1]
       
    81 
       
    82             # new indent level?
       
    83             if indent > stack_indent :
       
    84                 # set parent to previous item, and push new indent level + parent to stack
       
    85                 parent = prev
       
    86 
       
    87                 # push new indent level + its parent
       
    88                 stack.append((indent, parent))
       
    89 
       
    90             # same indent level as previous
       
    91             elif indent == stack_indent :
       
    92                 # parent is the one of the current stack level, stack doesn't change
       
    93                 parent = stack_parent
       
    94             
       
    95             # unravel stack
       
    96             elif indent < stack_indent :
       
    97                 while True :
       
    98                     # remove current stack level
       
    99                     stack.pop(-1)
       
   100 
       
   101                     # peek next level
       
   102                     stack_indent, stack_parent = stack[-1]
       
   103                     
       
   104                     # found the level to return to?
       
   105                     if stack_indent == indent :
       
   106                         # restore prev
       
   107                         parent = stack_parent
       
   108 
       
   109                         break
       
   110 
       
   111                     elif stack_indent < indent :
       
   112                         assert False, "Bad un-indent"
       
   113         
       
   114         # add to parent?
       
   115         if parent :
       
   116             parent[2].append(item)
       
   117 
       
   118         # update prev
       
   119         prev = item
       
   120     
       
   121     # return the root
       
   122     return root
       
   123