lib/tree_parse.py
changeset 46 73aef9705d6c
parent 45 e94ab812c0c8
child 47 2cceeb731950
equal deleted inserted replaced
45:e94ab812c0c8 46:73aef9705d6c
     1 
       
     2 """
       
     3     Parsing trees of node stored using a python-like syntax.
       
     4 
       
     5     A file consists of a number of lines, and each line consists of indenting whitespace and data. Each line has a parent
       
     6 """
       
     7 
       
     8 class TreeParseError (Exception) :
       
     9     """
       
    10         Error parsing a tree file
       
    11     """
       
    12 
       
    13     pass
       
    14 
       
    15 def _read_lines (path, stop_tokens, charset) :
       
    16     """
       
    17         Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where 
       
    18         line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line
       
    19         data with whitespace stripped.
       
    20 
       
    21         Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its
       
    22         indentation is taken as zero.
       
    23     """
       
    24 
       
    25     for line_number, line in enumerate(open(path, 'rb')) :
       
    26         # decode to unicode
       
    27         line = line.decode(charset)
       
    28 
       
    29         indent = 0
       
    30 
       
    31         # count indent
       
    32         for char in line :
       
    33             # tabs break things
       
    34             assert char != '\t'
       
    35             
       
    36             # increment up to first non-space char
       
    37             if char == ' ' :
       
    38                 indent += 1
       
    39             
       
    40             elif char in stop_tokens :
       
    41                 # consider line as not having any indentation at all
       
    42                 indent = 0
       
    43                 break
       
    44 
       
    45             else :
       
    46                 break
       
    47         
       
    48         # strip whitespace
       
    49         line = line.strip()
       
    50 
       
    51         # ignore empty lines
       
    52         if not line :
       
    53             continue
       
    54 
       
    55         # yield
       
    56         yield line_number + 1, indent, line
       
    57 
       
    58 def parse (path, stop_tokens='', charset='utf8') :
       
    59     """
       
    60         Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples.
       
    61     """
       
    62 
       
    63     # stack of (indent, PageInfo) items
       
    64     stack = []
       
    65 
       
    66     # the root item
       
    67     root = None
       
    68 
       
    69     # the previous item processed, None for first one
       
    70     prev = None
       
    71     
       
    72     # read lines
       
    73     for line_number, indent, line in _read_lines(path, stop_tokens, charset) :
       
    74         # create item
       
    75         item = (line_number, line, [])
       
    76 
       
    77         # are we the first item?
       
    78         if not prev :
       
    79             # root node does not have a parent
       
    80             parent = None
       
    81             
       
    82             # set root
       
    83             root = item
       
    84 
       
    85             # initialize stack
       
    86             stack.append((0, root))
       
    87             
       
    88         else :
       
    89             # peek stack
       
    90             stack_indent, stack_parent = stack[-1]
       
    91 
       
    92             # new indent level?
       
    93             if indent > stack_indent :
       
    94                 # set parent to previous item, and push new indent level + parent to stack
       
    95                 parent = prev
       
    96 
       
    97                 # push new indent level + its parent
       
    98                 stack.append((indent, parent))
       
    99 
       
   100             # same indent level as previous
       
   101             elif indent == stack_indent :
       
   102                 # parent is the one of the current stack level, stack doesn't change
       
   103                 parent = stack_parent
       
   104             
       
   105             # unravel stack
       
   106             elif indent < stack_indent :
       
   107                 while True :
       
   108                     # remove current stack level
       
   109                     stack.pop(-1)
       
   110 
       
   111                     # peek next level
       
   112                     stack_indent, stack_parent = stack[-1]
       
   113                     
       
   114                     # found the level to return to?
       
   115                     if stack_indent == indent :
       
   116                         # restore prev
       
   117                         parent = stack_parent
       
   118 
       
   119                         break
       
   120 
       
   121                     elif stack_indent < indent :
       
   122                         raise TreeParseError("Bad unindent on %s:%d, %d < %d" % (path, line_number, stack_indent, indent))
       
   123         
       
   124         # add to parent?
       
   125         if parent :
       
   126             parent[2].append(item)
       
   127 
       
   128         # update prev
       
   129         prev = item
       
   130     
       
   131     # return the root
       
   132     return root
       
   133