"""
Parsing trees of node stored using a python-like syntax.
A file consists of a number of lines, and each line consists of indenting whitespace and data. Each line has a parent
"""
class TreeParseError (Exception) :
"""
Error parsing a tree file
"""
pass
def _read_lines (file, stop_tokens=None, charset='utf8') :
"""
Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where
line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line
data with whitespace stripped.
File is either a str with the filesystem path, or a something that supports iterating over lines.
Charset is what to decode the lines with, or pass None to not decode.
Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its
indentation is taken as zero. Ignored if empty.
"""
# open path?
if isinstance(file, str) :
file = open(file, 'rb')
# iterate over lines
for line_number, line in enumerate(file) :
if charset :
# decode to unicode
line = line.decode(charset)
indent = 0
# count indent
for char in line :
# tabs break things
assert char != '\t'
# increment up to first non-space char
if char == ' ' :
indent += 1
elif stop_tokens and char in stop_tokens :
# consider line as not having any indentation at all
indent = 0
break
else :
break
# strip whitespace
line = line.strip()
# ignore empty lines
if not line :
continue
# yield
yield line_number + 1, indent, line
def parse (file, stop_tokens='', charset='utf8') :
"""
Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples.
"""
# stack of (indent, PageInfo) items
stack = []
# the root item
root = None
# the previous item processed, None for first one
prev = None
# read lines
for line_number, indent, line in _read_lines(file, stop_tokens, charset) :
# create item
item = (line_number, line, [])
# are we the first item?
if not prev :
# root node does not have a parent
parent = None
# set root
root = item
# initialize stack
stack.append((0, root))
else :
# peek stack
stack_indent, stack_parent = stack[-1]
# new indent level?
if indent > stack_indent :
# set parent to previous item, and push new indent level + parent to stack
parent = prev
# push new indent level + its parent
stack.append((indent, parent))
# same indent level as previous
elif indent == stack_indent :
# parent is the one of the current stack level, stack doesn't change
parent = stack_parent
# unravel stack
elif indent < stack_indent :
while True :
# remove current stack level
stack.pop(-1)
# peek next level
stack_indent, stack_parent = stack[-1]
# found the level to return to?
if stack_indent == indent :
# restore prev
parent = stack_parent
break
elif stack_indent < indent :
raise TreeParseError("Bad unindent on %s:%d, %d < %d" % (file, line_number, stack_indent, indent))
# add to parent?
if parent :
parent[2].append(item)
# update prev
prev = item
# return the root
return root