|
1 |
|
2 """ |
|
3 Parsing trees of node stored using a python-like syntax. |
|
4 |
|
5 A file consists of a number of lines, and each line consists of indenting whitespace and data. Each line has a parent |
|
6 """ |
|
7 |
|
8 def _read_lines (path, stop_tokens='') : |
|
9 """ |
|
10 Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where |
|
11 line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line |
|
12 data with whitespace stripped. |
|
13 |
|
14 Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its |
|
15 indentation is taken as zero. |
|
16 """ |
|
17 |
|
18 for line_number, line in enumerate(open(path, 'rb')) : |
|
19 indent = 0 |
|
20 |
|
21 # count indent |
|
22 for char in line : |
|
23 # tabs break things |
|
24 assert char != '\t' |
|
25 |
|
26 # increment up to first non-space char |
|
27 if char == ' ' : |
|
28 indent += 1 |
|
29 |
|
30 elif char in stop_tokens : |
|
31 # consider line as not having any indentation at all |
|
32 indent = 0 |
|
33 break |
|
34 |
|
35 else : |
|
36 break |
|
37 |
|
38 # strip whitespace |
|
39 line = line.strip() |
|
40 |
|
41 # ignore empty lines |
|
42 if not line : |
|
43 continue |
|
44 |
|
45 # yield |
|
46 yield line_number + 1, indent, line |
|
47 |
|
48 def parse (path, stop_tokens='') : |
|
49 """ |
|
50 Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples. |
|
51 """ |
|
52 |
|
53 # stack of (indent, PageInfo) items |
|
54 stack = [] |
|
55 |
|
56 # the root item |
|
57 root = None |
|
58 |
|
59 # the previous item processed, None for first one |
|
60 prev = None |
|
61 |
|
62 # read lines |
|
63 for line_number, indent, line in _read_lines(path, stop_tokens) : |
|
64 # create item |
|
65 item = (line_number, line, []) |
|
66 |
|
67 # are we the first item? |
|
68 if not prev : |
|
69 # root node does not have a parent |
|
70 parent = None |
|
71 |
|
72 # set root |
|
73 root = item |
|
74 |
|
75 # initialize stack |
|
76 stack.append((0, root)) |
|
77 |
|
78 else : |
|
79 # peek stack |
|
80 stack_indent, stack_parent = stack[-1] |
|
81 |
|
82 # new indent level? |
|
83 if indent > stack_indent : |
|
84 # set parent to previous item, and push new indent level + parent to stack |
|
85 parent = prev |
|
86 |
|
87 # push new indent level + its parent |
|
88 stack.append((indent, parent)) |
|
89 |
|
90 # same indent level as previous |
|
91 elif indent == stack_indent : |
|
92 # parent is the one of the current stack level, stack doesn't change |
|
93 parent = stack_parent |
|
94 |
|
95 # unravel stack |
|
96 elif indent < stack_indent : |
|
97 while True : |
|
98 # remove current stack level |
|
99 stack.pop(-1) |
|
100 |
|
101 # peek next level |
|
102 stack_indent, stack_parent = stack[-1] |
|
103 |
|
104 # found the level to return to? |
|
105 if stack_indent == indent : |
|
106 # restore prev |
|
107 parent = stack_parent |
|
108 |
|
109 break |
|
110 |
|
111 elif stack_indent < indent : |
|
112 assert False, "Bad un-indent" |
|
113 |
|
114 # add to parent? |
|
115 if parent : |
|
116 parent[2].append(item) |
|
117 |
|
118 # update prev |
|
119 prev = item |
|
120 |
|
121 # return the root |
|
122 return root |
|
123 |