--- a/tests/__init__.py Mon Feb 16 19:02:39 2009 +0200
+++ b/tests/__init__.py Mon Feb 16 19:02:59 2009 +0200
@@ -3,7 +3,7 @@
"""
import unittest
-import test_http
+import test_http, test_treeparse
def all_tests () :
"""
@@ -12,5 +12,6 @@
return unittest.TestSuite(unittest.defaultTestLoader.loadTestsFromModule(module) for module in (
test_http,
+ test_treeparse,
))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_treeparse.py Mon Feb 16 19:02:59 2009 +0200
@@ -0,0 +1,78 @@
+# :set encoding=utf8
+
+"""
+ Unit tests for qmsk.web.tree_parse
+"""
+
+import unittest
+import tree_parse
+
+class TestTreeParse (unittest.TestCase) :
+ VALID = """\
+foo
+ bar
+ quux
+
+ asdf
+ further
+ still
+
+ and back
+""".split('\n')
+
+ def test_read_lines_valid (self) :
+ self.assertEquals(list(tree_parse._read_lines(self.VALID)), [
+ (1, 0, "foo"),
+ (2, 2, "bar"),
+ (3, 3, "quux"),
+ (5, 2, "asdf"),
+ (6, 8, "further"),
+ (7, 9, "still"),
+ (9, 2, "and back"),
+ ])
+
+ def _parse (self, *lines, **args) :
+ return list(tree_parse._read_lines(lines, **args))
+
+ def test_read_lines_decode (self) :
+ data_unicode = u'föö'
+ data_raw = 'föö'
+
+ self.assertEquals(self._parse(data_unicode.encode('utf8'), charset='utf8'), [(1, 0, data_unicode)])
+ self.assertEquals(self._parse(data_unicode, charset=None), [(1, 0, data_unicode)])
+ self.assertEquals(self._parse(data_raw, charset=None), [(1, 0, data_raw)])
+ self.assertRaises(UnicodeDecodeError, self._parse, data_raw, charset='ascii')
+
+ def test_read_lines_stop (self) :
+ self.assertEquals(self._parse(" : foo"), [(1, 1, ": foo")])
+ self.assertEquals(self._parse(" : foo", stop_tokens=':'), [(1, 0, ": foo")])
+
+ def test_read_lines_empty (self) :
+ self.assertEquals(self._parse(*("foo\n\n bar".split('\n'))), [(1, 0, "foo"), (3, 2, "bar")])
+
+ def test_read_lines_strip (self) :
+ self.assertEquals(self._parse(" foo "), [(1, 1, "foo")])
+
+ def _do (self, *lines, **args) :
+ return tree_parse.parse(lines, **args)
+
+ def test_parse_valid (self) :
+ self.assertEquals(self._do(*self.VALID),
+ (1, "foo", [
+ (2, "bar", [
+ (3, "quux", []),
+ ]),
+ (5, "asdf", [
+ (6, "further", [
+ (7, "still", []),
+ ]),
+ ]),
+ (9, "and back", [])
+ ])
+ )
+
+ def test_parse_invalid (self) :
+ self.assertRaises(tree_parse.TreeParseError, self._do, "foo", " bar", " quux")
+
+ # XXX: don't test root, as that's known-broken still
+
--- a/tree_parse.py Mon Feb 16 19:02:39 2009 +0200
+++ b/tree_parse.py Mon Feb 16 19:02:59 2009 +0200
@@ -12,19 +12,29 @@
pass
-def _read_lines (path, stop_tokens, charset) :
+def _read_lines (file, stop_tokens=None, charset='utf8') :
"""
Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where
line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line
data with whitespace stripped.
+ File is either a str with the filesystem path, or a something that supports iterating over lines.
+
+ Charset is what to decode the lines with, or pass None to not decode.
+
Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its
- indentation is taken as zero.
+ indentation is taken as zero. Ignored if empty.
"""
-
- for line_number, line in enumerate(open(path, 'rb')) :
- # decode to unicode
- line = line.decode(charset)
+
+ # open path?
+ if isinstance(file, str) :
+ file = open(file, 'rb')
+
+ # iterate over lines
+ for line_number, line in enumerate(file) :
+ if charset :
+ # decode to unicode
+ line = line.decode(charset)
indent = 0
@@ -37,7 +47,7 @@
if char == ' ' :
indent += 1
- elif char in stop_tokens :
+ elif stop_tokens and char in stop_tokens :
# consider line as not having any indentation at all
indent = 0
break
@@ -55,7 +65,7 @@
# yield
yield line_number + 1, indent, line
-def parse (path, stop_tokens='', charset='utf8') :
+def parse (file, stop_tokens='', charset='utf8') :
"""
Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples.
"""
@@ -70,7 +80,7 @@
prev = None
# read lines
- for line_number, indent, line in _read_lines(path, stop_tokens, charset) :
+ for line_number, indent, line in _read_lines(file, stop_tokens, charset) :
# create item
item = (line_number, line, [])
@@ -119,7 +129,7 @@
break
elif stack_indent < indent :
- raise TreeParseError("Bad unindent on %s:%d, %d < %d" % (path, line_number, stack_indent, indent))
+ raise TreeParseError("Bad unindent on %s:%d, %d < %d" % (file, line_number, stack_indent, indent))
# add to parent?
if parent :