# HG changeset patch # User Tero Marttila # Date 1234803779 -7200 # Node ID bef7196f7682c4fd78cf9637f375f6eb0e8fe889 # Parent 2f659ff51c757bb6874873f6173377f708fdbf64 add tree_parse test and fix treeparse to handle other than filesystem paths diff -r 2f659ff51c75 -r bef7196f7682 tests/__init__.py --- a/tests/__init__.py Mon Feb 16 19:02:39 2009 +0200 +++ b/tests/__init__.py Mon Feb 16 19:02:59 2009 +0200 @@ -3,7 +3,7 @@ """ import unittest -import test_http +import test_http, test_treeparse def all_tests () : """ @@ -12,5 +12,6 @@ return unittest.TestSuite(unittest.defaultTestLoader.loadTestsFromModule(module) for module in ( test_http, + test_treeparse, )) diff -r 2f659ff51c75 -r bef7196f7682 tests/test_treeparse.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_treeparse.py Mon Feb 16 19:02:59 2009 +0200 @@ -0,0 +1,78 @@ +# :set encoding=utf8 + +""" + Unit tests for qmsk.web.tree_parse +""" + +import unittest +import tree_parse + +class TestTreeParse (unittest.TestCase) : + VALID = """\ +foo + bar + quux + + asdf + further + still + + and back +""".split('\n') + + def test_read_lines_valid (self) : + self.assertEquals(list(tree_parse._read_lines(self.VALID)), [ + (1, 0, "foo"), + (2, 2, "bar"), + (3, 3, "quux"), + (5, 2, "asdf"), + (6, 8, "further"), + (7, 9, "still"), + (9, 2, "and back"), + ]) + + def _parse (self, *lines, **args) : + return list(tree_parse._read_lines(lines, **args)) + + def test_read_lines_decode (self) : + data_unicode = u'föö' + data_raw = 'föö' + + self.assertEquals(self._parse(data_unicode.encode('utf8'), charset='utf8'), [(1, 0, data_unicode)]) + self.assertEquals(self._parse(data_unicode, charset=None), [(1, 0, data_unicode)]) + self.assertEquals(self._parse(data_raw, charset=None), [(1, 0, data_raw)]) + self.assertRaises(UnicodeDecodeError, self._parse, data_raw, charset='ascii') + + def test_read_lines_stop (self) : + self.assertEquals(self._parse(" : foo"), [(1, 1, ": foo")]) + self.assertEquals(self._parse(" : foo", stop_tokens=':'), [(1, 0, ": foo")]) + + def test_read_lines_empty (self) : + self.assertEquals(self._parse(*("foo\n\n bar".split('\n'))), [(1, 0, "foo"), (3, 2, "bar")]) + + def test_read_lines_strip (self) : + self.assertEquals(self._parse(" foo "), [(1, 1, "foo")]) + + def _do (self, *lines, **args) : + return tree_parse.parse(lines, **args) + + def test_parse_valid (self) : + self.assertEquals(self._do(*self.VALID), + (1, "foo", [ + (2, "bar", [ + (3, "quux", []), + ]), + (5, "asdf", [ + (6, "further", [ + (7, "still", []), + ]), + ]), + (9, "and back", []) + ]) + ) + + def test_parse_invalid (self) : + self.assertRaises(tree_parse.TreeParseError, self._do, "foo", " bar", " quux") + + # XXX: don't test root, as that's known-broken still + diff -r 2f659ff51c75 -r bef7196f7682 tree_parse.py --- a/tree_parse.py Mon Feb 16 19:02:39 2009 +0200 +++ b/tree_parse.py Mon Feb 16 19:02:59 2009 +0200 @@ -12,19 +12,29 @@ pass -def _read_lines (path, stop_tokens, charset) : +def _read_lines (file, stop_tokens=None, charset='utf8') : """ Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line data with whitespace stripped. + File is either a str with the filesystem path, or a something that supports iterating over lines. + + Charset is what to decode the lines with, or pass None to not decode. + Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its - indentation is taken as zero. + indentation is taken as zero. Ignored if empty. """ - - for line_number, line in enumerate(open(path, 'rb')) : - # decode to unicode - line = line.decode(charset) + + # open path? + if isinstance(file, str) : + file = open(file, 'rb') + + # iterate over lines + for line_number, line in enumerate(file) : + if charset : + # decode to unicode + line = line.decode(charset) indent = 0 @@ -37,7 +47,7 @@ if char == ' ' : indent += 1 - elif char in stop_tokens : + elif stop_tokens and char in stop_tokens : # consider line as not having any indentation at all indent = 0 break @@ -55,7 +65,7 @@ # yield yield line_number + 1, indent, line -def parse (path, stop_tokens='', charset='utf8') : +def parse (file, stop_tokens='', charset='utf8') : """ Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples. """ @@ -70,7 +80,7 @@ prev = None # read lines - for line_number, indent, line in _read_lines(path, stop_tokens, charset) : + for line_number, indent, line in _read_lines(file, stop_tokens, charset) : # create item item = (line_number, line, []) @@ -119,7 +129,7 @@ break elif stack_indent < indent : - raise TreeParseError("Bad unindent on %s:%d, %d < %d" % (path, line_number, stack_indent, indent)) + raise TreeParseError("Bad unindent on %s:%d, %d < %d" % (file, line_number, stack_indent, indent)) # add to parent? if parent :