add tree_parse test and fix treeparse to handle other than filesystem paths
authorTero Marttila <terom@fixme.fi>
Mon, 16 Feb 2009 19:02:59 +0200
changeset 77 bef7196f7682
parent 76 2f659ff51c75
child 78 a46d2fc07951
add tree_parse test and fix treeparse to handle other than filesystem paths
tests/__init__.py
tests/test_treeparse.py
tree_parse.py
--- a/tests/__init__.py	Mon Feb 16 19:02:39 2009 +0200
+++ b/tests/__init__.py	Mon Feb 16 19:02:59 2009 +0200
@@ -3,7 +3,7 @@
 """
 
 import unittest
-import test_http
+import test_http, test_treeparse
 
 def all_tests () :
     """
@@ -12,5 +12,6 @@
     
     return unittest.TestSuite(unittest.defaultTestLoader.loadTestsFromModule(module) for module in (
         test_http,
+        test_treeparse,
     ))
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_treeparse.py	Mon Feb 16 19:02:59 2009 +0200
@@ -0,0 +1,78 @@
+# :set encoding=utf8
+
+"""
+    Unit tests for qmsk.web.tree_parse
+"""
+
+import unittest
+import tree_parse
+
+class TestTreeParse (unittest.TestCase) :
+    VALID = """\
+foo
+  bar
+   quux
+
+  asdf
+        further
+         still
+
+  and back
+""".split('\n')
+
+    def test_read_lines_valid (self) :
+        self.assertEquals(list(tree_parse._read_lines(self.VALID)), [
+            (1, 0,  "foo"),
+            (2, 2,  "bar"),
+            (3, 3,  "quux"),
+            (5, 2,  "asdf"),
+            (6, 8,  "further"),
+            (7, 9,  "still"),
+            (9, 2,  "and back"),
+        ])
+
+    def _parse (self, *lines, **args) :
+        return list(tree_parse._read_lines(lines, **args))
+    
+    def test_read_lines_decode (self) :
+        data_unicode = u'föö'
+        data_raw = 'föö'
+
+        self.assertEquals(self._parse(data_unicode.encode('utf8'), charset='utf8'), [(1, 0, data_unicode)])
+        self.assertEquals(self._parse(data_unicode, charset=None), [(1, 0, data_unicode)])
+        self.assertEquals(self._parse(data_raw, charset=None), [(1, 0, data_raw)])
+        self.assertRaises(UnicodeDecodeError, self._parse, data_raw, charset='ascii')
+    
+    def test_read_lines_stop (self) :
+        self.assertEquals(self._parse(" : foo"), [(1, 1, ": foo")])
+        self.assertEquals(self._parse("    : foo", stop_tokens=':'), [(1, 0, ": foo")])
+    
+    def test_read_lines_empty (self) :
+        self.assertEquals(self._parse(*("foo\n\n  bar".split('\n'))), [(1, 0, "foo"), (3, 2, "bar")])
+
+    def test_read_lines_strip (self) :
+        self.assertEquals(self._parse(" foo "), [(1, 1, "foo")])
+    
+    def _do (self, *lines, **args) :
+        return tree_parse.parse(lines, **args)
+
+    def test_parse_valid (self) :
+        self.assertEquals(self._do(*self.VALID), 
+            (1, "foo", [
+                (2, "bar", [
+                    (3, "quux", []),
+                ]),
+                (5, "asdf", [
+                    (6, "further", [
+                        (7, "still", []),
+                    ]),
+                ]),
+                (9, "and back", [])
+            ])
+        )
+    
+    def test_parse_invalid (self) :
+        self.assertRaises(tree_parse.TreeParseError, self._do, "foo", "  bar", " quux")
+    
+    # XXX: don't test root, as that's known-broken still
+
--- a/tree_parse.py	Mon Feb 16 19:02:39 2009 +0200
+++ b/tree_parse.py	Mon Feb 16 19:02:59 2009 +0200
@@ -12,19 +12,29 @@
 
     pass
 
-def _read_lines (path, stop_tokens, charset) :
+def _read_lines (file, stop_tokens=None, charset='utf8') :
     """
         Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where 
         line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line
         data with whitespace stripped.
 
+        File is either a str with the filesystem path, or a something that supports iterating over lines. 
+        
+        Charset is what to decode the lines with, or pass None to not decode.
+
         Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its
-        indentation is taken as zero.
+        indentation is taken as zero. Ignored if empty.
     """
-
-    for line_number, line in enumerate(open(path, 'rb')) :
-        # decode to unicode
-        line = line.decode(charset)
+    
+    # open path?
+    if isinstance(file, str) :
+        file = open(file, 'rb')
+    
+    # iterate over lines
+    for line_number, line in enumerate(file) :
+        if charset :
+            # decode to unicode
+            line = line.decode(charset)
 
         indent = 0
 
@@ -37,7 +47,7 @@
             if char == ' ' :
                 indent += 1
             
-            elif char in stop_tokens :
+            elif stop_tokens and char in stop_tokens :
                 # consider line as not having any indentation at all
                 indent = 0
                 break
@@ -55,7 +65,7 @@
         # yield
         yield line_number + 1, indent, line
 
-def parse (path, stop_tokens='', charset='utf8') :
+def parse (file, stop_tokens='', charset='utf8') :
     """
         Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples.
     """
@@ -70,7 +80,7 @@
     prev = None
     
     # read lines
-    for line_number, indent, line in _read_lines(path, stop_tokens, charset) :
+    for line_number, indent, line in _read_lines(file, stop_tokens, charset) :
         # create item
         item = (line_number, line, [])
 
@@ -119,7 +129,7 @@
                         break
 
                     elif stack_indent < indent :
-                        raise TreeParseError("Bad unindent on %s:%d, %d < %d" % (path, line_number, stack_indent, indent))
+                        raise TreeParseError("Bad unindent on %s:%d, %d < %d" % (file, line_number, stack_indent, indent))
         
         # add to parent?
         if parent :