tree_parse.py
changeset 77 bef7196f7682
parent 46 54c5f5f340de
--- a/tree_parse.py	Mon Feb 16 19:02:39 2009 +0200
+++ b/tree_parse.py	Mon Feb 16 19:02:59 2009 +0200
@@ -12,19 +12,29 @@
 
     pass
 
-def _read_lines (path, stop_tokens, charset) :
+def _read_lines (file, stop_tokens=None, charset='utf8') :
     """
         Reads lines from the given path, ignoring empty lines, and yielding (line_number, indent, line) tuples, where 
         line_number is the line number, indent counts the amount of leading whitespace, and line is the actual line
         data with whitespace stripped.
 
+        File is either a str with the filesystem path, or a something that supports iterating over lines. 
+        
+        Charset is what to decode the lines with, or pass None to not decode.
+
         Stop tokens is a list of chars to stop counting indentation on - if such a line begins with such a char, its
-        indentation is taken as zero.
+        indentation is taken as zero. Ignored if empty.
     """
-
-    for line_number, line in enumerate(open(path, 'rb')) :
-        # decode to unicode
-        line = line.decode(charset)
+    
+    # open path?
+    if isinstance(file, str) :
+        file = open(file, 'rb')
+    
+    # iterate over lines
+    for line_number, line in enumerate(file) :
+        if charset :
+            # decode to unicode
+            line = line.decode(charset)
 
         indent = 0
 
@@ -37,7 +47,7 @@
             if char == ' ' :
                 indent += 1
             
-            elif char in stop_tokens :
+            elif stop_tokens and char in stop_tokens :
                 # consider line as not having any indentation at all
                 indent = 0
                 break
@@ -55,7 +65,7 @@
         # yield
         yield line_number + 1, indent, line
 
-def parse (path, stop_tokens='', charset='utf8') :
+def parse (file, stop_tokens='', charset='utf8') :
     """
         Reads and parses the file at the given path, returning a list of (line_number, line, children) tuples.
     """
@@ -70,7 +80,7 @@
     prev = None
     
     # read lines
-    for line_number, indent, line in _read_lines(path, stop_tokens, charset) :
+    for line_number, indent, line in _read_lines(file, stop_tokens, charset) :
         # create item
         item = (line_number, line, [])
 
@@ -119,7 +129,7 @@
                         break
 
                     elif stack_indent < indent :
-                        raise TreeParseError("Bad unindent on %s:%d, %d < %d" % (path, line_number, stack_indent, indent))
+                        raise TreeParseError("Bad unindent on %s:%d, %d < %d" % (file, line_number, stack_indent, indent))
         
         # add to parent?
         if parent :