# HG changeset patch
# User Tero Marttila <terom@paivola.fi>
# Date 1378895663 -10800
# Node ID cce9cf4933ca05fab937f05e99e3ba7266b4f6cb
# Parent  08a63738f2d1fa7a499eb031371194e7044338c1
pvl.dns.zone: more multi-line support in the parser..

diff -r 08a63738f2d1 -r cce9cf4933ca pvl/dns/zone.py
--- a/pvl/dns/zone.py	Tue Sep 10 17:17:57 2013 +0300
+++ b/pvl/dns/zone.py	Wed Sep 11 13:34:23 2013 +0300
@@ -31,7 +31,6 @@
 
     # data
     indent = None # was the line indented?
-    data = None
     parts = None # split line fields
 
     # optional
@@ -40,70 +39,17 @@
 
     PARSE_DATETIME_FORMAT = '%Y-%m-%d'
 
-    @classmethod
-    def parse (cls, file, lineno, line, line_timestamp_prefix=False) :
-        """
-            Parse out given line and build.
-        """
-
-        log.debug("parse: %s:%d: %s", file, lineno, line)
-
-        ts = None
-
-        if line_timestamp_prefix :
-            if ': ' not in line :
-                raise ZoneError("%s:%d: Missing timestamp prefix: %s" % (file, lineno, line))
-
-            # split prefix
-            prefix, line = line.split(': ', 1)
-
-            # parse it out
-            ts = datetime.strptime(prefix, cls.PARSE_DATETIME_FORMAT)
-
-            log.debug("  ts=%r", ts)
-
-        # was line indented?
-        indent = line.startswith(' ') or line.startswith('\t')
-        
-        # strip
-        line = line.strip()
-        
-        log.debug("  indent=%r, line=%r", indent, line)
-
-        # parse comment out?
-        if ';' in line :
-            line, comment = line.split(';', 1)
-
-            line = line.strip()
-            comment = comment.strip()
-
-        else :
-            line = line.strip()
-            comment = None
-        
-        log.debug("  line=%r, comment=%r", line, comment)
-
-        # parse fields
-        if '"' in line :
-            pre, data, post = line.split('"', 2)
-            parts = pre.split() + [data] + post.split()
-           
-        else :
-            parts = line.split()
-
-        log.debug("  parts=%r", parts)
-
-        # build
-        return cls(file, lineno, indent, line, parts, timestamp=ts, comment=comment)
-
-    def __init__ (self, file, lineno, indent, data, parts, timestamp=None, comment=None) :
+    def __init__ (self, file, lineno, line, indent, parts, comment=None, timestamp=None) :
+        # source
         self.file = file
         self.lineno = lineno
-
+        self.line = line
+        
+        # parse data
         self.indent = indent
-        self.data = data
         self.parts = parts
-
+        
+        # metadata
         self.timestamp = timestamp
         self.comment = comment
 
@@ -387,52 +333,100 @@
         # parse
         yield ZoneRecord.parse(line, parts=parts, origin=origin)
 
+def parse_zone_lines (file, line_timestamp_prefix=None) :
+    """
+        Parse ZoneLines from a file.
+    """
+    
+    multiline_start = None
+    multiline_parts = None
+    
+    for lineno, raw_line in enumerate(file) :
+        # possible mtime prefix for line
+        timestamp = None
+
+        if line_timestamp_prefix :
+            if ': ' not in raw_line :
+                raise ZoneError("%s:%d: Missing timestamp prefix: %s" % (file.name, lineno, raw_line))
+
+            # split prefix
+            prefix, raw_line = raw_line.split(': ', 1)
+
+            # parse it out
+            timestamp = datetime.strptime(prefix, cls.PARSE_DATETIME_FORMAT)
+
+            log.debug("%s:%d: ts=%r", file.name, lineno, ts)
+        
+        log.debug("%s:%d: %s", file.name, lineno, raw_line)
+        
+        # capture indent from raw line
+        indent = raw_line.startswith(' ') or raw_line.startswith('\t')
+        line = raw_line.strip()
+
+        # parse comment
+        if ';' in line:
+            line, comment = line.split(';', 1)
+
+            line = line.strip()
+            comment = comment.strip()
+        else :
+            comment = None
+       
+        log.debug("%s:%d: indent=%r, line=%r, comment=%r", file.name, lineno, indent, line, comment)
+
+        # parse fields
+        if '"' in line :
+            pre, data, post = line.split('"', 2)
+            parts = pre.split() + [data] + post.split()
+           
+        else :
+            parts = line.split()
+
+        # handle multi-line statements...
+        if '(' in parts :
+            assert not multiline_start
+
+            log.warn("%s:%d: Start of multi-line statement: %s", file.name, lineno, line)
+
+            multiline_start = (lineno, timestamp, indent, comment)
+            multiline_line = raw_line
+            multiline_parts = []
+
+        if multiline_start:
+            log.warn("%s:%d: Multi-line statement: %s", file.name, lineno, line)
+            
+            multiline_parts.extend([part for part in parts if part not in set('()')])
+            multiline_line += raw_line
+
+        if ')' in parts :
+            assert multiline_start
+
+            log.warn("%s:%d: End of multi-line statement: %s", file.name, lineno, line)
+            
+            lineno, timestamp, indent, comment = multiline_start
+            raw_line = multiline_line
+            parts = multiline_parts
+
+            multiline_start = multiline_line = multiline_parts = None
+    
+        # parse
+        if multiline_start:
+            pass
+        else:
+            yield ZoneLine(file.name, lineno, raw_line, indent, parts, comment, timestamp=timestamp)
+
 def parse_zone_records (file, origin=None, **opts) :
     """
         Parse ZoneRecord items from the given zonefile, ignoring non-record lines.
     """
 
     ttl = None
-
-    skip_multiline = False
     
-    for lineno, raw_line in enumerate(file) :
-        # parse comment
-        if ';' in raw_line :
-            line, comment = raw_line.split(';', 1)
-        else :
-            line = raw_line
-            comment = None
-
-        # XXX: handle multi-line statements...
-        # start
-        if '(' in line :
-            skip_multiline = True
-            
-            log.warn("%s:%d: Start of multi-line statement: %s", file.name, lineno, raw_line)
+    for line in parse_zone_lines(file, **opts):
+        if not line.parts :
+            log.debug("%s: skip empty line", line)
 
-        # end?
-        if ')' in line :
-            skip_multiline = False
-            
-            log.warn("%s:%d: End of multi-line statement: %s", file.name, lineno, raw_line)
-            
-            continue
-
-        elif skip_multiline :
-            log.warn("%s:%d: Multi-line statement: %s", file.name, lineno, raw_line)
-
-            continue
-        
-        # parse
-        line = ZoneLine.parse(file.name, lineno, raw_line, **opts)
-
-        if not line.data :
-            log.debug("%s: skip empty line: %s", line, raw_line)
-
-            continue
-
-        elif line.data.startswith('$') :
+        elif line.line.startswith('$') :
             # control record
             type = line.parts[0]
 
@@ -450,20 +444,18 @@
                     yield record
 
             else :
-                log.warning("%s: skip control record: %s", line, line.data)
+                log.warning("%s: skip control record: %s", line, line.line)
             
-            # XXX: passthrough!
-            continue
+        else :
+            # normal record?
+            record = ZoneRecord.parse(line, origin=origin)
 
-        # normal record?
-        record = ZoneRecord.parse(line, origin=origin)
+            if record :
+                yield record
 
-        if record :
-            yield record
-
-        else :
-            # unknown
-            log.warning("%s: skip unknown line: %s", line, line.data)
+            else :
+                # unknown
+                log.warning("%s: skip unknown line: %s", line, line.line)
     
 def reverse_ipv4 (ip) :
     """