process-zone: refactor to use ZoneLine/ZoneRecord classes for handling zone line data; implement --input-line-date for parsing `hg blame` output
authorTero Marttila <terom@paivola.fi>
Mon, 19 Mar 2012 10:50:34 +0200
changeset 546 91c073d5615a
parent 545 5d2a8510a28f
child 547 4ccc31fdc047
process-zone: refactor to use ZoneLine/ZoneRecord classes for handling zone line data; implement --input-line-date for parsing `hg blame` output
bin/process-zone
--- a/bin/process-zone	Sat Mar 17 21:43:03 2012 +0200
+++ b/bin/process-zone	Mon Mar 19 10:50:34 2012 +0200
@@ -8,6 +8,7 @@
 
 import optparse
 import codecs
+from datetime import datetime
 import logging
 
 log = logging.getLogger('main')
@@ -50,6 +51,10 @@
     parser.add_option('--output-charset',       metavar='CHARSET',  default='utf-8', 
             help="Encoding used for output files")
 
+    # read line mtimes?
+    parser.add_option('--input-line-date',      action='store_true',
+            help="Parse timestamp prefix from each input line (e.g. `hg blame | ...`)")
+
     # check stage
     parser.add_option('--check-hosts',          action='store_true',
             help="Check that host/IPs are unique. Use --quiet to silence warnings, and test exit status")
@@ -92,94 +97,219 @@
 
     return options, args
 
-def parse_record (line) :
+class ZoneLine (object) :
+    """
+        A line in a zonefile.
+    """
+
+    file = None
+    lineno = None
+
+    # data
+    indent = None # was the line indented?
+    data = None
+    parts = None # split line fields
+
+    # optional
+    timestamp = None
+    comment = None
+
+    PARSE_DATETIME_FORMAT = '%Y-%m-%d'
+
+    @classmethod
+    def parse (cls, file, lineno, line, line_timestamp_prefix=False) :
+        """
+            Parse out given line and build.
+        """
+
+        log.debug("parse: %s:%d: %s", file, lineno, line)
+
+        ts = None
+
+        if line_timestamp_prefix :
+            if ': ' not in line :
+                raise Exception("Missing timestamp prefix on line: %s:%d: %s" % (file, lineno, line))
+
+            # split prefix
+            prefix, line = line.split(': ', 1)
+
+            # parse it out
+            ts = datetime.strptime(prefix, cls.PARSE_DATETIME_FORMAT)
+
+            log.debug("  ts=%r", ts)
+
+        # was line indented?
+        indent = line.startswith(' ') or line.startswith('\t')
+        
+        # strip
+        line = line.strip()
+        
+        log.debug("  indent=%r, line=%r", indent, line)
+
+        # parse comment out?
+        if ';' in line :
+            line, comment = line.split(';', 1)
+
+            line = line.strip()
+            comment = comment.strip()
+
+        else :
+            line = line.strip()
+            comment = None
+        
+        log.debug("  line=%r, comment=%r", line, comment)
+
+        # parse fields
+        if '"' in line :
+            pre, data, post = line.split('"', 2)
+            parts = pre.split() + [data] + post.split()
+           
+        else :
+            parts = line.split()
+
+        log.debug("  parts=%r", parts)
+
+        # build
+        return cls(file, lineno, indent, line, parts, timestamp=ts, comment=comment)
+
+    def __init__ (self, file, lineno, indent, data, parts, timestamp=None, comment=None) :
+        self.file = file
+        self.lineno = lineno
+
+        self.indent = indent
+        self.data = data
+        self.parts = parts
+
+        self.timestamp = timestamp
+        self.comment = comment
+
+    def __str__ (self) :
+        return "{file}:{lineno}".format(file=self.file, lineno=self.lineno)
+
+class ZoneRecord (object) :
+    """
+        A record from a zonefile.
+    """
+
+    # the underlying line
+    line = None
+
+    # record fields
+    name = None
+    type = None
+
+    # list of data fields
+    data = None
+
+    # optional
+    ttl = None
+    cls = None
+
+    @classmethod
+    def parse (cls, line) :
+        """
+            Parse from ZoneLine. Returns None if there is no record on the line..
+        """
+
+        if not line.parts :
+            # skip
+            return
+        
+        # consume parts
+        parts = list(line.parts)
+
+        # indented lines don't have name
+        if line.indent :
+            name = None
+
+        else :
+            name = parts.pop(0)
+        
+        log.debug("  name=%r", name)
+
+        # parse ttl/cls/type
+        ttl = _cls = None
+
+        if parts and parts[0][0].isdigit() :
+            ttl = parts.pop(0)
+
+        if parts and parts[0].upper() in ('IN', 'CH') :
+            _cls = parts.pop(0)
+
+        # always have type
+        type = parts.pop(0)
+
+        # remaining parts are data
+        data = parts
+
+        log.debug("  ttl=%r, cls=%r, type=%r, data=%r", ttl, _cls, type, data)
+
+        return cls(name, type, data,
+            ttl     = ttl,
+            cls     = _cls,
+            line    = line,
+        )
+
+    def __init__ (self, name, type, data, ttl=None, cls=None, line=None, comment=None) :
+        self.name = name
+        self.type = type
+        self.data = data
+        
+        self.ttl = ttl
+        self.cls = cls
+        
+        self.line = line
+
+        # XXX: within line
+        self._comment = comment
+
+    def build_line (self) :
+        """
+            Construct a zonefile-format line..."
+        """
+
+        # XXX: comment?
+        if self._comment :
+            comment = '\t; ' + self._comment
+        else :
+            comment = ''
+            
+        return u"{name:25} {ttl:4} {cls:2} {type:5} {data}{comment}".format(
+                name    = self.name or '',
+                ttl     = self.ttl or '',
+                cls     = self.cls or '',
+                type    = self.type,
+                data    = ' '.join(unicode(data) for data in self.data),
+                comment = comment,
+        )
+
+    def __str__ (self) :
+        return ' '.join((self.name, self.type, ' '.join(self.data)))
+
+def parse_record (path, lineno, line, **opts) :
     """
         Parse (name, ttl, type, data, comment) from bind zonefile.
 
         Returns None for empty/comment lines.
     """
-    
-    # was line indented?
-    indent = line.startswith(' ') or line.startswith('\t')
-    
-    # strip
-    line = line.strip()
-
-    if not line or line.startswith(';') :
-        # skip
-        return
-    
-    #log.debug("line=%r", line)
-
-    # parse comment out
-    parts = line.split(';', 1)
-
-    if ';' in line :
-        data, comment = line.split(';', 1)
-
-        line = data.rstrip()
-        comment = comment.strip()
-
-    else :
-        line = line.rstrip()
-        comment = None
-    
-    #log.debug("line=%r, comment=%r", line, comment)
-
-    # parse data out?
-    if '"' in line :
-        line, data, end = line.split('"')
-        parts = line.split()
-       
-    else :
-        parts = line.split()
-        data = parts.pop(-1)
 
-    #log.debug("parts=%r, data=%r", parts, data)
-
-    # indented lines don't have name
-    if indent :
-        name = None
-
-    else :
-        name = parts.pop(0)
-    
-    #log.debug("name=%r", name)
-
-    # parse ttl/cls/type
-    ttl = cls = None
-
-    if parts and parts[0][0].isdigit() :
-        ttl = parts.pop(0)
+    # line
+    line = ZoneLine.parse(path, lineno, line, **opts)
+    record = ZoneRecord.parse(line)
 
-    if parts and parts[0].upper() in ('IN', 'CH') :
-        cls = parts.pop(0)
-
-    type = parts.pop(0)
-
-    #log.debug("ttl=%r, cls=%r, parts=%r", ttl, cls, parts)
-
-    if parts :
-        log.debug("extra data: %r + %r", parts, data)
+    if record :
+        return record
 
-        # extra data
-        data = ' '.join(parts + [data])
-
-
-    return name, ttl, type, data, comment
-
-def parse_zone (file) :
+def parse_zone_records (file, **opts) :
     """
-        Parse
-            (name, ttl, type, data, comment) 
-        data from zonefile.
+        Parse ZoneRecord items from the given zonefile, ignoring non-record lines.
     """
     
     for lineno, line in enumerate(file) :
-        data = parse_record(line)
+        record = parse_record(file.name, lineno, line, **opts)
 
-        if data :
-            yield data
+        if record :
+            yield record
 
 def check_zone_hosts (zone, whitelist=None) :
     """
@@ -193,32 +323,31 @@
 
     fail = None
 
-    for item in zone :
-        text = ' '.join(pp for pp in item if pp)
-        name, ttl, type, data, comment = item
+    for r in zone :
+        name = r.name
 
         # name
         if name not in by_name :
-            by_name[name] = text
+            by_name[name] = r
 
-        elif name in whitelist :
-            log.debug("Duplicate whitelist entry: %r", item)
+        elif r.name in whitelist :
+            log.debug("Duplicate whitelist entry: %s", r)
 
         else :
             # fail!
-            log.warn("Duplicate name: %s <-> %s", text, by_name[name])
+            log.warn("%s: Duplicate name: %s <-> %s", r.line, r, by_name[name])
             fail = True
 
         # ip
-        if type == 'A' :
-            ip = data
+        if r.type == 'A' :
+            ip, = r.data
 
             if ip not in by_ip :
-                by_ip[ip] = text
+                by_ip[ip] = r
 
             else :
                 # fail!
-                log.warn("Duplicate IP: %s <-> %s", text, by_ip[ip])
+                log.warn("%s: Duplicate IP: %s <-> %s", r.line, r, by_ip[ip])
                 fail = True
 
     return fail
@@ -228,19 +357,18 @@
         Process zone data -> forward zone data.
     """
 
-    for name, ttl, type, data, comment in zone :
-        yield name, ttl, type, data
+    for r in zone :
+        yield r
 
-        if type == 'A' :
-            if txt and comment :
-                # name
-                yield None, ttl, 'TXT', u'"{0}"'.format(comment)
+        if r.type == 'A' :
+            if txt and r.line.comment :
+                yield ZoneRecord(None, 'TXT', [u'"{0}"'.format(r.line.comment)], ttl=r.ttl)
             
             # XXX: RP, do we need it?
 
             if mx :
                 # XXX: is this a good idea?
-                yield None, ttl, 'MX', '10 {mx}'.format(mx=mx)
+                yield ZoneRecord(None, 'MX', [10, mx], ttl=r.ttl)
 
 def reverse_addr (ip) :
     """
@@ -264,11 +392,11 @@
         Process zone data -> reverse zone data.
     """
 
-    for name, ttl, type, data, comment in zone :
-        if type != 'A' :
+    for r in zone :
+        if r.type != 'A' :
             continue
 
-        ip = data
+        ip, = r.data
 
         # generate reverse-addr
         reverse = reverse_addr(ip)
@@ -283,45 +411,13 @@
 
         # domain to use
         host_domain = domain
-        host_fqdn = fqdn(name, domain)
-
-        yield reverse, 'PTR', host_fqdn
-
-def build_zone (zone) :
-    for item in zone :
-        ttl = cls = comment = None
-
-        if len(item) == 3 :
-            name, type, data = item
-
-        elif len(item) == 4 :
-            name, ttl, type, data = item
-
-        elif len(item) == 5 :
-            name, ttl, type, data, comment = item
+        host_fqdn = fqdn(r.name, domain)
 
-        else :
-            raise Exception("Weird zone entry: {0}".format(item))
-
-        if not name :
-            name = ''
+        yield ZoneRecord(reverse, 'PTR', [host_fqdn])
 
-        if not ttl :
-            ttl = ''
-        
-        if not cls :
-            cls = ''
-
-        if comment :
-            comment = '\t;' + comment
-        else :
-            comment = ''
-        
-        yield u"{name:25} {ttl:4} {cls:2} {type:5} {data}{comment}".format(name=name, ttl=ttl, cls=cls, type=type, data=data, comment=comment)
-
-def write_zone (file, zone) :
-    for line in build_zone(zone) :
-        file.write(line + u'\n')
+def write_zone_records (file, zone) :
+    for r in zone :
+        file.write(r.build_line() + u'\n')
 
 def open_file (path, mode, charset) :
     """
@@ -361,7 +457,9 @@
     for file in input_files :
         log.info("Reading zone: %s", file)
 
-        zone += list(parse_zone(file))
+        zone += list(parse_zone_records(file, 
+            line_timestamp_prefix   = options.input_line_date,
+        ))
 
     # check?
     if options.check_hosts :
@@ -402,7 +500,7 @@
         log.warn("Nothing to do")
         return 1
 
-    write_zone(output, zone)
+    write_zone_records(output, zone)
 
     return 0