bin/process-zone
author Tero Marttila <terom@paivola.fi>
Mon, 19 Mar 2012 10:50:34 +0200
changeset 23 91c073d5615a
parent 12 bddc9a060a73
child 24 4ccc31fdc047
permissions -rwxr-xr-x
process-zone: refactor to use ZoneLine/ZoneRecord classes for handling zone line data; implement --input-line-date for parsing `hg blame` output
#!/usr/bin/env python

"""
    Process zonefiles.
"""

__version__ = '0.0.1-dev'

import optparse
import codecs
from datetime import datetime
import logging

log = logging.getLogger('main')

# command-line options, global state
options = None

def parse_options (argv) :
    """
        Parse command-line arguments.
    """

    prog = argv[0]

    parser = optparse.OptionParser(
            prog        = prog,
            usage       = '%prog: [options]',
            version     = __version__,

            # module docstring
            description = __doc__,
    )

    # logging
    general = optparse.OptionGroup(parser, "General Options")

    general.add_option('-q', '--quiet',     dest='loglevel', action='store_const', const=logging.ERROR, help="Less output")
    general.add_option('-v', '--verbose',   dest='loglevel', action='store_const', const=logging.INFO,  help="More output")
    general.add_option('-D', '--debug',     dest='loglevel', action='store_const', const=logging.DEBUG, help="Even more output")

    parser.add_option_group(general)

    # input/output
    parser.add_option('-c', '--input-charset',  metavar='CHARSET',  default='utf-8', 
            help="Encoding used for input files")

    parser.add_option('-o', '--output',         metavar='FILE',     default='-',
            help="Write to output file; default stdout")

    parser.add_option('--output-charset',       metavar='CHARSET',  default='utf-8', 
            help="Encoding used for output files")

    # read line mtimes?
    parser.add_option('--input-line-date',      action='store_true',
            help="Parse timestamp prefix from each input line (e.g. `hg blame | ...`)")

    # check stage
    parser.add_option('--check-hosts',          action='store_true',
            help="Check that host/IPs are unique. Use --quiet to silence warnings, and test exit status")

    parser.add_option('--check-exempt',         action='append',
            help="Allow given names to have multiple records")

    # forward stage
    parser.add_option('--forward-zone',         action='store_true', 
            help="Generate forward zone")

    parser.add_option('--forward-txt',          action='store_true',
            help="Generate TXT records for forward zone")

    parser.add_option('--forward-mx',           metavar='MX',
            help="Generate MX records for forward zone")

    # reverse stage
    parser.add_option('--reverse-domain',       metavar='DOMAIN',
            help="Domain to use for hosts in reverse zone")

    parser.add_option('--reverse-zone',         metavar='NET',
            help="Generate forward zone for given subnet (x.z.y)")

    # defaults
    parser.set_defaults(
        loglevel            = logging.WARN,

        check_exempt        = [],
    )
    
    # parse
    options, args = parser.parse_args(argv[1:])

    # configure
    logging.basicConfig(
        format  = prog + ': %(name)s: %(levelname)s %(funcName)s : %(message)s',
        level   = options.loglevel,
    )

    return options, args

class ZoneLine (object) :
    """
        A line in a zonefile.
    """

    file = None
    lineno = None

    # data
    indent = None # was the line indented?
    data = None
    parts = None # split line fields

    # optional
    timestamp = None
    comment = None

    PARSE_DATETIME_FORMAT = '%Y-%m-%d'

    @classmethod
    def parse (cls, file, lineno, line, line_timestamp_prefix=False) :
        """
            Parse out given line and build.
        """

        log.debug("parse: %s:%d: %s", file, lineno, line)

        ts = None

        if line_timestamp_prefix :
            if ': ' not in line :
                raise Exception("Missing timestamp prefix on line: %s:%d: %s" % (file, lineno, line))

            # split prefix
            prefix, line = line.split(': ', 1)

            # parse it out
            ts = datetime.strptime(prefix, cls.PARSE_DATETIME_FORMAT)

            log.debug("  ts=%r", ts)

        # was line indented?
        indent = line.startswith(' ') or line.startswith('\t')
        
        # strip
        line = line.strip()
        
        log.debug("  indent=%r, line=%r", indent, line)

        # parse comment out?
        if ';' in line :
            line, comment = line.split(';', 1)

            line = line.strip()
            comment = comment.strip()

        else :
            line = line.strip()
            comment = None
        
        log.debug("  line=%r, comment=%r", line, comment)

        # parse fields
        if '"' in line :
            pre, data, post = line.split('"', 2)
            parts = pre.split() + [data] + post.split()
           
        else :
            parts = line.split()

        log.debug("  parts=%r", parts)

        # build
        return cls(file, lineno, indent, line, parts, timestamp=ts, comment=comment)

    def __init__ (self, file, lineno, indent, data, parts, timestamp=None, comment=None) :
        self.file = file
        self.lineno = lineno

        self.indent = indent
        self.data = data
        self.parts = parts

        self.timestamp = timestamp
        self.comment = comment

    def __str__ (self) :
        return "{file}:{lineno}".format(file=self.file, lineno=self.lineno)

class ZoneRecord (object) :
    """
        A record from a zonefile.
    """

    # the underlying line
    line = None

    # record fields
    name = None
    type = None

    # list of data fields
    data = None

    # optional
    ttl = None
    cls = None

    @classmethod
    def parse (cls, line) :
        """
            Parse from ZoneLine. Returns None if there is no record on the line..
        """

        if not line.parts :
            # skip
            return
        
        # consume parts
        parts = list(line.parts)

        # indented lines don't have name
        if line.indent :
            name = None

        else :
            name = parts.pop(0)
        
        log.debug("  name=%r", name)

        # parse ttl/cls/type
        ttl = _cls = None

        if parts and parts[0][0].isdigit() :
            ttl = parts.pop(0)

        if parts and parts[0].upper() in ('IN', 'CH') :
            _cls = parts.pop(0)

        # always have type
        type = parts.pop(0)

        # remaining parts are data
        data = parts

        log.debug("  ttl=%r, cls=%r, type=%r, data=%r", ttl, _cls, type, data)

        return cls(name, type, data,
            ttl     = ttl,
            cls     = _cls,
            line    = line,
        )

    def __init__ (self, name, type, data, ttl=None, cls=None, line=None, comment=None) :
        self.name = name
        self.type = type
        self.data = data
        
        self.ttl = ttl
        self.cls = cls
        
        self.line = line

        # XXX: within line
        self._comment = comment

    def build_line (self) :
        """
            Construct a zonefile-format line..."
        """

        # XXX: comment?
        if self._comment :
            comment = '\t; ' + self._comment
        else :
            comment = ''
            
        return u"{name:25} {ttl:4} {cls:2} {type:5} {data}{comment}".format(
                name    = self.name or '',
                ttl     = self.ttl or '',
                cls     = self.cls or '',
                type    = self.type,
                data    = ' '.join(unicode(data) for data in self.data),
                comment = comment,
        )

    def __str__ (self) :
        return ' '.join((self.name, self.type, ' '.join(self.data)))

def parse_record (path, lineno, line, **opts) :
    """
        Parse (name, ttl, type, data, comment) from bind zonefile.

        Returns None for empty/comment lines.
    """

    # line
    line = ZoneLine.parse(path, lineno, line, **opts)
    record = ZoneRecord.parse(line)

    if record :
        return record

def parse_zone_records (file, **opts) :
    """
        Parse ZoneRecord items from the given zonefile, ignoring non-record lines.
    """
    
    for lineno, line in enumerate(file) :
        record = parse_record(file.name, lineno, line, **opts)

        if record :
            yield record

def check_zone_hosts (zone, whitelist=None) :
    """
        Parse host/IP pairs from the zone, and verify that they are unique.

        As an exception, names listed in the given whitelist may have multiple IPs.
    """

    by_name = {}
    by_ip = {}

    fail = None

    for r in zone :
        name = r.name

        # name
        if name not in by_name :
            by_name[name] = r

        elif r.name in whitelist :
            log.debug("Duplicate whitelist entry: %s", r)

        else :
            # fail!
            log.warn("%s: Duplicate name: %s <-> %s", r.line, r, by_name[name])
            fail = True

        # ip
        if r.type == 'A' :
            ip, = r.data

            if ip not in by_ip :
                by_ip[ip] = r

            else :
                # fail!
                log.warn("%s: Duplicate IP: %s <-> %s", r.line, r, by_ip[ip])
                fail = True

    return fail

def process_zone_forwards (zone, txt=False, mx=False) :
    """
        Process zone data -> forward zone data.
    """

    for r in zone :
        yield r

        if r.type == 'A' :
            if txt and r.line.comment :
                yield ZoneRecord(None, 'TXT', [u'"{0}"'.format(r.line.comment)], ttl=r.ttl)
            
            # XXX: RP, do we need it?

            if mx :
                # XXX: is this a good idea?
                yield ZoneRecord(None, 'MX', [10, mx], ttl=r.ttl)

def reverse_addr (ip) :
    """
        Return in-addr.arpa reverse for given IPv4 IP.
    """
    
    # parse
    octets = tuple(int(part) for part in ip.split('.'))

    for octet in octets :
        assert 0 <= octet <= 255

    return '.'.join([str(octet) for octet in reversed(octets)] + ['in-addr', 'arpa'])

def fqdn (*parts) :
    return '.'.join(parts) + '.'


def process_zone_reverse (zone, origin, domain) :
    """
        Process zone data -> reverse zone data.
    """

    for r in zone :
        if r.type != 'A' :
            continue

        ip, = r.data

        # generate reverse-addr
        reverse = reverse_addr(ip)

        # verify
        if zone and reverse.endswith(origin) :
            reverse = reverse[:-(len(origin) + 1)]

        else :
            log.warning("Reverse does not match zone origin, skipping: (%s) -> %s <-> %s", ip, reverse, origin)
            continue

        # domain to use
        host_domain = domain
        host_fqdn = fqdn(r.name, domain)

        yield ZoneRecord(reverse, 'PTR', [host_fqdn])

def write_zone_records (file, zone) :
    for r in zone :
        file.write(r.build_line() + u'\n')

def open_file (path, mode, charset) :
    """
        Open unicode-enabled file from path, with - using stdio.
    """

    if path == '-' :
        # use stdin/out based on mode
        stream, func = {
            'r':    (sys.stdin, codecs.getreader),
            'w':    (sys.stdout, codecs.getwriter),
        }[mode[0]]

        # wrap
        return func(charset)(stream)

    else :
        # open
        return codecs.open(path, mode, charset)

def main (argv) :
    global options
    
    options, args = parse_options(argv)

    if args :
        # open files
        input_files = [open_file(path, 'r', options.input_charset) for path in args]

    else :
        # default to stdout
        input_files = [open_file('-', 'r', options.input_charset)]
   
    # process zone data
    zone = []

    for file in input_files :
        log.info("Reading zone: %s", file)

        zone += list(parse_zone_records(file, 
            line_timestamp_prefix   = options.input_line_date,
        ))

    # check?
    if options.check_hosts :
        whitelist = set(options.check_exempt)

        log.debug("checking hosts; whitelist=%r", whitelist)

        if check_zone_hosts(zone, whitelist=whitelist) :
            log.warn("Hosts check failed")
            return 2

        else :
            log.info("Hosts check OK")

    # output file
    output = open_file(options.output, 'w', options.output_charset)

    if options.forward_zone :
        log.info("Write forward zone: %s", output)

        zone = list(process_zone_forwards(zone, txt=options.forward_txt, mx=options.forward_mx))

    elif options.reverse_zone :
        origin = reverse_addr(options.reverse_zone)
        domain = options.reverse_domain

        if not domain :
            log.error("--reverse-zone requires --reverse-domain")
            return 1

        zone = list(process_zone_reverse(zone, origin=origin, domain=domain))

    elif options.check_hosts :
        # we only did that, done
        return 0

    else :
        log.warn("Nothing to do")
        return 1

    write_zone_records(output, zone)

    return 0

if __name__ == '__main__':
    import sys

    sys.exit(main(sys.argv))