bin/pvl.hosts-graph
author Tero Marttila <terom@paivola.fi>
Mon, 09 Mar 2015 23:31:13 +0200
changeset 738 3104fdf7ea26
parent 421 585eadaed270
permissions -rwxr-xr-x
pvl.hosts.hosts: drop support for instanced ip.* in favor of improved interface:ip.* =
#!/usr/bin/env python

"""
    Requirements:
        pydot
"""

import pvl.args
import pvl.hosts
from pvl.invoke import merge

import collections
import logging; log = logging.getLogger('pvl.hosts-graph')
import optparse

class ParseError (Exception) :
    def __init__ (self, file, line, msg) :
        self.file = file
        self.line = line
        self.msg = msg
  
    def __str__ (self) :
        return "{self.file}:{self.line}: {self.msg}".format(self=self)

def _parse_snmp_part (part) :
    if part.isdigit() :
        return int(part)
    else :
        return part

def _parse_snmp_attr (line) :
    for part in line.split() :
        yield _parse_snmp_part(part)

def _parse_snmp_value (line) :
    if '\t' in line :
        key, value = line.split('\t', 1)

        return { _parse_snmp_part(key): _parse_snmp_part(value) }

    else :
        return set((_parse_snmp_part(line), ))
    
def _load_snmp_data (options, file) :
    """
        Load a data dict generated by pvl.hosts-snmp from a file.

        Yields (host, attr, value)
    """

    host = None
    attr = None
    value = None
    
    for idx, line in enumerate(file, 1) :
        indent = 0

        while line.startswith('\t') :
            indent += 1
            line = line[1:]

        line = line.lstrip('\t').rstrip('\n')

        if indent == 0 :
            host = line
            attr = None
            value = None

        elif indent == 1 :
            attr = tuple(_parse_snmp_attr(line))
            value = None

            yield host, attr, None

        elif indent == 2 :
            if not attr :
                raise ParseError(file, line, "[%s] %s: value outside of attr" % (host, attr))
            
            value = _parse_snmp_value(line)

            yield host, attr, value

def load_snmp_data (options, file, hosts) :
    """
        Load snmp data as dict, from given file path, or stdin.
    """

    if file :
        file = open(file)
    else :
        file = sys.stdin

    root = { }
    
    hosts_by_namedomain = dict(
        (
            '{host}@{domain}'.format(host=host, domain=host.domain), host
        ) for host in hosts
    )

    for host_domain, attr, value in _load_snmp_data(options, file) :
        host = hosts_by_namedomain.get(host_domain)
        
        if value :
            log.debug("[%s] %s: %s", host, ' '.join(str(a) for a in attr), value)
        else :
            log.debug("[%s] %s", host, ' '.join(str(a) for a in attr),)

        item = root.setdefault(host, { })
        
        for a in attr[:-1] :
            item = item.setdefault(a, {})

        a = attr[-1]
        
        if value is None :
            pass

        elif isinstance(value, set) :
            item.setdefault(a, set()).update(value)

        elif isinstance(value, dict) :
            item.setdefault(a, dict()).update(value)

        else :
            item[a] = value
            
    return root

def host_vlans (host, host_vlans) :
    """
        {vlan: { tagged/untagged: [port] } } -> (port, (untag, [tag])).
    """

    ports = set()
    vlans_untagged = { }
    vlans_tagged = collections.defaultdict(set)

    for vlan, vlan_attrs in host_vlans.iteritems() :
        for port in vlan_attrs.get('tagged', ()) :
            ports.add(port)
            vlans_tagged[port].add(vlan)
        
        for port in vlan_attrs.get('untagged', ()) :
            ports.add(port)
            vlans_untagged[port] = vlan
    
    for port in ports :
        untag = vlans_untagged.get(port)
        tagged = vlans_tagged.get(port, ())

        log.debug("%s: %s: untag=%s tag=%s", host, port, untag, tagged)
        
        yield port, (untag, tagged)

def build_graph (options, snmp, hosts) :
    """
        Combine given snmp data and { host: Host } into
            { node: label }
            { (remote, remote_port, local_port, local): (local_untag, tagged, remote_untag) }
    """

    nodes = { } # host: label
    links = { } # (local, local_port, remote_port, remote_host): (local_untag, tagged, remote_untag)

    hosts_by_lldp = { } # chassis: host
    hosts_by_ethernet = { } # ethernet: host
    hosts_by_location = { } # (domain, location): host

    nodes_port = { } # (local, int(local_port)): {remote}
    nodes_out = { } # local: {remote}
    nodes_in = { } # remote: {local}
    links_out = { } # (local, remote): local_port
    links_in = { } # (remote, local): remote_port
    
    # first scan: lldp hosts
    for host, host_attrs in snmp.iteritems() :
        nodes[host] = host.location or str(host)

        if 'lldp' in host_attrs :
            lldp_local = host_attrs['lldp']['local']

            hosts_by_lldp[lldp_local['chassis']] = host
    
    # second scan: nodes by ethernet
    for host in hosts :
        for ethernet in host.ethernet.itervalues() :
            hosts_by_ethernet[ethernet] = host

        if host.location and host.location_domain:
            hosts_by_location[(host.location_domain, host.location)] = host
        elif host.location:
            hosts_by_location[(host.domain, host.location)] = host

    # first graph: lldp remotes
    for host, host_attrs in snmp.iteritems() :
        local_node = host

        if 'vlan' in host_attrs :
            vlans = dict(host_vlans(host, host_attrs['vlan']))
        else :
            vlans = None

        if 'lldp' in host_attrs :
            lldp = host_attrs['lldp']

            local_lldp = lldp['local']['chassis']
            
            for port, port_attrs in lldp.get('port', { }).iteritems() :
                local_port = port_attrs['local']['port']

                for remote_lldp, remote_attrs in port_attrs['remote'].iteritems() :
                    # determine remote node
                    remote_label = remote_attrs['sys_name']

                    if remote_lldp in hosts_by_lldp :
                        remote_node = remote_host = hosts_by_lldp[remote_lldp]

                    elif remote_lldp in hosts_by_ethernet :
                        remote_node = remote_host = hosts_by_ethernet[remote_lldp]

                        if remote_host.location :
                            remote_label = remote_host.location

                        log.info("%s:%s: guessing lldp host %s -> %s (%s)", host, port, remote_lldp, remote_host, remote_label)

                    elif options.graph_lldp_unknown :
                        log.warning("%s:%s: unknown lldp remote %s (%s)", host, port, remote_lldp, remote_label)
                        
                        # by chassis id
                        remote_node = remote_lldp
                        remote_host = None

                    else :
                        log.info("%s:%s: unknown lldp remote %s (%s)", host, port, remote_lldp, remote_label)
                        
                        remote_node = remote_host = None

                    
                    if not remote_node :
                        continue

                    # ensure remote node
                    if remote_node not in nodes :
                        log.debug("%s:%s: lazy-add remote %s (%s)", host, port, remote_node, remote_label)

                        nodes[remote_node] = remote_label
                    
                    # local vlans
                    if vlans :
                        port_vlans = vlans.get(port)
                    else :
                        port_vlans = None

                    if port_vlans :
                        local_untag, local_tagged = port_vlans

                    # directional mapping
                    links_out[(local_node, remote_node)] = local_port
                    nodes_port.setdefault((local_node, port), set()).add(remote_node)
                    nodes_out.setdefault(local_node, set()).add(remote_node)
                    nodes_in.setdefault(remote_node, set()).add(local_node)
                    
                    # bidirectional mappings
                    remote_port = remote_attrs['port']

                    links_in[(remote_node, local_node)] = remote_port

                    forward = (local_node, local_port, remote_port, remote_node)
                    reverse = (remote_node, remote_port, local_port, local_node)

                    if reverse not in links :
                        links[forward] = (local_untag, local_tagged, None)
                    else :
                        remote_untag, remote_tagged, _ = links[reverse]

                        # merge
                        if remote_untag != local_untag :
                            log.warning("%s:%s untag %s <=> %s untag %s:%s",
                                    host, local_port, local_untag,
                                    remote_untag, remote_node, remote_port
                            )

                        if remote_tagged != local_tagged :
                            log.warning("%s:%s tagged %s <-> %s tagged %s:%s",
                                    host, local_port, ':'.join(str(x) for x in sorted(local_tagged)),
                                    ':'.join(str(x) for x in sorted(remote_tagged)), remote_node, remote_port
                            )

                        links[reverse] = (remote_untag, remote_tagged, local_untag)

    # second graph: manual ports
    for host in hosts :
        local_node = host
        host_links = host.extensions.get('link')

        # XXX: copy-pasta
        if host in snmp and 'vlan' in snmp[host] :
            vlans = dict(host_vlans(host, snmp[host]['vlan']))
        else :
            vlans = None

        if host_links :
            if local_node not in nodes :
                # XXX: copypasta
                nodes[local_node] = host.location or str(host)

            for link_port, remote in host_links.iteritems() :
                if link_port.isdigit() :
                    port = int(link_port)
                else :
                    port = str(link_port)

                # map remote -> remote_host
                if '@' in remote :
                    remote_location, remote_domain = remote.split('@', 1)
                else :
                    remote_location = remote
                    remote_domain = host.domain

                remote_node = remote_host = hosts_by_location.get((remote_domain, remote_location))

                if not remote_host :
                    log.warning("%s:%s: unknown remote location: %s@%s", host, port, remote_location, remote_domain)
                    continue

                remote_label = remote_host.location or str(remote_host)
                
                log.info("%s:%s: link -> %s@%s (%s)", host, port, remote_host, remote_host.domain, remote_label)

                if remote_node not in nodes :
                    # XXX: copypasta
                    nodes[remote_node] = remote_label
            
                # local vlans
                if vlans and port in vlans :
                    local_untag, local_tagged = vlans[port]

                    log.info("%s:%s link vlans (%s) <%s>", host, port, local_untag, ':'.join(str(tag) for tag in local_tagged))

                    link_vlans = (local_untag, local_tagged, None)
                else :
                    # unknown
                    link_vlans = None
                    
                    log.warning("%s:%s unknown vlans", host, port)

                # directional links
                local_port = links_out.get((local_node, remote_node))
                
                if not local_port :
                    log.info("%s:%s: unconfirmed -> %s", host, port, remote_host)

                elif local_port != port :
                    log.warn("%s:%s: port mismatch %s -> %s", host, port, local_port, remote_host)

                else :
                    log.debug("%s:%s: confirm -> %s", host, port, remote_host)
                
                links_out[(local_node, remote_node)] = port
                nodes_port.setdefault((local_node, port), set()).add(remote_node)
                nodes_out.setdefault(local_node, set()).add(remote_node)
                nodes_in.setdefault(remote_node, set()).add(local_node)

                # update directional or missing links
                remote_port = links_out.get((remote_node, local_node))
                reverse_port = links_in.get((local_node, remote_node))

                if reverse_port and reverse_port != port :
                    # XXX: this can be caused by str vs int >_>
                    log.warn("%s:%s: reverse port mismatch %s <- %s", host, port, reverse_port, remote_node)


                if local_port and remote_port :
                    log.debug("%s:%s link <-> %s:%s", local_node, local_port, remote_port, remote_node)

                elif local_port :
                    # we have the forward mapping already, so this doesn't add any new info
                    log.debug("%s:%s link -> %s:%s", local_node, local_port, remote_port, remote_node)

                elif remote_port and reverse_port :
                    # we have the bidirectional mapping already, so this doesn't add any new info
                    log.debug("%s:%s link <-> %s:%s", local_node, local_port, remote_port, remote_node)

                elif remote_port :
                    # we had the reverse mapping already, make it bidirectional
                    local_port = port
                    log.info("%s:%s link <- %s:%s", local_node, local_port, remote_port, remote_node)
                    
                    # TODO: update vlan info
                    links[(remote_node, remote_port, local_port, local_node)] = links.pop((remote_node, remote_port, None, local_node))
                
                else :
                    local_port = port

                    # mapping was completely missing
                    log.info("%s:%s link -> %s", local_node, local_port, remote_node)

                    links[(local_node, local_port, None, remote_node)] = link_vlans


    # verify non-p2p links
    for (node, port), remotes in nodes_port.iteritems() :
        if len(remotes) > 1 :
            log.warning("%s:%s: multiple remotes: %s", node, port, ' '.join(str(host) for host in remotes))

    if options.graph_bridge :
        # scan hosts with bridges
        bridge_hosts = set()
        bridge_ports = { }

        for host, host_attrs in snmp.iteritems() :
            if 'bridge' in host_attrs or any('bridge' in vlan_attrs for vlan_attrs in host_attrs.get('vlan', { }).itervalues()) :
                bridge_hosts.add(host)
        
        # third graph: bridge
        for host, host_attrs in snmp.iteritems() :
            local_out = nodes_out.get(host)

            if not local_out :
                log.warning("%s: no outgoing links, skipping bridge", host)
                continue
            
            # scan vlan/port bridge ethers
            bridge = { } # (port, vlan): {ethernet}

            for port, ethernets in host_attrs.get('bridge', { }).iteritems() :
                bridge[(port, None)] = ethernets

            for vlan, vlan_attrs in host_attrs.get('vlan', { }).iteritems() :
                for port, ethernets in vlan_attrs.get('bridge', { }).iteritems()  :
                    bridge[(port, vlan)] = ethernets

            for (port, vlan), ethernets in bridge.iteritems() :
                local_node = host
                local_port = port
                
                remote_nodes = nodes_port.get((local_node, local_port))
                
                if not remote_nodes :
                    remote_node = None
                elif len(remote_nodes) == 1 :
                    remote_node, = remote_nodes
                else :
                    log.warning("%s:%s: ignore port with multiple remotes: %s", host, port, ' '.join(str(host) for host in remotes))
                    continue

                if remote_node :
                    remote_in = nodes_in.get(remote_node, set())
                else :
                    remote_in = set()
                
                remote_leaf = (remote_in == set((host, )))
                
                # TODO: add ether node and link if remote node also has this ether on this link
                #       also do this if all remote_in's agree that the ether is on the remote_node
                if not remote_node :
                    log.debug("%s:%s: no remote node", host, port)
                
                elif remote_leaf and (remote_node not in bridge_hosts) and len(ethernets) > 1 :
                    # map onto non-bridge leaf node
                    log.info("%s: <== %s:%s ", remote_node, host, port)

                    # map links out of the assumed remote bridge
                    local_node = remote_node
                    local_port = None

                else :
                    log.debug("%s:%s/%s bridge skip -> %s", host, port, vlan, remote_node)
                    continue

                for ethernet in ethernets :
                    # remote host
                    if ethernet in hosts_by_ethernet :
                        remote_node = remote_host = hosts_by_ethernet[ethernet]

                        remote_label = remote_host.location or str(remote_host)

                        log.debug("%s:%s/%s bridge %s = %s (%s)", host, port, vlan, ethernet, remote_host, remote_label)

                    elif options.graph_bridge_unknown :
                        log.warning("%s:%s/%s bridge unknown host %s", host, port, vlan, ethernet)

                        remote_label = remote_node = ethernet

                        nodes[remote_node] = remote_label
                        
                        remote_host = None

                    else :
                        log.info("%s:%s/%s bridge unknown host %s", host, port, vlan, ethernet)

                        continue
                
                    # TODO: also handled multiple IP/ethers for the same host
                    if remote_host == host and local_node != host :
                        log.debug("%s:%s: skip remote-mapped self", host, port)
                        continue

                    if remote_node not in nodes :
                        log.debug("%s:%s: lazy-add remote %s (%s)", host, port, remote_node, remote_label)

                        nodes[remote_node] = remote_label
                    
                    # unknown vlans
                    if vlan :
                        link_vlans = (vlan, (), None)
                    else :
                        link_vlans = None

                    # directional link
                    links_out[(local_node, remote_node)] = local_port

                    if local_port :
                        bridge_ports.setdefault((local_node, local_port), set()).add(remote_node)
                    
                    # bidirectional link
                    forward = (local_node, local_port, None, remote_node)

                    # scan for reverse
                    remote_port = links_out.get((remote_node, local_node))

                    if remote_port :
                        reverse = (remote_node, remote_port, None, local_node)

                        log.info("%s:%s bridge <-> %s:%s", local_node, local_port, remote_port, remote_node)

                        # fill in remote_port for bidirectional link
                        del links[reverse]
                        reverse = local_node, local_port, remote_port, remote_node
                        links[reverse] = link_vlans

                    else :
                        log.info("%s:%s bridge -> %s", local_node, local_port, remote_node)

                        links[forward] = link_vlans
        
        # verify unmanaged bridges
        for (node, port), remotes in bridge_ports.iteritems() :
            if len(remotes) > 1 :
                log.warning("%s:%s: multiple bridge remotes: %s", node, port, ' '.join(str(host) for host in remotes))

    return nodes, links

class GraphVlans (object) :
    """
        Maintain vlan -> dot style/color mappings
    """

    SERIES = 'paired12'
    NONE = 'black'

    def __init__ (self, vlans=None) :
        if vlans :
            self.vlans = dict(vlans)
        else :
            self.vlans = { }
    
    def color (self, vlan) :
        if vlan in self.vlans :
            return self.vlans[vlan]
        
        # alloc
        color = '/{series}/{index}'.format(series=self.SERIES, index=len(self.vlans) + 1)

        self.vlans[vlan] = color

        return color

def dot_quote (value) :
    """
        Quote a dot value.
    """

    return '"{value}"'.format(value=value)

def dot (*line, **attrs) :
    """
        Build dot-syntax:
            *line {
                *line [**attrs];
            }
    """

    if line and attrs :
        return ''.join(('\t', ' '.join(str(x) for x in line), ' [',
            ', '.join('{name}="{value}"'.format(name=name, value=value) for name, value in attrs.iteritems() if value is not None),
        ']', ';'))
    elif line :
        return ' '.join(line) + ' {'
    else :
        return '}'

def build_dot (options, nodes, links, type='digraph', vlans=None) :
    """
        Construct a dot description of the given node/links graph.
    """

    if vlans is True :
        vlans = { }

    yield dot(type, 'verkko')

    # defaults
    yield dot('graph',
            # XXX: breaks multi-edges?
            #splines     = 'true',

            sep             = '+25,25',
            overlap         = 'scalexy',

            # only applies to loops
            nodesep     = 0.5,
    )
    yield dot('edge',
        labeldistance   = 3.0,
        penwidth        = 2.0,
    )
    yield dot('node',
        fontsize        = 18,
    )
    
    # nodes
    for node, node_label in nodes.iteritems() :
        yield dot(dot_quote(node), label=node_label)

    # links
    for (local, local_port, remote_port, remote), link_vlans in links.iteritems() :
        if link_vlans :
            local_untag, tagged, remote_untag = link_vlans

            if vlans :
                head_color = vlans.color(local_untag) if local_untag else None
                tail_color = vlans.color(remote_untag) if remote_untag else None
                line_colors = [vlans.color(tag) for tag in sorted(tagged)]
            else :
                head_color = GraphVlans.NONE if local_untag else None
                tail_color = GraphVlans.NONE if remote_untag else None
                line_colors = []
        else :
            # unknown
            head_color = tail_color = None
            line_colors = []

        if head_color and tail_color :
            dir = 'both'
            colors = [head_color, tail_color] + line_colors
        elif head_color :
            dir = 'forward'
            colors = [head_color] + line_colors
        elif tail_color :
            dir = 'back'
            colors = [vlans.NONE, tail_color] + line_colors
        else :
            dir = 'none'
            colors = line_colors

        yield dot(dot_quote(local), '->', dot_quote(remote),
            taillabel   = local_port,
            headlabel   = remote_port,
            dir         = dir,

            fillcolor   = 'black',
            color       = ':'.join(colors) if colors else None,
        )

    yield dot()

def apply_dot (options, file, dot) :
    """
        Output dot file for given graphbits
    """

    for line in dot :
        file.write(line + '\n')

def main (argv) :
    """
        Graph network
    """

    parser = optparse.OptionParser(main.__doc__)
    parser.add_option_group(pvl.args.parser(parser))
    parser.add_option_group(pvl.hosts.optparser(parser))

    parser.add_option('--snmp-data', metavar='FILE', default=None,
            help="Load snmp data from FILE")


    parser.add_option('--graph-lldp-unknown', action='store_true',
            help="Graph unknown LLDP nodes")


    parser.add_option('--graph-vlans', action='store_true', dest='graph_vlans', 
            help="Graph links with VLAN information")

    parser.add_option('--no-vlans', action='store_false', dest='graph_vlans',
            help="Do not color VLANs")


    parser.add_option('--graph-bridge', action='store_true',
            help="Graph bridge forwarding database links")

    parser.add_option('--graph-bridge-unknown', action='store_true',
            help="Graph unknown bridge forwarding databse hosts")


    parser.add_option('--graph-dot', metavar='FILE',
            help="Output .dot graph data to file")

    # input
    options, args = parser.parse_args(argv[1:])
    pvl.args.apply(options)
    
    # load hosts
    hosts = list(pvl.hosts.apply(options, args))

    # load raw snmp data
    snmp = load_snmp_data(options, options.snmp_data, hosts)

    # process data into graph
    nodes, links = build_graph(options, snmp, hosts)
    
    # process graph into dot
    if options.graph_vlans is False :
        graph_vlans = None
    else :
        graph_vlans = GraphVlans()

    if options.graph_dot :
        # process to dot
        dot = build_dot(options, nodes, links, vlans=graph_vlans)
        
        # write out
        apply_dot(options, open(options.graph_dot, 'w'), dot)

    return 0

if __name__ == '__main__':
    pvl.args.main(main)