merge 666e638059b2
authorTero Marttila <terom@fixme.fi>
Fri, 05 Feb 2010 20:38:44 +0200
changeset 52 13fc80450862
parent 51 342850300d6a (diff)
parent 49 666e638059b2 (current diff)
child 53 21ab25ffa1e8
merge 666e638059b2
--- a/etc/fixbot-logwatch.py	Fri Feb 05 02:55:42 2010 +0200
+++ b/etc/fixbot-logwatch.py	Fri Feb 05 20:38:44 2010 +0200
@@ -18,4 +18,7 @@
         filters.su_nobody_killer,
         filters.all,
     )),
+    UnixDatagramSocket("test", os.path.join(logwatch_dir, "test.sock"), (
+        filters.all,
+    )),
 )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fixbot/logwatch/message.py	Fri Feb 05 20:38:44 2010 +0200
@@ -0,0 +1,173 @@
+"""
+    Log messages (e.g. syslog)
+"""
+
+# stdlib syslog, for facilities and levels
+import syslog
+import time, datetime
+import re
+
+PRIORITY_NAMES = {
+    syslog.LOG_EMERG:       'emerg',
+    syslog.LOG_ALERT:       'alert', 
+    syslog.LOG_CRIT:        'crit', 
+    syslog.LOG_ERR:         'err',
+    syslog.LOG_WARNING:     'warning',
+    syslog.LOG_NOTICE:      'notice',
+    syslog.LOG_INFO:        'info', 
+    syslog.LOG_DEBUG:       'debug',
+}
+
+FACILITY_NAMES = {
+    syslog.LOG_KERN:	'kern',     # 0
+    syslog.LOG_USER:	'user',     # 1
+    syslog.LOG_MAIL:	'mail',     # 2
+    syslog.LOG_DAEMON:	'daemon',   # 3
+    syslog.LOG_AUTH:	'auth',     # 4
+    5:                  'syslog',   # 5
+    syslog.LOG_LPR:	    'lpr',      # 6
+    syslog.LOG_NEWS:	'news',     # 7
+    syslog.LOG_UUCP:	'uucp',     # 8
+    syslog.LOG_CRON:	'cron',     # 9
+    10:                 'authpriv', # 10
+    11:                 'ftp',      # 11
+    12:                 'ntp',      # 12
+    13:                 'audit',    # 13
+    14:                 'alert',    # 14
+    15:                 'clock',    # 15
+    syslog.LOG_LOCAL0:	'local0',   # 16
+    syslog.LOG_LOCAL1:	'local1',   # 17
+    syslog.LOG_LOCAL2:	'local2',   # 18
+    syslog.LOG_LOCAL3:	'local3',   # 19
+    syslog.LOG_LOCAL4:	'local4',   # 20
+    syslog.LOG_LOCAL5:	'local5',   # 21
+    syslog.LOG_LOCAL6:	'local6',   # 22
+    syslog.LOG_LOCAL7:	'local7',   # 23
+}
+
+class SyslogMessage (object) :
+    """
+        A message from syslog with the following fields:
+
+            pri             - (optional) raw integer priority field
+            priority        - (optional) message priority as text
+            facility        - (optional) message facility as text
+            timestamp       - message timestamp as a datetime
+            hostname        - originating hostname
+            tag             - (optional) message tag including process name and id
+            program         - (optional) process name part of message tag
+            pid             - (optional) process ID part of message tag
+            text            - the log message following the tag
+            message         - the full log message including the tag
+            raw             - the full syslog-format message
+    """
+
+    # the regular expression used to parse the lines
+    LINE_RE = re.compile(
+            # the priority field as in raw syslog messages
+            r"(?:<(?P<pri>\d+)>)?"
+
+            # the timestamp+hostname header
+        +   r"(?P<timestamp>\w{3} [0-9 ]\d \d{2}:\d{2}:\d{2}) (?P<hostname>\S+)"
+
+            # the message, including possible tag/pid
+        +   r" (?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.+))\n?"
+    )
+
+    # strptime format of timestamp
+    TIMESTAMP_FMT = "%b %d %H:%M:%S"
+
+    def _parse_pri (self, match) :
+        """
+            Parse the priority/facility from the given match object
+        """
+        
+        # raw integer
+        self.pri = int(match.group('pri')) if match.group('pri') else None
+        
+        if self.pri :
+            # unpack
+            priority = self.pri % 8
+            facility = self.pri // 8
+            
+            # translate to names
+            self.priority = PRIORITY_NAMES.get(priority, str(priority))
+            self.facility = FACILITY_NAMES.get(facility, str(facility))
+        
+        else :
+            self.priority = self.facility = None
+    
+    def _parse_timestamp (self, match) :
+        """
+            Parse the timestamp field into a datetime.datetime from the given match object
+        """
+    
+        timestamp = match.group('timestamp')
+
+        try :
+            ts = time.strptime(timestamp, self.TIMESTAMP_FMT)
+
+        except Exception, ex :
+            raise ValueError("Invalid timestamp: %s: %s" % (timestamp, ex))
+        
+        # build timestamp
+        self.timestamp = datetime.datetime(
+            # fix year - strptime default is 1900
+            ts.tm_year if ts.tm_year != 1900 else time.localtime().tm_year,
+
+            # month, day, hour, minute, second
+            *ts[1:6]
+        )
+    
+    def _parse_hostname (self, match) :
+        """
+            Parse the hostname from the given match object
+        """
+        
+        # nothing much needed..
+        self.hostname = match.group('hostname')
+    
+    def _parse_message (self, match) :
+        """
+            Parse the message with tag from the given match object
+        """
+
+        self.message = match.group('message')
+        self.tag = match.group('tag')
+        self.program = match.group('program')
+        self.pid = int(match.group('pid')) if match.group('pid') else None
+        self.text = match.group('text')
+        
+    def __init__ (self, line) :
+        """
+            Construct this message from the given line
+        """
+        
+        # apply regexp
+        match = self.LINE_RE.match(line)
+
+        if not match :
+            # fail
+            raise ValueError("Invalid syslog data format")
+        
+        # unpack the various portions
+        self._parse_pri(match)
+        self._parse_timestamp(match)
+        self._parse_hostname(match)
+        self._parse_message(match)
+        
+        # the raw line as matched
+        self.raw = match.group(0)
+
+    def __str__ (self) :
+        """
+            Format to default format
+        """
+
+        return "%s %s %s" % (self.timestamp.strftime(self.TIMESTAMP_FMT), self.hostname, self.message)
+    
+    def __repr__ (self) :
+        return "pri=%s:%s, timestamp=%s, hostname=%s, tag=%s[%s], text=%r" % (
+            self.facility, self.priority, self.timestamp.isoformat(), self.hostname, self.program, self.pid, self.text
+        )
+
--- a/fixbot/logwatch/sources.py	Fri Feb 05 02:55:42 2010 +0200
+++ b/fixbot/logwatch/sources.py	Fri Feb 05 20:38:44 2010 +0200
@@ -2,10 +2,11 @@
     Implementations of the various sources of log data
 """
 
-from twisted.internet import protocol
+from twisted.internet import protocol, reactor
 from twisted.python import log
 
 from fixbot import fifo
+from fixbot.logwatch import message
 
 class LogSource (object) :
     """
@@ -39,20 +40,44 @@
 
     def handleData (self, data) :
         """
-            Buffer the given chunk of data, passing any full lines to handleLine
+            Feed binary data into the buffer, processing all lines via handleLine()
         """
 
         data = self.buf + data
         
         while "\n" in data :
             line, data = data.split("\n", 1)
-
+            
+            # full line
             self.handleLine(line)
 
         self.buf = data
 
     def handleLine (self, line) :
-        log.msg("Matching line `%s'..." % line)
+        """
+            Parse given line into a SyslogMessage, and pass it to handleMessage
+        """
+
+        # parse
+        try :
+            msg = message.SyslogMessage(line)
+    
+        except Exception, e :
+            # log and ignore
+            log.err(e, "Invalid syslog message from source %s: %s" % (self.name, line))
+    
+        else :
+            # handle the structured message
+            self.handleMessage(msg)
+
+    def handleMessage (self, msg) :
+        """
+            Process the given SyslogMessage
+        """
+
+        # XXX: filters should accept messages...
+        line = str(msg)
+        log.msg(repr(msg))
 
         for filter in self.filters :
             # let the filter process the line
@@ -129,4 +154,28 @@
         super(Fifo, self).connectionLost(reason)
         self.handleError("lost fifo for %s: %s" % (self.name, reason.getErrorMessage()))
 
+class UnixDatagramSocket (LogSource, protocol.DatagramProtocol) :
+    """
+        Stream messages from a UNIX datagram socket
+    """
+    
+    # maximum size of the recieved messages
+    # native syslog is 1024, but syslog-ng does better... so 4k
+    MAX_PACKET_SIZE = 4096
 
+    def __init__ (self, name, path, filters) :
+        LogSource.__init__(self, name, filters)
+
+        log.msg("opening unix socket for %s at: %s" % (name, path))
+        
+        # open UNIX socket
+        reactor.listenUNIXDatagram(path, self, self.MAX_PACKET_SIZE)
+
+    def datagramReceived (self, data, addr) :
+        """
+            Got message from syslog-ng
+        """
+        
+        # handle it as a line of data
+        self.handleLine(data)
+