log_parser.py
changeset 140 6db2527b67cf
parent 139 9c7769850195
child 141 65c98c9e1716
--- a/log_parser.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,233 +0,0 @@
-"""
-    Parse log data into log_events
-"""
-
-import re
-import datetime
-
-from log_line import LogTypes, LogLine
-
-class LogParseError (Exception) :
-    """
-        Parsing some line failed
-    """
-
-    def __init__ (self, line, offset, message) :
-        super(LogParseError, self).__init__("%r@%s: %s" % (line, offset, message))
-
-class LogParser (object) :
-    """
-        Abstract interface
-    """
-
-    def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
-        """
-            Setup the parser to use the given format for line timestamps, which are of the given timezone
-        """
-
-        self.tz = tz
-        self.timestamp_fmt = timestamp_fmt
-
-    def parse_lines (self, channel, lines, date=None, starting_offset=None) :
-        """
-            Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.
-
-            Channel is the LogChannel that these lines belong to.
-
-            Offset is the starting offset, and may be None to not use it.
-            
-            Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
-            information, event timestamps will have a date component of 1900/1/1.
-        """
-
-        abstract
-
-class IrssiParser (LogParser) :
-    """
-        A parser for irssi logfiles
-    """
-    
-    # timestamp prefix, with trailing space
-    _TS = r'(?P<timestamp>[a-zA-Z0-9: ]+[a-zA-Z0-9])\s*'
-
-    # subexpression parts
-    _NICK = r'(?P<nickname>.+?)'
-    _NICK2 = r'(?P<nickname2>.+?)'
-    _TARGET = r'(?P<target>.+?)'
-    _CHAN = r'(?P<channel>.+?)'
-    _CHAN2 = r'(?P<channel2>.+?)'
-    _USERHOST = r'(?P<username>.*?)@(?P<hostname>.*?)'
-    _MSG = r'(?P<message>.*)'
-    _SRV1 = r'(?P<server1>.+?)'
-    _SRV2 = r'(?P<server2>.+?)'
-
-    # regular expressions for matching lines, by type
-    TYPE_EXPRS = (
-        (   LogTypes.LOG_OPEN,      r'--- Log opened (?P<datetime>.+)'                              ),
-        (   LogTypes.LOG_CLOSE,     r'--- Log closed (?P<datetime>.+)'                              ),
-        (   LogTypes.MSG,           _TS + r'<(?P<flags>.)' + _NICK + '> ' + _MSG                   ),
-        (   LogTypes.NOTICE,        _TS + r'-' + _NICK + ':' + _CHAN + '- ' + _MSG                 ),
-        (   LogTypes.ACTION,        _TS + r'\* ' + _NICK + ' ' + _MSG                             ),
-        (   LogTypes.JOIN,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has joined ' + _CHAN                               ), 
-        (   LogTypes.PART,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has left ' + _CHAN + ' \[(?P<message>.*?)\]'       ),
-        (   LogTypes.KICK,          _TS + r'-!- ' + _TARGET + ' was kicked from ' + _CHAN + ' by ' + _NICK + ' \[(?P<message>.*?)\]'   ),
-        # XXX: use hostname instead of nickname for ServerMode
-        (   LogTypes.MODE,          _TS + r'-!- (mode|ServerMode)/' + _CHAN + ' \[(?P<mode>.+?)\] by (?P<nickname>\S+)'                ),
-        (   LogTypes.NICK,          _TS + r'-!- ' + _NICK + ' is now known as (?P<target>\S+)'                                         ),
-        (   LogTypes.QUIT,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has quit \[(?P<message>.*?)\]'                     ),
-        (   LogTypes.TOPIC,         _TS + r'-!- (' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P<topic>.*)|Topic unset by ' + _NICK2 + ' on ' + _CHAN2 + ')'    ),
-
-        (   LogTypes.SELF_NOTICE,   _TS + r'\[notice\(' + _CHAN + '\)\] ' + _MSG                   ),
-        (   LogTypes.SELF_NICK,     _TS + r'-!- You\'re now known as (?P<target>\S+)'              ),
-
-        (   LogTypes.NETSPLIT_START,    _TS + r'-!- Netsplit ' + _SRV1 + ' <-> ' + _SRV2 + ' quits: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more,\S+\))?'),
-        (   LogTypes.NETSPLIT_END,      _TS + r'-!- Netsplit over, joins: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more\))?'              ),
-
-        (   'DAY_CHANGED',          r'--- Day changed (?P<date>.+)'                                 ),
-    )
-
-    # precompile
-    TYPE_REGEXES = [(type, re.compile(expr)) for type, expr in TYPE_EXPRS]
-
-    def parse_line (self, channel, line, date, offset=None) :
-        """
-            Parse a single line, and return the resulting LogLine, or None, to ignore the line.
-
-            Uses self.TYPE_REGEXES to do the matching
-        """
-
-        # empty line
-        if not line :
-            return
-
-        # look for match
-        match = type = None
-
-        # test each type
-        for type, regex in self.TYPE_REGEXES :
-            # attempt to match
-            match = regex.match(line)
-            
-            # found, break
-            if match :
-                break
-        
-        # no match found?
-        if not match :
-            raise LogParseError(line, offset, "Line did not match any type")
-        
-        # match groups
-        groups = match.groupdict(None)
-
-        # parse timestamp
-        if 'datetime' in groups :
-            # parse datetime using default asctime() format
-            dt = datetime.datetime.strptime(groups['datetime'], '%a %b %d %H:%M:%S %Y')
-
-        elif 'timestamp' in groups :
-            # parse timestamp into naive datetime
-            dt = datetime.datetime.strptime(groups['timestamp'], self.timestamp_fmt)
-            
-            # override date?
-            if date :
-                dt = dt.replace(year=date.year, month=date.month, day=date.day)
-
-        elif 'date' in groups :
-            # parse date-only datetime
-            dt = datetime.datetime.strptime(groups['date'], '%a %b %d %Y')
-
-        else :
-            # no timestamp !?
-            raise LogParseError(line, offset, "No timestamp")
-
-        # now localize with timezone
-        dtz = self.tz.localize(dt)
-
-        # channel, currently unused
-        channel_name = (groups.get('channel') or groups.get('channel2'))
-
-        # source
-        if 'server1' in groups :
-            source = (None, None, groups.get('server1'), None)
-
-        else :
-            source = (groups.get('nickname') or groups.get('nickname2'), groups.get('username'), groups.get('hostname'), groups.get('flags'))
-
-        # target
-        if 'server2' in groups :
-            target = groups.get('server2')
-
-        else :
-            target = groups.get('target')
-
-        # data
-        if 'message' in groups :
-            data = groups['message']
-        
-        elif 'mode' in groups :
-            data = groups['mode']
-
-        elif 'topic' in groups :
-            data = groups['topic']
-        
-        elif 'nick_list' in groups :
-            # split into components
-            list = groups['nick_list'].split(', ')
-            
-            # additional count?
-            if 'count' in groups and groups['count'] :
-                list.append('+%d' % int(groups['count']))
-            
-            # join
-            data = ' '.join(list)
-
-        else :
-            data = None
-        
-        # custom types?
-        if type == 'DAY_CHANGED' :
-            # new date
-            date = dtz
-        
-        else :
-            # build+return (date, LogLine)
-            return date, LogLine(channel, offset, type, dtz, source, target, data)
-
-    def parse_lines (self, channel, lines, date=None, starting_offset=None) :
-        """
-            Parse the given lines, yielding LogEvents. 
-        """
-
-        for offset, line in enumerate(lines) :
-            # offset?
-            if starting_offset :
-                offset = starting_offset + offset
-
-            else :
-                offset = None
-            
-            # try and parse
-            try :
-                # get None or (date, line)
-                line_info = self.parse_line(channel, line, date, offset)
-
-           # passthrough LogParseError's
-            except LogParseError :
-                raise
-            
-            # wrap other errors as LogParseError
-            except Exception, e :
-                raise LogParseError(line, offset, "Parsing line failed: %s" % e)
-            
-            else :
-                # nothing?
-                if not line_info :
-                    continue
-                
-                # unpack, update date
-                date, line = line_info
-                
-                # yield
-                yield line
-
-