log_parser.py
changeset 50 f13cf27a360b
child 64 cdb6403c2498
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/log_parser.py	Mon Feb 09 00:24:13 2009 +0200
@@ -0,0 +1,67 @@
+"""
+    Parse log data into log_events
+"""
+
+import datetime
+
+import log_line
+from log_line import LogTypes
+
+class LogParser (object) :
+    """
+        Abstract interface
+    """
+
+    def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
+        """
+            Setup the parser to use the given format for line timestamps, which are of the given timezone
+        """
+
+        self.tz = tz
+        self.timestamp_fmt = timestamp_fmt
+
+    def parse_lines (self, lines, date=None) :
+        """
+            Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.
+            
+            Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
+            information, event timestamps will have a date component of 1900/1/1.
+        """
+
+        abstract
+
+
+class IrssiParser (LogParser) :
+    """
+        A parser for irssi logfiles
+    """
+
+    def parse_lines (self, lines, date=None) :
+        """
+            Parse the given lines, yielding LogEvents. 
+        """
+        
+        for line in lines :
+            # status lines
+            if line.startswith('---') :
+                # XXX: handle these
+                continue
+            
+            # normal lines
+            else :
+                # XXX: only parse timestamps for now
+                timestamp, data = line.split(' ', 1)
+                
+                # parse timestamp into naive datetime
+                dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt)
+                
+                # override date?
+                if date :
+                    dt = dt.replace(year=date.year, month=date.month, day=date.day)
+                
+                # now localize with timezone
+                dtz = self.tz.localize(dt)
+
+                # yield raw events
+                yield log_line.LogLine(LogTypes.RAW, dtz, None, data)
+