add a LogSourceDecoder to fallback from utf-8 to latin-1, and improve scripts/search-index.py
"""
Parse log data into log_events
"""
import datetime
import log_line
from log_line import LogTypes
class LogParser (object) :
"""
Abstract interface
"""
def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
"""
Setup the parser to use the given format for line timestamps, which are of the given timezone
"""
self.tz = tz
self.timestamp_fmt = timestamp_fmt
def parse_lines (self, lines, date=None, starting_offset=None) :
"""
Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.
Offset is the starting offset, and may be None to not use it.
Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
information, event timestamps will have a date component of 1900/1/1.
"""
abstract
class IrssiParser (LogParser) :
"""
A parser for irssi logfiles
"""
def parse_lines (self, lines, date=None, starting_offset=None) :
"""
Parse the given lines, yielding LogEvents.
"""
for offset, line in enumerate(lines) :
# status lines
if line.startswith('---') :
# XXX: handle these
continue
# normal lines
else :
# XXX: only parse timestamps for now
timestamp, data = line.split(' ', 1)
# parse timestamp into naive datetime
dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt)
# override date?
if date :
dt = dt.replace(year=date.year, month=date.month, day=date.day)
# now localize with timezone
dtz = self.tz.localize(dt)
# offset?
if starting_offset :
offset = starting_offset + offset
else :
offset = None
# yield raw events
yield log_line.LogLine(offset, LogTypes.RAW, dtz, None, data)