make IrssiParser a bit more resilient as to whitespace in/after timestamps
authorTero Marttila <terom@fixme.fi>
Wed, 11 Feb 2009 22:01:53 +0200
changeset 110 37e67ec434f3
parent 109 ca82d0fee336
child 111 95c0c49d76aa
make IrssiParser a bit more resilient as to whitespace in/after timestamps
log_parser.py
--- a/log_parser.py	Wed Feb 11 21:55:10 2009 +0200
+++ b/log_parser.py	Wed Feb 11 22:01:53 2009 +0200
@@ -46,9 +46,11 @@
     """
         A parser for irssi logfiles
     """
+    
+    # timestamp prefix, with trailing space
+    _TS = r'(?P<timestamp>[a-zA-Z0-9: ]+[a-zA-Z0-9])\s*'
 
     # subexpression parts
-    _TS = r'(?P<timestamp>\S+)'
     _NICK = r'(?P<nickname>.+?)'
     _NICK2 = r'(?P<nickname2>.+?)'
     _TARGET = r'(?P<target>.+?)'
@@ -63,23 +65,23 @@
     TYPE_EXPRS = (
         (   LogTypes.LOG_OPEN,      r'--- Log opened (?P<datetime>.+)'                              ),
         (   LogTypes.LOG_CLOSE,     r'--- Log closed (?P<datetime>.+)'                              ),
-        (   LogTypes.MSG,           _TS + r' <(?P<flags>.)' + _NICK + '> ' + _MSG                   ),
-        (   LogTypes.NOTICE,        _TS + r' -' + _NICK + ':' + _CHAN + '- ' + _MSG                 ),
-        (   LogTypes.ACTION,        _TS + r'  \* ' + _NICK + ' ' + _MSG                             ),
-        (   LogTypes.JOIN,          _TS + r' -!- ' + _NICK + ' \[' + _USERHOST + '\] has joined ' + _CHAN                               ), 
-        (   LogTypes.PART,          _TS + r' -!- ' + _NICK + ' \[' + _USERHOST + '\] has left ' + _CHAN + ' \[(?P<message>.*?)\]'       ),
-        (   LogTypes.KICK,          _TS + r' -!- ' + _TARGET + ' was kicked from ' + _CHAN + ' by ' + _NICK + ' \[(?P<message>.*?)\]'   ),
+        (   LogTypes.MSG,           _TS + r'<(?P<flags>.)' + _NICK + '> ' + _MSG                   ),
+        (   LogTypes.NOTICE,        _TS + r'-' + _NICK + ':' + _CHAN + '- ' + _MSG                 ),
+        (   LogTypes.ACTION,        _TS + r'\* ' + _NICK + ' ' + _MSG                             ),
+        (   LogTypes.JOIN,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has joined ' + _CHAN                               ), 
+        (   LogTypes.PART,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has left ' + _CHAN + ' \[(?P<message>.*?)\]'       ),
+        (   LogTypes.KICK,          _TS + r'-!- ' + _TARGET + ' was kicked from ' + _CHAN + ' by ' + _NICK + ' \[(?P<message>.*?)\]'   ),
         # XXX: use hostname instead of nickname for ServerMode
-        (   LogTypes.MODE,          _TS + r' -!- (mode|ServerMode)/' + _CHAN + ' \[(?P<mode>.+?)\] by (?P<nickname>\S+)'                ),
-        (   LogTypes.NICK,          _TS + r' -!- ' + _NICK + ' is now known as (?P<target>\S+)'                                         ),
-        (   LogTypes.QUIT,          _TS + r' -!- ' + _NICK + ' \[' + _USERHOST + '\] has quit \[(?P<message>.*?)\]'                     ),
-        (   LogTypes.TOPIC,         _TS + r' -!- (' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P<topic>.*)|Topic unset by ' + _NICK2 + ' on ' + _CHAN2 + ')'    ),
+        (   LogTypes.MODE,          _TS + r'-!- (mode|ServerMode)/' + _CHAN + ' \[(?P<mode>.+?)\] by (?P<nickname>\S+)'                ),
+        (   LogTypes.NICK,          _TS + r'-!- ' + _NICK + ' is now known as (?P<target>\S+)'                                         ),
+        (   LogTypes.QUIT,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has quit \[(?P<message>.*?)\]'                     ),
+        (   LogTypes.TOPIC,         _TS + r'-!- (' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P<topic>.*)|Topic unset by ' + _NICK2 + ' on ' + _CHAN2 + ')'    ),
 
-        (   LogTypes.SELF_NOTICE,   _TS + r' \[notice\(' + _CHAN + '\)\] ' + _MSG                   ),
-        (   LogTypes.SELF_NICK,     _TS + r' -!- You\'re now known as (?P<target>\S+)'              ),
+        (   LogTypes.SELF_NOTICE,   _TS + r'\[notice\(' + _CHAN + '\)\] ' + _MSG                   ),
+        (   LogTypes.SELF_NICK,     _TS + r'-!- You\'re now known as (?P<target>\S+)'              ),
 
-        (   LogTypes.NETSPLIT_START,    _TS + r' -!- Netsplit ' + _SRV1 + ' <-> ' + _SRV2 + ' quits: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more,\S+\))?'),
-        (   LogTypes.NETSPLIT_END,      _TS + r' -!- Netsplit over, joins: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more\))?'              ),
+        (   LogTypes.NETSPLIT_START,    _TS + r'-!- Netsplit ' + _SRV1 + ' <-> ' + _SRV2 + ' quits: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more,\S+\))?'),
+        (   LogTypes.NETSPLIT_END,      _TS + r'-!- Netsplit over, joins: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more\))?'              ),
 
         (   'DAY_CHANGED',          r'--- Day changed (?P<date>.+)'                                 ),
     )