improve search further
authorTero Marttila <terom@fixme.fi>
Mon, 09 Feb 2009 11:46:17 +0200
changeset 65 8b50694f841e
parent 64 cdb6403c2498
child 66 090ed78ec8fa
improve search further
handlers.py
log_channel.py
log_formatter.py
log_parser.py
log_search.py
log_source.py
templates/channel.tmpl
templates/channel_date.tmpl
templates/channel_search.tmpl
tools/search.py
--- a/handlers.py	Mon Feb 09 11:05:53 2009 +0200
+++ b/handlers.py	Mon Feb 09 11:46:17 2009 +0200
@@ -17,6 +17,11 @@
     channel_list    = channels.channel_list,
 )
 
+# our LogSearch thing
+# XXX: move elsewhere
+import log_search
+search_index = log_search.LogSearchIndex("logs/index", 'r')
+
 def index (request) :
     """
         The topmost index page, display a list of available channels, perhaps some general stats
@@ -158,10 +163,10 @@
     # got a search query?
     if q :
         # do search
-        lines = channel.source.get_search(q)
+        lines = search_index.search_simple(channel, q)
 
         # format
-        lines = formatter.format_html(lines)
+        lines = formatter.format_html(lines, full_timestamps=True)
 
     else :
         lines = None
@@ -171,7 +176,7 @@
         req             = request,
         prefs           = request.prefs,
         channel         = channel,
-        query           = q,
+        search_query    = q,
         lines           = lines,
     )
 
--- a/log_channel.py	Mon Feb 09 11:05:53 2009 +0200
+++ b/log_channel.py	Mon Feb 09 11:46:17 2009 +0200
@@ -2,6 +2,8 @@
     A channel represents a series of log events, stored in some log source
 """
 
+import log_search
+
 class LogChannel (object) :
     """
         A single IRC channel, logged to some specific place
@@ -24,4 +26,11 @@
         """
 
         return "%s - %s" % (self.network, self.name)
+    
+    def search (self, query) :
+        """
+            Perform a search on this channel, returning a sequence of LogLines
+        """
 
+        return log_search.index.search_simple(self, query)
+
--- a/log_formatter.py	Mon Feb 09 11:05:53 2009 +0200
+++ b/log_formatter.py	Mon Feb 09 11:46:17 2009 +0200
@@ -9,7 +9,7 @@
 
 class LogFormatter (object) :
     """
-        Provides a method to format series of LogLines into various output formats, with varying themes
+        Provides a method to format series of LogLines into various output formats, with varying themes.
     """
 
     # the formatter's code name
@@ -23,16 +23,27 @@
         self.tz = tz
         self.timestamp_fmt = timestamp_fmt
     
-    def _format_line_text (self, line, template_dict) :
+    def _format_line_text (self, line, template_dict, full_timestamp=False) :
         """
             Format the given line as text, using the given { type: string template } dict
         """
-        
+            
         # look up the template
         template = template_dict[line.type]
+        
+        # convert timestamp into display timezone
+        dtz = line.timestamp.astimezone(self.tz)
+        
+        # full timestamps?
+        if full_timestamp :
+            # XXX: ugly
+            timestamp_fmt = '%Y-%m-%d ' + self.timestamp_fmt
+
+        else :
+            timestamp_fmt = self.timestamp_fmt
 
         # build timestamp
-        timestamp = line.timestamp.astimezone(self.tz).strftime(self.timestamp_fmt)
+        timestamp = dtz.strftime(timestamp_fmt)
         
         # format with dict
         return template % dict(
@@ -41,16 +52,22 @@
             data            = line.data,
         )
     
-    def format_txt (self, lines) :
+    def format_txt (self, lines, full_timestamps=False) :
         """
-            Format as plaintext
+            Format given lines as plaintext.
+
+            If full_timestamps is given, the output will contain full timestamps with both date and time.
+
+            No trailing newlines.
         """
 
         abstract
 
-    def format_html (self, lines) :
+    def format_html (self, lines, full_timestamps=False) :
         """
-            Format as HTML
+            Format as HTML.
+            
+            See format_txt for information about arguments
         """
 
         abstract
@@ -65,11 +82,11 @@
         LogTypes.RAW        : "%(timestamp)s %(data)s",
     }
 
-    def format_txt (self, lines) :
+    def format_txt (self, lines, full_timestamps=False) :
         # ...handle each line
         for line in lines :
             # using __TYPES
-            yield self._format_line_text(line, self.__FMT)
+            yield self._format_line_text(line, self.__FMT, full_timestamps)
 
 class IrssiFormatter (IrssiTextFormatter) :
     """
@@ -79,7 +96,7 @@
     name = 'irssi'
     title = "Irssi (plain)"
 
-    def format_html (self, lines) :
+    def format_html (self, lines, full_timestamps=False) :
         """
             Just uses format_txt, but wraps in <pre></pre>
         """
@@ -88,7 +105,7 @@
         yield "<pre>"
         
         # format using IrssiTextFormatter
-        for line in self.format_txt(lines) :
+        for line in self.format_txt(lines, full_timestamps) :
             # escape HTML
             yield cgi.escape(line)
 
--- a/log_parser.py	Mon Feb 09 11:05:53 2009 +0200
+++ b/log_parser.py	Mon Feb 09 11:46:17 2009 +0200
@@ -42,7 +42,7 @@
         """
             Parse the given lines, yielding LogEvents. 
         """
-        
+
         for offset, line in enumerate(lines) :
             # status lines
             if line.startswith('---') :
@@ -65,7 +65,7 @@
                 dtz = self.tz.localize(dt)
 
                 # offset?
-                if offset :
+                if starting_offset :
                     offset = starting_offset + offset
 
                 else :
--- a/log_search.py	Mon Feb 09 11:05:53 2009 +0200
+++ b/log_search.py	Mon Feb 09 11:46:17 2009 +0200
@@ -37,7 +37,7 @@
         mode_to_flag = {
             'r':    hype.Database.DBREADER,
             'w':    hype.Database.DBREADER | hype.Database.DBWRITER | hype.Database.DBCREAT,
-            'a':    hype.Database.DBREADER | hype.Database.DBWRITER,
+            'a':    hype.Database.DBREADER | hype.Database.DBWRITER | hype.Database.DBCREAT,
             '*':    hype.Database.DBREADER | hype.Database.DBWRITER | hype.Database.DBCREAT | hype.Database.DBTRUNC,
         }
 
@@ -49,7 +49,7 @@
         
         # open
         if not self.db.open(path, flags) :
-            raise Exception("Index open failed: %s" % (path, ))
+            raise Exception("Index open failed: %s, mode=%s, flags=%#06x: %s" % (path, mode, flags, self.db.err_msg(self.db.error())))
 
     def insert (self, channel, lines) :
         """
@@ -120,7 +120,7 @@
             type        = int(doc.attr('@type'))
             timestamp   = datetime.datetime.fromtimestamp(int(doc.attr('@timestamp')), pytz.utc)
             source      = doc.attr('@source')
-            data        = doc.cat_texts()
+            data        = doc.cat_texts().decode('utf8')
 
             # build+yield to (channel_id, LogLine) tuple
             yield (channel_id, log_line.LogLine(None, type, timestamp, source, data))
@@ -151,76 +151,3 @@
 
             yield line
 
-def cmd_load (options, channel_name, date) :
-    """
-        Loads the logs for a specific channel/date into the index
-    """
-
-    import channels
-    
-    # open the LogSearchIndex
-    index = LogSearchIndex(options.index_path, '*' if options.create_index else 'a')
-
-    # open the channel
-    channel = channels.channel_list.lookup(channel_name)
-
-    # parse date
-    date = datetime.datetime.strptime(date, '%Y-%m-%d').replace(tzinfo=channel.source.tz)
-
-    # load lines for date
-    lines = channel.source.get_date(date)
-
-    # insert
-    index.insert(channel, lines)
-
-def cmd_search (options, channel_name, query) :
-    """
-        Search the index for events on a specific channel with the given query
-    """
-
-    import channels
-    
-    # open the LogSearchIndex
-    index = LogSearchIndex(options.index_path, '*' if options.create_index else 'a')
-
-    # open the channel
-    channel = channels.channel_list.lookup(channel_name)
-    
-    # search
-    lines = index.search_simple(channel, query)
-    
-    # display as plaintext
-    for line in options.formatter.format_txt(lines) :
-        print line
-
-if __name__ == '__main__' :
-    from optparse import OptionParser
-    import log_formatter
-    
-    # define parser
-    parser = OptionParser(
-        usage           = "%prog [options] <command> [ ... ]",
-        add_help_option = True,
-    )
-
-    # define command-line arguments
-    parser.add_option("-I", "--index", dest="index_path", help="Index database path", metavar="PATH", default="logs/index")
-    parser.add_option("--create", dest="create_index", help="Create index database", default=False)
-    parser.add_option("-f", "--formatter", dest="formatter_name", help="LogFormatter to use", default="irssi")
-    parser.add_option("-z", "--timezone", dest="tz_name", help="Timezone for output", metavar="TZ", default="UTC")
-
-    # parse
-    options, args = parser.parse_args()
-
-    # postprocess stuff
-    options.tz = pytz.timezone(options.tz_name)
-    options.formatter = log_formatter.by_name(options.formatter_name)(options.tz)
-    
-    # pop command
-    command = args.pop(0)
-
-    # inspect
-    func = globals()['cmd_%s' % command]
-    
-    # call
-    func(options, *args)
--- a/log_source.py	Mon Feb 09 11:05:53 2009 +0200
+++ b/log_source.py	Mon Feb 09 11:46:17 2009 +0200
@@ -32,13 +32,6 @@
 
         abstract
     
-    def get_search (self, query) :
-        """
-            Search the logs for the given query
-        """
-
-        abstract
-
 class LogFile (object) :
     """
         A file containing LogEvents
@@ -72,7 +65,7 @@
         self.file.seek(0)
 
         # iterate over lines, decoding them as well
-        return (line.decode(self.charset) for line in self.file)
+        return (line.decode(self.charset).rstrip(self.sep) for line in self.file)
     
     def read_full (self) :
         """
@@ -413,18 +406,3 @@
         # return set
         return days
 
-    def get_search (self, query) :
-        """
-            Just inspect the latest logfile
-        """
-
-        # one logfile
-        logfile = self._iter_logfile_reverse().next()
-
-        # inspect each line
-        for line in logfile.read_full() :
-            # XXX: use proper LogQuery stuff
-            if query in line.data :
-                yield line
-        
-
--- a/templates/channel.tmpl	Mon Feb 09 11:05:53 2009 +0200
+++ b/templates/channel.tmpl	Mon Feb 09 11:46:17 2009 +0200
@@ -38,7 +38,7 @@
         <a href="${h.build_url(urls.channel_search, channel=channel)}">Search:</a>
     </li><li class="join-left">
         <form action="${urls.channel_search.build(req, channel=channel)}" method="GET">
-            <input name="q"></input>
+            <input name="q" value="${search_query if search_query else ''}"></input>
             <input type="submit" value="Go &raquo;" />
         </form>
     </li>
--- a/templates/channel_date.tmpl	Mon Feb 09 11:05:53 2009 +0200
+++ b/templates/channel_date.tmpl	Mon Feb 09 11:46:17 2009 +0200
@@ -3,7 +3,7 @@
 <div id="title">${channel.title} :: Logs for ${h.fmt_date(date)}</div>
 
 % for line in lines :
-${line}\
+${line}
 % endfor
 
 
--- a/templates/channel_search.tmpl	Mon Feb 09 11:05:53 2009 +0200
+++ b/templates/channel_search.tmpl	Mon Feb 09 11:46:17 2009 +0200
@@ -1,6 +1,6 @@
 <%inherit file="channel.tmpl" />
 
-% if not query :
+% if not search_query :
 <div id="title">${channel.title} :: Search</div>
 
 <form action="${h.build_url(urls.channel_search, channel=channel)}" method="GET">
@@ -13,9 +13,9 @@
 <p>Search for something.</p>
 
 % else :
-<div id="title">${channel.title} :: Search '${query}'</div>
+<div id="title">${channel.title} :: Search '${search_query}'</div>
 
 % for line in lines:
-${line}\
+${line}
 % endfor
 % endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/search.py	Mon Feb 09 11:46:17 2009 +0200
@@ -0,0 +1,90 @@
+"""
+    Tool for accessing the search index
+"""
+
+import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')
+
+import datetime, pytz
+
+import log_search
+
+def cmd_load (options, channel_name, *dates) :
+    """
+        Loads the logs for a specific channel for the given dates into the index
+    """
+
+    import channels
+    
+    # open the LogSearchIndex
+    index = log_search.LogSearchIndex(options.index_path, '*' if options.create_index else 'a')
+
+    # open the channel
+    channel = channels.channel_list.lookup(channel_name)
+    
+    for date_name in dates :
+        # parse date
+        date = datetime.datetime.strptime(date_name, '%Y-%m-%d').replace(tzinfo=channel.source.tz)
+
+        print "%s..." % (date, )
+
+        # load lines for date
+        lines = channel.source.get_date(date)
+
+        # insert
+        index.insert(channel, lines)
+
+def cmd_search (options, channel_name, query) :
+    """
+        Search the index for events on a specific channel with the given query
+    """
+
+    import channels
+
+    assert not options.create_index
+    
+    # open the LogSearchIndex
+    index = log_search.LogSearchIndex(options.index_path, 'r')
+
+    # open the channel
+    channel = channels.channel_list.lookup(channel_name)
+    
+    # search
+    lines = index.search_simple(channel, query)
+    
+    # display as plaintext
+    for line in options.formatter.format_txt(lines) :
+        print line
+
+if __name__ == '__main__' :
+    from optparse import OptionParser
+    import log_formatter
+    
+    # define parser
+    parser = OptionParser(
+        usage           = "%prog [options] <command> [ ... ]",
+        add_help_option = True,
+    )
+
+    # define command-line arguments
+    parser.add_option("-I", "--index", dest="index_path", help="Index database path", metavar="PATH", default="logs/index")
+    parser.add_option("--create", dest="create_index", help="Create index database", default=False)
+    parser.add_option("-f", "--formatter", dest="formatter_name", help="LogFormatter to use", default="irssi")
+    parser.add_option("-z", "--timezone", dest="tz_name", help="Timezone for output", metavar="TZ", default="UTC")
+
+    # parse
+    options, args = parser.parse_args()
+
+    # postprocess stuff
+    options.tz = pytz.timezone(options.tz_name)
+    options.formatter = log_formatter.by_name(options.formatter_name)(options.tz)
+    
+    # pop command
+    command = args.pop(0)
+
+    # inspect
+    func = globals()['cmd_%s' % command]
+    
+    # call
+    func(options, *args)
+
+