log_search.py
changeset 89 2dc6de43f317
parent 87 39915772f090
child 93 48fca00689e3
equal deleted inserted replaced
88:0b8e2ba5f76f 89:2dc6de43f317
     5 import datetime, calendar, pytz
     5 import datetime, calendar, pytz
     6 import os.path
     6 import os.path
     7 
     7 
     8 import HyperEstraier as hype
     8 import HyperEstraier as hype
     9 
     9 
    10 import log_line
    10 import log_line, utils
    11 
    11 
    12 class LogSearchError (Exception) :
    12 class LogSearchError (Exception) :
    13     """
    13     """
    14         General search error
    14         General search error
    15     """
    15     """
    28         An index on the logs for a group of channels.
    28         An index on the logs for a group of channels.
    29 
    29 
    30         This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server).
    30         This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server).
    31 
    31 
    32         These log documents have the following attributes:
    32         These log documents have the following attributes:
    33             @uri            - channel/date/line
    33             @uri                - channel/date/line
    34             channel         - channel code
    34             channel             - channel code
    35             type            - the LogType id
    35             type                - the LogType id
    36             timestamp       - UTC timestamp
    36             timestamp           - UTC timestamp
    37             source_nickname - source nickname
    37             source_nickname     - source nickname
    38 
    38             source_username     - source username
    39         Each document then has a single line of data, which is the log message itself
    39             source_hostname     - source hostname
       
    40             source_chanflags    - source channel flags
       
    41             target_nickname     - target nickname
       
    42 
       
    43         Each document then has a single line of data, which is the log data message
    40     """
    44     """
    41 
    45 
    42     def __init__ (self, channels, path, mode='r') :
    46     def __init__ (self, channels, path, mode='r') :
    43         """
    47         """
    44             Open the database at the given path, with the given mode:
    48             Open the database at the given path, with the given mode:
    99             doc = hype.Document()
   103             doc = hype.Document()
   100 
   104 
   101             # line date
   105             # line date
   102             date = line.timestamp.date()
   106             date = line.timestamp.date()
   103 
   107 
   104             # convert to UTC timestamp
       
   105             utc_timestamp = calendar.timegm(line.timestamp.utctimetuple())
       
   106 
       
   107             # ensure that it's not 1900
   108             # ensure that it's not 1900
   108             assert date.year != 1900
   109             assert date.year != 1900
   109 
   110 
   110             # add URI
   111             # add URI
   111             doc.add_attr('@uri',        "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset))
   112             doc.add_attr('@uri',        "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset))
   115 
   116 
   116             # add type
   117             # add type
   117             doc.add_attr('type',        str(line.type))
   118             doc.add_attr('type',        str(line.type))
   118 
   119 
   119             # add UTC timestamp
   120             # add UTC timestamp
   120             doc.add_attr('timestamp',   str(utc_timestamp))
   121             doc.add_attr('timestamp',   str(utils.to_utc_timestamp(line.timestamp)))
   121 
   122 
   122             # add source attribute?
   123             # add source attribute?
   123             if line.source :
   124             if line.source :
   124                 source_nickname, source_username, source_hostname, source_chanflags = line.source
   125                 source_nickname, source_username, source_hostname, source_chanflags = line.source
   125 
   126 
   126                 # XXX: handle source_nickname is None
   127                 if source_nickname :
   127                 if not source_nickname is None :
   128                     doc.add_attr('source_nickname', source_nickname.encode('utf8'))
   128                     source_nickname = str(source_nickname)
   129                 
   129 
   130                 if source_username :
   130                 doc.add_attr('source_nickname', source_nickname)
   131                     doc.add_attr('source_username', source_username.encode('utf8'))
   131             
   132 
       
   133                 if source_hostname :
       
   134                     doc.add_attr('source_hostname', source_hostname.encode('utf8'))
       
   135 
       
   136                 if source_chanflags :
       
   137                     doc.add_attr('source_chanflags', source_chanflags.encode('utf8'))
       
   138             
       
   139             # add target attributes?
       
   140             if line.target :
       
   141                 target_nickname = line.target
       
   142 
       
   143                 if target_nickname :
       
   144                     doc.add_attr('target_nickname', target_nickname.encode('utf8'))
       
   145 
   132             # add data
   146             # add data
   133             if line.data :
   147             if line.data :
   134                 doc.add_text(line.data.encode('utf8'))
   148                 doc.add_text(line.data.encode('utf8'))
   135 
   149 
   136             # put, "clean up dispensable regions of the overwritten document"
   150             # put, "clean up dispensable regions of the overwritten document"
   162             doc = self.db.get_doc(doc_id, 0)
   176             doc = self.db.get_doc(doc_id, 0)
   163 
   177 
   164             # load the attributes/text
   178             # load the attributes/text
   165             channel         = self.channels.lookup(doc.attr('channel'))
   179             channel         = self.channels.lookup(doc.attr('channel'))
   166             type            = int(doc.attr('type'))
   180             type            = int(doc.attr('type'))
   167             timestamp       = datetime.datetime.fromtimestamp(int(doc.attr('timestamp')), pytz.utc)
   181             timestamp       = utils.from_utc_timestamp(int(doc.attr('timestamp')))
   168             source_nickname = doc.attr('source_nickname')
   182 
       
   183             # source
       
   184             source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags'))
       
   185 
       
   186             # target
       
   187             target = doc.attr('target_nickname')
       
   188             
       
   189             # message text
   169             message         = doc.cat_texts().decode('utf8')
   190             message         = doc.cat_texts().decode('utf8')
   170 
   191 
   171             # build+yield to as LogLine
   192             # build+yield to as LogLine
   172             yield log_line.LogLine(channel, None, type, timestamp, (source_nickname, None, None, None), None, message)
   193             yield log_line.LogLine(channel, None, type, timestamp, source, target, message)
   173     
   194     
   174     def search (self, options=None, channel=None, phrase=None, order=None, max=None, skip=None) :
   195     def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) :
   175         """
   196         """
   176             Search with flexible parameters
   197             Search with flexible parameters
   177 
   198 
   178                 options     - bitmask of hype.Condition.*
   199                 options     - bitmask of hype.Condition.*
   179                 channel     - LogChannel object
   200                 channel     - LogChannel object
       
   201                 attrs       - raw attribute expressions
   180                 phrase      - the search query phrase
   202                 phrase      - the search query phrase
   181                 order       - order attribute expression
   203                 order       - order attribute expression
   182                 max         - number of results to return
   204                 max         - number of results to return
   183                 skip        - number of results to skip
   205                 skip        - number of results to skip
   184         """
   206         """
   190             # set options
   212             # set options
   191             cond.set_options(options)
   213             cond.set_options(options)
   192         
   214         
   193         if channel :
   215         if channel :
   194             # add channel attribute
   216             # add channel attribute
   195             cond.add_attr("@channel STREQ %s" % (channel.id, ))
   217             cond.add_attr("channel STREQ %s" % (channel.id, ))
   196         
   218         
       
   219         if attrs :
       
   220             # add attributes
       
   221             for attr in attrs :
       
   222                 cond.add_attr(attr)
       
   223 
   197         if phrase :
   224         if phrase :
   198             # add phrase
   225             # add phrase
   199             cond.set_phrase(phrase)
   226             cond.set_phrase(phrase)
   200         
   227         
   201         if order :
   228         if order :
   227             channel     = channel,
   254             channel     = channel,
   228 
   255 
   229             # given phrase
   256             # given phrase
   230             phrase      = query,
   257             phrase      = query,
   231 
   258 
   232             # order by timestamp
   259             # order by timestamp, descending (backwards)
   233             order       = "@timestamp NUMD",
   260             order       = "timestamp NUMD",
   234 
   261 
   235             # count/offset
   262             # count/offset
   236             max         = count,
   263             max         = count,
   237             skip        = offset,
   264             skip        = offset,
   238         ))
   265         ))
   239         
   266         
   240         # reverse
   267         # reverse
   241         return reversed(results)
   268         return reversed(results)
   242 
   269 
       
   270     def list (self, channel, date, count=None, skip=None) :
       
   271         """
       
   272             List all indexed log items for the given UTC date
       
   273         """
       
   274 
       
   275         # start/end dates
       
   276         dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0)
       
   277         dt_end   = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999)
       
   278         
       
   279         # search
       
   280         return self.search(
       
   281             # specific channel
       
   282             channel     = channel,
       
   283 
       
   284             # specific date range
       
   285             attrs       = [
       
   286                 "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end))
       
   287             ],
       
   288 
       
   289             # order correctly
       
   290             order       = "timestamp NUMA",
       
   291 
       
   292             # max count/offset
       
   293             max         = count,
       
   294             skip        = skip
       
   295         )