src/lib/lex.c
changeset 16 74fb62022fb3
parent 15 a8d183e79ed9
child 18 b12e78767248
--- a/src/lib/lex.c	Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/lex.c	Thu Oct 09 00:33:37 2008 +0300
@@ -1,7 +1,181 @@
+
+#include <stdlib.h>
 
 #include "lex.h"
+#include "error.h"
+#include "log.h"
+
+#define INITIAL_BUF_SIZE 4096
 
 int lexer (const struct lex *lex, const char *input, void *arg) {
-    // XXX: implement
+    // handling error returns
+    int err = -1, cb_err;
+    
+    // token buffer
+    char *buf = NULL, *buf_ptr;
+    size_t buf_size = INITIAL_BUF_SIZE;
+    
+    // state
+    int prev_state = LEX_INITIAL, cur_state = lex->initial_state, next_state = LEX_INITIAL;
+    
+    // input chars
+    const char *c = input;
+
+    // lookups
+    const struct lex_transition *trans = NULL;
+
+    // allocate the buffer
+    if ((buf = malloc(sizeof(char) * buf_size)) == NULL)
+        goto error;
+
+    // set buf_ptr initial position
+    buf_ptr = buf;
+    
+    // clear input
+    DEBUG("*cough*");
+    DEBUGN("%s", "");
+
+    // process input
+    do {
+        if (*c) {
+            // look up the next state
+            for (trans = lex->state_list[cur_state - 1].trans_list; trans->next_state > 0; trans++) {
+                // accept defaults
+                if (trans->flags & LEX_TRANS_DEFAULT)
+                    break;
+                
+                // disregard non-matches
+                if (trans->left > *c || *c > trans->right)
+                    continue;
+                
+                // abort on invalids
+                if (trans->flags & LEX_TRANS_INVALID)
+                    goto error;
+                
+                else {
+                    // accept it
+                    break;
+                }
+            }
+            
+            // did we find a transition with a valid next state?
+            if (!(next_state = trans->next_state))
+                goto error;
+
+            // call the char handler
+            if (lex->char_fn && (cb_err = lex->char_fn(*c, cur_state, next_state, arg)))
+                goto error;
+
+        } else {
+            // EOF!
+            next_state = LEX_EOF;
+            
+            // is cur_state a valid end state?
+            if (!(lex->state_list[cur_state - 1].flags & LEX_STATE_END))
+                goto error;
+            
+            // note: we don't pass the NUL byte to the char handler
+        }
+
+        // if this char is part of the next token...
+        if (next_state != cur_state) {
+            // terminate the buffer and reset buf_ptr
+            *buf_ptr = 0; buf_ptr = buf;
+            
+            // dump state transitions
+            DEBUGF("\n\t%25s -> %25s -> %25s",
+                LEX_STATE_NAME(lex, prev_state),
+                LEX_STATE_NAME(lex, cur_state),
+                LEX_STATE_NAME(lex, next_state)
+            );
+
+            // pass in the complete token to the handler
+            if (lex->token_fn && (cb_err = lex->token_fn(cur_state, buf, next_state, prev_state, arg)))
+                goto error;
+
+            // update states
+            prev_state = cur_state;
+            cur_state = next_state;
+            next_state = LEX_INITIAL;
+        }
+        
+        // dump chars
+        if (next_state == LEX_INITIAL)
+            DEBUGN("%c", *c);
+        else
+            DEBUGNF("%c", *c);
+        
+        // store this char in the buffer
+        *(buf_ptr++) = *c;
+
+        // grow the buffer if needed
+        if (buf_ptr - buf >= buf_size) {
+            // remember the offset, as buf_ptr might get invalidated if buf is moved
+            size_t buf_offset = buf_ptr - buf;
+
+            // calc new size
+            buf_size *= 2;
+            
+            // grow/move
+            if ((buf = realloc(buf, buf_size)) == NULL)
+                goto error;
+            
+            // fix buf_ptr
+            buf_ptr = buf + buf_offset;
+        }
+    } while (*(c++));
+
+    // call the end handler
+    if (lex->end_fn && (cb_err = lex->end_fn(cur_state, arg)))
+        goto error;
+
+    // successfully parsed!
+    err = 0;
+
+error:
+    DEBUGNF("\n");
+    
+    if (cb_err)
+        err = cb_err;
+
+    // dump debug info on error
+    if (err) {
+        const char *cc;
+        
+        // figure out the error
+        if (!buf)
+            WARNING("malloc/realloc");
+
+        else if (trans && trans->flags & LEX_TRANS_INVALID)
+            WARNING("hit invalid transition match");
+
+        else if (!next_state)
+            WARNING("no valid transition found");
+            
+        else if (next_state == LEX_EOF && !(lex->state_list[cur_state - 1].flags & LEX_STATE_END))
+            WARNING("invalid end state");
+        
+        else
+            WARNING("unknown error condition (!?)");
+
+        DEBUG("%s", input);
+        DEBUGN("%s", "");
+
+        for (cc = input; cc < c; cc++)
+            DEBUGNF(" ");
+
+        DEBUGF("^\t%s -> %s -> %s",
+            LEX_STATE_NAME(lex, prev_state),
+            LEX_STATE_NAME(lex, cur_state),
+            LEX_STATE_NAME(lex, next_state)
+        );
+    }
+
+    // free stuff
+    free(buf);
+
+    // return
+    return err;
 }
 
+