diff -r a8d183e79ed9 -r 74fb62022fb3 src/lib/lex.c --- a/src/lib/lex.c Wed Oct 08 22:05:13 2008 +0300 +++ b/src/lib/lex.c Thu Oct 09 00:33:37 2008 +0300 @@ -1,7 +1,181 @@ + +#include #include "lex.h" +#include "error.h" +#include "log.h" + +#define INITIAL_BUF_SIZE 4096 int lexer (const struct lex *lex, const char *input, void *arg) { - // XXX: implement + // handling error returns + int err = -1, cb_err; + + // token buffer + char *buf = NULL, *buf_ptr; + size_t buf_size = INITIAL_BUF_SIZE; + + // state + int prev_state = LEX_INITIAL, cur_state = lex->initial_state, next_state = LEX_INITIAL; + + // input chars + const char *c = input; + + // lookups + const struct lex_transition *trans = NULL; + + // allocate the buffer + if ((buf = malloc(sizeof(char) * buf_size)) == NULL) + goto error; + + // set buf_ptr initial position + buf_ptr = buf; + + // clear input + DEBUG("*cough*"); + DEBUGN("%s", ""); + + // process input + do { + if (*c) { + // look up the next state + for (trans = lex->state_list[cur_state - 1].trans_list; trans->next_state > 0; trans++) { + // accept defaults + if (trans->flags & LEX_TRANS_DEFAULT) + break; + + // disregard non-matches + if (trans->left > *c || *c > trans->right) + continue; + + // abort on invalids + if (trans->flags & LEX_TRANS_INVALID) + goto error; + + else { + // accept it + break; + } + } + + // did we find a transition with a valid next state? + if (!(next_state = trans->next_state)) + goto error; + + // call the char handler + if (lex->char_fn && (cb_err = lex->char_fn(*c, cur_state, next_state, arg))) + goto error; + + } else { + // EOF! + next_state = LEX_EOF; + + // is cur_state a valid end state? + if (!(lex->state_list[cur_state - 1].flags & LEX_STATE_END)) + goto error; + + // note: we don't pass the NUL byte to the char handler + } + + // if this char is part of the next token... + if (next_state != cur_state) { + // terminate the buffer and reset buf_ptr + *buf_ptr = 0; buf_ptr = buf; + + // dump state transitions + DEBUGF("\n\t%25s -> %25s -> %25s", + LEX_STATE_NAME(lex, prev_state), + LEX_STATE_NAME(lex, cur_state), + LEX_STATE_NAME(lex, next_state) + ); + + // pass in the complete token to the handler + if (lex->token_fn && (cb_err = lex->token_fn(cur_state, buf, next_state, prev_state, arg))) + goto error; + + // update states + prev_state = cur_state; + cur_state = next_state; + next_state = LEX_INITIAL; + } + + // dump chars + if (next_state == LEX_INITIAL) + DEBUGN("%c", *c); + else + DEBUGNF("%c", *c); + + // store this char in the buffer + *(buf_ptr++) = *c; + + // grow the buffer if needed + if (buf_ptr - buf >= buf_size) { + // remember the offset, as buf_ptr might get invalidated if buf is moved + size_t buf_offset = buf_ptr - buf; + + // calc new size + buf_size *= 2; + + // grow/move + if ((buf = realloc(buf, buf_size)) == NULL) + goto error; + + // fix buf_ptr + buf_ptr = buf + buf_offset; + } + } while (*(c++)); + + // call the end handler + if (lex->end_fn && (cb_err = lex->end_fn(cur_state, arg))) + goto error; + + // successfully parsed! + err = 0; + +error: + DEBUGNF("\n"); + + if (cb_err) + err = cb_err; + + // dump debug info on error + if (err) { + const char *cc; + + // figure out the error + if (!buf) + WARNING("malloc/realloc"); + + else if (trans && trans->flags & LEX_TRANS_INVALID) + WARNING("hit invalid transition match"); + + else if (!next_state) + WARNING("no valid transition found"); + + else if (next_state == LEX_EOF && !(lex->state_list[cur_state - 1].flags & LEX_STATE_END)) + WARNING("invalid end state"); + + else + WARNING("unknown error condition (!?)"); + + DEBUG("%s", input); + DEBUGN("%s", ""); + + for (cc = input; cc < c; cc++) + DEBUGNF(" "); + + DEBUGF("^\t%s -> %s -> %s", + LEX_STATE_NAME(lex, prev_state), + LEX_STATE_NAME(lex, cur_state), + LEX_STATE_NAME(lex, next_state) + ); + } + + // free stuff + free(buf); + + // return + return err; } +