terom@16: terom@16: #include terom@15: terom@15: #include "lex.h" terom@16: #include "error.h" terom@16: #include "log.h" terom@16: terom@16: #define INITIAL_BUF_SIZE 4096 terom@15: terom@15: int lexer (const struct lex *lex, const char *input, void *arg) { terom@16: // handling error returns terom@16: int err = -1, cb_err; terom@16: terom@16: // token buffer terom@16: char *buf = NULL, *buf_ptr; terom@16: size_t buf_size = INITIAL_BUF_SIZE; terom@16: terom@16: // state terom@16: int prev_state = LEX_INITIAL, cur_state = lex->initial_state, next_state = LEX_INITIAL; terom@16: terom@16: // input chars terom@16: const char *c = input; terom@16: terom@16: // lookups terom@16: const struct lex_transition *trans = NULL; terom@16: terom@16: // allocate the buffer terom@16: if ((buf = malloc(sizeof(char) * buf_size)) == NULL) terom@16: goto error; terom@16: terom@16: // set buf_ptr initial position terom@16: buf_ptr = buf; terom@16: terom@16: // clear input terom@16: DEBUG("*cough*"); terom@16: DEBUGN("%s", ""); terom@16: terom@16: // process input terom@16: do { terom@16: if (*c) { terom@16: // look up the next state terom@16: for (trans = lex->state_list[cur_state - 1].trans_list; trans->next_state > 0; trans++) { terom@16: // accept defaults terom@16: if (trans->flags & LEX_TRANS_DEFAULT) terom@16: break; terom@16: terom@16: // disregard non-matches terom@16: if (trans->left > *c || *c > trans->right) terom@16: continue; terom@16: terom@16: // abort on invalids terom@16: if (trans->flags & LEX_TRANS_INVALID) terom@16: goto error; terom@16: terom@16: else { terom@16: // accept it terom@16: break; terom@16: } terom@16: } terom@16: terom@16: // did we find a transition with a valid next state? terom@16: if (!(next_state = trans->next_state)) terom@16: goto error; terom@16: terom@16: // call the char handler terom@16: if (lex->char_fn && (cb_err = lex->char_fn(*c, cur_state, next_state, arg))) terom@16: goto error; terom@16: terom@16: } else { terom@16: // EOF! terom@16: next_state = LEX_EOF; terom@16: terom@16: // is cur_state a valid end state? terom@16: if (!(lex->state_list[cur_state - 1].flags & LEX_STATE_END)) terom@16: goto error; terom@16: terom@16: // note: we don't pass the NUL byte to the char handler terom@16: } terom@16: terom@16: // if this char is part of the next token... terom@16: if (next_state != cur_state) { terom@16: // terminate the buffer and reset buf_ptr terom@16: *buf_ptr = 0; buf_ptr = buf; terom@16: terom@16: // dump state transitions terom@16: DEBUGF("\n\t%25s -> %25s -> %25s", terom@16: LEX_STATE_NAME(lex, prev_state), terom@16: LEX_STATE_NAME(lex, cur_state), terom@16: LEX_STATE_NAME(lex, next_state) terom@16: ); terom@16: terom@16: // pass in the complete token to the handler terom@16: if (lex->token_fn && (cb_err = lex->token_fn(cur_state, buf, next_state, prev_state, arg))) terom@16: goto error; terom@16: terom@16: // update states terom@16: prev_state = cur_state; terom@16: cur_state = next_state; terom@16: next_state = LEX_INITIAL; terom@16: } terom@16: terom@16: // dump chars terom@16: if (next_state == LEX_INITIAL) terom@16: DEBUGN("%c", *c); terom@16: else terom@16: DEBUGNF("%c", *c); terom@16: terom@16: // store this char in the buffer terom@16: *(buf_ptr++) = *c; terom@16: terom@16: // grow the buffer if needed terom@16: if (buf_ptr - buf >= buf_size) { terom@16: // remember the offset, as buf_ptr might get invalidated if buf is moved terom@16: size_t buf_offset = buf_ptr - buf; terom@16: terom@16: // calc new size terom@16: buf_size *= 2; terom@16: terom@16: // grow/move terom@16: if ((buf = realloc(buf, buf_size)) == NULL) terom@16: goto error; terom@16: terom@16: // fix buf_ptr terom@16: buf_ptr = buf + buf_offset; terom@16: } terom@16: } while (*(c++)); terom@16: terom@16: // call the end handler terom@16: if (lex->end_fn && (cb_err = lex->end_fn(cur_state, arg))) terom@16: goto error; terom@16: terom@16: // successfully parsed! terom@16: err = 0; terom@16: terom@16: error: terom@16: DEBUGNF("\n"); terom@16: terom@16: if (cb_err) terom@16: err = cb_err; terom@16: terom@16: // dump debug info on error terom@16: if (err) { terom@16: const char *cc; terom@16: terom@16: // figure out the error terom@16: if (!buf) terom@16: WARNING("malloc/realloc"); terom@16: terom@16: else if (trans && trans->flags & LEX_TRANS_INVALID) terom@16: WARNING("hit invalid transition match"); terom@16: terom@16: else if (!next_state) terom@16: WARNING("no valid transition found"); terom@16: terom@16: else if (next_state == LEX_EOF && !(lex->state_list[cur_state - 1].flags & LEX_STATE_END)) terom@16: WARNING("invalid end state"); terom@16: terom@16: else terom@16: WARNING("unknown error condition (!?)"); terom@16: terom@16: DEBUG("%s", input); terom@16: DEBUGN("%s", ""); terom@16: terom@16: for (cc = input; cc < c; cc++) terom@16: DEBUGNF(" "); terom@16: terom@16: DEBUGF("^\t%s -> %s -> %s", terom@16: LEX_STATE_NAME(lex, prev_state), terom@16: LEX_STATE_NAME(lex, cur_state), terom@16: LEX_STATE_NAME(lex, next_state) terom@16: ); terom@16: } terom@16: terom@16: // free stuff terom@16: free(buf); terom@16: terom@16: // return terom@16: return err; terom@15: } terom@15: terom@16: