src/lib/lex.c
changeset 16 74fb62022fb3
parent 15 a8d183e79ed9
child 18 b12e78767248
equal deleted inserted replaced
15:a8d183e79ed9 16:74fb62022fb3
       
     1 
       
     2 #include <stdlib.h>
     1 
     3 
     2 #include "lex.h"
     4 #include "lex.h"
       
     5 #include "error.h"
       
     6 #include "log.h"
       
     7 
       
     8 #define INITIAL_BUF_SIZE 4096
     3 
     9 
     4 int lexer (const struct lex *lex, const char *input, void *arg) {
    10 int lexer (const struct lex *lex, const char *input, void *arg) {
     5     // XXX: implement
    11     // handling error returns
       
    12     int err = -1, cb_err;
       
    13     
       
    14     // token buffer
       
    15     char *buf = NULL, *buf_ptr;
       
    16     size_t buf_size = INITIAL_BUF_SIZE;
       
    17     
       
    18     // state
       
    19     int prev_state = LEX_INITIAL, cur_state = lex->initial_state, next_state = LEX_INITIAL;
       
    20     
       
    21     // input chars
       
    22     const char *c = input;
       
    23 
       
    24     // lookups
       
    25     const struct lex_transition *trans = NULL;
       
    26 
       
    27     // allocate the buffer
       
    28     if ((buf = malloc(sizeof(char) * buf_size)) == NULL)
       
    29         goto error;
       
    30 
       
    31     // set buf_ptr initial position
       
    32     buf_ptr = buf;
       
    33     
       
    34     // clear input
       
    35     DEBUG("*cough*");
       
    36     DEBUGN("%s", "");
       
    37 
       
    38     // process input
       
    39     do {
       
    40         if (*c) {
       
    41             // look up the next state
       
    42             for (trans = lex->state_list[cur_state - 1].trans_list; trans->next_state > 0; trans++) {
       
    43                 // accept defaults
       
    44                 if (trans->flags & LEX_TRANS_DEFAULT)
       
    45                     break;
       
    46                 
       
    47                 // disregard non-matches
       
    48                 if (trans->left > *c || *c > trans->right)
       
    49                     continue;
       
    50                 
       
    51                 // abort on invalids
       
    52                 if (trans->flags & LEX_TRANS_INVALID)
       
    53                     goto error;
       
    54                 
       
    55                 else {
       
    56                     // accept it
       
    57                     break;
       
    58                 }
       
    59             }
       
    60             
       
    61             // did we find a transition with a valid next state?
       
    62             if (!(next_state = trans->next_state))
       
    63                 goto error;
       
    64 
       
    65             // call the char handler
       
    66             if (lex->char_fn && (cb_err = lex->char_fn(*c, cur_state, next_state, arg)))
       
    67                 goto error;
       
    68 
       
    69         } else {
       
    70             // EOF!
       
    71             next_state = LEX_EOF;
       
    72             
       
    73             // is cur_state a valid end state?
       
    74             if (!(lex->state_list[cur_state - 1].flags & LEX_STATE_END))
       
    75                 goto error;
       
    76             
       
    77             // note: we don't pass the NUL byte to the char handler
       
    78         }
       
    79 
       
    80         // if this char is part of the next token...
       
    81         if (next_state != cur_state) {
       
    82             // terminate the buffer and reset buf_ptr
       
    83             *buf_ptr = 0; buf_ptr = buf;
       
    84             
       
    85             // dump state transitions
       
    86             DEBUGF("\n\t%25s -> %25s -> %25s",
       
    87                 LEX_STATE_NAME(lex, prev_state),
       
    88                 LEX_STATE_NAME(lex, cur_state),
       
    89                 LEX_STATE_NAME(lex, next_state)
       
    90             );
       
    91 
       
    92             // pass in the complete token to the handler
       
    93             if (lex->token_fn && (cb_err = lex->token_fn(cur_state, buf, next_state, prev_state, arg)))
       
    94                 goto error;
       
    95 
       
    96             // update states
       
    97             prev_state = cur_state;
       
    98             cur_state = next_state;
       
    99             next_state = LEX_INITIAL;
       
   100         }
       
   101         
       
   102         // dump chars
       
   103         if (next_state == LEX_INITIAL)
       
   104             DEBUGN("%c", *c);
       
   105         else
       
   106             DEBUGNF("%c", *c);
       
   107         
       
   108         // store this char in the buffer
       
   109         *(buf_ptr++) = *c;
       
   110 
       
   111         // grow the buffer if needed
       
   112         if (buf_ptr - buf >= buf_size) {
       
   113             // remember the offset, as buf_ptr might get invalidated if buf is moved
       
   114             size_t buf_offset = buf_ptr - buf;
       
   115 
       
   116             // calc new size
       
   117             buf_size *= 2;
       
   118             
       
   119             // grow/move
       
   120             if ((buf = realloc(buf, buf_size)) == NULL)
       
   121                 goto error;
       
   122             
       
   123             // fix buf_ptr
       
   124             buf_ptr = buf + buf_offset;
       
   125         }
       
   126     } while (*(c++));
       
   127 
       
   128     // call the end handler
       
   129     if (lex->end_fn && (cb_err = lex->end_fn(cur_state, arg)))
       
   130         goto error;
       
   131 
       
   132     // successfully parsed!
       
   133     err = 0;
       
   134 
       
   135 error:
       
   136     DEBUGNF("\n");
       
   137     
       
   138     if (cb_err)
       
   139         err = cb_err;
       
   140 
       
   141     // dump debug info on error
       
   142     if (err) {
       
   143         const char *cc;
       
   144         
       
   145         // figure out the error
       
   146         if (!buf)
       
   147             WARNING("malloc/realloc");
       
   148 
       
   149         else if (trans && trans->flags & LEX_TRANS_INVALID)
       
   150             WARNING("hit invalid transition match");
       
   151 
       
   152         else if (!next_state)
       
   153             WARNING("no valid transition found");
       
   154             
       
   155         else if (next_state == LEX_EOF && !(lex->state_list[cur_state - 1].flags & LEX_STATE_END))
       
   156             WARNING("invalid end state");
       
   157         
       
   158         else
       
   159             WARNING("unknown error condition (!?)");
       
   160 
       
   161         DEBUG("%s", input);
       
   162         DEBUGN("%s", "");
       
   163 
       
   164         for (cc = input; cc < c; cc++)
       
   165             DEBUGNF(" ");
       
   166 
       
   167         DEBUGF("^\t%s -> %s -> %s",
       
   168             LEX_STATE_NAME(lex, prev_state),
       
   169             LEX_STATE_NAME(lex, cur_state),
       
   170             LEX_STATE_NAME(lex, next_state)
       
   171         );
       
   172     }
       
   173 
       
   174     // free stuff
       
   175     free(buf);
       
   176 
       
   177     // return
       
   178     return err;
     6 }
   179 }
     7 
   180 
       
   181