|
1 |
|
2 #include <stdlib.h> |
1 |
3 |
2 #include "lex.h" |
4 #include "lex.h" |
|
5 #include "error.h" |
|
6 #include "log.h" |
|
7 |
|
8 #define INITIAL_BUF_SIZE 4096 |
3 |
9 |
4 int lexer (const struct lex *lex, const char *input, void *arg) { |
10 int lexer (const struct lex *lex, const char *input, void *arg) { |
5 // XXX: implement |
11 // handling error returns |
|
12 int err = -1, cb_err; |
|
13 |
|
14 // token buffer |
|
15 char *buf = NULL, *buf_ptr; |
|
16 size_t buf_size = INITIAL_BUF_SIZE; |
|
17 |
|
18 // state |
|
19 int prev_state = LEX_INITIAL, cur_state = lex->initial_state, next_state = LEX_INITIAL; |
|
20 |
|
21 // input chars |
|
22 const char *c = input; |
|
23 |
|
24 // lookups |
|
25 const struct lex_transition *trans = NULL; |
|
26 |
|
27 // allocate the buffer |
|
28 if ((buf = malloc(sizeof(char) * buf_size)) == NULL) |
|
29 goto error; |
|
30 |
|
31 // set buf_ptr initial position |
|
32 buf_ptr = buf; |
|
33 |
|
34 // clear input |
|
35 DEBUG("*cough*"); |
|
36 DEBUGN("%s", ""); |
|
37 |
|
38 // process input |
|
39 do { |
|
40 if (*c) { |
|
41 // look up the next state |
|
42 for (trans = lex->state_list[cur_state - 1].trans_list; trans->next_state > 0; trans++) { |
|
43 // accept defaults |
|
44 if (trans->flags & LEX_TRANS_DEFAULT) |
|
45 break; |
|
46 |
|
47 // disregard non-matches |
|
48 if (trans->left > *c || *c > trans->right) |
|
49 continue; |
|
50 |
|
51 // abort on invalids |
|
52 if (trans->flags & LEX_TRANS_INVALID) |
|
53 goto error; |
|
54 |
|
55 else { |
|
56 // accept it |
|
57 break; |
|
58 } |
|
59 } |
|
60 |
|
61 // did we find a transition with a valid next state? |
|
62 if (!(next_state = trans->next_state)) |
|
63 goto error; |
|
64 |
|
65 // call the char handler |
|
66 if (lex->char_fn && (cb_err = lex->char_fn(*c, cur_state, next_state, arg))) |
|
67 goto error; |
|
68 |
|
69 } else { |
|
70 // EOF! |
|
71 next_state = LEX_EOF; |
|
72 |
|
73 // is cur_state a valid end state? |
|
74 if (!(lex->state_list[cur_state - 1].flags & LEX_STATE_END)) |
|
75 goto error; |
|
76 |
|
77 // note: we don't pass the NUL byte to the char handler |
|
78 } |
|
79 |
|
80 // if this char is part of the next token... |
|
81 if (next_state != cur_state) { |
|
82 // terminate the buffer and reset buf_ptr |
|
83 *buf_ptr = 0; buf_ptr = buf; |
|
84 |
|
85 // dump state transitions |
|
86 DEBUGF("\n\t%25s -> %25s -> %25s", |
|
87 LEX_STATE_NAME(lex, prev_state), |
|
88 LEX_STATE_NAME(lex, cur_state), |
|
89 LEX_STATE_NAME(lex, next_state) |
|
90 ); |
|
91 |
|
92 // pass in the complete token to the handler |
|
93 if (lex->token_fn && (cb_err = lex->token_fn(cur_state, buf, next_state, prev_state, arg))) |
|
94 goto error; |
|
95 |
|
96 // update states |
|
97 prev_state = cur_state; |
|
98 cur_state = next_state; |
|
99 next_state = LEX_INITIAL; |
|
100 } |
|
101 |
|
102 // dump chars |
|
103 if (next_state == LEX_INITIAL) |
|
104 DEBUGN("%c", *c); |
|
105 else |
|
106 DEBUGNF("%c", *c); |
|
107 |
|
108 // store this char in the buffer |
|
109 *(buf_ptr++) = *c; |
|
110 |
|
111 // grow the buffer if needed |
|
112 if (buf_ptr - buf >= buf_size) { |
|
113 // remember the offset, as buf_ptr might get invalidated if buf is moved |
|
114 size_t buf_offset = buf_ptr - buf; |
|
115 |
|
116 // calc new size |
|
117 buf_size *= 2; |
|
118 |
|
119 // grow/move |
|
120 if ((buf = realloc(buf, buf_size)) == NULL) |
|
121 goto error; |
|
122 |
|
123 // fix buf_ptr |
|
124 buf_ptr = buf + buf_offset; |
|
125 } |
|
126 } while (*(c++)); |
|
127 |
|
128 // call the end handler |
|
129 if (lex->end_fn && (cb_err = lex->end_fn(cur_state, arg))) |
|
130 goto error; |
|
131 |
|
132 // successfully parsed! |
|
133 err = 0; |
|
134 |
|
135 error: |
|
136 DEBUGNF("\n"); |
|
137 |
|
138 if (cb_err) |
|
139 err = cb_err; |
|
140 |
|
141 // dump debug info on error |
|
142 if (err) { |
|
143 const char *cc; |
|
144 |
|
145 // figure out the error |
|
146 if (!buf) |
|
147 WARNING("malloc/realloc"); |
|
148 |
|
149 else if (trans && trans->flags & LEX_TRANS_INVALID) |
|
150 WARNING("hit invalid transition match"); |
|
151 |
|
152 else if (!next_state) |
|
153 WARNING("no valid transition found"); |
|
154 |
|
155 else if (next_state == LEX_EOF && !(lex->state_list[cur_state - 1].flags & LEX_STATE_END)) |
|
156 WARNING("invalid end state"); |
|
157 |
|
158 else |
|
159 WARNING("unknown error condition (!?)"); |
|
160 |
|
161 DEBUG("%s", input); |
|
162 DEBUGN("%s", ""); |
|
163 |
|
164 for (cc = input; cc < c; cc++) |
|
165 DEBUGNF(" "); |
|
166 |
|
167 DEBUGF("^\t%s -> %s -> %s", |
|
168 LEX_STATE_NAME(lex, prev_state), |
|
169 LEX_STATE_NAME(lex, cur_state), |
|
170 LEX_STATE_NAME(lex, next_state) |
|
171 ); |
|
172 } |
|
173 |
|
174 // free stuff |
|
175 free(buf); |
|
176 |
|
177 // return |
|
178 return err; |
6 } |
179 } |
7 |
180 |
|
181 |