| author | Tero Marttila <terom@fixme.fi> |
| Sat, 11 Oct 2008 21:35:48 +0300 | |
| changeset 20 | f0ef6d8880b4 |
| parent 18 | b12e78767248 |
| permissions | -rw-r--r-- |
| 16 | 1 |
|
2 |
#include <stdlib.h> |
|
| 15 | 3 |
|
4 |
#include "lex.h" |
|
| 16 | 5 |
#include "error.h" |
6 |
#include "log.h" |
|
7 |
||
8 |
#define INITIAL_BUF_SIZE 4096 |
|
| 15 | 9 |
|
10 |
int lexer (const struct lex *lex, const char *input, void *arg) {
|
|
| 16 | 11 |
// handling error returns |
12 |
int err = -1, cb_err; |
|
13 |
||
14 |
// token buffer |
|
15 |
char *buf = NULL, *buf_ptr; |
|
16 |
size_t buf_size = INITIAL_BUF_SIZE; |
|
17 |
||
18 |
// state |
|
19 |
int prev_state = LEX_INITIAL, cur_state = lex->initial_state, next_state = LEX_INITIAL; |
|
20 |
||
21 |
// input chars |
|
22 |
const char *c = input; |
|
23 |
||
24 |
// lookups |
|
25 |
const struct lex_transition *trans = NULL; |
|
26 |
||
27 |
// allocate the buffer |
|
28 |
if ((buf = malloc(sizeof(char) * buf_size)) == NULL) |
|
29 |
goto error; |
|
30 |
||
31 |
// set buf_ptr initial position |
|
32 |
buf_ptr = buf; |
|
33 |
||
34 |
// clear input |
|
35 |
DEBUG("*cough*");
|
|
36 |
DEBUGN("%s", "");
|
|
37 |
||
38 |
// process input |
|
39 |
do {
|
|
40 |
if (*c) {
|
|
41 |
// look up the next state |
|
|
18
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
16
diff
changeset
|
42 |
for (trans = lex->state_list[cur_state - 1].trans_list; trans->next_state > 0 || trans->flags; trans++) {
|
| 16 | 43 |
// accept defaults |
44 |
if (trans->flags & LEX_TRANS_DEFAULT) |
|
45 |
break; |
|
46 |
||
47 |
// disregard non-matches |
|
48 |
if (trans->left > *c || *c > trans->right) |
|
49 |
continue; |
|
50 |
||
51 |
// abort on invalids |
|
|
18
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
16
diff
changeset
|
52 |
if (trans->flags & LEX_TRANS_INVALID) {
|
| 16 | 53 |
goto error; |
54 |
||
|
18
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
16
diff
changeset
|
55 |
} else {
|
| 16 | 56 |
// accept it |
57 |
break; |
|
58 |
} |
|
59 |
} |
|
60 |
||
61 |
// did we find a transition with a valid next state? |
|
62 |
if (!(next_state = trans->next_state)) |
|
63 |
goto error; |
|
64 |
||
65 |
// call the char handler |
|
66 |
if (lex->char_fn && (cb_err = lex->char_fn(*c, cur_state, next_state, arg))) |
|
67 |
goto error; |
|
68 |
||
69 |
} else {
|
|
70 |
// EOF! |
|
71 |
next_state = LEX_EOF; |
|
72 |
||
73 |
// is cur_state a valid end state? |
|
74 |
if (!(lex->state_list[cur_state - 1].flags & LEX_STATE_END)) |
|
75 |
goto error; |
|
76 |
||
77 |
// note: we don't pass the NUL byte to the char handler |
|
78 |
} |
|
79 |
||
80 |
// if this char is part of the next token... |
|
81 |
if (next_state != cur_state) {
|
|
82 |
// terminate the buffer and reset buf_ptr |
|
83 |
*buf_ptr = 0; buf_ptr = buf; |
|
84 |
||
85 |
// dump state transitions |
|
86 |
DEBUGF("\n\t%25s -> %25s -> %25s",
|
|
87 |
LEX_STATE_NAME(lex, prev_state), |
|
88 |
LEX_STATE_NAME(lex, cur_state), |
|
89 |
LEX_STATE_NAME(lex, next_state) |
|
90 |
); |
|
91 |
||
92 |
// pass in the complete token to the handler |
|
93 |
if (lex->token_fn && (cb_err = lex->token_fn(cur_state, buf, next_state, prev_state, arg))) |
|
94 |
goto error; |
|
95 |
||
96 |
// update states |
|
97 |
prev_state = cur_state; |
|
98 |
cur_state = next_state; |
|
99 |
next_state = LEX_INITIAL; |
|
100 |
} |
|
101 |
||
102 |
// dump chars |
|
103 |
if (next_state == LEX_INITIAL) |
|
104 |
DEBUGN("%c", *c);
|
|
105 |
else |
|
106 |
DEBUGNF("%c", *c);
|
|
107 |
||
108 |
// store this char in the buffer |
|
109 |
*(buf_ptr++) = *c; |
|
110 |
||
111 |
// grow the buffer if needed |
|
112 |
if (buf_ptr - buf >= buf_size) {
|
|
113 |
// remember the offset, as buf_ptr might get invalidated if buf is moved |
|
114 |
size_t buf_offset = buf_ptr - buf; |
|
115 |
||
116 |
// calc new size |
|
117 |
buf_size *= 2; |
|
118 |
||
119 |
// grow/move |
|
120 |
if ((buf = realloc(buf, buf_size)) == NULL) |
|
121 |
goto error; |
|
122 |
||
123 |
// fix buf_ptr |
|
124 |
buf_ptr = buf + buf_offset; |
|
125 |
} |
|
126 |
} while (*(c++)); |
|
127 |
||
128 |
// call the end handler |
|
129 |
if (lex->end_fn && (cb_err = lex->end_fn(cur_state, arg))) |
|
130 |
goto error; |
|
131 |
||
132 |
// successfully parsed! |
|
133 |
err = 0; |
|
134 |
||
135 |
error: |
|
136 |
DEBUGNF("\n");
|
|
137 |
||
138 |
if (cb_err) |
|
139 |
err = cb_err; |
|
140 |
||
141 |
// dump debug info on error |
|
142 |
if (err) {
|
|
143 |
const char *cc; |
|
144 |
||
145 |
// figure out the error |
|
146 |
if (!buf) |
|
147 |
WARNING("malloc/realloc");
|
|
148 |
||
149 |
else if (trans && trans->flags & LEX_TRANS_INVALID) |
|
150 |
WARNING("hit invalid transition match");
|
|
151 |
||
152 |
else if (!next_state) |
|
153 |
WARNING("no valid transition found");
|
|
154 |
||
155 |
else if (next_state == LEX_EOF && !(lex->state_list[cur_state - 1].flags & LEX_STATE_END)) |
|
156 |
WARNING("invalid end state");
|
|
157 |
||
158 |
else |
|
159 |
WARNING("unknown error condition (!?)");
|
|
160 |
||
161 |
DEBUG("%s", input);
|
|
162 |
DEBUGN("%s", "");
|
|
163 |
||
164 |
for (cc = input; cc < c; cc++) |
|
165 |
DEBUGNF(" ");
|
|
166 |
||
167 |
DEBUGF("^\t%s -> %s -> %s",
|
|
168 |
LEX_STATE_NAME(lex, prev_state), |
|
169 |
LEX_STATE_NAME(lex, cur_state), |
|
170 |
LEX_STATE_NAME(lex, next_state) |
|
171 |
); |
|
172 |
} |
|
173 |
||
174 |
// free stuff |
|
175 |
free(buf); |
|
176 |
||
177 |
// return |
|
178 |
return err; |
|
| 15 | 179 |
} |
180 |
||
| 16 | 181 |