16
|
1 |
|
|
2 |
#include <stdlib.h>
|
15
|
3 |
|
|
4 |
#include "lex.h"
|
16
|
5 |
#include "error.h"
|
|
6 |
#include "log.h"
|
|
7 |
|
|
8 |
#define INITIAL_BUF_SIZE 4096
|
15
|
9 |
|
|
10 |
int lexer (const struct lex *lex, const char *input, void *arg) {
|
16
|
11 |
// handling error returns
|
|
12 |
int err = -1, cb_err;
|
|
13 |
|
|
14 |
// token buffer
|
|
15 |
char *buf = NULL, *buf_ptr;
|
|
16 |
size_t buf_size = INITIAL_BUF_SIZE;
|
|
17 |
|
|
18 |
// state
|
|
19 |
int prev_state = LEX_INITIAL, cur_state = lex->initial_state, next_state = LEX_INITIAL;
|
|
20 |
|
|
21 |
// input chars
|
|
22 |
const char *c = input;
|
|
23 |
|
|
24 |
// lookups
|
|
25 |
const struct lex_transition *trans = NULL;
|
|
26 |
|
|
27 |
// allocate the buffer
|
|
28 |
if ((buf = malloc(sizeof(char) * buf_size)) == NULL)
|
|
29 |
goto error;
|
|
30 |
|
|
31 |
// set buf_ptr initial position
|
|
32 |
buf_ptr = buf;
|
|
33 |
|
|
34 |
// clear input
|
|
35 |
DEBUG("*cough*");
|
|
36 |
DEBUGN("%s", "");
|
|
37 |
|
|
38 |
// process input
|
|
39 |
do {
|
|
40 |
if (*c) {
|
|
41 |
// look up the next state
|
|
42 |
for (trans = lex->state_list[cur_state - 1].trans_list; trans->next_state > 0; trans++) {
|
|
43 |
// accept defaults
|
|
44 |
if (trans->flags & LEX_TRANS_DEFAULT)
|
|
45 |
break;
|
|
46 |
|
|
47 |
// disregard non-matches
|
|
48 |
if (trans->left > *c || *c > trans->right)
|
|
49 |
continue;
|
|
50 |
|
|
51 |
// abort on invalids
|
|
52 |
if (trans->flags & LEX_TRANS_INVALID)
|
|
53 |
goto error;
|
|
54 |
|
|
55 |
else {
|
|
56 |
// accept it
|
|
57 |
break;
|
|
58 |
}
|
|
59 |
}
|
|
60 |
|
|
61 |
// did we find a transition with a valid next state?
|
|
62 |
if (!(next_state = trans->next_state))
|
|
63 |
goto error;
|
|
64 |
|
|
65 |
// call the char handler
|
|
66 |
if (lex->char_fn && (cb_err = lex->char_fn(*c, cur_state, next_state, arg)))
|
|
67 |
goto error;
|
|
68 |
|
|
69 |
} else {
|
|
70 |
// EOF!
|
|
71 |
next_state = LEX_EOF;
|
|
72 |
|
|
73 |
// is cur_state a valid end state?
|
|
74 |
if (!(lex->state_list[cur_state - 1].flags & LEX_STATE_END))
|
|
75 |
goto error;
|
|
76 |
|
|
77 |
// note: we don't pass the NUL byte to the char handler
|
|
78 |
}
|
|
79 |
|
|
80 |
// if this char is part of the next token...
|
|
81 |
if (next_state != cur_state) {
|
|
82 |
// terminate the buffer and reset buf_ptr
|
|
83 |
*buf_ptr = 0; buf_ptr = buf;
|
|
84 |
|
|
85 |
// dump state transitions
|
|
86 |
DEBUGF("\n\t%25s -> %25s -> %25s",
|
|
87 |
LEX_STATE_NAME(lex, prev_state),
|
|
88 |
LEX_STATE_NAME(lex, cur_state),
|
|
89 |
LEX_STATE_NAME(lex, next_state)
|
|
90 |
);
|
|
91 |
|
|
92 |
// pass in the complete token to the handler
|
|
93 |
if (lex->token_fn && (cb_err = lex->token_fn(cur_state, buf, next_state, prev_state, arg)))
|
|
94 |
goto error;
|
|
95 |
|
|
96 |
// update states
|
|
97 |
prev_state = cur_state;
|
|
98 |
cur_state = next_state;
|
|
99 |
next_state = LEX_INITIAL;
|
|
100 |
}
|
|
101 |
|
|
102 |
// dump chars
|
|
103 |
if (next_state == LEX_INITIAL)
|
|
104 |
DEBUGN("%c", *c);
|
|
105 |
else
|
|
106 |
DEBUGNF("%c", *c);
|
|
107 |
|
|
108 |
// store this char in the buffer
|
|
109 |
*(buf_ptr++) = *c;
|
|
110 |
|
|
111 |
// grow the buffer if needed
|
|
112 |
if (buf_ptr - buf >= buf_size) {
|
|
113 |
// remember the offset, as buf_ptr might get invalidated if buf is moved
|
|
114 |
size_t buf_offset = buf_ptr - buf;
|
|
115 |
|
|
116 |
// calc new size
|
|
117 |
buf_size *= 2;
|
|
118 |
|
|
119 |
// grow/move
|
|
120 |
if ((buf = realloc(buf, buf_size)) == NULL)
|
|
121 |
goto error;
|
|
122 |
|
|
123 |
// fix buf_ptr
|
|
124 |
buf_ptr = buf + buf_offset;
|
|
125 |
}
|
|
126 |
} while (*(c++));
|
|
127 |
|
|
128 |
// call the end handler
|
|
129 |
if (lex->end_fn && (cb_err = lex->end_fn(cur_state, arg)))
|
|
130 |
goto error;
|
|
131 |
|
|
132 |
// successfully parsed!
|
|
133 |
err = 0;
|
|
134 |
|
|
135 |
error:
|
|
136 |
DEBUGNF("\n");
|
|
137 |
|
|
138 |
if (cb_err)
|
|
139 |
err = cb_err;
|
|
140 |
|
|
141 |
// dump debug info on error
|
|
142 |
if (err) {
|
|
143 |
const char *cc;
|
|
144 |
|
|
145 |
// figure out the error
|
|
146 |
if (!buf)
|
|
147 |
WARNING("malloc/realloc");
|
|
148 |
|
|
149 |
else if (trans && trans->flags & LEX_TRANS_INVALID)
|
|
150 |
WARNING("hit invalid transition match");
|
|
151 |
|
|
152 |
else if (!next_state)
|
|
153 |
WARNING("no valid transition found");
|
|
154 |
|
|
155 |
else if (next_state == LEX_EOF && !(lex->state_list[cur_state - 1].flags & LEX_STATE_END))
|
|
156 |
WARNING("invalid end state");
|
|
157 |
|
|
158 |
else
|
|
159 |
WARNING("unknown error condition (!?)");
|
|
160 |
|
|
161 |
DEBUG("%s", input);
|
|
162 |
DEBUGN("%s", "");
|
|
163 |
|
|
164 |
for (cc = input; cc < c; cc++)
|
|
165 |
DEBUGNF(" ");
|
|
166 |
|
|
167 |
DEBUGF("^\t%s -> %s -> %s",
|
|
168 |
LEX_STATE_NAME(lex, prev_state),
|
|
169 |
LEX_STATE_NAME(lex, cur_state),
|
|
170 |
LEX_STATE_NAME(lex, next_state)
|
|
171 |
);
|
|
172 |
}
|
|
173 |
|
|
174 |
// free stuff
|
|
175 |
free(buf);
|
|
176 |
|
|
177 |
// return
|
|
178 |
return err;
|
15
|
179 |
}
|
|
180 |
|
16
|
181 |
|