author | Tero Marttila <terom@fixme.fi> |
Sat, 11 Oct 2008 20:45:28 +0300 | |
changeset 18 | b12e78767248 |
parent 17 | 0a024b29b16d |
permissions | -rw-r--r-- |
15 | 1 |
#define _GNU_SOURCE |
2 |
#include <stdlib.h> |
|
3 |
#include <string.h> |
|
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
4 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
5 |
#include "url.h" |
15 | 6 |
#include "lex.h" |
7 |
#include "error.h" |
|
16 | 8 |
#include "log.h" |
15 | 9 |
#include "misc.h" |
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
10 |
|
14 | 11 |
enum url_token { |
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
12 |
URL_INVALID, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
13 |
|
14 | 14 |
URL_BEGIN, |
15 |
||
16 |
// kludge to resolve ambiguous URL_SCHEME/URL_USERNAME+URL_PASSWORD/URL_HOSTNAME+URL_SERVICE at the beginning |
|
17 |
URL_BEGIN_ALNUM, |
|
18 |
URL_BEGIN_COLON, |
|
19 |
||
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
20 |
URL_SCHEME, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
21 |
URL_SCHEME_SEP, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
22 |
URL_SCHEME_END_COL, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
23 |
URL_SCHEME_END_SLASH1, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
24 |
URL_SCHEME_END_SLASH2, |
14 | 25 |
|
26 |
// kludge to resolve ambiguous URL_USERNAME+URL_PASSWORD/URL_HOSTNAME+URL_SERVICE after a scheme |
|
27 |
URL_USERHOST_ALNUM, |
|
28 |
URL_USERHOST_COLON, |
|
29 |
URL_USERHOST_ALNUM2, |
|
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
30 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
31 |
URL_USERNAME, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
32 |
URL_PASSWORD_SEP, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
33 |
URL_PASSWORD, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
34 |
URL_USERNAME_END, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
35 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
36 |
URL_HOSTNAME, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
37 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
38 |
URL_SERVICE_SEP, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
39 |
URL_SERVICE, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
40 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
41 |
URL_PATH_START, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
42 |
URL_PATH, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
43 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
44 |
URL_OPT_START, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
45 |
URL_OPT_KEY, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
46 |
URL_OPT_EQ, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
47 |
URL_OPT_VAL, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
48 |
URL_OPT_SEP, |
14 | 49 |
|
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
50 |
URL_MAX, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
51 |
}; |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
52 |
|
14 | 53 |
/* |
54 |
* Parser state |
|
55 |
*/ |
|
56 |
struct url_state { |
|
15 | 57 |
// the URL to parse into |
14 | 58 |
struct url *url; |
15 | 59 |
|
60 |
// our lookahead-kludge |
|
61 |
const char *alnum, *alnum2; |
|
62 |
||
63 |
}; |
|
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
64 |
|
17 | 65 |
static int _url_append_scheme (struct url *url, const char *data, int copy) { |
66 |
if (!url->schema) { |
|
67 |
if ((url->schema = malloc(sizeof(struct url_schema) + (1 * sizeof(const char *)))) == NULL) |
|
68 |
ERROR("malloc"); |
|
69 |
||
70 |
url->schema->count = 1; |
|
71 |
||
72 |
} else { |
|
73 |
url->schema->count++; |
|
74 |
||
75 |
// I'm starting to hate flexible array members... |
|
76 |
if ((url->schema = realloc(url->schema, sizeof(struct url_schema) + url->schema->count * sizeof(const char *))) == NULL) |
|
77 |
ERROR("realloc"); |
|
78 |
} |
|
79 |
||
80 |
if ((url->schema->list[url->schema->count - 1] = copy ? strdup(data) : data) == NULL) |
|
81 |
ERROR("strdup"); |
|
82 |
||
83 |
// k |
|
16 | 84 |
return 0; |
17 | 85 |
|
86 |
error: |
|
87 |
return -1; |
|
15 | 88 |
} |
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
89 |
|
18
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
90 |
static struct url_opt *_url_get_opt (struct url *url, int new) { |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
91 |
if (!url->opts) { |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
92 |
if ((url->opts = malloc(sizeof(struct url_opts) + (1 * sizeof(struct url_opt)))) == NULL) |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
93 |
ERROR("malloc"); |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
94 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
95 |
url->opts->count = 1; |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
96 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
97 |
} else if (new) { |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
98 |
url->opts->count++; |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
99 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
100 |
if ((url->opts = realloc(url->opts, sizeof(struct url_opts) + url->opts->count * sizeof(struct url_opt))) == NULL) |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
101 |
ERROR("realloc"); |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
102 |
} |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
103 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
104 |
// success |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
105 |
return &url->opts->list[url->opts->count - 1]; |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
106 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
107 |
error: |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
108 |
return NULL; |
15 | 109 |
} |
110 |
||
18
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
111 |
static int _url_append_opt_key (struct url *url, const char *key) { |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
112 |
struct url_opt *opt; |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
113 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
114 |
if ((opt = _url_get_opt(url, 1)) == NULL) |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
115 |
goto error; |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
116 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
117 |
if ((opt->key = strdup(key)) == NULL) |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
118 |
ERROR("strdup"); |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
119 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
120 |
opt->value = NULL; |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
121 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
122 |
return 0; |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
123 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
124 |
error: |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
125 |
return -1; |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
126 |
} |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
127 |
|
15 | 128 |
static int _url_append_opt_val (struct url *url, const char *value) { |
18
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
129 |
struct url_opt *opt; |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
130 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
131 |
if ((opt = _url_get_opt(url, 0)) == NULL) |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
132 |
goto error; |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
133 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
134 |
if ((opt->value = strdup(value)) == NULL) |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
135 |
ERROR("strdup"); |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
136 |
|
16 | 137 |
return 0; |
18
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
138 |
|
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
139 |
error: |
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
140 |
return -1; |
16 | 141 |
} |
15 | 142 |
|
16 | 143 |
static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg); |
144 |
||
145 |
static struct lex url_lex = { |
|
146 |
.token_fn = url_lex_token, |
|
147 |
.char_fn = NULL, |
|
148 |
.end_fn = NULL, |
|
149 |
||
150 |
.state_count = URL_MAX, |
|
151 |
.initial_state = URL_BEGIN, |
|
152 |
.state_list = { |
|
153 |
LEX_STATE ( URL_BEGIN ) { |
|
154 |
LEX_ALNUM ( URL_BEGIN_ALNUM ), |
|
155 |
LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
156 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
157 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
158 |
LEX_END |
|
159 |
}, |
|
160 |
||
161 |
// this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME |
|
162 |
LEX_STATE_END ( URL_BEGIN_ALNUM ) { |
|
163 |
LEX_CHAR ( '+', URL_SCHEME_SEP ), // it was URL_SCHEME |
|
164 |
LEX_CHAR ( ':', URL_BEGIN_COLON ), |
|
165 |
LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME |
|
166 |
LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME |
|
167 |
LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME |
|
18
b12e78767248
url_test is starting to be properly functional
Tero Marttila <terom@fixme.fi>
parents:
17
diff
changeset
|
168 |
LEX_DEFAULT ( URL_BEGIN_ALNUM ) |
16 | 169 |
}, |
170 |
||
171 |
// this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP |
|
172 |
LEX_STATE ( URL_BEGIN_COLON ) { |
|
173 |
LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), // it was URL_SCHEME |
|
174 |
LEX_ALNUM ( URL_USERHOST_ALNUM2 ), |
|
175 |
LEX_END |
|
176 |
}, |
|
177 |
||
178 |
||
179 |
LEX_STATE ( URL_SCHEME ) { |
|
180 |
LEX_ALNUM ( URL_SCHEME ), |
|
181 |
LEX_CHAR ( '+', URL_SCHEME_SEP ), |
|
182 |
LEX_CHAR ( ':', URL_SCHEME_END_COL ), |
|
183 |
LEX_END |
|
184 |
}, |
|
185 |
||
186 |
LEX_STATE ( URL_SCHEME_SEP ) { |
|
187 |
LEX_ALNUM ( URL_SCHEME ), |
|
188 |
LEX_END |
|
189 |
}, |
|
190 |
||
191 |
LEX_STATE ( URL_SCHEME_END_COL ) { |
|
192 |
LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), |
|
193 |
LEX_END |
|
194 |
}, |
|
195 |
||
196 |
LEX_STATE ( URL_SCHEME_END_SLASH1 ) { |
|
197 |
LEX_CHAR ( '/', URL_SCHEME_END_SLASH2 ), |
|
198 |
LEX_END |
|
199 |
}, |
|
200 |
||
201 |
LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) { |
|
202 |
LEX_ALNUM ( URL_USERHOST_ALNUM ), |
|
203 |
LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
204 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
205 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
206 |
LEX_END |
|
207 |
}, |
|
208 |
||
209 |
// this can be URL_USERNAME or URL_HOSTNAME |
|
210 |
LEX_STATE_END ( URL_USERHOST_ALNUM ) { |
|
211 |
LEX_CHAR ( ':', URL_USERHOST_COLON ), |
|
212 |
LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME |
|
213 |
LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME |
|
214 |
LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME |
|
215 |
LEX_DEFAULT ( URL_USERHOST_ALNUM ), |
|
216 |
}, |
|
217 |
||
218 |
// this can be URL_USERNAME_END or URL_SERVICE_SEP |
|
219 |
LEX_STATE ( URL_USERHOST_COLON ) { |
|
220 |
LEX_ALNUM ( URL_USERHOST_ALNUM2 ), |
|
221 |
LEX_END |
|
222 |
}, |
|
223 |
||
224 |
// this can be URL_PASSWORD or URL_SERVICE |
|
225 |
LEX_STATE_END ( URL_USERHOST_ALNUM2 ) { |
|
226 |
LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_PASSSWORD |
|
227 |
LEX_CHAR ( '/', URL_PATH_START ), // it was URL_SERVICE |
|
228 |
LEX_CHAR ( '?', URL_OPT_START ), // it was URL_SERVICE |
|
229 |
LEX_DEFAULT ( URL_USERHOST_ALNUM2 ), |
|
230 |
}, |
|
231 |
||
232 |
// dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2 |
|
233 |
LEX_STATE ( URL_USERNAME ) { |
|
234 |
LEX_END |
|
235 |
}, |
|
236 |
||
237 |
LEX_STATE ( URL_PASSWORD_SEP ) { |
|
238 |
LEX_END |
|
239 |
}, |
|
240 |
||
241 |
LEX_STATE ( URL_PASSWORD ) { |
|
242 |
LEX_END |
|
243 |
}, |
|
244 |
||
245 |
||
246 |
LEX_STATE_END ( URL_USERNAME_END ) { |
|
247 |
LEX_ALNUM ( URL_HOSTNAME ), |
|
248 |
LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
249 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
250 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
251 |
LEX_END |
|
252 |
}, |
|
253 |
||
254 |
||
255 |
LEX_STATE_END ( URL_HOSTNAME ) { |
|
256 |
LEX_ALNUM ( URL_HOSTNAME ), |
|
257 |
LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
258 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
259 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
260 |
LEX_END |
|
261 |
}, |
|
262 |
||
263 |
||
264 |
LEX_STATE ( URL_SERVICE_SEP ) { |
|
265 |
LEX_ALNUM ( URL_SERVICE ), |
|
266 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
267 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
268 |
LEX_END |
|
269 |
}, |
|
270 |
||
271 |
LEX_STATE_END ( URL_SERVICE ) { |
|
272 |
LEX_ALNUM ( URL_SERVICE ), |
|
273 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
274 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
275 |
LEX_END |
|
276 |
}, |
|
277 |
||
278 |
||
279 |
LEX_STATE_END ( URL_PATH_START ) { |
|
280 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
281 |
LEX_DEFAULT ( URL_PATH ), |
|
282 |
}, |
|
283 |
||
284 |
LEX_STATE_END ( URL_PATH ) { |
|
285 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
286 |
LEX_DEFAULT ( URL_PATH ), |
|
287 |
}, |
|
288 |
||
289 |
||
290 |
LEX_STATE_END ( URL_OPT_START ) { |
|
291 |
LEX_CHAR ( '&', URL_OPT_SEP ), |
|
292 |
LEX_INVALID ( '=' ), |
|
293 |
LEX_DEFAULT ( URL_OPT_KEY ), |
|
294 |
}, |
|
295 |
||
296 |
LEX_STATE_END ( URL_OPT_KEY ) { |
|
297 |
LEX_CHAR ( '&', URL_OPT_SEP ), |
|
298 |
LEX_CHAR ( '=', URL_OPT_EQ ), |
|
299 |
LEX_DEFAULT ( URL_OPT_KEY ), |
|
300 |
}, |
|
301 |
||
302 |
LEX_STATE_END ( URL_OPT_EQ ) { |
|
303 |
LEX_CHAR ( '&', URL_OPT_SEP ), |
|
304 |
LEX_INVALID ( '=' ), |
|
305 |
LEX_DEFAULT ( URL_OPT_VAL ), |
|
306 |
}, |
|
307 |
||
308 |
LEX_STATE_END ( URL_OPT_VAL ) { |
|
309 |
LEX_CHAR ( '&', URL_OPT_SEP ), |
|
310 |
LEX_INVALID ( '=' ), |
|
311 |
LEX_DEFAULT ( URL_OPT_VAL ), |
|
312 |
}, |
|
313 |
||
314 |
LEX_STATE_END ( URL_OPT_SEP ) { |
|
315 |
LEX_CHAR ( '&', URL_OPT_SEP ), |
|
316 |
LEX_INVALID ( '=' ), |
|
317 |
LEX_DEFAULT ( URL_OPT_KEY ), |
|
318 |
}, |
|
319 |
||
320 |
LEX_STATE ( URL_ERROR ) { |
|
321 |
LEX_END |
|
322 |
}, |
|
323 |
} |
|
324 |
}; |
|
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
325 |
|
14 | 326 |
static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) { |
327 |
enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token; |
|
328 |
struct url_state *state = arg; |
|
15 | 329 |
const char **copy_to = NULL; |
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
330 |
|
15 | 331 |
(void) prev_token; |
332 |
||
333 |
switch (this_token) { |
|
16 | 334 |
case URL_BEGIN: |
335 |
// irrelevant |
|
336 |
break; |
|
337 |
||
15 | 338 |
case URL_BEGIN_ALNUM: |
339 |
switch (next_token) { |
|
340 |
case URL_SCHEME_SEP: |
|
341 |
// store the scheme |
|
17 | 342 |
if (_url_append_scheme(state->url, token_data, 1)) |
15 | 343 |
goto error; |
344 |
||
345 |
break; |
|
346 |
||
347 |
case URL_USERNAME_END: |
|
348 |
// store the username |
|
349 |
copy_to = &state->url->username; break; |
|
350 |
||
351 |
case URL_PATH_START: |
|
352 |
case URL_OPT_START: |
|
353 |
case LEX_EOF: |
|
354 |
// store the hostname |
|
355 |
copy_to = &state->url->hostname; break; |
|
14 | 356 |
|
15 | 357 |
case URL_BEGIN_COLON: |
358 |
// gah... |
|
359 |
copy_to = &state->alnum; break; |
|
360 |
||
361 |
||
362 |
default: |
|
363 |
FATAL("weird next token"); |
|
364 |
} |
|
365 |
||
366 |
break; |
|
367 |
||
368 |
case URL_BEGIN_COLON: |
|
369 |
switch (next_token) { |
|
370 |
case URL_SCHEME_END_SLASH1: |
|
371 |
// store the schema |
|
17 | 372 |
if (_url_append_scheme(state->url, state->alnum, 0)) |
15 | 373 |
goto error; |
17 | 374 |
|
375 |
state->alnum = NULL; |
|
15 | 376 |
|
377 |
break; |
|
378 |
||
379 |
case URL_USERHOST_ALNUM2: |
|
380 |
// gah.. |
|
381 |
break; |
|
382 |
||
383 |
default: |
|
384 |
FATAL("weird next token"); |
|
385 |
} |
|
386 |
||
387 |
break; |
|
388 |
||
389 |
case URL_SCHEME: |
|
390 |
// store the scheme |
|
17 | 391 |
if (_url_append_scheme(state->url, token_data, 1)) |
15 | 392 |
goto error; |
393 |
||
394 |
break; |
|
395 |
||
396 |
case URL_SCHEME_SEP: |
|
397 |
// ignore |
|
398 |
break; |
|
399 |
||
400 |
case URL_SCHEME_END_COL: |
|
401 |
case URL_SCHEME_END_SLASH1: |
|
402 |
case URL_SCHEME_END_SLASH2: |
|
403 |
// ignore |
|
404 |
break; |
|
405 |
||
406 |
case URL_USERHOST_ALNUM: |
|
407 |
switch (next_token) { |
|
408 |
case URL_USERNAME_END: |
|
409 |
// store the username |
|
410 |
copy_to = &state->url->username; break; |
|
411 |
||
412 |
case URL_PATH_START: |
|
413 |
case URL_OPT_START: |
|
414 |
case LEX_EOF: |
|
415 |
// store the hostname |
|
416 |
copy_to = &state->url->hostname; break; |
|
417 |
||
418 |
case URL_USERHOST_COLON: |
|
419 |
// gah... |
|
420 |
copy_to = &state->alnum; break; |
|
421 |
||
422 |
default: |
|
423 |
FATAL("weird next token"); |
|
424 |
} |
|
425 |
||
426 |
break; |
|
427 |
||
428 |
case URL_USERHOST_COLON: |
|
429 |
// ignore |
|
430 |
break; |
|
431 |
||
432 |
case URL_USERHOST_ALNUM2: |
|
433 |
switch (next_token) { |
|
434 |
case URL_USERNAME_END: |
|
435 |
// store the username and password |
|
436 |
state->url->username = state->alnum; state->alnum = NULL; |
|
437 |
copy_to = &state->url->password; |
|
438 |
||
439 |
break; |
|
440 |
||
441 |
case URL_PATH_START: |
|
442 |
case URL_OPT_START: |
|
443 |
case LEX_EOF: |
|
16 | 444 |
// store the hostname and service |
445 |
state->url->hostname = state->alnum; state->alnum = NULL; |
|
15 | 446 |
copy_to = &state->url->service; break; |
447 |
||
448 |
default: |
|
449 |
FATAL("weird next token"); |
|
450 |
} |
|
451 |
||
452 |
break; |
|
453 |
||
454 |
case URL_USERNAME: |
|
455 |
case URL_PASSWORD_SEP: |
|
456 |
case URL_PASSWORD: |
|
457 |
FATAL("these should be overshadowed"); |
|
458 |
||
459 |
case URL_USERNAME_END: |
|
460 |
// ignore |
|
461 |
break; |
|
462 |
||
463 |
case URL_HOSTNAME: |
|
464 |
// store |
|
465 |
copy_to = &state->url->hostname; break; |
|
466 |
||
467 |
case URL_SERVICE_SEP: |
|
468 |
// ignore |
|
469 |
break; |
|
470 |
||
471 |
case URL_SERVICE: |
|
472 |
// store |
|
473 |
copy_to = &state->url->service; break; |
|
474 |
||
475 |
case URL_PATH_START: |
|
476 |
// ignore |
|
477 |
break; |
|
478 |
||
479 |
case URL_PATH: |
|
480 |
// store |
|
481 |
copy_to = &state->url->path; break; |
|
482 |
||
483 |
case URL_OPT_START: |
|
484 |
// ignore |
|
485 |
break; |
|
486 |
||
487 |
case URL_OPT_KEY: |
|
488 |
// store |
|
489 |
if (_url_append_opt_key(state->url, token_data)) |
|
490 |
goto error; |
|
491 |
||
492 |
break; |
|
493 |
||
494 |
case URL_OPT_EQ: |
|
495 |
// ignore |
|
496 |
break; |
|
497 |
||
498 |
case URL_OPT_VAL: |
|
499 |
// store |
|
500 |
if (_url_append_opt_val(state->url, token_data)) |
|
501 |
goto error; |
|
502 |
||
503 |
break; |
|
504 |
||
505 |
case URL_OPT_SEP: |
|
506 |
// ignore |
|
507 |
break; |
|
508 |
||
509 |
default: |
|
16 | 510 |
ERROR("invalid token"); |
15 | 511 |
} |
512 |
||
513 |
if (copy_to) { |
|
514 |
// copy the token data |
|
515 |
if ((*copy_to = strdup(token_data)) == NULL) |
|
516 |
ERROR("strdup"); |
|
517 |
} |
|
518 |
||
519 |
// good |
|
520 |
return 0; |
|
521 |
||
522 |
error: |
|
16 | 523 |
DEBUG("token: %s -> %s -> %s: %s", |
524 |
LEX_STATE_NAME(&url_lex, prev_token), LEX_STATE_NAME(&url_lex, this_token), LEX_STATE_NAME(&url_lex, next_token), |
|
525 |
token_data |
|
526 |
); |
|
15 | 527 |
return -1; |
14 | 528 |
} |
529 |
||
530 |
||
531 |
int url_parse (struct url *url, const char *text) { |
|
532 |
struct url_state state; ZINIT(state); |
|
533 |
int ret; |
|
534 |
||
535 |
// set up state |
|
536 |
state.url = url; |
|
537 |
||
538 |
// parse it |
|
539 |
if ((ret = lexer(&url_lex, text, &state))) |
|
540 |
ERROR("invalid URL"); |
|
541 |
||
542 |
// success |
|
543 |
return 0; |
|
544 |
||
545 |
error: |
|
546 |
return -1; |
|
547 |
} |
|
548 |
||
16 | 549 |
static void _url_dump_part (const char *field, const char *val, FILE *stream) { |
550 |
if (val) { |
|
551 |
fprintf(stream, "%s=%s ", field, val); |
|
552 |
} |
|
553 |
} |
|
554 |
||
555 |
void url_dump (const struct url *url, FILE *stream) { |
|
556 |
int i; |
|
557 |
||
558 |
if (url->schema) { |
|
17 | 559 |
fprintf(stream, "schema=("); |
16 | 560 |
|
561 |
for (i = 0; i < url->schema->count; i++) { |
|
562 |
if (i > 0) |
|
17 | 563 |
fprintf(stream, ","); |
16 | 564 |
|
565 |
fprintf(stream, "%s", url->schema->list[i]); |
|
566 |
} |
|
567 |
||
17 | 568 |
fprintf(stream, ") "); |
16 | 569 |
} |
570 |
||
571 |
_url_dump_part("username", url->username, stream); |
|
572 |
_url_dump_part("password", url->password, stream); |
|
573 |
_url_dump_part("hostname", url->hostname, stream); |
|
574 |
_url_dump_part("service", url->service, stream); |
|
575 |
_url_dump_part("path", url->path, stream); |
|
576 |
||
577 |
if (url->opts) { |
|
578 |
fprintf(stream, "opts: "); |
|
579 |
||
580 |
for (i = 0; i < url->opts->count; i++) { |
|
581 |
fprintf(stream, "%s=%s ", url->opts->list[i].key, url->opts->list[i].value); |
|
582 |
} |
|
583 |
} |
|
584 |
||
585 |
fprintf(stream, "\n"); |
|
586 |
} |
|
587 |