author | Tero Marttila <terom@fixme.fi> |
Thu, 09 Oct 2008 00:49:32 +0300 | |
changeset 17 | 0a024b29b16d |
parent 16 | 74fb62022fb3 |
child 18 | b12e78767248 |
permissions | -rw-r--r-- |
15 | 1 |
#define _GNU_SOURCE |
2 |
#include <stdlib.h> |
|
3 |
#include <string.h> |
|
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
4 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
5 |
#include "url.h" |
15 | 6 |
#include "lex.h" |
7 |
#include "error.h" |
|
16 | 8 |
#include "log.h" |
15 | 9 |
#include "misc.h" |
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
10 |
|
14 | 11 |
enum url_token { |
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
12 |
URL_INVALID, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
13 |
|
14 | 14 |
URL_BEGIN, |
15 |
||
16 |
// kludge to resolve ambiguous URL_SCHEME/URL_USERNAME+URL_PASSWORD/URL_HOSTNAME+URL_SERVICE at the beginning |
|
17 |
URL_BEGIN_ALNUM, |
|
18 |
URL_BEGIN_COLON, |
|
19 |
||
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
20 |
URL_SCHEME, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
21 |
URL_SCHEME_SEP, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
22 |
URL_SCHEME_END_COL, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
23 |
URL_SCHEME_END_SLASH1, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
24 |
URL_SCHEME_END_SLASH2, |
14 | 25 |
|
26 |
// kludge to resolve ambiguous URL_USERNAME+URL_PASSWORD/URL_HOSTNAME+URL_SERVICE after a scheme |
|
27 |
URL_USERHOST_ALNUM, |
|
28 |
URL_USERHOST_COLON, |
|
29 |
URL_USERHOST_ALNUM2, |
|
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
30 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
31 |
URL_USERNAME, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
32 |
URL_PASSWORD_SEP, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
33 |
URL_PASSWORD, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
34 |
URL_USERNAME_END, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
35 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
36 |
URL_HOSTNAME, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
37 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
38 |
URL_SERVICE_SEP, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
39 |
URL_SERVICE, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
40 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
41 |
URL_PATH_START, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
42 |
URL_PATH, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
43 |
|
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
44 |
URL_OPT_START, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
45 |
URL_OPT_KEY, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
46 |
URL_OPT_EQ, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
47 |
URL_OPT_VAL, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
48 |
URL_OPT_SEP, |
14 | 49 |
|
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
50 |
URL_MAX, |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
51 |
}; |
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
52 |
|
14 | 53 |
/* |
54 |
* Parser state |
|
55 |
*/ |
|
56 |
struct url_state { |
|
15 | 57 |
// the URL to parse into |
14 | 58 |
struct url *url; |
15 | 59 |
|
60 |
// our lookahead-kludge |
|
61 |
const char *alnum, *alnum2; |
|
62 |
||
63 |
}; |
|
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
64 |
|
17 | 65 |
static int _url_append_scheme (struct url *url, const char *data, int copy) { |
66 |
if (!url->schema) { |
|
67 |
if ((url->schema = malloc(sizeof(struct url_schema) + (1 * sizeof(const char *)))) == NULL) |
|
68 |
ERROR("malloc"); |
|
69 |
||
70 |
url->schema->count = 1; |
|
71 |
||
72 |
} else { |
|
73 |
url->schema->count++; |
|
74 |
||
75 |
// I'm starting to hate flexible array members... |
|
76 |
if ((url->schema = realloc(url->schema, sizeof(struct url_schema) + url->schema->count * sizeof(const char *))) == NULL) |
|
77 |
ERROR("realloc"); |
|
78 |
} |
|
79 |
||
80 |
if ((url->schema->list[url->schema->count - 1] = copy ? strdup(data) : data) == NULL) |
|
81 |
ERROR("strdup"); |
|
82 |
||
83 |
// k |
|
16 | 84 |
return 0; |
17 | 85 |
|
86 |
error: |
|
87 |
return -1; |
|
15 | 88 |
} |
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
89 |
|
15 | 90 |
static int _url_append_opt_key (struct url *url, const char *key) { |
16 | 91 |
return 0; |
15 | 92 |
} |
93 |
||
94 |
static int _url_append_opt_val (struct url *url, const char *value) { |
|
16 | 95 |
return 0; |
96 |
} |
|
15 | 97 |
|
16 | 98 |
static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg); |
99 |
||
100 |
static struct lex url_lex = { |
|
101 |
.token_fn = url_lex_token, |
|
102 |
.char_fn = NULL, |
|
103 |
.end_fn = NULL, |
|
104 |
||
105 |
.state_count = URL_MAX, |
|
106 |
.initial_state = URL_BEGIN, |
|
107 |
.state_list = { |
|
108 |
LEX_STATE ( URL_BEGIN ) { |
|
109 |
LEX_ALNUM ( URL_BEGIN_ALNUM ), |
|
110 |
LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
111 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
112 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
113 |
LEX_END |
|
114 |
}, |
|
115 |
||
116 |
// this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME |
|
117 |
LEX_STATE_END ( URL_BEGIN_ALNUM ) { |
|
118 |
LEX_ALNUM ( URL_BEGIN_ALNUM ), |
|
119 |
LEX_CHAR ( '+', URL_SCHEME_SEP ), // it was URL_SCHEME |
|
120 |
LEX_CHAR ( ':', URL_BEGIN_COLON ), |
|
121 |
LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME |
|
122 |
LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME |
|
123 |
LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME |
|
124 |
LEX_END |
|
125 |
}, |
|
126 |
||
127 |
// this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP |
|
128 |
LEX_STATE ( URL_BEGIN_COLON ) { |
|
129 |
LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), // it was URL_SCHEME |
|
130 |
LEX_ALNUM ( URL_USERHOST_ALNUM2 ), |
|
131 |
LEX_END |
|
132 |
}, |
|
133 |
||
134 |
||
135 |
LEX_STATE ( URL_SCHEME ) { |
|
136 |
LEX_ALNUM ( URL_SCHEME ), |
|
137 |
LEX_CHAR ( '+', URL_SCHEME_SEP ), |
|
138 |
LEX_CHAR ( ':', URL_SCHEME_END_COL ), |
|
139 |
LEX_END |
|
140 |
}, |
|
141 |
||
142 |
LEX_STATE ( URL_SCHEME_SEP ) { |
|
143 |
LEX_ALNUM ( URL_SCHEME ), |
|
144 |
LEX_END |
|
145 |
}, |
|
146 |
||
147 |
LEX_STATE ( URL_SCHEME_END_COL ) { |
|
148 |
LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), |
|
149 |
LEX_END |
|
150 |
}, |
|
151 |
||
152 |
LEX_STATE ( URL_SCHEME_END_SLASH1 ) { |
|
153 |
LEX_CHAR ( '/', URL_SCHEME_END_SLASH2 ), |
|
154 |
LEX_END |
|
155 |
}, |
|
156 |
||
157 |
LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) { |
|
158 |
LEX_ALNUM ( URL_USERHOST_ALNUM ), |
|
159 |
LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
160 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
161 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
162 |
LEX_END |
|
163 |
}, |
|
164 |
||
165 |
// this can be URL_USERNAME or URL_HOSTNAME |
|
166 |
LEX_STATE_END ( URL_USERHOST_ALNUM ) { |
|
167 |
LEX_CHAR ( ':', URL_USERHOST_COLON ), |
|
168 |
LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME |
|
169 |
LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME |
|
170 |
LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME |
|
171 |
LEX_DEFAULT ( URL_USERHOST_ALNUM ), |
|
172 |
}, |
|
173 |
||
174 |
// this can be URL_USERNAME_END or URL_SERVICE_SEP |
|
175 |
LEX_STATE ( URL_USERHOST_COLON ) { |
|
176 |
LEX_ALNUM ( URL_USERHOST_ALNUM2 ), |
|
177 |
LEX_END |
|
178 |
}, |
|
179 |
||
180 |
// this can be URL_PASSWORD or URL_SERVICE |
|
181 |
LEX_STATE_END ( URL_USERHOST_ALNUM2 ) { |
|
182 |
LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_PASSSWORD |
|
183 |
LEX_CHAR ( '/', URL_PATH_START ), // it was URL_SERVICE |
|
184 |
LEX_CHAR ( '?', URL_OPT_START ), // it was URL_SERVICE |
|
185 |
LEX_DEFAULT ( URL_USERHOST_ALNUM2 ), |
|
186 |
}, |
|
187 |
||
188 |
// dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2 |
|
189 |
LEX_STATE ( URL_USERNAME ) { |
|
190 |
LEX_END |
|
191 |
}, |
|
192 |
||
193 |
LEX_STATE ( URL_PASSWORD_SEP ) { |
|
194 |
LEX_END |
|
195 |
}, |
|
196 |
||
197 |
LEX_STATE ( URL_PASSWORD ) { |
|
198 |
LEX_END |
|
199 |
}, |
|
200 |
||
201 |
||
202 |
LEX_STATE_END ( URL_USERNAME_END ) { |
|
203 |
LEX_ALNUM ( URL_HOSTNAME ), |
|
204 |
LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
205 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
206 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
207 |
LEX_END |
|
208 |
}, |
|
209 |
||
210 |
||
211 |
LEX_STATE_END ( URL_HOSTNAME ) { |
|
212 |
LEX_ALNUM ( URL_HOSTNAME ), |
|
213 |
LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
214 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
215 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
216 |
LEX_END |
|
217 |
}, |
|
218 |
||
219 |
||
220 |
LEX_STATE ( URL_SERVICE_SEP ) { |
|
221 |
LEX_ALNUM ( URL_SERVICE ), |
|
222 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
223 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
224 |
LEX_END |
|
225 |
}, |
|
226 |
||
227 |
LEX_STATE_END ( URL_SERVICE ) { |
|
228 |
LEX_ALNUM ( URL_SERVICE ), |
|
229 |
LEX_CHAR ( '/', URL_PATH_START ), |
|
230 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
231 |
LEX_END |
|
232 |
}, |
|
233 |
||
234 |
||
235 |
LEX_STATE_END ( URL_PATH_START ) { |
|
236 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
237 |
LEX_DEFAULT ( URL_PATH ), |
|
238 |
}, |
|
239 |
||
240 |
LEX_STATE_END ( URL_PATH ) { |
|
241 |
LEX_CHAR ( '?', URL_OPT_START ), |
|
242 |
LEX_DEFAULT ( URL_PATH ), |
|
243 |
}, |
|
244 |
||
245 |
||
246 |
LEX_STATE_END ( URL_OPT_START ) { |
|
247 |
LEX_CHAR ( '&', URL_OPT_SEP ), |
|
248 |
LEX_INVALID ( '=' ), |
|
249 |
LEX_DEFAULT ( URL_OPT_KEY ), |
|
250 |
}, |
|
251 |
||
252 |
LEX_STATE_END ( URL_OPT_KEY ) { |
|
253 |
LEX_CHAR ( '&', URL_OPT_SEP ), |
|
254 |
LEX_CHAR ( '=', URL_OPT_EQ ), |
|
255 |
LEX_DEFAULT ( URL_OPT_KEY ), |
|
256 |
}, |
|
257 |
||
258 |
LEX_STATE_END ( URL_OPT_EQ ) { |
|
259 |
LEX_CHAR ( '&', URL_OPT_SEP ), |
|
260 |
LEX_INVALID ( '=' ), |
|
261 |
LEX_DEFAULT ( URL_OPT_VAL ), |
|
262 |
}, |
|
263 |
||
264 |
LEX_STATE_END ( URL_OPT_VAL ) { |
|
265 |
LEX_CHAR ( '&', URL_OPT_SEP ), |
|
266 |
LEX_INVALID ( '=' ), |
|
267 |
LEX_DEFAULT ( URL_OPT_VAL ), |
|
268 |
}, |
|
269 |
||
270 |
LEX_STATE_END ( URL_OPT_SEP ) { |
|
271 |
LEX_CHAR ( '&', URL_OPT_SEP ), |
|
272 |
LEX_INVALID ( '=' ), |
|
273 |
LEX_DEFAULT ( URL_OPT_KEY ), |
|
274 |
}, |
|
275 |
||
276 |
LEX_STATE ( URL_ERROR ) { |
|
277 |
LEX_END |
|
278 |
}, |
|
279 |
} |
|
280 |
}; |
|
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
281 |
|
14 | 282 |
static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) { |
283 |
enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token; |
|
284 |
struct url_state *state = arg; |
|
15 | 285 |
const char **copy_to = NULL; |
13
385b9a10d096
inital playing around with a lexer/url parser
Tero Marttila <terom@fixme.fi>
parents:
diff
changeset
|
286 |
|
15 | 287 |
(void) prev_token; |
288 |
||
289 |
switch (this_token) { |
|
16 | 290 |
case URL_BEGIN: |
291 |
// irrelevant |
|
292 |
break; |
|
293 |
||
15 | 294 |
case URL_BEGIN_ALNUM: |
295 |
switch (next_token) { |
|
296 |
case URL_SCHEME_SEP: |
|
297 |
// store the scheme |
|
17 | 298 |
if (_url_append_scheme(state->url, token_data, 1)) |
15 | 299 |
goto error; |
300 |
||
301 |
break; |
|
302 |
||
303 |
case URL_USERNAME_END: |
|
304 |
// store the username |
|
305 |
copy_to = &state->url->username; break; |
|
306 |
||
307 |
case URL_PATH_START: |
|
308 |
case URL_OPT_START: |
|
309 |
case LEX_EOF: |
|
310 |
// store the hostname |
|
311 |
copy_to = &state->url->hostname; break; |
|
14 | 312 |
|
15 | 313 |
case URL_BEGIN_COLON: |
314 |
// gah... |
|
315 |
copy_to = &state->alnum; break; |
|
316 |
||
317 |
||
318 |
default: |
|
319 |
FATAL("weird next token"); |
|
320 |
} |
|
321 |
||
322 |
break; |
|
323 |
||
324 |
case URL_BEGIN_COLON: |
|
325 |
switch (next_token) { |
|
326 |
case URL_SCHEME_END_SLASH1: |
|
327 |
// store the schema |
|
17 | 328 |
if (_url_append_scheme(state->url, state->alnum, 0)) |
15 | 329 |
goto error; |
17 | 330 |
|
331 |
state->alnum = NULL; |
|
15 | 332 |
|
333 |
break; |
|
334 |
||
335 |
case URL_USERHOST_ALNUM2: |
|
336 |
// gah.. |
|
337 |
break; |
|
338 |
||
339 |
default: |
|
340 |
FATAL("weird next token"); |
|
341 |
} |
|
342 |
||
343 |
break; |
|
344 |
||
345 |
case URL_SCHEME: |
|
346 |
// store the scheme |
|
17 | 347 |
if (_url_append_scheme(state->url, token_data, 1)) |
15 | 348 |
goto error; |
349 |
||
350 |
break; |
|
351 |
||
352 |
case URL_SCHEME_SEP: |
|
353 |
// ignore |
|
354 |
break; |
|
355 |
||
356 |
case URL_SCHEME_END_COL: |
|
357 |
case URL_SCHEME_END_SLASH1: |
|
358 |
case URL_SCHEME_END_SLASH2: |
|
359 |
// ignore |
|
360 |
break; |
|
361 |
||
362 |
case URL_USERHOST_ALNUM: |
|
363 |
switch (next_token) { |
|
364 |
case URL_USERNAME_END: |
|
365 |
// store the username |
|
366 |
copy_to = &state->url->username; break; |
|
367 |
||
368 |
case URL_PATH_START: |
|
369 |
case URL_OPT_START: |
|
370 |
case LEX_EOF: |
|
371 |
// store the hostname |
|
372 |
copy_to = &state->url->hostname; break; |
|
373 |
||
374 |
case URL_USERHOST_COLON: |
|
375 |
// gah... |
|
376 |
copy_to = &state->alnum; break; |
|
377 |
||
378 |
default: |
|
379 |
FATAL("weird next token"); |
|
380 |
} |
|
381 |
||
382 |
break; |
|
383 |
||
384 |
case URL_USERHOST_COLON: |
|
385 |
// ignore |
|
386 |
break; |
|
387 |
||
388 |
case URL_USERHOST_ALNUM2: |
|
389 |
switch (next_token) { |
|
390 |
case URL_USERNAME_END: |
|
391 |
// store the username and password |
|
392 |
state->url->username = state->alnum; state->alnum = NULL; |
|
393 |
copy_to = &state->url->password; |
|
394 |
||
395 |
break; |
|
396 |
||
397 |
case URL_PATH_START: |
|
398 |
case URL_OPT_START: |
|
399 |
case LEX_EOF: |
|
16 | 400 |
// store the hostname and service |
401 |
state->url->hostname = state->alnum; state->alnum = NULL; |
|
15 | 402 |
copy_to = &state->url->service; break; |
403 |
||
404 |
default: |
|
405 |
FATAL("weird next token"); |
|
406 |
} |
|
407 |
||
408 |
break; |
|
409 |
||
410 |
case URL_USERNAME: |
|
411 |
case URL_PASSWORD_SEP: |
|
412 |
case URL_PASSWORD: |
|
413 |
FATAL("these should be overshadowed"); |
|
414 |
||
415 |
case URL_USERNAME_END: |
|
416 |
// ignore |
|
417 |
break; |
|
418 |
||
419 |
case URL_HOSTNAME: |
|
420 |
// store |
|
421 |
copy_to = &state->url->hostname; break; |
|
422 |
||
423 |
case URL_SERVICE_SEP: |
|
424 |
// ignore |
|
425 |
break; |
|
426 |
||
427 |
case URL_SERVICE: |
|
428 |
// store |
|
429 |
copy_to = &state->url->service; break; |
|
430 |
||
431 |
case URL_PATH_START: |
|
432 |
// ignore |
|
433 |
break; |
|
434 |
||
435 |
case URL_PATH: |
|
436 |
// store |
|
437 |
copy_to = &state->url->path; break; |
|
438 |
||
439 |
case URL_OPT_START: |
|
440 |
// ignore |
|
441 |
break; |
|
442 |
||
443 |
case URL_OPT_KEY: |
|
444 |
// store |
|
445 |
if (_url_append_opt_key(state->url, token_data)) |
|
446 |
goto error; |
|
447 |
||
448 |
break; |
|
449 |
||
450 |
case URL_OPT_EQ: |
|
451 |
// ignore |
|
452 |
break; |
|
453 |
||
454 |
case URL_OPT_VAL: |
|
455 |
// store |
|
456 |
if (_url_append_opt_val(state->url, token_data)) |
|
457 |
goto error; |
|
458 |
||
459 |
break; |
|
460 |
||
461 |
case URL_OPT_SEP: |
|
462 |
// ignore |
|
463 |
break; |
|
464 |
||
465 |
default: |
|
16 | 466 |
ERROR("invalid token"); |
15 | 467 |
} |
468 |
||
469 |
if (copy_to) { |
|
470 |
// copy the token data |
|
471 |
if ((*copy_to = strdup(token_data)) == NULL) |
|
472 |
ERROR("strdup"); |
|
473 |
} |
|
474 |
||
475 |
// good |
|
476 |
return 0; |
|
477 |
||
478 |
error: |
|
16 | 479 |
DEBUG("token: %s -> %s -> %s: %s", |
480 |
LEX_STATE_NAME(&url_lex, prev_token), LEX_STATE_NAME(&url_lex, this_token), LEX_STATE_NAME(&url_lex, next_token), |
|
481 |
token_data |
|
482 |
); |
|
15 | 483 |
return -1; |
14 | 484 |
} |
485 |
||
486 |
||
487 |
int url_parse (struct url *url, const char *text) { |
|
488 |
struct url_state state; ZINIT(state); |
|
489 |
int ret; |
|
490 |
||
491 |
// set up state |
|
492 |
state.url = url; |
|
493 |
||
494 |
// parse it |
|
495 |
if ((ret = lexer(&url_lex, text, &state))) |
|
496 |
ERROR("invalid URL"); |
|
497 |
||
498 |
// success |
|
499 |
return 0; |
|
500 |
||
501 |
error: |
|
502 |
return -1; |
|
503 |
} |
|
504 |
||
16 | 505 |
static void _url_dump_part (const char *field, const char *val, FILE *stream) { |
506 |
if (val) { |
|
507 |
fprintf(stream, "%s=%s ", field, val); |
|
508 |
} |
|
509 |
} |
|
510 |
||
511 |
void url_dump (const struct url *url, FILE *stream) { |
|
512 |
int i; |
|
513 |
||
514 |
if (url->schema) { |
|
17 | 515 |
fprintf(stream, "schema=("); |
16 | 516 |
|
517 |
for (i = 0; i < url->schema->count; i++) { |
|
518 |
if (i > 0) |
|
17 | 519 |
fprintf(stream, ","); |
16 | 520 |
|
521 |
fprintf(stream, "%s", url->schema->list[i]); |
|
522 |
} |
|
523 |
||
17 | 524 |
fprintf(stream, ") "); |
16 | 525 |
} |
526 |
||
527 |
_url_dump_part("username", url->username, stream); |
|
528 |
_url_dump_part("password", url->password, stream); |
|
529 |
_url_dump_part("hostname", url->hostname, stream); |
|
530 |
_url_dump_part("service", url->service, stream); |
|
531 |
_url_dump_part("path", url->path, stream); |
|
532 |
||
533 |
if (url->opts) { |
|
534 |
fprintf(stream, "opts: "); |
|
535 |
||
536 |
for (i = 0; i < url->opts->count; i++) { |
|
537 |
fprintf(stream, "%s=%s ", url->opts->list[i].key, url->opts->list[i].value); |
|
538 |
} |
|
539 |
} |
|
540 |
||
541 |
fprintf(stream, "\n"); |
|
542 |
} |
|
543 |