60 const char *alnum, *alnum2; |
61 const char *alnum, *alnum2; |
61 |
62 |
62 }; |
63 }; |
63 |
64 |
64 static int _url_append_scheme (struct url *url, const char *data) { |
65 static int _url_append_scheme (struct url *url, const char *data) { |
65 |
66 return 0; |
66 } |
67 } |
67 |
68 |
68 static int _url_append_opt_key (struct url *url, const char *key) { |
69 static int _url_append_opt_key (struct url *url, const char *key) { |
69 |
70 return 0; |
70 } |
71 } |
71 |
72 |
72 static int _url_append_opt_val (struct url *url, const char *value) { |
73 static int _url_append_opt_val (struct url *url, const char *value) { |
73 |
74 return 0; |
74 } |
75 } |
|
76 |
|
77 static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg); |
|
78 |
|
79 static struct lex url_lex = { |
|
80 .token_fn = url_lex_token, |
|
81 .char_fn = NULL, |
|
82 .end_fn = NULL, |
|
83 |
|
84 .state_count = URL_MAX, |
|
85 .initial_state = URL_BEGIN, |
|
86 .state_list = { |
|
87 LEX_STATE ( URL_BEGIN ) { |
|
88 LEX_ALNUM ( URL_BEGIN_ALNUM ), |
|
89 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
90 LEX_CHAR ( '/', URL_PATH_START ), |
|
91 LEX_CHAR ( '?', URL_OPT_START ), |
|
92 LEX_END |
|
93 }, |
|
94 |
|
95 // this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME |
|
96 LEX_STATE_END ( URL_BEGIN_ALNUM ) { |
|
97 LEX_ALNUM ( URL_BEGIN_ALNUM ), |
|
98 LEX_CHAR ( '+', URL_SCHEME_SEP ), // it was URL_SCHEME |
|
99 LEX_CHAR ( ':', URL_BEGIN_COLON ), |
|
100 LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME |
|
101 LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME |
|
102 LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME |
|
103 LEX_END |
|
104 }, |
|
105 |
|
106 // this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP |
|
107 LEX_STATE ( URL_BEGIN_COLON ) { |
|
108 LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), // it was URL_SCHEME |
|
109 LEX_ALNUM ( URL_USERHOST_ALNUM2 ), |
|
110 LEX_END |
|
111 }, |
|
112 |
|
113 |
|
114 LEX_STATE ( URL_SCHEME ) { |
|
115 LEX_ALNUM ( URL_SCHEME ), |
|
116 LEX_CHAR ( '+', URL_SCHEME_SEP ), |
|
117 LEX_CHAR ( ':', URL_SCHEME_END_COL ), |
|
118 LEX_END |
|
119 }, |
|
120 |
|
121 LEX_STATE ( URL_SCHEME_SEP ) { |
|
122 LEX_ALNUM ( URL_SCHEME ), |
|
123 LEX_END |
|
124 }, |
|
125 |
|
126 LEX_STATE ( URL_SCHEME_END_COL ) { |
|
127 LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), |
|
128 LEX_END |
|
129 }, |
|
130 |
|
131 LEX_STATE ( URL_SCHEME_END_SLASH1 ) { |
|
132 LEX_CHAR ( '/', URL_SCHEME_END_SLASH2 ), |
|
133 LEX_END |
|
134 }, |
|
135 |
|
136 LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) { |
|
137 LEX_ALNUM ( URL_USERHOST_ALNUM ), |
|
138 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
139 LEX_CHAR ( '/', URL_PATH_START ), |
|
140 LEX_CHAR ( '?', URL_OPT_START ), |
|
141 LEX_END |
|
142 }, |
|
143 |
|
144 // this can be URL_USERNAME or URL_HOSTNAME |
|
145 LEX_STATE_END ( URL_USERHOST_ALNUM ) { |
|
146 LEX_CHAR ( ':', URL_USERHOST_COLON ), |
|
147 LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME |
|
148 LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME |
|
149 LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME |
|
150 LEX_DEFAULT ( URL_USERHOST_ALNUM ), |
|
151 }, |
|
152 |
|
153 // this can be URL_USERNAME_END or URL_SERVICE_SEP |
|
154 LEX_STATE ( URL_USERHOST_COLON ) { |
|
155 LEX_ALNUM ( URL_USERHOST_ALNUM2 ), |
|
156 LEX_END |
|
157 }, |
|
158 |
|
159 // this can be URL_PASSWORD or URL_SERVICE |
|
160 LEX_STATE_END ( URL_USERHOST_ALNUM2 ) { |
|
161 LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_PASSSWORD |
|
162 LEX_CHAR ( '/', URL_PATH_START ), // it was URL_SERVICE |
|
163 LEX_CHAR ( '?', URL_OPT_START ), // it was URL_SERVICE |
|
164 LEX_DEFAULT ( URL_USERHOST_ALNUM2 ), |
|
165 }, |
|
166 |
|
167 // dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2 |
|
168 LEX_STATE ( URL_USERNAME ) { |
|
169 LEX_END |
|
170 }, |
|
171 |
|
172 LEX_STATE ( URL_PASSWORD_SEP ) { |
|
173 LEX_END |
|
174 }, |
|
175 |
|
176 LEX_STATE ( URL_PASSWORD ) { |
|
177 LEX_END |
|
178 }, |
|
179 |
|
180 |
|
181 LEX_STATE_END ( URL_USERNAME_END ) { |
|
182 LEX_ALNUM ( URL_HOSTNAME ), |
|
183 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
184 LEX_CHAR ( '/', URL_PATH_START ), |
|
185 LEX_CHAR ( '?', URL_OPT_START ), |
|
186 LEX_END |
|
187 }, |
|
188 |
|
189 |
|
190 LEX_STATE_END ( URL_HOSTNAME ) { |
|
191 LEX_ALNUM ( URL_HOSTNAME ), |
|
192 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
193 LEX_CHAR ( '/', URL_PATH_START ), |
|
194 LEX_CHAR ( '?', URL_OPT_START ), |
|
195 LEX_END |
|
196 }, |
|
197 |
|
198 |
|
199 LEX_STATE ( URL_SERVICE_SEP ) { |
|
200 LEX_ALNUM ( URL_SERVICE ), |
|
201 LEX_CHAR ( '/', URL_PATH_START ), |
|
202 LEX_CHAR ( '?', URL_OPT_START ), |
|
203 LEX_END |
|
204 }, |
|
205 |
|
206 LEX_STATE_END ( URL_SERVICE ) { |
|
207 LEX_ALNUM ( URL_SERVICE ), |
|
208 LEX_CHAR ( '/', URL_PATH_START ), |
|
209 LEX_CHAR ( '?', URL_OPT_START ), |
|
210 LEX_END |
|
211 }, |
|
212 |
|
213 |
|
214 LEX_STATE_END ( URL_PATH_START ) { |
|
215 LEX_CHAR ( '?', URL_OPT_START ), |
|
216 LEX_DEFAULT ( URL_PATH ), |
|
217 }, |
|
218 |
|
219 LEX_STATE_END ( URL_PATH ) { |
|
220 LEX_CHAR ( '?', URL_OPT_START ), |
|
221 LEX_DEFAULT ( URL_PATH ), |
|
222 }, |
|
223 |
|
224 |
|
225 LEX_STATE_END ( URL_OPT_START ) { |
|
226 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
227 LEX_INVALID ( '=' ), |
|
228 LEX_DEFAULT ( URL_OPT_KEY ), |
|
229 }, |
|
230 |
|
231 LEX_STATE_END ( URL_OPT_KEY ) { |
|
232 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
233 LEX_CHAR ( '=', URL_OPT_EQ ), |
|
234 LEX_DEFAULT ( URL_OPT_KEY ), |
|
235 }, |
|
236 |
|
237 LEX_STATE_END ( URL_OPT_EQ ) { |
|
238 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
239 LEX_INVALID ( '=' ), |
|
240 LEX_DEFAULT ( URL_OPT_VAL ), |
|
241 }, |
|
242 |
|
243 LEX_STATE_END ( URL_OPT_VAL ) { |
|
244 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
245 LEX_INVALID ( '=' ), |
|
246 LEX_DEFAULT ( URL_OPT_VAL ), |
|
247 }, |
|
248 |
|
249 LEX_STATE_END ( URL_OPT_SEP ) { |
|
250 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
251 LEX_INVALID ( '=' ), |
|
252 LEX_DEFAULT ( URL_OPT_KEY ), |
|
253 }, |
|
254 |
|
255 LEX_STATE ( URL_ERROR ) { |
|
256 LEX_END |
|
257 }, |
|
258 } |
|
259 }; |
75 |
260 |
76 static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) { |
261 static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) { |
77 enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token; |
262 enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token; |
78 struct url_state *state = arg; |
263 struct url_state *state = arg; |
79 const char **copy_to = NULL; |
264 const char **copy_to = NULL; |
80 |
265 |
81 (void) prev_token; |
266 (void) prev_token; |
82 |
267 |
83 switch (this_token) { |
268 switch (this_token) { |
|
269 case URL_BEGIN: |
|
270 // irrelevant |
|
271 break; |
|
272 |
84 case URL_BEGIN_ALNUM: |
273 case URL_BEGIN_ALNUM: |
85 switch (next_token) { |
274 switch (next_token) { |
86 case URL_SCHEME_SEP: |
275 case URL_SCHEME_SEP: |
87 // store the scheme |
276 // store the scheme |
88 if (_url_append_scheme(state->url, token_data)) |
277 if (_url_append_scheme(state->url, token_data)) |
261 |
451 |
262 // good |
452 // good |
263 return 0; |
453 return 0; |
264 |
454 |
265 error: |
455 error: |
266 // XXX: error codes? |
456 DEBUG("token: %s -> %s -> %s: %s", |
|
457 LEX_STATE_NAME(&url_lex, prev_token), LEX_STATE_NAME(&url_lex, this_token), LEX_STATE_NAME(&url_lex, next_token), |
|
458 token_data |
|
459 ); |
267 return -1; |
460 return -1; |
268 } |
461 } |
269 |
462 |
270 static struct lex url_lex = { |
|
271 .token_fn = url_lex_token, |
|
272 .char_fn = NULL, |
|
273 .end_fn = NULL, |
|
274 |
|
275 .state_count = URL_MAX, |
|
276 .state_list = { |
|
277 LEX_STATE ( URL_BEGIN ) { |
|
278 LEX_ALNUM ( URL_BEGIN_ALNUM ), |
|
279 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
280 LEX_CHAR ( '/', URL_PATH_START ), |
|
281 LEX_CHAR ( '?', URL_OPT_START ), |
|
282 LEX_END |
|
283 }, |
|
284 |
|
285 // this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME |
|
286 LEX_STATE_END ( URL_BEGIN_ALNUM ) { |
|
287 LEX_ALNUM ( URL_BEGIN_ALNUM ), |
|
288 LEX_CHAR ( '+', URL_SCHEME_SEP ), // it was URL_SCHEME |
|
289 LEX_CHAR ( ':', URL_BEGIN_COLON ), |
|
290 LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME |
|
291 LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME |
|
292 LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME |
|
293 LEX_END |
|
294 }, |
|
295 |
|
296 // this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP |
|
297 LEX_STATE ( URL_BEGIN_COLON ) { |
|
298 LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), // it was URL_SCHEME |
|
299 LEX_ALNUM ( URL_USERHOST_ALNUM2 ), |
|
300 LEX_END |
|
301 }, |
|
302 |
|
303 |
|
304 LEX_STATE ( URL_SCHEME ) { |
|
305 LEX_ALNUM ( URL_SCHEME ), |
|
306 LEX_CHAR ( '+', URL_SCHEME_SEP ), |
|
307 LEX_CHAR ( ':', URL_SCHEME_END_COL ), |
|
308 LEX_END |
|
309 }, |
|
310 |
|
311 LEX_STATE ( URL_SCHEME_SEP ) { |
|
312 LEX_ALNUM ( URL_SCHEME ), |
|
313 LEX_END |
|
314 }, |
|
315 |
|
316 LEX_STATE ( URL_SCHEME_END_COL ) { |
|
317 LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), |
|
318 LEX_END |
|
319 }, |
|
320 |
|
321 LEX_STATE ( URL_SCHEME_END_SLASH1 ) { |
|
322 LEX_CHAR ( '/', URL_SCHEME_END_SLASH2 ), |
|
323 LEX_END |
|
324 }, |
|
325 |
|
326 LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) { |
|
327 LEX_ALNUM ( URL_USERHOST_ALNUM ), |
|
328 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
329 LEX_CHAR ( '/', URL_PATH_START ), |
|
330 LEX_CHAR ( '?', URL_OPT_START ), |
|
331 LEX_END |
|
332 }, |
|
333 |
|
334 // this can be URL_USERNAME or URL_HOSTNAME |
|
335 LEX_STATE_END ( URL_USERHOST_ALNUM ) { |
|
336 LEX_ALNUM ( URL_USERHOST_ALNUM ), |
|
337 LEX_CHAR ( ':', URL_USERHOST_COLON ), |
|
338 LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME |
|
339 LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME |
|
340 LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME |
|
341 LEX_END |
|
342 }, |
|
343 |
|
344 // this can be URL_USERNAME_END or URL_SERVICE_SEP |
|
345 LEX_STATE ( URL_USERHOST_COLON ) { |
|
346 LEX_ALNUM ( URL_USERHOST_ALNUM2 ), |
|
347 LEX_END |
|
348 }, |
|
349 |
|
350 // this can be URL_PASSWORD or URL_SERVICE |
|
351 LEX_STATE_END ( URL_USERHOST_ALNUM2 ) { |
|
352 LEX_ALNUM ( URL_USERHOST_ALNUM ), |
|
353 LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_PASSSWORD |
|
354 LEX_CHAR ( '/', URL_PATH_START ), // it was URL_SERVICE |
|
355 LEX_CHAR ( '?', URL_OPT_START ), // it was URL_SERVICE |
|
356 LEX_END |
|
357 }, |
|
358 |
|
359 // dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2 |
|
360 LEX_STATE ( URL_USERNAME ) { |
|
361 LEX_END |
|
362 }, |
|
363 |
|
364 LEX_STATE ( URL_PASSWORD_SEP ) { |
|
365 LEX_END |
|
366 }, |
|
367 |
|
368 LEX_STATE ( URL_PASSWORD ) { |
|
369 LEX_END |
|
370 }, |
|
371 |
|
372 |
|
373 LEX_STATE_END ( URL_USERNAME_END ) { |
|
374 LEX_ALNUM ( URL_HOSTNAME ), |
|
375 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
376 LEX_CHAR ( '/', URL_PATH_START ), |
|
377 LEX_CHAR ( '?', URL_OPT_START ), |
|
378 LEX_END |
|
379 }, |
|
380 |
|
381 |
|
382 LEX_STATE_END ( URL_HOSTNAME ) { |
|
383 LEX_ALNUM ( URL_HOSTNAME ), |
|
384 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
385 LEX_CHAR ( '/', URL_PATH_START ), |
|
386 LEX_CHAR ( '?', URL_OPT_START ), |
|
387 LEX_END |
|
388 }, |
|
389 |
|
390 |
|
391 LEX_STATE ( URL_SERVICE_SEP ) { |
|
392 LEX_ALNUM ( URL_SERVICE ), |
|
393 LEX_CHAR ( '/', URL_PATH_START ), |
|
394 LEX_CHAR ( '?', URL_OPT_START ), |
|
395 LEX_END |
|
396 }, |
|
397 |
|
398 LEX_STATE_END ( URL_SERVICE ) { |
|
399 LEX_ALNUM ( URL_SERVICE ), |
|
400 LEX_CHAR ( '/', URL_PATH_START ), |
|
401 LEX_CHAR ( '?', URL_OPT_START ), |
|
402 LEX_END |
|
403 }, |
|
404 |
|
405 |
|
406 LEX_STATE_END ( URL_PATH_START ) { |
|
407 LEX_CHAR ( '?', URL_OPT_START ), |
|
408 LEX_DEFAULT ( URL_PATH ), |
|
409 }, |
|
410 |
|
411 LEX_STATE_END ( URL_PATH ) { |
|
412 LEX_CHAR ( '?', URL_OPT_START ), |
|
413 LEX_DEFAULT ( URL_PATH ), |
|
414 }, |
|
415 |
|
416 |
|
417 LEX_STATE_END ( URL_OPT_START ) { |
|
418 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
419 LEX_INVALID ( '=' ), |
|
420 LEX_DEFAULT ( URL_OPT_KEY ), |
|
421 }, |
|
422 |
|
423 LEX_STATE_END ( URL_OPT_KEY ) { |
|
424 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
425 LEX_CHAR ( '=', URL_OPT_EQ ), |
|
426 LEX_DEFAULT ( URL_OPT_KEY ), |
|
427 }, |
|
428 |
|
429 LEX_STATE_END ( URL_OPT_EQ ) { |
|
430 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
431 LEX_INVALID ( '=' ), |
|
432 LEX_DEFAULT ( URL_OPT_VAL ), |
|
433 }, |
|
434 |
|
435 LEX_STATE_END ( URL_OPT_VAL ) { |
|
436 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
437 LEX_INVALID ( '=' ), |
|
438 LEX_DEFAULT ( URL_OPT_VAL ), |
|
439 }, |
|
440 |
|
441 LEX_STATE_END ( URL_OPT_SEP ) { |
|
442 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
443 LEX_INVALID ( '=' ), |
|
444 LEX_DEFAULT ( URL_OPT_KEY ), |
|
445 }, |
|
446 |
|
447 LEX_STATE ( URL_ERROR ) { |
|
448 LEX_END |
|
449 }, |
|
450 } |
|
451 }; |
|
452 |
463 |
453 int url_parse (struct url *url, const char *text) { |
464 int url_parse (struct url *url, const char *text) { |
454 struct url_state state; ZINIT(state); |
465 struct url_state state; ZINIT(state); |
455 int ret; |
466 int ret; |
456 |
467 |