27 URL_OPT_START, |
38 URL_OPT_START, |
28 URL_OPT_KEY, |
39 URL_OPT_KEY, |
29 URL_OPT_EQ, |
40 URL_OPT_EQ, |
30 URL_OPT_VAL, |
41 URL_OPT_VAL, |
31 URL_OPT_SEP, |
42 URL_OPT_SEP, |
|
43 |
|
44 URL_END, |
32 |
45 |
33 URL_MAX, |
46 URL_MAX, |
34 }; |
47 }; |
35 |
48 |
36 static struct lex *url_lex = { |
49 /* |
|
50 * Parser state |
|
51 */ |
|
52 struct url_state { |
|
53 struct url *url; |
|
54 |
|
55 |
|
56 }; |
|
57 |
|
58 static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) { |
|
59 enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token; |
|
60 struct url_state *state = arg; |
|
61 |
|
62 } |
|
63 |
|
64 static int url_lex_end (int _last_token, void *arg) { |
|
65 enum url_token last_token = _last_token; |
|
66 struct url_state *state = arg; |
|
67 |
|
68 } |
|
69 |
|
70 static struct lex url_lex = { |
37 .state_count = URL_MAX, |
71 .state_count = URL_MAX, |
38 .stae_list = { |
72 .state_list = { |
39 LEX_STATE(URL_SCHEME) |
73 LEX_STATE ( URL_BEGIN ) { |
40 LEX_ALNUM ( URL_SCHEME ), |
74 LEX_ALNUM ( URL_BEGIN_ALNUM ), |
41 LEX_CHAR ( '+', URL_SCHEME_SEP ), |
75 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
42 LEX_STATE_END, |
76 LEX_CHAR ( '/', URL_PATH_START ), |
43 |
77 LEX_CHAR ( '?', URL_OPT_START ), |
44 |
78 LEX_END |
45 |
79 }, |
|
80 |
|
81 // this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME |
|
82 LEX_STATE_END ( URL_BEGIN_ALNUM ) { |
|
83 LEX_ALNUM ( URL_BEGIN_ALNUM ), |
|
84 LEX_CHAR ( '+', URL_SCHEME_SEP ), // it was URL_SCHEME |
|
85 LEX_CHAR ( ':', URL_BEGIN_COLON ), |
|
86 LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME |
|
87 LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME |
|
88 LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME |
|
89 LEX_END |
|
90 }, |
|
91 |
|
92 // this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP |
|
93 LEX_STATE ( URL_BEGIN_COLON ) { |
|
94 LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), // it was URL_SCHEME |
|
95 LEX_ALNUM ( URL_USERHOST_ALNUM2 ), |
|
96 LEX_END |
|
97 }, |
|
98 |
|
99 |
|
100 LEX_STATE ( URL_SCHEME ) { |
|
101 LEX_ALNUM ( URL_SCHEME ), |
|
102 LEX_CHAR ( '+', URL_SCHEME_SEP ), |
|
103 LEX_CHAR ( ':', URL_SCHEME_END_COL ), |
|
104 LEX_END |
|
105 }, |
|
106 |
|
107 LEX_STATE ( URL_SCHEME_SEP ) { |
|
108 LEX_ALNUM ( URL_SCHEME ), |
|
109 LEX_END |
|
110 }, |
|
111 |
|
112 LEX_STATE ( URL_SCHEME_END_COL ) { |
|
113 LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), |
|
114 LEX_END |
|
115 }, |
|
116 |
|
117 LEX_STATE ( URL_SCHEME_END_SLASH1 ) { |
|
118 LEX_CHAR ( '/', URL_SCHEME_END_SLASH2 ), |
|
119 LEX_END |
|
120 }, |
|
121 |
|
122 LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) { |
|
123 LEX_ALNUM ( URL_USERHOST_ALNUM ), |
|
124 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
125 LEX_CHAR ( '/', URL_PATH_START ), |
|
126 LEX_CHAR ( '?', URL_OPT_START ), |
|
127 LEX_END |
|
128 }, |
|
129 |
|
130 // this can be URL_USERNAME or URL_HOSTNAME |
|
131 LEX_STATE_END ( URL_USERHOST_ALNUM ) { |
|
132 LEX_ALNUM ( URL_USERHOST_ALNUM ), |
|
133 LEX_CHAR ( ':', URL_USERHOST_COLON ), |
|
134 LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME |
|
135 LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME |
|
136 LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME |
|
137 LEX_END |
|
138 } |
|
139 |
|
140 // this can be URL_USERNAME_END or URL_SERVICE_SEP |
|
141 LEX_STATE ( URL_USERHOST_COLON ) { |
|
142 LEX_ALNUM ( URL_USERHOST_ALNUM2 ), |
|
143 LEX_END |
|
144 }, |
|
145 |
|
146 // this can be URL_PASSWORD or URL_SERVICE |
|
147 LEX_STATE_END ( URL_USERHOST_ALNUM2 ) { |
|
148 LEX_ALNUM ( URL_USERHOST_ALNUM ), |
|
149 LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_PASSSWORD |
|
150 LEX_CHAR ( '/', URL_PATH_START ), // it was URL_SERVICE |
|
151 LEX_CHAR ( '?', URL_OPT_START ), // it was URL_SERVICE |
|
152 LEX_END |
|
153 }, |
|
154 |
|
155 // dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2 |
|
156 LEX_STATE ( URL_USERNAME ) { |
|
157 LEX_END |
|
158 }, |
|
159 |
|
160 LEX_STATE ( URL_PASSWORD_SEP ) { |
|
161 LEX_END |
|
162 }, |
|
163 |
|
164 LEX_STATE ( URL_PASSWORD ) { |
|
165 LEX_END |
|
166 }, |
|
167 |
|
168 |
|
169 LEX_STATE_END ( URL_USERNAME_END ) { |
|
170 LEX_ALNUM ( URL_HOSTNAME ), |
|
171 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
172 LEX_CHAR ( '/', URL_PATH_START ), |
|
173 LEX_CHAR ( '?', URL_OPT_START ), |
|
174 LEX_END |
|
175 }, |
|
176 |
|
177 |
|
178 LEX_STATE_END ( URL_HOSTNAME ) { |
|
179 LEX_ALNUM ( URL_HOSTNAME ), |
|
180 LEX_CHAR ( ':', URL_SERVICE_SEP ), |
|
181 LEX_CHAR ( '/', URL_PATH_START ), |
|
182 LEX_CHAR ( '?', URL_OPT_START ), |
|
183 LEX_END |
|
184 }, |
|
185 |
|
186 |
|
187 LEX_STATE ( URL_SERVICE_SEP ) { |
|
188 LEX_ALNUM ( URL_SERVICE ), |
|
189 LEX_CHAR ( '/', URL_PATH_START ), |
|
190 LEX_CHAR ( '?', URL_OPT_START ), |
|
191 LEX_END |
|
192 }, |
|
193 |
|
194 LEX_STATE_END ( URL_SERVICE ) { |
|
195 LEX_ALNUM ( URL_SERVICE ), |
|
196 LEX_CHAR ( '/', URL_PATH_START ), |
|
197 LEX_CHAR ( '?', URL_OPT_START ), |
|
198 LEX_END |
|
199 }, |
|
200 |
|
201 |
|
202 LEX_STATE_END ( URL_PATH_START ) { |
|
203 LEX_CHAR ( '?', URL_OPT_START ), |
|
204 LEX_DEFAULT ( URL_PATH ), |
|
205 }, |
|
206 |
|
207 LEX_STATE_END ( URL_PATH ) { |
|
208 LEX_CHAR ( '?', URL_OPT_START ), |
|
209 LEX_DEFAULT ( URL_PATH ), |
|
210 }, |
|
211 |
|
212 |
|
213 LEX_STATE_END ( URL_OPT_START ) { |
|
214 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
215 LEX_CHAR ( '=', URL_ERROR ), |
|
216 LEX_DEFAULT ( URL_OPT_KEY ), |
|
217 }, |
|
218 |
|
219 LEX_STATE_END ( URL_OPT_KEY ) { |
|
220 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
221 LEX_CHAR ( '=', URL_OPT_EQ ), |
|
222 LEX_DEFAULT ( URL_OPT_KEY ), |
|
223 }, |
|
224 |
|
225 LEX_STATE_END ( URL_OPT_EQ ) { |
|
226 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
227 LEX_DEFAULT ( URL_OPT_VAL ), |
|
228 }, |
|
229 |
|
230 LEX_STATE_END ( URL_OPT_VAL ) { |
|
231 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
232 LEX_DEFAULT ( URL_OPT_VAL ), |
|
233 }, |
|
234 |
|
235 LEX_STATE_END ( URL_OPT_SEP ) { |
|
236 LEX_CHAR ( '&', URL_OPT_SEP ), |
|
237 LEX_CHAR ( '=', URL_ERROR ), |
|
238 LEX_DEFAULT ( URL_OPT_KEY ), |
|
239 }, |
|
240 |
|
241 LEX_STATE ( URL_ERROR ) { |
|
242 LEX_END |
|
243 }, |
|
244 |
|
245 URL_MAX, |
46 }, |
246 }, |
|
247 |
|
248 .token_fn = url_lex_token, |
|
249 .char_fn = NULL, |
|
250 .end_fn = url_lex_end, |
|
251 }; |
|
252 |
|
253 int url_parse (struct url *url, const char *text) { |
|
254 struct url_state state; ZINIT(state); |
|
255 int ret; |
|
256 |
|
257 // set up state |
|
258 state.url = url; |
|
259 |
|
260 // parse it |
|
261 if ((ret = lexer(&url_lex, text, &state))) |
|
262 ERROR("invalid URL"); |
|
263 |
|
264 // success |
|
265 return 0; |
|
266 |
|
267 error: |
|
268 return -1; |
47 } |
269 } |
48 |
270 |