src/lib/url.c
changeset 14 115067dfba55
parent 13 385b9a10d096
child 15 a8d183e79ed9
equal deleted inserted replaced
13:385b9a10d096 14:115067dfba55
     1 
     1 
     2 #include "url.h"
     2 #include "url.h"
     3 #include "lexer.h"
     3 #include "lexer.h"
     4 
     4 
     5 enum url_tokens {
     5 enum url_token {
     6     URL_INVALID,
     6     URL_INVALID,
     7     
     7     
       
     8     URL_BEGIN,
       
     9 
       
    10     // kludge to resolve ambiguous URL_SCHEME/URL_USERNAME+URL_PASSWORD/URL_HOSTNAME+URL_SERVICE at the beginning
       
    11     URL_BEGIN_ALNUM,
       
    12     URL_BEGIN_COLON,
       
    13 
     8     URL_SCHEME,
    14     URL_SCHEME,
     9     URL_SCHEME_SEP,
    15     URL_SCHEME_SEP,
    10     URL_SCHEME_END_COL,
    16     URL_SCHEME_END_COL,
    11     URL_SCHEME_END_SLASH1,
    17     URL_SCHEME_END_SLASH1,
    12     URL_SCHEME_END_SLASH2,
    18     URL_SCHEME_END_SLASH2,
       
    19 
       
    20     // kludge to resolve ambiguous URL_USERNAME+URL_PASSWORD/URL_HOSTNAME+URL_SERVICE after a scheme 
       
    21     URL_USERHOST_ALNUM,
       
    22     URL_USERHOST_COLON,
       
    23     URL_USERHOST_ALNUM2,
    13     
    24     
    14     URL_USERNAME,
    25     URL_USERNAME,
    15     URL_PASSWORD_SEP,
    26     URL_PASSWORD_SEP,
    16     URL_PASSWORD,
    27     URL_PASSWORD,
    17     URL_USERNAME_END,
    28     URL_USERNAME_END,
    27     URL_OPT_START,
    38     URL_OPT_START,
    28     URL_OPT_KEY,
    39     URL_OPT_KEY,
    29     URL_OPT_EQ,
    40     URL_OPT_EQ,
    30     URL_OPT_VAL,
    41     URL_OPT_VAL,
    31     URL_OPT_SEP,
    42     URL_OPT_SEP,
       
    43     
       
    44     URL_END,
    32 
    45 
    33     URL_MAX,
    46     URL_MAX,
    34 };
    47 };
    35 
    48 
    36 static struct lex *url_lex = {
    49 /*
       
    50  * Parser state
       
    51  */
       
    52 struct url_state {
       
    53     struct url *url;
       
    54 
       
    55 
       
    56 };
       
    57 
       
    58 static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) {
       
    59     enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token;
       
    60     struct url_state *state = arg;
       
    61 
       
    62 }
       
    63 
       
    64 static int url_lex_end (int _last_token, void *arg) {
       
    65     enum url_token last_token = _last_token;
       
    66     struct url_state *state = arg;
       
    67 
       
    68 }
       
    69 
       
    70 static struct lex url_lex = {
    37     .state_count = URL_MAX,
    71     .state_count = URL_MAX,
    38     .stae_list = {
    72     .state_list = {
    39         LEX_STATE(URL_SCHEME)
    73         LEX_STATE ( URL_BEGIN ) {
    40             LEX_ALNUM       (           URL_SCHEME          ),
    74             LEX_ALNUM       (           URL_BEGIN_ALNUM         ),
    41             LEX_CHAR        (   '+',    URL_SCHEME_SEP      ),
    75             LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
    42         LEX_STATE_END,
    76             LEX_CHAR        (   '/',    URL_PATH_START          ),
    43 
    77             LEX_CHAR        (   '?',    URL_OPT_START           ),
    44 
    78             LEX_END
    45 
    79         },
       
    80         
       
    81         // this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME
       
    82         LEX_STATE_END ( URL_BEGIN_ALNUM ) {
       
    83             LEX_ALNUM       (           URL_BEGIN_ALNUM         ),
       
    84             LEX_CHAR        (   '+',    URL_SCHEME_SEP          ),  // it was URL_SCHEME
       
    85             LEX_CHAR        (   ':',    URL_BEGIN_COLON         ), 
       
    86             LEX_CHAR        (   '@',    URL_USERNAME_END        ),  // it was URL_USERNAME
       
    87             LEX_CHAR        (   '/',    URL_PATH_START          ),  // it was URL_HOSTNAME
       
    88             LEX_CHAR        (   '?',    URL_OPT_START           ),  // it was URL_HOSTNAME
       
    89             LEX_END
       
    90         },
       
    91         
       
    92         // this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP
       
    93         LEX_STATE ( URL_BEGIN_COLON ) {
       
    94             LEX_CHAR        (   '/',    URL_SCHEME_END_SLASH1   ),  // it was URL_SCHEME
       
    95             LEX_ALNUM       (           URL_USERHOST_ALNUM2     ),
       
    96             LEX_END
       
    97         },
       
    98        
       
    99 
       
   100         LEX_STATE ( URL_SCHEME ) { 
       
   101             LEX_ALNUM       (           URL_SCHEME              ),
       
   102             LEX_CHAR        (   '+',    URL_SCHEME_SEP          ),
       
   103             LEX_CHAR        (   ':',    URL_SCHEME_END_COL      ),
       
   104             LEX_END
       
   105         },
       
   106 
       
   107         LEX_STATE ( URL_SCHEME_SEP ) {
       
   108             LEX_ALNUM       (           URL_SCHEME              ),
       
   109             LEX_END
       
   110         },
       
   111 
       
   112         LEX_STATE ( URL_SCHEME_END_COL ) {
       
   113             LEX_CHAR        (   '/',    URL_SCHEME_END_SLASH1   ),
       
   114             LEX_END
       
   115         },
       
   116 
       
   117         LEX_STATE ( URL_SCHEME_END_SLASH1 ) {
       
   118             LEX_CHAR        (   '/',    URL_SCHEME_END_SLASH2   ),
       
   119             LEX_END
       
   120         },
       
   121 
       
   122         LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) {
       
   123             LEX_ALNUM       (           URL_USERHOST_ALNUM      ),
       
   124             LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
       
   125             LEX_CHAR        (   '/',    URL_PATH_START          ),
       
   126             LEX_CHAR        (   '?',    URL_OPT_START           ),
       
   127             LEX_END
       
   128         },
       
   129         
       
   130         // this can be URL_USERNAME or URL_HOSTNAME
       
   131         LEX_STATE_END ( URL_USERHOST_ALNUM ) {
       
   132             LEX_ALNUM       (           URL_USERHOST_ALNUM      ),
       
   133             LEX_CHAR        (   ':',    URL_USERHOST_COLON      ), 
       
   134             LEX_CHAR        (   '@',    URL_USERNAME_END        ),  // it was URL_USERNAME
       
   135             LEX_CHAR        (   '/',    URL_PATH_START          ),  // it was URL_HOSTNAME
       
   136             LEX_CHAR        (   '?',    URL_OPT_START           ),  // it was URL_HOSTNAME
       
   137             LEX_END
       
   138         }
       
   139         
       
   140         // this can be URL_USERNAME_END or URL_SERVICE_SEP
       
   141         LEX_STATE ( URL_USERHOST_COLON ) {
       
   142             LEX_ALNUM       (           URL_USERHOST_ALNUM2        ),
       
   143             LEX_END
       
   144         },
       
   145         
       
   146         // this can be URL_PASSWORD or URL_SERVICE
       
   147         LEX_STATE_END ( URL_USERHOST_ALNUM2 ) {
       
   148             LEX_ALNUM       (           URL_USERHOST_ALNUM      ),
       
   149             LEX_CHAR        (   '@',    URL_USERNAME_END        ),  // it was URL_PASSSWORD
       
   150             LEX_CHAR        (   '/',    URL_PATH_START          ),  // it was URL_SERVICE
       
   151             LEX_CHAR        (   '?',    URL_OPT_START           ),  // it was URL_SERVICE
       
   152             LEX_END
       
   153         },
       
   154         
       
   155         // dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2
       
   156         LEX_STATE ( URL_USERNAME ) {
       
   157             LEX_END
       
   158         },
       
   159 
       
   160         LEX_STATE ( URL_PASSWORD_SEP ) {
       
   161             LEX_END
       
   162         },
       
   163 
       
   164         LEX_STATE ( URL_PASSWORD ) {
       
   165             LEX_END
       
   166         },
       
   167 
       
   168 
       
   169         LEX_STATE_END ( URL_USERNAME_END ) {
       
   170             LEX_ALNUM       (           URL_HOSTNAME            ), 
       
   171             LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
       
   172             LEX_CHAR        (   '/',    URL_PATH_START          ),
       
   173             LEX_CHAR        (   '?',    URL_OPT_START           ),
       
   174             LEX_END
       
   175         },
       
   176 
       
   177 
       
   178         LEX_STATE_END ( URL_HOSTNAME ) {
       
   179             LEX_ALNUM       (           URL_HOSTNAME            ), 
       
   180             LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
       
   181             LEX_CHAR        (   '/',    URL_PATH_START          ),
       
   182             LEX_CHAR        (   '?',    URL_OPT_START           ),
       
   183             LEX_END
       
   184         },
       
   185 
       
   186 
       
   187         LEX_STATE ( URL_SERVICE_SEP ) {
       
   188             LEX_ALNUM       (           URL_SERVICE            ), 
       
   189             LEX_CHAR        (   '/',    URL_PATH_START          ),
       
   190             LEX_CHAR        (   '?',    URL_OPT_START           ),
       
   191             LEX_END
       
   192         },
       
   193 
       
   194         LEX_STATE_END ( URL_SERVICE ) {
       
   195             LEX_ALNUM       (           URL_SERVICE            ), 
       
   196             LEX_CHAR        (   '/',    URL_PATH_START          ),
       
   197             LEX_CHAR        (   '?',    URL_OPT_START           ),
       
   198             LEX_END
       
   199         },
       
   200 
       
   201 
       
   202         LEX_STATE_END ( URL_PATH_START ) {
       
   203             LEX_CHAR        (   '?',    URL_OPT_START           ),
       
   204             LEX_DEFAULT     (           URL_PATH                ),
       
   205         },
       
   206 
       
   207         LEX_STATE_END ( URL_PATH ) {
       
   208             LEX_CHAR        (   '?',    URL_OPT_START           ),
       
   209             LEX_DEFAULT     (           URL_PATH                ),
       
   210         },
       
   211 
       
   212 
       
   213         LEX_STATE_END ( URL_OPT_START ) {
       
   214             LEX_CHAR        (   '&',    URL_OPT_SEP             ),
       
   215             LEX_CHAR        (   '=',    URL_ERROR               ),
       
   216             LEX_DEFAULT     (           URL_OPT_KEY             ),
       
   217         },
       
   218 
       
   219         LEX_STATE_END ( URL_OPT_KEY ) {
       
   220             LEX_CHAR        (   '&',    URL_OPT_SEP             ),
       
   221             LEX_CHAR        (   '=',    URL_OPT_EQ              ),
       
   222             LEX_DEFAULT     (           URL_OPT_KEY             ),
       
   223         },
       
   224 
       
   225         LEX_STATE_END ( URL_OPT_EQ ) {
       
   226             LEX_CHAR        (   '&',    URL_OPT_SEP             ),
       
   227             LEX_DEFAULT     (           URL_OPT_VAL             ),
       
   228         },
       
   229 
       
   230         LEX_STATE_END ( URL_OPT_VAL ) {
       
   231             LEX_CHAR        (   '&',    URL_OPT_SEP             ),
       
   232             LEX_DEFAULT     (           URL_OPT_VAL             ),
       
   233         },
       
   234 
       
   235         LEX_STATE_END ( URL_OPT_SEP ) {
       
   236             LEX_CHAR        (   '&',    URL_OPT_SEP             ),
       
   237             LEX_CHAR        (   '=',    URL_ERROR               ),
       
   238             LEX_DEFAULT     (           URL_OPT_KEY             ),
       
   239         },
       
   240         
       
   241         LEX_STATE ( URL_ERROR ) {
       
   242             LEX_END
       
   243         },
       
   244 
       
   245         URL_MAX,
    46     },
   246     },
       
   247 
       
   248     .token_fn = url_lex_token,
       
   249     .char_fn = NULL,
       
   250     .end_fn = url_lex_end,
       
   251 };
       
   252 
       
   253 int url_parse (struct url *url, const char *text) {
       
   254     struct url_state state; ZINIT(state);
       
   255     int ret;
       
   256 
       
   257     // set up state
       
   258     state.url = url;
       
   259     
       
   260     // parse it
       
   261     if ((ret = lexer(&url_lex, text, &state)))
       
   262         ERROR("invalid URL");
       
   263 
       
   264     // success
       
   265     return 0;
       
   266 
       
   267 error:
       
   268     return -1;
    47 }
   269 }
    48 
   270