# HG changeset patch # User Tero Marttila # Date 1223501617 -10800 # Node ID 74fb62022fb33d23867990af2b70af1f2800436f # Parent a8d183e79ed9706ee2a77f08576a395e5a95e251 starting to work diff -r a8d183e79ed9 -r 74fb62022fb3 src/lib/lex.c --- a/src/lib/lex.c Wed Oct 08 22:05:13 2008 +0300 +++ b/src/lib/lex.c Thu Oct 09 00:33:37 2008 +0300 @@ -1,7 +1,181 @@ + +#include #include "lex.h" +#include "error.h" +#include "log.h" + +#define INITIAL_BUF_SIZE 4096 int lexer (const struct lex *lex, const char *input, void *arg) { - // XXX: implement + // handling error returns + int err = -1, cb_err; + + // token buffer + char *buf = NULL, *buf_ptr; + size_t buf_size = INITIAL_BUF_SIZE; + + // state + int prev_state = LEX_INITIAL, cur_state = lex->initial_state, next_state = LEX_INITIAL; + + // input chars + const char *c = input; + + // lookups + const struct lex_transition *trans = NULL; + + // allocate the buffer + if ((buf = malloc(sizeof(char) * buf_size)) == NULL) + goto error; + + // set buf_ptr initial position + buf_ptr = buf; + + // clear input + DEBUG("*cough*"); + DEBUGN("%s", ""); + + // process input + do { + if (*c) { + // look up the next state + for (trans = lex->state_list[cur_state - 1].trans_list; trans->next_state > 0; trans++) { + // accept defaults + if (trans->flags & LEX_TRANS_DEFAULT) + break; + + // disregard non-matches + if (trans->left > *c || *c > trans->right) + continue; + + // abort on invalids + if (trans->flags & LEX_TRANS_INVALID) + goto error; + + else { + // accept it + break; + } + } + + // did we find a transition with a valid next state? + if (!(next_state = trans->next_state)) + goto error; + + // call the char handler + if (lex->char_fn && (cb_err = lex->char_fn(*c, cur_state, next_state, arg))) + goto error; + + } else { + // EOF! + next_state = LEX_EOF; + + // is cur_state a valid end state? + if (!(lex->state_list[cur_state - 1].flags & LEX_STATE_END)) + goto error; + + // note: we don't pass the NUL byte to the char handler + } + + // if this char is part of the next token... + if (next_state != cur_state) { + // terminate the buffer and reset buf_ptr + *buf_ptr = 0; buf_ptr = buf; + + // dump state transitions + DEBUGF("\n\t%25s -> %25s -> %25s", + LEX_STATE_NAME(lex, prev_state), + LEX_STATE_NAME(lex, cur_state), + LEX_STATE_NAME(lex, next_state) + ); + + // pass in the complete token to the handler + if (lex->token_fn && (cb_err = lex->token_fn(cur_state, buf, next_state, prev_state, arg))) + goto error; + + // update states + prev_state = cur_state; + cur_state = next_state; + next_state = LEX_INITIAL; + } + + // dump chars + if (next_state == LEX_INITIAL) + DEBUGN("%c", *c); + else + DEBUGNF("%c", *c); + + // store this char in the buffer + *(buf_ptr++) = *c; + + // grow the buffer if needed + if (buf_ptr - buf >= buf_size) { + // remember the offset, as buf_ptr might get invalidated if buf is moved + size_t buf_offset = buf_ptr - buf; + + // calc new size + buf_size *= 2; + + // grow/move + if ((buf = realloc(buf, buf_size)) == NULL) + goto error; + + // fix buf_ptr + buf_ptr = buf + buf_offset; + } + } while (*(c++)); + + // call the end handler + if (lex->end_fn && (cb_err = lex->end_fn(cur_state, arg))) + goto error; + + // successfully parsed! + err = 0; + +error: + DEBUGNF("\n"); + + if (cb_err) + err = cb_err; + + // dump debug info on error + if (err) { + const char *cc; + + // figure out the error + if (!buf) + WARNING("malloc/realloc"); + + else if (trans && trans->flags & LEX_TRANS_INVALID) + WARNING("hit invalid transition match"); + + else if (!next_state) + WARNING("no valid transition found"); + + else if (next_state == LEX_EOF && !(lex->state_list[cur_state - 1].flags & LEX_STATE_END)) + WARNING("invalid end state"); + + else + WARNING("unknown error condition (!?)"); + + DEBUG("%s", input); + DEBUGN("%s", ""); + + for (cc = input; cc < c; cc++) + DEBUGNF(" "); + + DEBUGF("^\t%s -> %s -> %s", + LEX_STATE_NAME(lex, prev_state), + LEX_STATE_NAME(lex, cur_state), + LEX_STATE_NAME(lex, next_state) + ); + } + + // free stuff + free(buf); + + // return + return err; } + diff -r a8d183e79ed9 -r 74fb62022fb3 src/lib/lex.h --- a/src/lib/lex.h Wed Oct 08 22:05:13 2008 +0300 +++ b/src/lib/lex.h Thu Oct 09 00:33:37 2008 +0300 @@ -17,7 +17,8 @@ */ enum lex_transition_flags { LEX_TRANS_DEFAULT = 0x01, - LEX_TRANS_FINAL = 0x02, + /* not supported + LEX_TRANS_FINAL = 0x02, */ LEX_TRANS_INVALID = 0x04, }; @@ -57,12 +58,15 @@ }; /* - * Special tokens + * Special states, these are all defined as zero */ // shows up in token_fn as the value of next_token when this_token is the last token. #define LEX_EOF 0 +// shows up as the initial value of prev_token +#define LEX_INITIAL 0 + /* * Lex machine */ @@ -80,11 +84,13 @@ int (*token_fn) (int this_token, char *token_data, int next_token, int prev_token, void *arg); /* - * Called on every char handled by the lexer. `this_token` is the state of the token that the char belongs to. + * Called on every char handled by the lexer. + * + * The NUL byte at the end of the input string is not passed to char_fn (why not?). * * Return zero to have lexing continue, nonzero to stop lexing. */ - int (*char_fn) (int this_token, char token_char, void *arg); + int (*char_fn) (char token_char, int from_token, int to_token, void *arg); /* * Called when the end of input has been reached, `last_token` is the state that we terminated in. @@ -96,6 +102,9 @@ // number of states size_t state_count; + // initial state + int initial_state; + // array of lex_states, indexable by the state id. struct lex_state state_list[]; }; @@ -120,6 +129,11 @@ } /* + * Helpers for handling states + */ +#define LEX_STATE_NAME(lex, state) ((state) ? (lex)->state_list[(state) - 1].name : "...") + +/* * Lex it! * * Return zero to indiciate that the input was valid, nonzero otherwise. diff -r a8d183e79ed9 -r 74fb62022fb3 src/lib/lexer.h --- a/src/lib/lexer.h Wed Oct 08 22:05:13 2008 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,118 +0,0 @@ -#ifndef LIB_LEXER_H -#define LIB_LEXER_H - -/* - * Simple FSM lexing - * - * The lexer is implemented as a Finite State Machine, consisting for a number of states, which then contain a set of - * transitions, which move the lexer from state to state based on each char of input at a time. - * - * Whenever the state changes, the token callback is triggered with the collected token data. - */ - -/* - * Transition flags - */ -enum lex_transition_flags { - LEX_TRANS_DEFAULT = 0x01, - LEX_TRANS_FINAL = 0x02, -}; - -/* - * A transition from one state to another. - */ -struct lex_transition { - // applies to chars [left, right] - char left, right; - - // flags from lex_transition_flags - char flags; - - // next state to enter - int next_state; -}; - -/* - * State flags - */ -enum lex_state_flags { - LEX_STATE_END = 0x01; -}; - -/* - * A state - */ -struct lex_state { - // the state name (for debugging) - const char *name; - - // flags from lex_state_flags - char flags; - - // list of transitions for this state, terminated by a transition with next_state=0 - struct lex_transition *trans_list; -}; - -/* - * Lex machine - */ -struct lex { - // number of states - size_t state_count; - - // array of lex_states, indexable by the state id. - struct lex_state *state_list; - - /* - * Core token handler. Everytime a full token is lexed (i.e. the state changes), this will be called. - * `this_token` represents the full token that was parsed, and `token_data` is the token's value. `next_token` - * is the state that terminated this token, and `prev_token` was the token before this one. - * - * `token_data` is a buffer allocated by the lexer that the actual input data is copied into. Thence, it can be - * modified, as its contents will be replaced by the next token. Hence, if you need to keep hold of it, copy it. - * - * Return zero to have lexing continue, nonzero to stop lexing. - */ - int (*token_fn) (int this_token, char *token_data, int next_token, int prev_token, void *arg); - - /* - * Called on every char handled by the lexer. `this_token` is the state of the token that the char belongs to. - * - * Return zero to have lexing continue, nonzero to stop lexing. - */ - int (*char_fn) (int this_token, char token_char, void *arg); - - /* - * Called when the end of input has been reached, `last_token` is the state that we terminated in. - * - * Return zero to indiciate that the input was valid, nonzero to indicate an error. - */ - int (*end_fn) (int last_token, void *arg); -}; - -/* - * Helper macros for building the state_list - */ -#define LEX_STATE(enum_val) { #enum_val, 0, -#define LEX_STATE_END(enum_val) { #enum_val, LEX_STATE_END, - - #define LEX_CHAR(c, to) { c, c, 0, to }, - #define LEX_RANGE(l, r, to) { l, r, 0, to }, - #define LEX_ALPHA(to) LEX_RANGE('a', 'z', to), LEX_RANGE('A', 'Z', to) - #define LEX_NUMBER(to) LEX_RANGE('0', '9', to) - #define LEX_ALNUM(to) LEX_ALPHA(to), LEX_NUMBER(to), LEX_CHAR('-', to), LEX_CHAR('_', to) - #define LEX_WHITESPACE(to) LEX_CHAR(' ', to), LEX_CHAR('\n', to), LEX_CHAR('\t', to) - - #define LEX_DEFAULT(to) { 0, 0, LEX_TRANS_DEFAULT, to } \ - } - #define LEX_END { 0, 0, 0, 0 } \ - } - -/* - * Lex it! - * - * Return zero to indiciate that the input was valid, nonzero otherwise. - */ -int lexer (const struct lex *lex, const char *input, void *arg); - -#endif /* LIB_LEXER_H */ diff -r a8d183e79ed9 -r 74fb62022fb3 src/lib/log.c --- a/src/lib/log.c Wed Oct 08 22:05:13 2008 +0300 +++ b/src/lib/log.c Thu Oct 09 00:33:37 2008 +0300 @@ -6,33 +6,34 @@ #include "log.h" -static void _generic_err_vargs (int use_stderr, const char *func, int perr, const char *fmt, va_list va) { - FILE *stream = use_stderr ? stderr : stdout; +static void _generic_err_vargs (int flags, const char *func, int err, const char *fmt, va_list va) { + FILE *stream = flags & LOG_DISPLAY_STDERR ? stderr : stdout; if (func) fprintf(stream, "%s: ", func); vfprintf(stream, fmt, va); - if (perr) - fprintf(stream, ": %s\n", strerror(perr > 0 ? errno : -perr)); - - fprintf(stream, "\n"); + if (flags & LOG_DISPLAY_PERR) + fprintf(stream, ": %s\n", strerror(err == 0 ? errno : -err)); + + if (!(flags & LOG_DISPLAY_NONL)) + fprintf(stream, "\n"); } -void _generic_err (int use_stderr, const char *func, int perr, const char *fmt, ...) { +void _generic_err (int flags, const char *func, int err, const char *fmt, ...) { va_list va; va_start(va, fmt); - _generic_err_vargs(use_stderr, func, perr, fmt, va); + _generic_err_vargs(flags, func, err, fmt, va); va_end(va); } -void _generic_err_exit (int use_stderr, const char *func, int perr, const char *fmt, ...) { +void _generic_err_exit (int flags, const char *func, int err, const char *fmt, ...) { va_list va; va_start(va, fmt); - _generic_err_vargs(use_stderr, func, perr, fmt, va); + _generic_err_vargs(flags, func, err, fmt, va); va_end(va); exit(EXIT_FAILURE); diff -r a8d183e79ed9 -r 74fb62022fb3 src/lib/log.h --- a/src/lib/log.h Wed Oct 08 22:05:13 2008 +0300 +++ b/src/lib/log.h Thu Oct 09 00:33:37 2008 +0300 @@ -5,11 +5,21 @@ * error handling */ -void _generic_err ( /*int level, */ int use_stderr, const char *func, int perr, const char *fmt, ...) +enum log_display_flags { + LOG_DISPLAY_STDOUT = 0x00, + LOG_DISPLAY_STDERR = 0x01, + + LOG_DISPLAY_PERR = 0x02, + + LOG_DISPLAY_NONL = 0x04, +}; + + +void _generic_err (int flags, const char *func, int err, const char *fmt, ...) __attribute__ ((format (printf, 4, 5))); // needs to be defined as its own function for the noreturn attribute -void _generic_err_exit ( /* int level, */ int used_stderr, const char *func, int perr, const char *fmt, ...) +void _generic_err_exit (int flags, const char *func, int err, const char *fmt, ...) __attribute__ ((format (printf, 4, 5))) __attribute__ ((noreturn)); @@ -25,20 +35,20 @@ extern enum _debug_level _cur_debug_level; // various kinds of ways to handle an error, 2**3 of them, *g* -#define info(...) _generic_err( 0, NULL, 0, __VA_ARGS__ ) -#define error(...) _generic_err( 1, NULL, 0, __VA_ARGS__ ) -#define err_exit(...) _generic_err_exit( 1, NULL, 0, __VA_ARGS__ ) -#define perr(...) _generic_err( 1, NULL, 1, __VA_ARGS__ ) -#define perr_exit(...) _generic_err_exit( 1, NULL, 1, __VA_ARGS__ ) -#define err_func(func, ...) _generic_err( 1, func, 0, __VA_ARGS__ ) -#define err_func_exit(func, ...) _generic_err_exit( 1, func, 0, __VA_ARGS__ ) -#define perr_func(func, ...) _generic_err( 1, func, 1, __VA_ARGS__ ) -#define perr_func_exit(func, ...) _generic_err_exit( 1, func, 1, __VA_ARGS__ ) -#define eerr_func(func, err, ...) _generic_err( 1, func, err,__VA_ARGS__ ) +#define info(...) _generic_err( LOG_DISPLAY_STDOUT, NULL, 0, __VA_ARGS__ ) +#define error(...) _generic_err( LOG_DISPLAY_STDERR, NULL, 0, __VA_ARGS__ ) +#define err_exit(...) _generic_err_exit( LOG_DISPLAY_STDERR, NULL, 0, __VA_ARGS__ ) +#define perr(...) _generic_err( LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR, NULL, 0, __VA_ARGS__ ) +#define perr_exit(...) _generic_err_exit( LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR, NULL, 0, __VA_ARGS__ ) +#define err_func(func, ...) _generic_err( LOG_DISPLAY_STDERR, func, 0, __VA_ARGS__ ) +#define err_func_exit(func, ...) _generic_err_exit( LOG_DISPLAY_STDERR, func, 0, __VA_ARGS__ ) +#define perr_func(func, ...) _generic_err( LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR, func, 0, __VA_ARGS__ ) +#define perr_func_exit(func, ...) _generic_err_exit( LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR, func, 0, __VA_ARGS__ ) +#define eerr_func(func, err, ...) _generic_err( LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR, func, err, __VA_ARGS__ ) +#define debug(func, ...) _generic_err( LOG_DISPLAY_STDERR, func, 0, __VA_ARGS__ ) +#define debug_nonl(func, ...) _generic_err( LOG_DISPLAY_STDERR | LOG_DISPLAY_NONL, func, 0, __VA_ARGS__ ) -/* - * Legacy... - */ +// logging includes errors #include "error.h" #define WARNING(...) err_func(__func__, __VA_ARGS__) @@ -46,9 +56,15 @@ #define EWARNING(err, ...) eerr_func(__func__, (err), __VA_ARGS__) #ifdef DEBUG_ENABLED -#define DEBUG(...) err_func(__func__, __VA_ARGS__) +#define DEBUG(...) debug(__func__, __VA_ARGS__) +#define DEBUGF(...) debug(NULL, __VA_ARGS__) +#define DEBUGN(...) debug_nonl(__func__, __VA_ARGS__) +#define DEBUGNF(...) debug_nonl(NULL, __VA_ARGS__) #else #define DEBUG(...) (void) (0) +#define DEBUGF(...) (void) (0) +#define DEBUGN(...) (void) (0) +#define DEBUGNF(...) (void) (0) #endif // default is to enable INFO @@ -63,7 +79,7 @@ #if INFO_ENABLED #define INFO(...) info(__VA_ARGS__) #else -#define INFO(...) (void) (0) +#define INFO(...) (void) (__VA_ARGS__) #endif #endif /* LIB_LOG_H */ diff -r a8d183e79ed9 -r 74fb62022fb3 src/lib/url.c --- a/src/lib/url.c Wed Oct 08 22:05:13 2008 +0300 +++ b/src/lib/url.c Thu Oct 09 00:33:37 2008 +0300 @@ -5,6 +5,7 @@ #include "url.h" #include "lex.h" #include "error.h" +#include "log.h" #include "misc.h" enum url_token { @@ -62,16 +63,200 @@ }; static int _url_append_scheme (struct url *url, const char *data) { - + return 0; } static int _url_append_opt_key (struct url *url, const char *key) { - + return 0; } static int _url_append_opt_val (struct url *url, const char *value) { + return 0; +} -} +static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg); + +static struct lex url_lex = { + .token_fn = url_lex_token, + .char_fn = NULL, + .end_fn = NULL, + + .state_count = URL_MAX, + .initial_state = URL_BEGIN, + .state_list = { + LEX_STATE ( URL_BEGIN ) { + LEX_ALNUM ( URL_BEGIN_ALNUM ), + LEX_CHAR ( ':', URL_SERVICE_SEP ), + LEX_CHAR ( '/', URL_PATH_START ), + LEX_CHAR ( '?', URL_OPT_START ), + LEX_END + }, + + // this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME + LEX_STATE_END ( URL_BEGIN_ALNUM ) { + LEX_ALNUM ( URL_BEGIN_ALNUM ), + LEX_CHAR ( '+', URL_SCHEME_SEP ), // it was URL_SCHEME + LEX_CHAR ( ':', URL_BEGIN_COLON ), + LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME + LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME + LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME + LEX_END + }, + + // this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP + LEX_STATE ( URL_BEGIN_COLON ) { + LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), // it was URL_SCHEME + LEX_ALNUM ( URL_USERHOST_ALNUM2 ), + LEX_END + }, + + + LEX_STATE ( URL_SCHEME ) { + LEX_ALNUM ( URL_SCHEME ), + LEX_CHAR ( '+', URL_SCHEME_SEP ), + LEX_CHAR ( ':', URL_SCHEME_END_COL ), + LEX_END + }, + + LEX_STATE ( URL_SCHEME_SEP ) { + LEX_ALNUM ( URL_SCHEME ), + LEX_END + }, + + LEX_STATE ( URL_SCHEME_END_COL ) { + LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), + LEX_END + }, + + LEX_STATE ( URL_SCHEME_END_SLASH1 ) { + LEX_CHAR ( '/', URL_SCHEME_END_SLASH2 ), + LEX_END + }, + + LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) { + LEX_ALNUM ( URL_USERHOST_ALNUM ), + LEX_CHAR ( ':', URL_SERVICE_SEP ), + LEX_CHAR ( '/', URL_PATH_START ), + LEX_CHAR ( '?', URL_OPT_START ), + LEX_END + }, + + // this can be URL_USERNAME or URL_HOSTNAME + LEX_STATE_END ( URL_USERHOST_ALNUM ) { + LEX_CHAR ( ':', URL_USERHOST_COLON ), + LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME + LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME + LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME + LEX_DEFAULT ( URL_USERHOST_ALNUM ), + }, + + // this can be URL_USERNAME_END or URL_SERVICE_SEP + LEX_STATE ( URL_USERHOST_COLON ) { + LEX_ALNUM ( URL_USERHOST_ALNUM2 ), + LEX_END + }, + + // this can be URL_PASSWORD or URL_SERVICE + LEX_STATE_END ( URL_USERHOST_ALNUM2 ) { + LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_PASSSWORD + LEX_CHAR ( '/', URL_PATH_START ), // it was URL_SERVICE + LEX_CHAR ( '?', URL_OPT_START ), // it was URL_SERVICE + LEX_DEFAULT ( URL_USERHOST_ALNUM2 ), + }, + + // dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2 + LEX_STATE ( URL_USERNAME ) { + LEX_END + }, + + LEX_STATE ( URL_PASSWORD_SEP ) { + LEX_END + }, + + LEX_STATE ( URL_PASSWORD ) { + LEX_END + }, + + + LEX_STATE_END ( URL_USERNAME_END ) { + LEX_ALNUM ( URL_HOSTNAME ), + LEX_CHAR ( ':', URL_SERVICE_SEP ), + LEX_CHAR ( '/', URL_PATH_START ), + LEX_CHAR ( '?', URL_OPT_START ), + LEX_END + }, + + + LEX_STATE_END ( URL_HOSTNAME ) { + LEX_ALNUM ( URL_HOSTNAME ), + LEX_CHAR ( ':', URL_SERVICE_SEP ), + LEX_CHAR ( '/', URL_PATH_START ), + LEX_CHAR ( '?', URL_OPT_START ), + LEX_END + }, + + + LEX_STATE ( URL_SERVICE_SEP ) { + LEX_ALNUM ( URL_SERVICE ), + LEX_CHAR ( '/', URL_PATH_START ), + LEX_CHAR ( '?', URL_OPT_START ), + LEX_END + }, + + LEX_STATE_END ( URL_SERVICE ) { + LEX_ALNUM ( URL_SERVICE ), + LEX_CHAR ( '/', URL_PATH_START ), + LEX_CHAR ( '?', URL_OPT_START ), + LEX_END + }, + + + LEX_STATE_END ( URL_PATH_START ) { + LEX_CHAR ( '?', URL_OPT_START ), + LEX_DEFAULT ( URL_PATH ), + }, + + LEX_STATE_END ( URL_PATH ) { + LEX_CHAR ( '?', URL_OPT_START ), + LEX_DEFAULT ( URL_PATH ), + }, + + + LEX_STATE_END ( URL_OPT_START ) { + LEX_CHAR ( '&', URL_OPT_SEP ), + LEX_INVALID ( '=' ), + LEX_DEFAULT ( URL_OPT_KEY ), + }, + + LEX_STATE_END ( URL_OPT_KEY ) { + LEX_CHAR ( '&', URL_OPT_SEP ), + LEX_CHAR ( '=', URL_OPT_EQ ), + LEX_DEFAULT ( URL_OPT_KEY ), + }, + + LEX_STATE_END ( URL_OPT_EQ ) { + LEX_CHAR ( '&', URL_OPT_SEP ), + LEX_INVALID ( '=' ), + LEX_DEFAULT ( URL_OPT_VAL ), + }, + + LEX_STATE_END ( URL_OPT_VAL ) { + LEX_CHAR ( '&', URL_OPT_SEP ), + LEX_INVALID ( '=' ), + LEX_DEFAULT ( URL_OPT_VAL ), + }, + + LEX_STATE_END ( URL_OPT_SEP ) { + LEX_CHAR ( '&', URL_OPT_SEP ), + LEX_INVALID ( '=' ), + LEX_DEFAULT ( URL_OPT_KEY ), + }, + + LEX_STATE ( URL_ERROR ) { + LEX_END + }, + } +}; static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) { enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token; @@ -81,6 +266,10 @@ (void) prev_token; switch (this_token) { + case URL_BEGIN: + // irrelevant + break; + case URL_BEGIN_ALNUM: switch (next_token) { case URL_SCHEME_SEP: @@ -185,7 +374,8 @@ case URL_PATH_START: case URL_OPT_START: case LEX_EOF: - // store the service + // store the hostname and service + state->url->hostname = state->alnum; state->alnum = NULL; copy_to = &state->url->service; break; default: @@ -250,7 +440,7 @@ break; default: - FATAL("invalid token"); + ERROR("invalid token"); } if (copy_to) { @@ -263,192 +453,13 @@ return 0; error: - // XXX: error codes? + DEBUG("token: %s -> %s -> %s: %s", + LEX_STATE_NAME(&url_lex, prev_token), LEX_STATE_NAME(&url_lex, this_token), LEX_STATE_NAME(&url_lex, next_token), + token_data + ); return -1; } -static struct lex url_lex = { - .token_fn = url_lex_token, - .char_fn = NULL, - .end_fn = NULL, - - .state_count = URL_MAX, - .state_list = { - LEX_STATE ( URL_BEGIN ) { - LEX_ALNUM ( URL_BEGIN_ALNUM ), - LEX_CHAR ( ':', URL_SERVICE_SEP ), - LEX_CHAR ( '/', URL_PATH_START ), - LEX_CHAR ( '?', URL_OPT_START ), - LEX_END - }, - - // this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME - LEX_STATE_END ( URL_BEGIN_ALNUM ) { - LEX_ALNUM ( URL_BEGIN_ALNUM ), - LEX_CHAR ( '+', URL_SCHEME_SEP ), // it was URL_SCHEME - LEX_CHAR ( ':', URL_BEGIN_COLON ), - LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME - LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME - LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME - LEX_END - }, - - // this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP - LEX_STATE ( URL_BEGIN_COLON ) { - LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), // it was URL_SCHEME - LEX_ALNUM ( URL_USERHOST_ALNUM2 ), - LEX_END - }, - - - LEX_STATE ( URL_SCHEME ) { - LEX_ALNUM ( URL_SCHEME ), - LEX_CHAR ( '+', URL_SCHEME_SEP ), - LEX_CHAR ( ':', URL_SCHEME_END_COL ), - LEX_END - }, - - LEX_STATE ( URL_SCHEME_SEP ) { - LEX_ALNUM ( URL_SCHEME ), - LEX_END - }, - - LEX_STATE ( URL_SCHEME_END_COL ) { - LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), - LEX_END - }, - - LEX_STATE ( URL_SCHEME_END_SLASH1 ) { - LEX_CHAR ( '/', URL_SCHEME_END_SLASH2 ), - LEX_END - }, - - LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) { - LEX_ALNUM ( URL_USERHOST_ALNUM ), - LEX_CHAR ( ':', URL_SERVICE_SEP ), - LEX_CHAR ( '/', URL_PATH_START ), - LEX_CHAR ( '?', URL_OPT_START ), - LEX_END - }, - - // this can be URL_USERNAME or URL_HOSTNAME - LEX_STATE_END ( URL_USERHOST_ALNUM ) { - LEX_ALNUM ( URL_USERHOST_ALNUM ), - LEX_CHAR ( ':', URL_USERHOST_COLON ), - LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME - LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME - LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME - LEX_END - }, - - // this can be URL_USERNAME_END or URL_SERVICE_SEP - LEX_STATE ( URL_USERHOST_COLON ) { - LEX_ALNUM ( URL_USERHOST_ALNUM2 ), - LEX_END - }, - - // this can be URL_PASSWORD or URL_SERVICE - LEX_STATE_END ( URL_USERHOST_ALNUM2 ) { - LEX_ALNUM ( URL_USERHOST_ALNUM ), - LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_PASSSWORD - LEX_CHAR ( '/', URL_PATH_START ), // it was URL_SERVICE - LEX_CHAR ( '?', URL_OPT_START ), // it was URL_SERVICE - LEX_END - }, - - // dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2 - LEX_STATE ( URL_USERNAME ) { - LEX_END - }, - - LEX_STATE ( URL_PASSWORD_SEP ) { - LEX_END - }, - - LEX_STATE ( URL_PASSWORD ) { - LEX_END - }, - - - LEX_STATE_END ( URL_USERNAME_END ) { - LEX_ALNUM ( URL_HOSTNAME ), - LEX_CHAR ( ':', URL_SERVICE_SEP ), - LEX_CHAR ( '/', URL_PATH_START ), - LEX_CHAR ( '?', URL_OPT_START ), - LEX_END - }, - - - LEX_STATE_END ( URL_HOSTNAME ) { - LEX_ALNUM ( URL_HOSTNAME ), - LEX_CHAR ( ':', URL_SERVICE_SEP ), - LEX_CHAR ( '/', URL_PATH_START ), - LEX_CHAR ( '?', URL_OPT_START ), - LEX_END - }, - - - LEX_STATE ( URL_SERVICE_SEP ) { - LEX_ALNUM ( URL_SERVICE ), - LEX_CHAR ( '/', URL_PATH_START ), - LEX_CHAR ( '?', URL_OPT_START ), - LEX_END - }, - - LEX_STATE_END ( URL_SERVICE ) { - LEX_ALNUM ( URL_SERVICE ), - LEX_CHAR ( '/', URL_PATH_START ), - LEX_CHAR ( '?', URL_OPT_START ), - LEX_END - }, - - - LEX_STATE_END ( URL_PATH_START ) { - LEX_CHAR ( '?', URL_OPT_START ), - LEX_DEFAULT ( URL_PATH ), - }, - - LEX_STATE_END ( URL_PATH ) { - LEX_CHAR ( '?', URL_OPT_START ), - LEX_DEFAULT ( URL_PATH ), - }, - - - LEX_STATE_END ( URL_OPT_START ) { - LEX_CHAR ( '&', URL_OPT_SEP ), - LEX_INVALID ( '=' ), - LEX_DEFAULT ( URL_OPT_KEY ), - }, - - LEX_STATE_END ( URL_OPT_KEY ) { - LEX_CHAR ( '&', URL_OPT_SEP ), - LEX_CHAR ( '=', URL_OPT_EQ ), - LEX_DEFAULT ( URL_OPT_KEY ), - }, - - LEX_STATE_END ( URL_OPT_EQ ) { - LEX_CHAR ( '&', URL_OPT_SEP ), - LEX_INVALID ( '=' ), - LEX_DEFAULT ( URL_OPT_VAL ), - }, - - LEX_STATE_END ( URL_OPT_VAL ) { - LEX_CHAR ( '&', URL_OPT_SEP ), - LEX_INVALID ( '=' ), - LEX_DEFAULT ( URL_OPT_VAL ), - }, - - LEX_STATE_END ( URL_OPT_SEP ) { - LEX_CHAR ( '&', URL_OPT_SEP ), - LEX_INVALID ( '=' ), - LEX_DEFAULT ( URL_OPT_KEY ), - }, - - LEX_STATE ( URL_ERROR ) { - LEX_END - }, - } -}; int url_parse (struct url *url, const char *text) { struct url_state state; ZINIT(state); @@ -468,3 +479,42 @@ return -1; } +static void _url_dump_part (const char *field, const char *val, FILE *stream) { + if (val) { + fprintf(stream, "%s=%s ", field, val); + } +} + +void url_dump (const struct url *url, FILE *stream) { + int i; + + if (url->schema) { + fprintf(stream, "schema="); + + for (i = 0; i < url->schema->count; i++) { + if (i > 0) + fprintf(stream, "+"); + + fprintf(stream, "%s", url->schema->list[i]); + } + + fprintf(stream, " "); + } + + _url_dump_part("username", url->username, stream); + _url_dump_part("password", url->password, stream); + _url_dump_part("hostname", url->hostname, stream); + _url_dump_part("service", url->service, stream); + _url_dump_part("path", url->path, stream); + + if (url->opts) { + fprintf(stream, "opts: "); + + for (i = 0; i < url->opts->count; i++) { + fprintf(stream, "%s=%s ", url->opts->list[i].key, url->opts->list[i].value); + } + } + + fprintf(stream, "\n"); +} + diff -r a8d183e79ed9 -r 74fb62022fb3 src/lib/url.h --- a/src/lib/url.h Wed Oct 08 22:05:13 2008 +0300 +++ b/src/lib/url.h Thu Oct 09 00:33:37 2008 +0300 @@ -13,13 +13,14 @@ */ #include +#include /* * The schema */ struct url_schema { size_t count; - const char **list; + const char *list[]; }; /* @@ -30,7 +31,7 @@ struct url_opt { const char *key; const char *value; - } **list; + } list[]; }; /* @@ -54,4 +55,9 @@ */ int url_parse (struct url *url, const char *text); +/* + * Prints a url in a debug-output format. + */ +void url_dump (const struct url *url, FILE *stream); + #endif /* LIB_URL_H */ diff -r a8d183e79ed9 -r 74fb62022fb3 src/url_test.c --- a/src/url_test.c Wed Oct 08 22:05:13 2008 +0300 +++ b/src/url_test.c Thu Oct 09 00:33:37 2008 +0300 @@ -5,8 +5,9 @@ #include "lib/url.h" -#define FAIL(...) do { printf("FAIL: "); printf(__VA_ARGS__); return -1; } while (0) +#define FAIL(...) do { printf("FAIL: "); printf(__VA_ARGS__); printf("\n"); return -1; } while (0) +struct url_schema basic_http = { 1, { "http" } }; struct url_test { const char *url; @@ -16,9 +17,9 @@ NULL, NULL, NULL, "localhost", "http", NULL, NULL } }, -/* { "http://example.com/path", { - { 1, { "http" } }, NULL, NULL, "example.com", NULL, "path", NULL - } }, */ + { "http://example.com/path", { + &basic_http, NULL, NULL, "example.com", NULL, "path", NULL + } }, { NULL, { } }, }; @@ -26,14 +27,14 @@ int cmp_url_str (const char *field, const char *test, const char *real) { if (!test) { if (real) - FAIL("%s: shouldn't be present", field); + FAIL("%s shouldn't be present", field); } else if (!real) { - FAIL("%s: missing", field); + FAIL("%s is missing", field); } else { if (strcmp(test, real) != 0) - FAIL("%s: differs: %s -> %s", field, test, real); + FAIL("%s differs: %s -> %s", field, test, real); } // ok @@ -94,10 +95,10 @@ FAIL("inconsistent opts count"); for (i = 0; i < test->opts->count; i++) { - if (strcmp(test->opts->list[i]->key, real->opts->list[i]->key) != 0) + if (strcmp(test->opts->list[i].key, real->opts->list[i].key) != 0) FAIL("differing scheme key #%d", i); - if (strcmp(test->opts->list[i]->value, real->opts->list[i]->value) != 0) + if (strcmp(test->opts->list[i].value, real->opts->list[i].value) != 0) FAIL("differing scheme value #%d", i); } } @@ -109,45 +110,6 @@ return -1; } -void print_url_part (const char *field, const char *val) { - if (val) { - printf("%s=%s ", field, val); - } -} - -void print_url (const struct url *url) { - int i; - - if (url->schema) { - printf("schema="); - - for (i = 0; i < url->schema->count; i++) { - if (i > 0) - printf("+"); - - printf("%s", url->schema->list[i]); - } - - printf(" "); - } - - print_url_part("username", url->username); - print_url_part("password", url->password); - print_url_part("hostname", url->hostname); - print_url_part("service", url->service); - print_url_part("path", url->path); - - if (url->opts) { - printf("opts: "); - - for (i = 0; i < url->opts->count; i++) { - printf("%s=%s ", url->opts->list[i]->key, url->opts->list[i]->value); - } - } - - printf("\n"); -} - void usage (const char *exec_name) { printf("Usage: %s\n\n\tNo arguments are accepted\n", exec_name); @@ -164,7 +126,7 @@ // run the tests for (test = url_tests; test->url; test++) { // first output the URL we are handling... - printf("%s... ", test->url); + printf("%-80s - ", test->url); fflush(stdout); // parse the URL @@ -178,14 +140,14 @@ // compare it if (cmp_url(&test->expected, &url)) { printf("\texpected: "); - print_url(&test->expected); + url_dump(&test->expected, stdout); printf("\tresult: "); - print_url(&url); + url_dump(&url, stdout); } else { printf("OK\n\t"); - print_url(&url); + url_dump(&url, stdout); } } }