--- a/src/lib/lex.c Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/lex.c Thu Oct 09 00:33:37 2008 +0300
@@ -1,7 +1,181 @@
+
+#include <stdlib.h>
#include "lex.h"
+#include "error.h"
+#include "log.h"
+
+#define INITIAL_BUF_SIZE 4096
int lexer (const struct lex *lex, const char *input, void *arg) {
- // XXX: implement
+ // handling error returns
+ int err = -1, cb_err;
+
+ // token buffer
+ char *buf = NULL, *buf_ptr;
+ size_t buf_size = INITIAL_BUF_SIZE;
+
+ // state
+ int prev_state = LEX_INITIAL, cur_state = lex->initial_state, next_state = LEX_INITIAL;
+
+ // input chars
+ const char *c = input;
+
+ // lookups
+ const struct lex_transition *trans = NULL;
+
+ // allocate the buffer
+ if ((buf = malloc(sizeof(char) * buf_size)) == NULL)
+ goto error;
+
+ // set buf_ptr initial position
+ buf_ptr = buf;
+
+ // clear input
+ DEBUG("*cough*");
+ DEBUGN("%s", "");
+
+ // process input
+ do {
+ if (*c) {
+ // look up the next state
+ for (trans = lex->state_list[cur_state - 1].trans_list; trans->next_state > 0; trans++) {
+ // accept defaults
+ if (trans->flags & LEX_TRANS_DEFAULT)
+ break;
+
+ // disregard non-matches
+ if (trans->left > *c || *c > trans->right)
+ continue;
+
+ // abort on invalids
+ if (trans->flags & LEX_TRANS_INVALID)
+ goto error;
+
+ else {
+ // accept it
+ break;
+ }
+ }
+
+ // did we find a transition with a valid next state?
+ if (!(next_state = trans->next_state))
+ goto error;
+
+ // call the char handler
+ if (lex->char_fn && (cb_err = lex->char_fn(*c, cur_state, next_state, arg)))
+ goto error;
+
+ } else {
+ // EOF!
+ next_state = LEX_EOF;
+
+ // is cur_state a valid end state?
+ if (!(lex->state_list[cur_state - 1].flags & LEX_STATE_END))
+ goto error;
+
+ // note: we don't pass the NUL byte to the char handler
+ }
+
+ // if this char is part of the next token...
+ if (next_state != cur_state) {
+ // terminate the buffer and reset buf_ptr
+ *buf_ptr = 0; buf_ptr = buf;
+
+ // dump state transitions
+ DEBUGF("\n\t%25s -> %25s -> %25s",
+ LEX_STATE_NAME(lex, prev_state),
+ LEX_STATE_NAME(lex, cur_state),
+ LEX_STATE_NAME(lex, next_state)
+ );
+
+ // pass in the complete token to the handler
+ if (lex->token_fn && (cb_err = lex->token_fn(cur_state, buf, next_state, prev_state, arg)))
+ goto error;
+
+ // update states
+ prev_state = cur_state;
+ cur_state = next_state;
+ next_state = LEX_INITIAL;
+ }
+
+ // dump chars
+ if (next_state == LEX_INITIAL)
+ DEBUGN("%c", *c);
+ else
+ DEBUGNF("%c", *c);
+
+ // store this char in the buffer
+ *(buf_ptr++) = *c;
+
+ // grow the buffer if needed
+ if (buf_ptr - buf >= buf_size) {
+ // remember the offset, as buf_ptr might get invalidated if buf is moved
+ size_t buf_offset = buf_ptr - buf;
+
+ // calc new size
+ buf_size *= 2;
+
+ // grow/move
+ if ((buf = realloc(buf, buf_size)) == NULL)
+ goto error;
+
+ // fix buf_ptr
+ buf_ptr = buf + buf_offset;
+ }
+ } while (*(c++));
+
+ // call the end handler
+ if (lex->end_fn && (cb_err = lex->end_fn(cur_state, arg)))
+ goto error;
+
+ // successfully parsed!
+ err = 0;
+
+error:
+ DEBUGNF("\n");
+
+ if (cb_err)
+ err = cb_err;
+
+ // dump debug info on error
+ if (err) {
+ const char *cc;
+
+ // figure out the error
+ if (!buf)
+ WARNING("malloc/realloc");
+
+ else if (trans && trans->flags & LEX_TRANS_INVALID)
+ WARNING("hit invalid transition match");
+
+ else if (!next_state)
+ WARNING("no valid transition found");
+
+ else if (next_state == LEX_EOF && !(lex->state_list[cur_state - 1].flags & LEX_STATE_END))
+ WARNING("invalid end state");
+
+ else
+ WARNING("unknown error condition (!?)");
+
+ DEBUG("%s", input);
+ DEBUGN("%s", "");
+
+ for (cc = input; cc < c; cc++)
+ DEBUGNF(" ");
+
+ DEBUGF("^\t%s -> %s -> %s",
+ LEX_STATE_NAME(lex, prev_state),
+ LEX_STATE_NAME(lex, cur_state),
+ LEX_STATE_NAME(lex, next_state)
+ );
+ }
+
+ // free stuff
+ free(buf);
+
+ // return
+ return err;
}
+
--- a/src/lib/lex.h Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/lex.h Thu Oct 09 00:33:37 2008 +0300
@@ -17,7 +17,8 @@
*/
enum lex_transition_flags {
LEX_TRANS_DEFAULT = 0x01,
- LEX_TRANS_FINAL = 0x02,
+ /* not supported
+ LEX_TRANS_FINAL = 0x02, */
LEX_TRANS_INVALID = 0x04,
};
@@ -57,12 +58,15 @@
};
/*
- * Special tokens
+ * Special states, these are all defined as zero
*/
// shows up in token_fn as the value of next_token when this_token is the last token.
#define LEX_EOF 0
+// shows up as the initial value of prev_token
+#define LEX_INITIAL 0
+
/*
* Lex machine
*/
@@ -80,11 +84,13 @@
int (*token_fn) (int this_token, char *token_data, int next_token, int prev_token, void *arg);
/*
- * Called on every char handled by the lexer. `this_token` is the state of the token that the char belongs to.
+ * Called on every char handled by the lexer.
+ *
+ * The NUL byte at the end of the input string is not passed to char_fn (why not?).
*
* Return zero to have lexing continue, nonzero to stop lexing.
*/
- int (*char_fn) (int this_token, char token_char, void *arg);
+ int (*char_fn) (char token_char, int from_token, int to_token, void *arg);
/*
* Called when the end of input has been reached, `last_token` is the state that we terminated in.
@@ -96,6 +102,9 @@
// number of states
size_t state_count;
+ // initial state
+ int initial_state;
+
// array of lex_states, indexable by the state id.
struct lex_state state_list[];
};
@@ -120,6 +129,11 @@
}
/*
+ * Helpers for handling states
+ */
+#define LEX_STATE_NAME(lex, state) ((state) ? (lex)->state_list[(state) - 1].name : "...")
+
+/*
* Lex it!
*
* Return zero to indiciate that the input was valid, nonzero otherwise.
--- a/src/lib/lexer.h Wed Oct 08 22:05:13 2008 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-#ifndef LIB_LEXER_H
-#define LIB_LEXER_H
-
-/*
- * Simple FSM lexing
- *
- * The lexer is implemented as a Finite State Machine, consisting for a number of states, which then contain a set of
- * transitions, which move the lexer from state to state based on each char of input at a time.
- *
- * Whenever the state changes, the token callback is triggered with the collected token data.
- */
-
-/*
- * Transition flags
- */
-enum lex_transition_flags {
- LEX_TRANS_DEFAULT = 0x01,
- LEX_TRANS_FINAL = 0x02,
-};
-
-/*
- * A transition from one state to another.
- */
-struct lex_transition {
- // applies to chars [left, right]
- char left, right;
-
- // flags from lex_transition_flags
- char flags;
-
- // next state to enter
- int next_state;
-};
-
-/*
- * State flags
- */
-enum lex_state_flags {
- LEX_STATE_END = 0x01;
-};
-
-/*
- * A state
- */
-struct lex_state {
- // the state name (for debugging)
- const char *name;
-
- // flags from lex_state_flags
- char flags;
-
- // list of transitions for this state, terminated by a transition with next_state=0
- struct lex_transition *trans_list;
-};
-
-/*
- * Lex machine
- */
-struct lex {
- // number of states
- size_t state_count;
-
- // array of lex_states, indexable by the state id.
- struct lex_state *state_list;
-
- /*
- * Core token handler. Everytime a full token is lexed (i.e. the state changes), this will be called.
- * `this_token` represents the full token that was parsed, and `token_data` is the token's value. `next_token`
- * is the state that terminated this token, and `prev_token` was the token before this one.
- *
- * `token_data` is a buffer allocated by the lexer that the actual input data is copied into. Thence, it can be
- * modified, as its contents will be replaced by the next token. Hence, if you need to keep hold of it, copy it.
- *
- * Return zero to have lexing continue, nonzero to stop lexing.
- */
- int (*token_fn) (int this_token, char *token_data, int next_token, int prev_token, void *arg);
-
- /*
- * Called on every char handled by the lexer. `this_token` is the state of the token that the char belongs to.
- *
- * Return zero to have lexing continue, nonzero to stop lexing.
- */
- int (*char_fn) (int this_token, char token_char, void *arg);
-
- /*
- * Called when the end of input has been reached, `last_token` is the state that we terminated in.
- *
- * Return zero to indiciate that the input was valid, nonzero to indicate an error.
- */
- int (*end_fn) (int last_token, void *arg);
-};
-
-/*
- * Helper macros for building the state_list
- */
-#define LEX_STATE(enum_val) { #enum_val, 0,
-#define LEX_STATE_END(enum_val) { #enum_val, LEX_STATE_END,
-
- #define LEX_CHAR(c, to) { c, c, 0, to },
- #define LEX_RANGE(l, r, to) { l, r, 0, to },
- #define LEX_ALPHA(to) LEX_RANGE('a', 'z', to), LEX_RANGE('A', 'Z', to)
- #define LEX_NUMBER(to) LEX_RANGE('0', '9', to)
- #define LEX_ALNUM(to) LEX_ALPHA(to), LEX_NUMBER(to), LEX_CHAR('-', to), LEX_CHAR('_', to)
- #define LEX_WHITESPACE(to) LEX_CHAR(' ', to), LEX_CHAR('\n', to), LEX_CHAR('\t', to)
-
- #define LEX_DEFAULT(to) { 0, 0, LEX_TRANS_DEFAULT, to } \
- }
- #define LEX_END { 0, 0, 0, 0 } \
- }
-
-/*
- * Lex it!
- *
- * Return zero to indiciate that the input was valid, nonzero otherwise.
- */
-int lexer (const struct lex *lex, const char *input, void *arg);
-
-#endif /* LIB_LEXER_H */
--- a/src/lib/log.c Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/log.c Thu Oct 09 00:33:37 2008 +0300
@@ -6,33 +6,34 @@
#include "log.h"
-static void _generic_err_vargs (int use_stderr, const char *func, int perr, const char *fmt, va_list va) {
- FILE *stream = use_stderr ? stderr : stdout;
+static void _generic_err_vargs (int flags, const char *func, int err, const char *fmt, va_list va) {
+ FILE *stream = flags & LOG_DISPLAY_STDERR ? stderr : stdout;
if (func)
fprintf(stream, "%s: ", func);
vfprintf(stream, fmt, va);
- if (perr)
- fprintf(stream, ": %s\n", strerror(perr > 0 ? errno : -perr));
-
- fprintf(stream, "\n");
+ if (flags & LOG_DISPLAY_PERR)
+ fprintf(stream, ": %s\n", strerror(err == 0 ? errno : -err));
+
+ if (!(flags & LOG_DISPLAY_NONL))
+ fprintf(stream, "\n");
}
-void _generic_err (int use_stderr, const char *func, int perr, const char *fmt, ...) {
+void _generic_err (int flags, const char *func, int err, const char *fmt, ...) {
va_list va;
va_start(va, fmt);
- _generic_err_vargs(use_stderr, func, perr, fmt, va);
+ _generic_err_vargs(flags, func, err, fmt, va);
va_end(va);
}
-void _generic_err_exit (int use_stderr, const char *func, int perr, const char *fmt, ...) {
+void _generic_err_exit (int flags, const char *func, int err, const char *fmt, ...) {
va_list va;
va_start(va, fmt);
- _generic_err_vargs(use_stderr, func, perr, fmt, va);
+ _generic_err_vargs(flags, func, err, fmt, va);
va_end(va);
exit(EXIT_FAILURE);
--- a/src/lib/log.h Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/log.h Thu Oct 09 00:33:37 2008 +0300
@@ -5,11 +5,21 @@
* error handling
*/
-void _generic_err ( /*int level, */ int use_stderr, const char *func, int perr, const char *fmt, ...)
+enum log_display_flags {
+ LOG_DISPLAY_STDOUT = 0x00,
+ LOG_DISPLAY_STDERR = 0x01,
+
+ LOG_DISPLAY_PERR = 0x02,
+
+ LOG_DISPLAY_NONL = 0x04,
+};
+
+
+void _generic_err (int flags, const char *func, int err, const char *fmt, ...)
__attribute__ ((format (printf, 4, 5)));
// needs to be defined as its own function for the noreturn attribute
-void _generic_err_exit ( /* int level, */ int used_stderr, const char *func, int perr, const char *fmt, ...)
+void _generic_err_exit (int flags, const char *func, int err, const char *fmt, ...)
__attribute__ ((format (printf, 4, 5)))
__attribute__ ((noreturn));
@@ -25,20 +35,20 @@
extern enum _debug_level _cur_debug_level;
// various kinds of ways to handle an error, 2**3 of them, *g*
-#define info(...) _generic_err( 0, NULL, 0, __VA_ARGS__ )
-#define error(...) _generic_err( 1, NULL, 0, __VA_ARGS__ )
-#define err_exit(...) _generic_err_exit( 1, NULL, 0, __VA_ARGS__ )
-#define perr(...) _generic_err( 1, NULL, 1, __VA_ARGS__ )
-#define perr_exit(...) _generic_err_exit( 1, NULL, 1, __VA_ARGS__ )
-#define err_func(func, ...) _generic_err( 1, func, 0, __VA_ARGS__ )
-#define err_func_exit(func, ...) _generic_err_exit( 1, func, 0, __VA_ARGS__ )
-#define perr_func(func, ...) _generic_err( 1, func, 1, __VA_ARGS__ )
-#define perr_func_exit(func, ...) _generic_err_exit( 1, func, 1, __VA_ARGS__ )
-#define eerr_func(func, err, ...) _generic_err( 1, func, err,__VA_ARGS__ )
+#define info(...) _generic_err( LOG_DISPLAY_STDOUT, NULL, 0, __VA_ARGS__ )
+#define error(...) _generic_err( LOG_DISPLAY_STDERR, NULL, 0, __VA_ARGS__ )
+#define err_exit(...) _generic_err_exit( LOG_DISPLAY_STDERR, NULL, 0, __VA_ARGS__ )
+#define perr(...) _generic_err( LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR, NULL, 0, __VA_ARGS__ )
+#define perr_exit(...) _generic_err_exit( LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR, NULL, 0, __VA_ARGS__ )
+#define err_func(func, ...) _generic_err( LOG_DISPLAY_STDERR, func, 0, __VA_ARGS__ )
+#define err_func_exit(func, ...) _generic_err_exit( LOG_DISPLAY_STDERR, func, 0, __VA_ARGS__ )
+#define perr_func(func, ...) _generic_err( LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR, func, 0, __VA_ARGS__ )
+#define perr_func_exit(func, ...) _generic_err_exit( LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR, func, 0, __VA_ARGS__ )
+#define eerr_func(func, err, ...) _generic_err( LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR, func, err, __VA_ARGS__ )
+#define debug(func, ...) _generic_err( LOG_DISPLAY_STDERR, func, 0, __VA_ARGS__ )
+#define debug_nonl(func, ...) _generic_err( LOG_DISPLAY_STDERR | LOG_DISPLAY_NONL, func, 0, __VA_ARGS__ )
-/*
- * Legacy...
- */
+// logging includes errors
#include "error.h"
#define WARNING(...) err_func(__func__, __VA_ARGS__)
@@ -46,9 +56,15 @@
#define EWARNING(err, ...) eerr_func(__func__, (err), __VA_ARGS__)
#ifdef DEBUG_ENABLED
-#define DEBUG(...) err_func(__func__, __VA_ARGS__)
+#define DEBUG(...) debug(__func__, __VA_ARGS__)
+#define DEBUGF(...) debug(NULL, __VA_ARGS__)
+#define DEBUGN(...) debug_nonl(__func__, __VA_ARGS__)
+#define DEBUGNF(...) debug_nonl(NULL, __VA_ARGS__)
#else
#define DEBUG(...) (void) (0)
+#define DEBUGF(...) (void) (0)
+#define DEBUGN(...) (void) (0)
+#define DEBUGNF(...) (void) (0)
#endif
// default is to enable INFO
@@ -63,7 +79,7 @@
#if INFO_ENABLED
#define INFO(...) info(__VA_ARGS__)
#else
-#define INFO(...) (void) (0)
+#define INFO(...) (void) (__VA_ARGS__)
#endif
#endif /* LIB_LOG_H */
--- a/src/lib/url.c Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/url.c Thu Oct 09 00:33:37 2008 +0300
@@ -5,6 +5,7 @@
#include "url.h"
#include "lex.h"
#include "error.h"
+#include "log.h"
#include "misc.h"
enum url_token {
@@ -62,16 +63,200 @@
};
static int _url_append_scheme (struct url *url, const char *data) {
-
+ return 0;
}
static int _url_append_opt_key (struct url *url, const char *key) {
-
+ return 0;
}
static int _url_append_opt_val (struct url *url, const char *value) {
+ return 0;
+}
-}
+static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg);
+
+static struct lex url_lex = {
+ .token_fn = url_lex_token,
+ .char_fn = NULL,
+ .end_fn = NULL,
+
+ .state_count = URL_MAX,
+ .initial_state = URL_BEGIN,
+ .state_list = {
+ LEX_STATE ( URL_BEGIN ) {
+ LEX_ALNUM ( URL_BEGIN_ALNUM ),
+ LEX_CHAR ( ':', URL_SERVICE_SEP ),
+ LEX_CHAR ( '/', URL_PATH_START ),
+ LEX_CHAR ( '?', URL_OPT_START ),
+ LEX_END
+ },
+
+ // this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME
+ LEX_STATE_END ( URL_BEGIN_ALNUM ) {
+ LEX_ALNUM ( URL_BEGIN_ALNUM ),
+ LEX_CHAR ( '+', URL_SCHEME_SEP ), // it was URL_SCHEME
+ LEX_CHAR ( ':', URL_BEGIN_COLON ),
+ LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME
+ LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME
+ LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME
+ LEX_END
+ },
+
+ // this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP
+ LEX_STATE ( URL_BEGIN_COLON ) {
+ LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), // it was URL_SCHEME
+ LEX_ALNUM ( URL_USERHOST_ALNUM2 ),
+ LEX_END
+ },
+
+
+ LEX_STATE ( URL_SCHEME ) {
+ LEX_ALNUM ( URL_SCHEME ),
+ LEX_CHAR ( '+', URL_SCHEME_SEP ),
+ LEX_CHAR ( ':', URL_SCHEME_END_COL ),
+ LEX_END
+ },
+
+ LEX_STATE ( URL_SCHEME_SEP ) {
+ LEX_ALNUM ( URL_SCHEME ),
+ LEX_END
+ },
+
+ LEX_STATE ( URL_SCHEME_END_COL ) {
+ LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ),
+ LEX_END
+ },
+
+ LEX_STATE ( URL_SCHEME_END_SLASH1 ) {
+ LEX_CHAR ( '/', URL_SCHEME_END_SLASH2 ),
+ LEX_END
+ },
+
+ LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) {
+ LEX_ALNUM ( URL_USERHOST_ALNUM ),
+ LEX_CHAR ( ':', URL_SERVICE_SEP ),
+ LEX_CHAR ( '/', URL_PATH_START ),
+ LEX_CHAR ( '?', URL_OPT_START ),
+ LEX_END
+ },
+
+ // this can be URL_USERNAME or URL_HOSTNAME
+ LEX_STATE_END ( URL_USERHOST_ALNUM ) {
+ LEX_CHAR ( ':', URL_USERHOST_COLON ),
+ LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME
+ LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME
+ LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME
+ LEX_DEFAULT ( URL_USERHOST_ALNUM ),
+ },
+
+ // this can be URL_USERNAME_END or URL_SERVICE_SEP
+ LEX_STATE ( URL_USERHOST_COLON ) {
+ LEX_ALNUM ( URL_USERHOST_ALNUM2 ),
+ LEX_END
+ },
+
+ // this can be URL_PASSWORD or URL_SERVICE
+ LEX_STATE_END ( URL_USERHOST_ALNUM2 ) {
+ LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_PASSSWORD
+ LEX_CHAR ( '/', URL_PATH_START ), // it was URL_SERVICE
+ LEX_CHAR ( '?', URL_OPT_START ), // it was URL_SERVICE
+ LEX_DEFAULT ( URL_USERHOST_ALNUM2 ),
+ },
+
+ // dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2
+ LEX_STATE ( URL_USERNAME ) {
+ LEX_END
+ },
+
+ LEX_STATE ( URL_PASSWORD_SEP ) {
+ LEX_END
+ },
+
+ LEX_STATE ( URL_PASSWORD ) {
+ LEX_END
+ },
+
+
+ LEX_STATE_END ( URL_USERNAME_END ) {
+ LEX_ALNUM ( URL_HOSTNAME ),
+ LEX_CHAR ( ':', URL_SERVICE_SEP ),
+ LEX_CHAR ( '/', URL_PATH_START ),
+ LEX_CHAR ( '?', URL_OPT_START ),
+ LEX_END
+ },
+
+
+ LEX_STATE_END ( URL_HOSTNAME ) {
+ LEX_ALNUM ( URL_HOSTNAME ),
+ LEX_CHAR ( ':', URL_SERVICE_SEP ),
+ LEX_CHAR ( '/', URL_PATH_START ),
+ LEX_CHAR ( '?', URL_OPT_START ),
+ LEX_END
+ },
+
+
+ LEX_STATE ( URL_SERVICE_SEP ) {
+ LEX_ALNUM ( URL_SERVICE ),
+ LEX_CHAR ( '/', URL_PATH_START ),
+ LEX_CHAR ( '?', URL_OPT_START ),
+ LEX_END
+ },
+
+ LEX_STATE_END ( URL_SERVICE ) {
+ LEX_ALNUM ( URL_SERVICE ),
+ LEX_CHAR ( '/', URL_PATH_START ),
+ LEX_CHAR ( '?', URL_OPT_START ),
+ LEX_END
+ },
+
+
+ LEX_STATE_END ( URL_PATH_START ) {
+ LEX_CHAR ( '?', URL_OPT_START ),
+ LEX_DEFAULT ( URL_PATH ),
+ },
+
+ LEX_STATE_END ( URL_PATH ) {
+ LEX_CHAR ( '?', URL_OPT_START ),
+ LEX_DEFAULT ( URL_PATH ),
+ },
+
+
+ LEX_STATE_END ( URL_OPT_START ) {
+ LEX_CHAR ( '&', URL_OPT_SEP ),
+ LEX_INVALID ( '=' ),
+ LEX_DEFAULT ( URL_OPT_KEY ),
+ },
+
+ LEX_STATE_END ( URL_OPT_KEY ) {
+ LEX_CHAR ( '&', URL_OPT_SEP ),
+ LEX_CHAR ( '=', URL_OPT_EQ ),
+ LEX_DEFAULT ( URL_OPT_KEY ),
+ },
+
+ LEX_STATE_END ( URL_OPT_EQ ) {
+ LEX_CHAR ( '&', URL_OPT_SEP ),
+ LEX_INVALID ( '=' ),
+ LEX_DEFAULT ( URL_OPT_VAL ),
+ },
+
+ LEX_STATE_END ( URL_OPT_VAL ) {
+ LEX_CHAR ( '&', URL_OPT_SEP ),
+ LEX_INVALID ( '=' ),
+ LEX_DEFAULT ( URL_OPT_VAL ),
+ },
+
+ LEX_STATE_END ( URL_OPT_SEP ) {
+ LEX_CHAR ( '&', URL_OPT_SEP ),
+ LEX_INVALID ( '=' ),
+ LEX_DEFAULT ( URL_OPT_KEY ),
+ },
+
+ LEX_STATE ( URL_ERROR ) {
+ LEX_END
+ },
+ }
+};
static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) {
enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token;
@@ -81,6 +266,10 @@
(void) prev_token;
switch (this_token) {
+ case URL_BEGIN:
+ // irrelevant
+ break;
+
case URL_BEGIN_ALNUM:
switch (next_token) {
case URL_SCHEME_SEP:
@@ -185,7 +374,8 @@
case URL_PATH_START:
case URL_OPT_START:
case LEX_EOF:
- // store the service
+ // store the hostname and service
+ state->url->hostname = state->alnum; state->alnum = NULL;
copy_to = &state->url->service; break;
default:
@@ -250,7 +440,7 @@
break;
default:
- FATAL("invalid token");
+ ERROR("invalid token");
}
if (copy_to) {
@@ -263,192 +453,13 @@
return 0;
error:
- // XXX: error codes?
+ DEBUG("token: %s -> %s -> %s: %s",
+ LEX_STATE_NAME(&url_lex, prev_token), LEX_STATE_NAME(&url_lex, this_token), LEX_STATE_NAME(&url_lex, next_token),
+ token_data
+ );
return -1;
}
-static struct lex url_lex = {
- .token_fn = url_lex_token,
- .char_fn = NULL,
- .end_fn = NULL,
-
- .state_count = URL_MAX,
- .state_list = {
- LEX_STATE ( URL_BEGIN ) {
- LEX_ALNUM ( URL_BEGIN_ALNUM ),
- LEX_CHAR ( ':', URL_SERVICE_SEP ),
- LEX_CHAR ( '/', URL_PATH_START ),
- LEX_CHAR ( '?', URL_OPT_START ),
- LEX_END
- },
-
- // this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME
- LEX_STATE_END ( URL_BEGIN_ALNUM ) {
- LEX_ALNUM ( URL_BEGIN_ALNUM ),
- LEX_CHAR ( '+', URL_SCHEME_SEP ), // it was URL_SCHEME
- LEX_CHAR ( ':', URL_BEGIN_COLON ),
- LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME
- LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME
- LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME
- LEX_END
- },
-
- // this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP
- LEX_STATE ( URL_BEGIN_COLON ) {
- LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ), // it was URL_SCHEME
- LEX_ALNUM ( URL_USERHOST_ALNUM2 ),
- LEX_END
- },
-
-
- LEX_STATE ( URL_SCHEME ) {
- LEX_ALNUM ( URL_SCHEME ),
- LEX_CHAR ( '+', URL_SCHEME_SEP ),
- LEX_CHAR ( ':', URL_SCHEME_END_COL ),
- LEX_END
- },
-
- LEX_STATE ( URL_SCHEME_SEP ) {
- LEX_ALNUM ( URL_SCHEME ),
- LEX_END
- },
-
- LEX_STATE ( URL_SCHEME_END_COL ) {
- LEX_CHAR ( '/', URL_SCHEME_END_SLASH1 ),
- LEX_END
- },
-
- LEX_STATE ( URL_SCHEME_END_SLASH1 ) {
- LEX_CHAR ( '/', URL_SCHEME_END_SLASH2 ),
- LEX_END
- },
-
- LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) {
- LEX_ALNUM ( URL_USERHOST_ALNUM ),
- LEX_CHAR ( ':', URL_SERVICE_SEP ),
- LEX_CHAR ( '/', URL_PATH_START ),
- LEX_CHAR ( '?', URL_OPT_START ),
- LEX_END
- },
-
- // this can be URL_USERNAME or URL_HOSTNAME
- LEX_STATE_END ( URL_USERHOST_ALNUM ) {
- LEX_ALNUM ( URL_USERHOST_ALNUM ),
- LEX_CHAR ( ':', URL_USERHOST_COLON ),
- LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_USERNAME
- LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME
- LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME
- LEX_END
- },
-
- // this can be URL_USERNAME_END or URL_SERVICE_SEP
- LEX_STATE ( URL_USERHOST_COLON ) {
- LEX_ALNUM ( URL_USERHOST_ALNUM2 ),
- LEX_END
- },
-
- // this can be URL_PASSWORD or URL_SERVICE
- LEX_STATE_END ( URL_USERHOST_ALNUM2 ) {
- LEX_ALNUM ( URL_USERHOST_ALNUM ),
- LEX_CHAR ( '@', URL_USERNAME_END ), // it was URL_PASSSWORD
- LEX_CHAR ( '/', URL_PATH_START ), // it was URL_SERVICE
- LEX_CHAR ( '?', URL_OPT_START ), // it was URL_SERVICE
- LEX_END
- },
-
- // dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2
- LEX_STATE ( URL_USERNAME ) {
- LEX_END
- },
-
- LEX_STATE ( URL_PASSWORD_SEP ) {
- LEX_END
- },
-
- LEX_STATE ( URL_PASSWORD ) {
- LEX_END
- },
-
-
- LEX_STATE_END ( URL_USERNAME_END ) {
- LEX_ALNUM ( URL_HOSTNAME ),
- LEX_CHAR ( ':', URL_SERVICE_SEP ),
- LEX_CHAR ( '/', URL_PATH_START ),
- LEX_CHAR ( '?', URL_OPT_START ),
- LEX_END
- },
-
-
- LEX_STATE_END ( URL_HOSTNAME ) {
- LEX_ALNUM ( URL_HOSTNAME ),
- LEX_CHAR ( ':', URL_SERVICE_SEP ),
- LEX_CHAR ( '/', URL_PATH_START ),
- LEX_CHAR ( '?', URL_OPT_START ),
- LEX_END
- },
-
-
- LEX_STATE ( URL_SERVICE_SEP ) {
- LEX_ALNUM ( URL_SERVICE ),
- LEX_CHAR ( '/', URL_PATH_START ),
- LEX_CHAR ( '?', URL_OPT_START ),
- LEX_END
- },
-
- LEX_STATE_END ( URL_SERVICE ) {
- LEX_ALNUM ( URL_SERVICE ),
- LEX_CHAR ( '/', URL_PATH_START ),
- LEX_CHAR ( '?', URL_OPT_START ),
- LEX_END
- },
-
-
- LEX_STATE_END ( URL_PATH_START ) {
- LEX_CHAR ( '?', URL_OPT_START ),
- LEX_DEFAULT ( URL_PATH ),
- },
-
- LEX_STATE_END ( URL_PATH ) {
- LEX_CHAR ( '?', URL_OPT_START ),
- LEX_DEFAULT ( URL_PATH ),
- },
-
-
- LEX_STATE_END ( URL_OPT_START ) {
- LEX_CHAR ( '&', URL_OPT_SEP ),
- LEX_INVALID ( '=' ),
- LEX_DEFAULT ( URL_OPT_KEY ),
- },
-
- LEX_STATE_END ( URL_OPT_KEY ) {
- LEX_CHAR ( '&', URL_OPT_SEP ),
- LEX_CHAR ( '=', URL_OPT_EQ ),
- LEX_DEFAULT ( URL_OPT_KEY ),
- },
-
- LEX_STATE_END ( URL_OPT_EQ ) {
- LEX_CHAR ( '&', URL_OPT_SEP ),
- LEX_INVALID ( '=' ),
- LEX_DEFAULT ( URL_OPT_VAL ),
- },
-
- LEX_STATE_END ( URL_OPT_VAL ) {
- LEX_CHAR ( '&', URL_OPT_SEP ),
- LEX_INVALID ( '=' ),
- LEX_DEFAULT ( URL_OPT_VAL ),
- },
-
- LEX_STATE_END ( URL_OPT_SEP ) {
- LEX_CHAR ( '&', URL_OPT_SEP ),
- LEX_INVALID ( '=' ),
- LEX_DEFAULT ( URL_OPT_KEY ),
- },
-
- LEX_STATE ( URL_ERROR ) {
- LEX_END
- },
- }
-};
int url_parse (struct url *url, const char *text) {
struct url_state state; ZINIT(state);
@@ -468,3 +479,42 @@
return -1;
}
+static void _url_dump_part (const char *field, const char *val, FILE *stream) {
+ if (val) {
+ fprintf(stream, "%s=%s ", field, val);
+ }
+}
+
+void url_dump (const struct url *url, FILE *stream) {
+ int i;
+
+ if (url->schema) {
+ fprintf(stream, "schema=");
+
+ for (i = 0; i < url->schema->count; i++) {
+ if (i > 0)
+ fprintf(stream, "+");
+
+ fprintf(stream, "%s", url->schema->list[i]);
+ }
+
+ fprintf(stream, " ");
+ }
+
+ _url_dump_part("username", url->username, stream);
+ _url_dump_part("password", url->password, stream);
+ _url_dump_part("hostname", url->hostname, stream);
+ _url_dump_part("service", url->service, stream);
+ _url_dump_part("path", url->path, stream);
+
+ if (url->opts) {
+ fprintf(stream, "opts: ");
+
+ for (i = 0; i < url->opts->count; i++) {
+ fprintf(stream, "%s=%s ", url->opts->list[i].key, url->opts->list[i].value);
+ }
+ }
+
+ fprintf(stream, "\n");
+}
+
--- a/src/lib/url.h Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/url.h Thu Oct 09 00:33:37 2008 +0300
@@ -13,13 +13,14 @@
*/
#include <sys/types.h>
+#include <stdio.h>
/*
* The schema
*/
struct url_schema {
size_t count;
- const char **list;
+ const char *list[];
};
/*
@@ -30,7 +31,7 @@
struct url_opt {
const char *key;
const char *value;
- } **list;
+ } list[];
};
/*
@@ -54,4 +55,9 @@
*/
int url_parse (struct url *url, const char *text);
+/*
+ * Prints a url in a debug-output format.
+ */
+void url_dump (const struct url *url, FILE *stream);
+
#endif /* LIB_URL_H */
--- a/src/url_test.c Wed Oct 08 22:05:13 2008 +0300
+++ b/src/url_test.c Thu Oct 09 00:33:37 2008 +0300
@@ -5,8 +5,9 @@
#include "lib/url.h"
-#define FAIL(...) do { printf("FAIL: "); printf(__VA_ARGS__); return -1; } while (0)
+#define FAIL(...) do { printf("FAIL: "); printf(__VA_ARGS__); printf("\n"); return -1; } while (0)
+struct url_schema basic_http = { 1, { "http" } };
struct url_test {
const char *url;
@@ -16,9 +17,9 @@
NULL, NULL, NULL, "localhost", "http", NULL, NULL
} },
-/* { "http://example.com/path", {
- { 1, { "http" } }, NULL, NULL, "example.com", NULL, "path", NULL
- } }, */
+ { "http://example.com/path", {
+ &basic_http, NULL, NULL, "example.com", NULL, "path", NULL
+ } },
{ NULL, { } },
};
@@ -26,14 +27,14 @@
int cmp_url_str (const char *field, const char *test, const char *real) {
if (!test) {
if (real)
- FAIL("%s: shouldn't be present", field);
+ FAIL("%s shouldn't be present", field);
} else if (!real) {
- FAIL("%s: missing", field);
+ FAIL("%s is missing", field);
} else {
if (strcmp(test, real) != 0)
- FAIL("%s: differs: %s -> %s", field, test, real);
+ FAIL("%s differs: %s -> %s", field, test, real);
}
// ok
@@ -94,10 +95,10 @@
FAIL("inconsistent opts count");
for (i = 0; i < test->opts->count; i++) {
- if (strcmp(test->opts->list[i]->key, real->opts->list[i]->key) != 0)
+ if (strcmp(test->opts->list[i].key, real->opts->list[i].key) != 0)
FAIL("differing scheme key #%d", i);
- if (strcmp(test->opts->list[i]->value, real->opts->list[i]->value) != 0)
+ if (strcmp(test->opts->list[i].value, real->opts->list[i].value) != 0)
FAIL("differing scheme value #%d", i);
}
}
@@ -109,45 +110,6 @@
return -1;
}
-void print_url_part (const char *field, const char *val) {
- if (val) {
- printf("%s=%s ", field, val);
- }
-}
-
-void print_url (const struct url *url) {
- int i;
-
- if (url->schema) {
- printf("schema=");
-
- for (i = 0; i < url->schema->count; i++) {
- if (i > 0)
- printf("+");
-
- printf("%s", url->schema->list[i]);
- }
-
- printf(" ");
- }
-
- print_url_part("username", url->username);
- print_url_part("password", url->password);
- print_url_part("hostname", url->hostname);
- print_url_part("service", url->service);
- print_url_part("path", url->path);
-
- if (url->opts) {
- printf("opts: ");
-
- for (i = 0; i < url->opts->count; i++) {
- printf("%s=%s ", url->opts->list[i]->key, url->opts->list[i]->value);
- }
- }
-
- printf("\n");
-}
-
void usage (const char *exec_name) {
printf("Usage: %s\n\n\tNo arguments are accepted\n", exec_name);
@@ -164,7 +126,7 @@
// run the tests
for (test = url_tests; test->url; test++) {
// first output the URL we are handling...
- printf("%s... ", test->url);
+ printf("%-80s - ", test->url);
fflush(stdout);
// parse the URL
@@ -178,14 +140,14 @@
// compare it
if (cmp_url(&test->expected, &url)) {
printf("\texpected: ");
- print_url(&test->expected);
+ url_dump(&test->expected, stdout);
printf("\tresult: ");
- print_url(&url);
+ url_dump(&url, stdout);
} else {
printf("OK\n\t");
- print_url(&url);
+ url_dump(&url, stdout);
}
}
}