starting to work
authorTero Marttila <terom@fixme.fi>
Thu, 09 Oct 2008 00:33:37 +0300
changeset 16 74fb62022fb3
parent 15 a8d183e79ed9
child 17 0a024b29b16d
starting to work
src/lib/lex.c
src/lib/lex.h
src/lib/lexer.h
src/lib/log.c
src/lib/log.h
src/lib/url.c
src/lib/url.h
src/url_test.c
--- a/src/lib/lex.c	Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/lex.c	Thu Oct 09 00:33:37 2008 +0300
@@ -1,7 +1,181 @@
+
+#include <stdlib.h>
 
 #include "lex.h"
+#include "error.h"
+#include "log.h"
+
+#define INITIAL_BUF_SIZE 4096
 
 int lexer (const struct lex *lex, const char *input, void *arg) {
-    // XXX: implement
+    // handling error returns
+    int err = -1, cb_err;
+    
+    // token buffer
+    char *buf = NULL, *buf_ptr;
+    size_t buf_size = INITIAL_BUF_SIZE;
+    
+    // state
+    int prev_state = LEX_INITIAL, cur_state = lex->initial_state, next_state = LEX_INITIAL;
+    
+    // input chars
+    const char *c = input;
+
+    // lookups
+    const struct lex_transition *trans = NULL;
+
+    // allocate the buffer
+    if ((buf = malloc(sizeof(char) * buf_size)) == NULL)
+        goto error;
+
+    // set buf_ptr initial position
+    buf_ptr = buf;
+    
+    // clear input
+    DEBUG("*cough*");
+    DEBUGN("%s", "");
+
+    // process input
+    do {
+        if (*c) {
+            // look up the next state
+            for (trans = lex->state_list[cur_state - 1].trans_list; trans->next_state > 0; trans++) {
+                // accept defaults
+                if (trans->flags & LEX_TRANS_DEFAULT)
+                    break;
+                
+                // disregard non-matches
+                if (trans->left > *c || *c > trans->right)
+                    continue;
+                
+                // abort on invalids
+                if (trans->flags & LEX_TRANS_INVALID)
+                    goto error;
+                
+                else {
+                    // accept it
+                    break;
+                }
+            }
+            
+            // did we find a transition with a valid next state?
+            if (!(next_state = trans->next_state))
+                goto error;
+
+            // call the char handler
+            if (lex->char_fn && (cb_err = lex->char_fn(*c, cur_state, next_state, arg)))
+                goto error;
+
+        } else {
+            // EOF!
+            next_state = LEX_EOF;
+            
+            // is cur_state a valid end state?
+            if (!(lex->state_list[cur_state - 1].flags & LEX_STATE_END))
+                goto error;
+            
+            // note: we don't pass the NUL byte to the char handler
+        }
+
+        // if this char is part of the next token...
+        if (next_state != cur_state) {
+            // terminate the buffer and reset buf_ptr
+            *buf_ptr = 0; buf_ptr = buf;
+            
+            // dump state transitions
+            DEBUGF("\n\t%25s -> %25s -> %25s",
+                LEX_STATE_NAME(lex, prev_state),
+                LEX_STATE_NAME(lex, cur_state),
+                LEX_STATE_NAME(lex, next_state)
+            );
+
+            // pass in the complete token to the handler
+            if (lex->token_fn && (cb_err = lex->token_fn(cur_state, buf, next_state, prev_state, arg)))
+                goto error;
+
+            // update states
+            prev_state = cur_state;
+            cur_state = next_state;
+            next_state = LEX_INITIAL;
+        }
+        
+        // dump chars
+        if (next_state == LEX_INITIAL)
+            DEBUGN("%c", *c);
+        else
+            DEBUGNF("%c", *c);
+        
+        // store this char in the buffer
+        *(buf_ptr++) = *c;
+
+        // grow the buffer if needed
+        if (buf_ptr - buf >= buf_size) {
+            // remember the offset, as buf_ptr might get invalidated if buf is moved
+            size_t buf_offset = buf_ptr - buf;
+
+            // calc new size
+            buf_size *= 2;
+            
+            // grow/move
+            if ((buf = realloc(buf, buf_size)) == NULL)
+                goto error;
+            
+            // fix buf_ptr
+            buf_ptr = buf + buf_offset;
+        }
+    } while (*(c++));
+
+    // call the end handler
+    if (lex->end_fn && (cb_err = lex->end_fn(cur_state, arg)))
+        goto error;
+
+    // successfully parsed!
+    err = 0;
+
+error:
+    DEBUGNF("\n");
+    
+    if (cb_err)
+        err = cb_err;
+
+    // dump debug info on error
+    if (err) {
+        const char *cc;
+        
+        // figure out the error
+        if (!buf)
+            WARNING("malloc/realloc");
+
+        else if (trans && trans->flags & LEX_TRANS_INVALID)
+            WARNING("hit invalid transition match");
+
+        else if (!next_state)
+            WARNING("no valid transition found");
+            
+        else if (next_state == LEX_EOF && !(lex->state_list[cur_state - 1].flags & LEX_STATE_END))
+            WARNING("invalid end state");
+        
+        else
+            WARNING("unknown error condition (!?)");
+
+        DEBUG("%s", input);
+        DEBUGN("%s", "");
+
+        for (cc = input; cc < c; cc++)
+            DEBUGNF(" ");
+
+        DEBUGF("^\t%s -> %s -> %s",
+            LEX_STATE_NAME(lex, prev_state),
+            LEX_STATE_NAME(lex, cur_state),
+            LEX_STATE_NAME(lex, next_state)
+        );
+    }
+
+    // free stuff
+    free(buf);
+
+    // return
+    return err;
 }
 
+
--- a/src/lib/lex.h	Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/lex.h	Thu Oct 09 00:33:37 2008 +0300
@@ -17,7 +17,8 @@
  */
 enum lex_transition_flags {
     LEX_TRANS_DEFAULT   = 0x01,
-    LEX_TRANS_FINAL     = 0x02,
+    /* not supported
+    LEX_TRANS_FINAL     = 0x02, */
     LEX_TRANS_INVALID   = 0x04,
 };
 
@@ -57,12 +58,15 @@
 };
 
 /*
- * Special tokens
+ * Special states, these are all defined as zero
  */
 
 // shows up in token_fn as the value of next_token when this_token is the last token.
 #define LEX_EOF 0
 
+// shows up as the initial value of prev_token
+#define LEX_INITIAL 0
+
 /*
  * Lex machine
  */
@@ -80,11 +84,13 @@
     int (*token_fn) (int this_token, char *token_data, int next_token, int prev_token, void *arg);
 
     /*
-     * Called on every char handled by the lexer. `this_token` is the state of the token that the char belongs to.
+     * Called on every char handled by the lexer.
+     *
+     * The NUL byte at the end of the input string is not passed to char_fn (why not?).
      *
      * Return zero to have lexing continue, nonzero to stop lexing.
      */
-    int (*char_fn) (int this_token, char token_char, void *arg);
+    int (*char_fn) (char token_char, int from_token, int to_token, void *arg);
 
     /*
      * Called when the end of input has been reached, `last_token` is the state that we terminated in.
@@ -96,6 +102,9 @@
     // number of states
     size_t state_count;
 
+    // initial state
+    int initial_state;
+
     // array of lex_states, indexable by the state id.
     struct lex_state state_list[];
 };
@@ -120,6 +129,11 @@
                                   }
 
 /*
+ * Helpers for handling states
+ */ 
+#define LEX_STATE_NAME(lex, state) ((state) ? (lex)->state_list[(state) - 1].name : "...")
+
+/*
  * Lex it!
  *
  * Return zero to indiciate that the input was valid, nonzero otherwise.
--- a/src/lib/lexer.h	Wed Oct 08 22:05:13 2008 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-#ifndef LIB_LEXER_H
-#define LIB_LEXER_H
-
-/*
- * Simple FSM lexing
- *
- * The lexer is implemented as a Finite State Machine, consisting for a number of states, which then contain a set of
- * transitions, which move the lexer from state to state based on each char of input at a time.
- *
- * Whenever the state changes, the token callback is triggered with the collected token data.
- */
-
-/*
- * Transition flags
- */
-enum lex_transition_flags {
-    LEX_TRANS_DEFAULT   = 0x01,
-    LEX_TRANS_FINAL     = 0x02,
-};
-
-/*
- * A transition from one state to another.
- */
-struct lex_transition {
-    // applies to chars [left, right]
-    char left, right;
-    
-    // flags from lex_transition_flags
-    char flags;
-    
-    // next state to enter
-    int next_state;
-};
-
-/*
- * State flags
- */ 
-enum lex_state_flags {
-    LEX_STATE_END       = 0x01;
-};
-
-/*
- * A state
- */
-struct lex_state {
-    // the state name (for debugging)
-    const char *name;
-
-    // flags from lex_state_flags
-    char flags;
-
-    // list of transitions for this state, terminated by a transition with next_state=0
-    struct lex_transition *trans_list;
-};
-
-/*
- * Lex machine
- */
-struct lex {
-    // number of states
-    size_t state_count;
-
-    // array of lex_states, indexable by the state id.
-    struct lex_state *state_list;
-
-    /*
-     * Core token handler. Everytime a full token is lexed (i.e. the state changes), this will be called.
-     * `this_token` represents the full token that was parsed, and `token_data` is the token's value. `next_token`
-     * is the state that terminated this token, and `prev_token` was the token before this one.
-     *
-     * `token_data` is a buffer allocated by the lexer that the actual input data is copied into. Thence, it can be
-     * modified, as its contents will be replaced by the next token. Hence, if you need to keep hold of it, copy it.
-     *
-     * Return zero to have lexing continue, nonzero to stop lexing.
-     */
-    int (*token_fn) (int this_token, char *token_data, int next_token, int prev_token, void *arg);
-
-    /*
-     * Called on every char handled by the lexer. `this_token` is the state of the token that the char belongs to.
-     *
-     * Return zero to have lexing continue, nonzero to stop lexing.
-     */
-    int (*char_fn) (int this_token, char token_char, void *arg);
-
-    /*
-     * Called when the end of input has been reached, `last_token` is the state that we terminated in.
-     *
-     * Return zero to indiciate that the input was valid, nonzero to indicate an error.
-     */
-    int (*end_fn) (int last_token, void *arg);
-};
-
-/*
- * Helper macros for building the state_list
- */
-#define LEX_STATE(enum_val)     { #enum_val, 0,
-#define LEX_STATE_END(enum_val) { #enum_val, LEX_STATE_END,
-
-    #define LEX_CHAR(c, to)         { c, c, 0, to },
-    #define LEX_RANGE(l, r, to)     { l, r, 0, to },
-    #define LEX_ALPHA(to)           LEX_RANGE('a', 'z', to), LEX_RANGE('A', 'Z', to)
-    #define LEX_NUMBER(to)          LEX_RANGE('0', '9', to)
-    #define LEX_ALNUM(to)           LEX_ALPHA(to), LEX_NUMBER(to), LEX_CHAR('-', to), LEX_CHAR('_', to)
-    #define LEX_WHITESPACE(to)      LEX_CHAR(' ', to), LEX_CHAR('\n', to), LEX_CHAR('\t', to)
-
-    #define LEX_DEFAULT(to)         { 0, 0, LEX_TRANS_DEFAULT, to } \
-                                  }
-    #define LEX_END                 { 0, 0, 0, 0 } \
-                                  }
-
-/*
- * Lex it!
- *
- * Return zero to indiciate that the input was valid, nonzero otherwise.
- */
-int lexer (const struct lex *lex, const char *input, void *arg);
-
-#endif /* LIB_LEXER_H */
--- a/src/lib/log.c	Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/log.c	Thu Oct 09 00:33:37 2008 +0300
@@ -6,33 +6,34 @@
 
 #include "log.h"
 
-static void _generic_err_vargs (int use_stderr, const char *func, int perr, const char *fmt, va_list va) {
-    FILE *stream = use_stderr ? stderr : stdout;
+static void _generic_err_vargs (int flags, const char *func, int err, const char *fmt, va_list va) {
+    FILE *stream = flags & LOG_DISPLAY_STDERR ? stderr : stdout;
 
     if (func)
         fprintf(stream, "%s: ", func);
     
     vfprintf(stream, fmt, va);
     
-    if (perr)
-        fprintf(stream, ": %s\n", strerror(perr > 0 ? errno : -perr));
-
-    fprintf(stream, "\n");
+    if (flags & LOG_DISPLAY_PERR)
+        fprintf(stream, ": %s\n", strerror(err == 0 ? errno : -err));
+    
+    if (!(flags & LOG_DISPLAY_NONL))
+        fprintf(stream, "\n");
 }
 
-void _generic_err (int use_stderr, const char *func, int perr, const char *fmt, ...) {
+void _generic_err (int flags, const char *func, int err, const char *fmt, ...) {
     va_list va;
 
     va_start(va, fmt);
-    _generic_err_vargs(use_stderr, func, perr, fmt, va);
+    _generic_err_vargs(flags, func, err, fmt, va);
     va_end(va);
 }
 
-void _generic_err_exit (int use_stderr, const char *func, int perr, const char *fmt, ...) {
+void _generic_err_exit (int flags, const char *func, int err, const char *fmt, ...) {
     va_list va;
 
     va_start(va, fmt);
-    _generic_err_vargs(use_stderr, func, perr, fmt, va);
+    _generic_err_vargs(flags, func, err, fmt, va);
     va_end(va);
       
     exit(EXIT_FAILURE);
--- a/src/lib/log.h	Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/log.h	Thu Oct 09 00:33:37 2008 +0300
@@ -5,11 +5,21 @@
  * error handling
  */
 
-void _generic_err ( /*int level, */ int use_stderr, const char *func, int perr, const char *fmt, ...)
+enum log_display_flags {
+    LOG_DISPLAY_STDOUT =    0x00,
+    LOG_DISPLAY_STDERR =    0x01,
+
+    LOG_DISPLAY_PERR =      0x02,
+
+    LOG_DISPLAY_NONL =      0x04,
+};
+
+
+void _generic_err (int flags, const char *func, int err, const char *fmt, ...)
         __attribute__ ((format (printf, 4, 5)));
 
 // needs to be defined as its own function for the noreturn attribute
-void _generic_err_exit ( /* int level, */ int used_stderr, const char *func, int perr, const char *fmt, ...)
+void _generic_err_exit (int flags, const char *func, int err, const char *fmt, ...)
         __attribute__ ((format (printf, 4, 5)))
         __attribute__ ((noreturn));
 
@@ -25,20 +35,20 @@
 extern enum _debug_level _cur_debug_level;
 
 // various kinds of ways to handle an error, 2**3 of them, *g*
-#define info(...)                   _generic_err(       0,  NULL,   0,  __VA_ARGS__ )
-#define error(...)                  _generic_err(       1,  NULL,   0,  __VA_ARGS__ )
-#define err_exit(...)               _generic_err_exit(  1,  NULL,   0,  __VA_ARGS__ )
-#define perr(...)                   _generic_err(       1,  NULL,   1,  __VA_ARGS__ )
-#define perr_exit(...)              _generic_err_exit(  1,  NULL,   1,  __VA_ARGS__ )
-#define err_func(func, ...)         _generic_err(       1,  func,   0,  __VA_ARGS__ )
-#define err_func_exit(func, ...)    _generic_err_exit(  1,  func,   0,  __VA_ARGS__ )
-#define perr_func(func, ...)        _generic_err(       1,  func,   1,  __VA_ARGS__ )
-#define perr_func_exit(func, ...)   _generic_err_exit(  1,  func,   1,  __VA_ARGS__ )
-#define eerr_func(func, err, ...)   _generic_err(       1,  func,   err,__VA_ARGS__ )
+#define info(...)                   _generic_err(       LOG_DISPLAY_STDOUT,                     NULL, 0,    __VA_ARGS__ )
+#define error(...)                  _generic_err(       LOG_DISPLAY_STDERR,                     NULL, 0,    __VA_ARGS__ )
+#define err_exit(...)               _generic_err_exit(  LOG_DISPLAY_STDERR,                     NULL, 0,    __VA_ARGS__ )
+#define perr(...)                   _generic_err(       LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR,  NULL, 0,    __VA_ARGS__ )
+#define perr_exit(...)              _generic_err_exit(  LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR,  NULL, 0,    __VA_ARGS__ )
+#define err_func(func, ...)         _generic_err(       LOG_DISPLAY_STDERR,                     func, 0,    __VA_ARGS__ )
+#define err_func_exit(func, ...)    _generic_err_exit(  LOG_DISPLAY_STDERR,                     func, 0,    __VA_ARGS__ )
+#define perr_func(func, ...)        _generic_err(       LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR,  func, 0,    __VA_ARGS__ )
+#define perr_func_exit(func, ...)   _generic_err_exit(  LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR,  func, 0,    __VA_ARGS__ )
+#define eerr_func(func, err, ...)   _generic_err(       LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR,  func, err,  __VA_ARGS__ )
+#define debug(func, ...)            _generic_err(       LOG_DISPLAY_STDERR,                     func, 0,    __VA_ARGS__ )
+#define debug_nonl(func, ...)       _generic_err(       LOG_DISPLAY_STDERR | LOG_DISPLAY_NONL,  func, 0,    __VA_ARGS__ )
 
-/*
- * Legacy...
- */
+// logging includes errors
 #include "error.h"
 
 #define WARNING(...) err_func(__func__, __VA_ARGS__)
@@ -46,9 +56,15 @@
 #define EWARNING(err, ...) eerr_func(__func__, (err), __VA_ARGS__)
 
 #ifdef DEBUG_ENABLED
-#define DEBUG(...) err_func(__func__, __VA_ARGS__)
+#define DEBUG(...) debug(__func__, __VA_ARGS__)
+#define DEBUGF(...) debug(NULL, __VA_ARGS__)
+#define DEBUGN(...) debug_nonl(__func__, __VA_ARGS__)
+#define DEBUGNF(...) debug_nonl(NULL, __VA_ARGS__)
 #else
 #define DEBUG(...) (void) (0)
+#define DEBUGF(...) (void) (0)
+#define DEBUGN(...) (void) (0)
+#define DEBUGNF(...) (void) (0)
 #endif
 
 // default is to enable INFO
@@ -63,7 +79,7 @@
 #if INFO_ENABLED
 #define INFO(...) info(__VA_ARGS__)
 #else
-#define INFO(...) (void) (0)
+#define INFO(...) (void) (__VA_ARGS__)
 #endif
 
 #endif /* LIB_LOG_H */
--- a/src/lib/url.c	Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/url.c	Thu Oct 09 00:33:37 2008 +0300
@@ -5,6 +5,7 @@
 #include "url.h"
 #include "lex.h"
 #include "error.h"
+#include "log.h"
 #include "misc.h"
 
 enum url_token {
@@ -62,16 +63,200 @@
 };
 
 static int _url_append_scheme (struct url *url, const char *data) {
-    
+    return 0;
 }
 
 static int _url_append_opt_key (struct url *url, const char *key) {
-
+    return 0;
 }
 
 static int _url_append_opt_val (struct url *url, const char *value) {
+    return 0;
+}
 
-}
+static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg);
+
+static struct lex url_lex = {
+    .token_fn = url_lex_token,
+    .char_fn = NULL,
+    .end_fn = NULL,
+
+    .state_count = URL_MAX,
+    .initial_state = URL_BEGIN,
+    .state_list = {
+        LEX_STATE ( URL_BEGIN ) {
+            LEX_ALNUM       (           URL_BEGIN_ALNUM         ),
+            LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
+            LEX_CHAR        (   '/',    URL_PATH_START          ),
+            LEX_CHAR        (   '?',    URL_OPT_START           ),
+            LEX_END
+        },
+        
+        // this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME
+        LEX_STATE_END ( URL_BEGIN_ALNUM ) {
+            LEX_ALNUM       (           URL_BEGIN_ALNUM         ),
+            LEX_CHAR        (   '+',    URL_SCHEME_SEP          ),  // it was URL_SCHEME
+            LEX_CHAR        (   ':',    URL_BEGIN_COLON         ), 
+            LEX_CHAR        (   '@',    URL_USERNAME_END        ),  // it was URL_USERNAME
+            LEX_CHAR        (   '/',    URL_PATH_START          ),  // it was URL_HOSTNAME
+            LEX_CHAR        (   '?',    URL_OPT_START           ),  // it was URL_HOSTNAME
+            LEX_END
+        },
+        
+        // this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP
+        LEX_STATE ( URL_BEGIN_COLON ) {
+            LEX_CHAR        (   '/',    URL_SCHEME_END_SLASH1   ),  // it was URL_SCHEME
+            LEX_ALNUM       (           URL_USERHOST_ALNUM2     ),
+            LEX_END
+        },
+       
+
+        LEX_STATE ( URL_SCHEME ) { 
+            LEX_ALNUM       (           URL_SCHEME              ),
+            LEX_CHAR        (   '+',    URL_SCHEME_SEP          ),
+            LEX_CHAR        (   ':',    URL_SCHEME_END_COL      ),
+            LEX_END
+        },
+
+        LEX_STATE ( URL_SCHEME_SEP ) {
+            LEX_ALNUM       (           URL_SCHEME              ),
+            LEX_END
+        },
+
+        LEX_STATE ( URL_SCHEME_END_COL ) {
+            LEX_CHAR        (   '/',    URL_SCHEME_END_SLASH1   ),
+            LEX_END
+        },
+
+        LEX_STATE ( URL_SCHEME_END_SLASH1 ) {
+            LEX_CHAR        (   '/',    URL_SCHEME_END_SLASH2   ),
+            LEX_END
+        },
+
+        LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) {
+            LEX_ALNUM       (           URL_USERHOST_ALNUM      ),
+            LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
+            LEX_CHAR        (   '/',    URL_PATH_START          ),
+            LEX_CHAR        (   '?',    URL_OPT_START           ),
+            LEX_END
+        },
+        
+        // this can be URL_USERNAME or URL_HOSTNAME
+        LEX_STATE_END ( URL_USERHOST_ALNUM ) {
+            LEX_CHAR        (   ':',    URL_USERHOST_COLON      ), 
+            LEX_CHAR        (   '@',    URL_USERNAME_END        ),  // it was URL_USERNAME
+            LEX_CHAR        (   '/',    URL_PATH_START          ),  // it was URL_HOSTNAME
+            LEX_CHAR        (   '?',    URL_OPT_START           ),  // it was URL_HOSTNAME
+            LEX_DEFAULT     (           URL_USERHOST_ALNUM      ),
+        },
+        
+        // this can be URL_USERNAME_END or URL_SERVICE_SEP
+        LEX_STATE ( URL_USERHOST_COLON ) {
+            LEX_ALNUM       (           URL_USERHOST_ALNUM2        ),
+            LEX_END
+        },
+        
+        // this can be URL_PASSWORD or URL_SERVICE
+        LEX_STATE_END ( URL_USERHOST_ALNUM2 ) {
+            LEX_CHAR        (   '@',    URL_USERNAME_END        ),  // it was URL_PASSSWORD
+            LEX_CHAR        (   '/',    URL_PATH_START          ),  // it was URL_SERVICE
+            LEX_CHAR        (   '?',    URL_OPT_START           ),  // it was URL_SERVICE
+            LEX_DEFAULT     (           URL_USERHOST_ALNUM2     ),
+        },
+        
+        // dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2
+        LEX_STATE ( URL_USERNAME ) {
+            LEX_END
+        },
+
+        LEX_STATE ( URL_PASSWORD_SEP ) {
+            LEX_END
+        },
+
+        LEX_STATE ( URL_PASSWORD ) {
+            LEX_END
+        },
+
+
+        LEX_STATE_END ( URL_USERNAME_END ) {
+            LEX_ALNUM       (           URL_HOSTNAME            ), 
+            LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
+            LEX_CHAR        (   '/',    URL_PATH_START          ),
+            LEX_CHAR        (   '?',    URL_OPT_START           ),
+            LEX_END
+        },
+
+
+        LEX_STATE_END ( URL_HOSTNAME ) {
+            LEX_ALNUM       (           URL_HOSTNAME            ), 
+            LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
+            LEX_CHAR        (   '/',    URL_PATH_START          ),
+            LEX_CHAR        (   '?',    URL_OPT_START           ),
+            LEX_END
+        },
+
+
+        LEX_STATE ( URL_SERVICE_SEP ) {
+            LEX_ALNUM       (           URL_SERVICE            ), 
+            LEX_CHAR        (   '/',    URL_PATH_START          ),
+            LEX_CHAR        (   '?',    URL_OPT_START           ),
+            LEX_END
+        },
+
+        LEX_STATE_END ( URL_SERVICE ) {
+            LEX_ALNUM       (           URL_SERVICE            ), 
+            LEX_CHAR        (   '/',    URL_PATH_START          ),
+            LEX_CHAR        (   '?',    URL_OPT_START           ),
+            LEX_END
+        },
+
+
+        LEX_STATE_END ( URL_PATH_START ) {
+            LEX_CHAR        (   '?',    URL_OPT_START           ),
+            LEX_DEFAULT     (           URL_PATH                ),
+        },
+
+        LEX_STATE_END ( URL_PATH ) {
+            LEX_CHAR        (   '?',    URL_OPT_START           ),
+            LEX_DEFAULT     (           URL_PATH                ),
+        },
+
+
+        LEX_STATE_END ( URL_OPT_START ) {
+            LEX_CHAR        (   '&',    URL_OPT_SEP             ),
+            LEX_INVALID     (   '='                             ),
+            LEX_DEFAULT     (           URL_OPT_KEY             ),
+        },
+
+        LEX_STATE_END ( URL_OPT_KEY ) {
+            LEX_CHAR        (   '&',    URL_OPT_SEP             ),
+            LEX_CHAR        (   '=',    URL_OPT_EQ              ),
+            LEX_DEFAULT     (           URL_OPT_KEY             ),
+        },
+
+        LEX_STATE_END ( URL_OPT_EQ ) {
+            LEX_CHAR        (   '&',    URL_OPT_SEP             ),
+            LEX_INVALID     (   '='                             ),
+            LEX_DEFAULT     (           URL_OPT_VAL             ),
+        },
+
+        LEX_STATE_END ( URL_OPT_VAL ) {
+            LEX_CHAR        (   '&',    URL_OPT_SEP             ),
+            LEX_INVALID     (   '='                             ),
+            LEX_DEFAULT     (           URL_OPT_VAL             ),
+        },
+
+        LEX_STATE_END ( URL_OPT_SEP ) {
+            LEX_CHAR        (   '&',    URL_OPT_SEP             ),
+            LEX_INVALID     (   '='                             ),
+            LEX_DEFAULT     (           URL_OPT_KEY             ),
+        },
+        
+        LEX_STATE ( URL_ERROR ) {
+            LEX_END
+        },
+    }
+};
 
 static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) {
     enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token;
@@ -81,6 +266,10 @@
     (void) prev_token;
     
     switch (this_token) {
+        case URL_BEGIN:
+            // irrelevant
+            break;
+
         case URL_BEGIN_ALNUM:
             switch (next_token) {
                 case URL_SCHEME_SEP:
@@ -185,7 +374,8 @@
                 case URL_PATH_START:
                 case URL_OPT_START:
                 case LEX_EOF:
-                    // store the service
+                    // store the hostname and service
+                    state->url->hostname = state->alnum; state->alnum = NULL;
                     copy_to = &state->url->service; break;
 
                 default:
@@ -250,7 +440,7 @@
             break;
         
         default:
-            FATAL("invalid token");
+            ERROR("invalid token");
     }
     
     if (copy_to) {
@@ -263,192 +453,13 @@
     return 0;
 
 error:
-    // XXX: error codes?
+    DEBUG("token: %s -> %s -> %s: %s", 
+        LEX_STATE_NAME(&url_lex, prev_token), LEX_STATE_NAME(&url_lex, this_token), LEX_STATE_NAME(&url_lex, next_token),
+        token_data
+    );
     return -1;
 }
 
-static struct lex url_lex = {
-    .token_fn = url_lex_token,
-    .char_fn = NULL,
-    .end_fn = NULL,
-
-    .state_count = URL_MAX,
-    .state_list = {
-        LEX_STATE ( URL_BEGIN ) {
-            LEX_ALNUM       (           URL_BEGIN_ALNUM         ),
-            LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
-            LEX_CHAR        (   '/',    URL_PATH_START          ),
-            LEX_CHAR        (   '?',    URL_OPT_START           ),
-            LEX_END
-        },
-        
-        // this can be URL_SCHEME, URL_USERNAME or URL_HOSTNAME
-        LEX_STATE_END ( URL_BEGIN_ALNUM ) {
-            LEX_ALNUM       (           URL_BEGIN_ALNUM         ),
-            LEX_CHAR        (   '+',    URL_SCHEME_SEP          ),  // it was URL_SCHEME
-            LEX_CHAR        (   ':',    URL_BEGIN_COLON         ), 
-            LEX_CHAR        (   '@',    URL_USERNAME_END        ),  // it was URL_USERNAME
-            LEX_CHAR        (   '/',    URL_PATH_START          ),  // it was URL_HOSTNAME
-            LEX_CHAR        (   '?',    URL_OPT_START           ),  // it was URL_HOSTNAME
-            LEX_END
-        },
-        
-        // this can be URL_SCHEME_END_COL, URL_USERNAME_END or URL_SERVICE_SEP
-        LEX_STATE ( URL_BEGIN_COLON ) {
-            LEX_CHAR        (   '/',    URL_SCHEME_END_SLASH1   ),  // it was URL_SCHEME
-            LEX_ALNUM       (           URL_USERHOST_ALNUM2     ),
-            LEX_END
-        },
-       
-
-        LEX_STATE ( URL_SCHEME ) { 
-            LEX_ALNUM       (           URL_SCHEME              ),
-            LEX_CHAR        (   '+',    URL_SCHEME_SEP          ),
-            LEX_CHAR        (   ':',    URL_SCHEME_END_COL      ),
-            LEX_END
-        },
-
-        LEX_STATE ( URL_SCHEME_SEP ) {
-            LEX_ALNUM       (           URL_SCHEME              ),
-            LEX_END
-        },
-
-        LEX_STATE ( URL_SCHEME_END_COL ) {
-            LEX_CHAR        (   '/',    URL_SCHEME_END_SLASH1   ),
-            LEX_END
-        },
-
-        LEX_STATE ( URL_SCHEME_END_SLASH1 ) {
-            LEX_CHAR        (   '/',    URL_SCHEME_END_SLASH2   ),
-            LEX_END
-        },
-
-        LEX_STATE_END ( URL_SCHEME_END_SLASH2 ) {
-            LEX_ALNUM       (           URL_USERHOST_ALNUM      ),
-            LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
-            LEX_CHAR        (   '/',    URL_PATH_START          ),
-            LEX_CHAR        (   '?',    URL_OPT_START           ),
-            LEX_END
-        },
-        
-        // this can be URL_USERNAME or URL_HOSTNAME
-        LEX_STATE_END ( URL_USERHOST_ALNUM ) {
-            LEX_ALNUM       (           URL_USERHOST_ALNUM      ),
-            LEX_CHAR        (   ':',    URL_USERHOST_COLON      ), 
-            LEX_CHAR        (   '@',    URL_USERNAME_END        ),  // it was URL_USERNAME
-            LEX_CHAR        (   '/',    URL_PATH_START          ),  // it was URL_HOSTNAME
-            LEX_CHAR        (   '?',    URL_OPT_START           ),  // it was URL_HOSTNAME
-            LEX_END
-        },
-        
-        // this can be URL_USERNAME_END or URL_SERVICE_SEP
-        LEX_STATE ( URL_USERHOST_COLON ) {
-            LEX_ALNUM       (           URL_USERHOST_ALNUM2        ),
-            LEX_END
-        },
-        
-        // this can be URL_PASSWORD or URL_SERVICE
-        LEX_STATE_END ( URL_USERHOST_ALNUM2 ) {
-            LEX_ALNUM       (           URL_USERHOST_ALNUM      ),
-            LEX_CHAR        (   '@',    URL_USERNAME_END        ),  // it was URL_PASSSWORD
-            LEX_CHAR        (   '/',    URL_PATH_START          ),  // it was URL_SERVICE
-            LEX_CHAR        (   '?',    URL_OPT_START           ),  // it was URL_SERVICE
-            LEX_END
-        },
-        
-        // dummy states, covered by URL_USERHOST_ALNUM/URL_USERHOST_COLON/URL_USERHOST_ALNUM2
-        LEX_STATE ( URL_USERNAME ) {
-            LEX_END
-        },
-
-        LEX_STATE ( URL_PASSWORD_SEP ) {
-            LEX_END
-        },
-
-        LEX_STATE ( URL_PASSWORD ) {
-            LEX_END
-        },
-
-
-        LEX_STATE_END ( URL_USERNAME_END ) {
-            LEX_ALNUM       (           URL_HOSTNAME            ), 
-            LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
-            LEX_CHAR        (   '/',    URL_PATH_START          ),
-            LEX_CHAR        (   '?',    URL_OPT_START           ),
-            LEX_END
-        },
-
-
-        LEX_STATE_END ( URL_HOSTNAME ) {
-            LEX_ALNUM       (           URL_HOSTNAME            ), 
-            LEX_CHAR        (   ':',    URL_SERVICE_SEP         ),
-            LEX_CHAR        (   '/',    URL_PATH_START          ),
-            LEX_CHAR        (   '?',    URL_OPT_START           ),
-            LEX_END
-        },
-
-
-        LEX_STATE ( URL_SERVICE_SEP ) {
-            LEX_ALNUM       (           URL_SERVICE            ), 
-            LEX_CHAR        (   '/',    URL_PATH_START          ),
-            LEX_CHAR        (   '?',    URL_OPT_START           ),
-            LEX_END
-        },
-
-        LEX_STATE_END ( URL_SERVICE ) {
-            LEX_ALNUM       (           URL_SERVICE            ), 
-            LEX_CHAR        (   '/',    URL_PATH_START          ),
-            LEX_CHAR        (   '?',    URL_OPT_START           ),
-            LEX_END
-        },
-
-
-        LEX_STATE_END ( URL_PATH_START ) {
-            LEX_CHAR        (   '?',    URL_OPT_START           ),
-            LEX_DEFAULT     (           URL_PATH                ),
-        },
-
-        LEX_STATE_END ( URL_PATH ) {
-            LEX_CHAR        (   '?',    URL_OPT_START           ),
-            LEX_DEFAULT     (           URL_PATH                ),
-        },
-
-
-        LEX_STATE_END ( URL_OPT_START ) {
-            LEX_CHAR        (   '&',    URL_OPT_SEP             ),
-            LEX_INVALID     (   '='                             ),
-            LEX_DEFAULT     (           URL_OPT_KEY             ),
-        },
-
-        LEX_STATE_END ( URL_OPT_KEY ) {
-            LEX_CHAR        (   '&',    URL_OPT_SEP             ),
-            LEX_CHAR        (   '=',    URL_OPT_EQ              ),
-            LEX_DEFAULT     (           URL_OPT_KEY             ),
-        },
-
-        LEX_STATE_END ( URL_OPT_EQ ) {
-            LEX_CHAR        (   '&',    URL_OPT_SEP             ),
-            LEX_INVALID     (   '='                             ),
-            LEX_DEFAULT     (           URL_OPT_VAL             ),
-        },
-
-        LEX_STATE_END ( URL_OPT_VAL ) {
-            LEX_CHAR        (   '&',    URL_OPT_SEP             ),
-            LEX_INVALID     (   '='                             ),
-            LEX_DEFAULT     (           URL_OPT_VAL             ),
-        },
-
-        LEX_STATE_END ( URL_OPT_SEP ) {
-            LEX_CHAR        (   '&',    URL_OPT_SEP             ),
-            LEX_INVALID     (   '='                             ),
-            LEX_DEFAULT     (           URL_OPT_KEY             ),
-        },
-        
-        LEX_STATE ( URL_ERROR ) {
-            LEX_END
-        },
-    }
-};
 
 int url_parse (struct url *url, const char *text) {
     struct url_state state; ZINIT(state);
@@ -468,3 +479,42 @@
     return -1;
 }
 
+static void _url_dump_part (const char *field, const char *val, FILE *stream) {
+    if (val) {
+        fprintf(stream, "%s=%s ", field, val);
+    }
+}
+
+void url_dump (const struct url *url, FILE *stream) {
+    int i;
+
+    if (url->schema) {
+        fprintf(stream, "schema=");
+
+        for (i = 0; i < url->schema->count; i++) {
+            if (i > 0)
+                fprintf(stream, "+");
+
+            fprintf(stream, "%s", url->schema->list[i]);
+        }
+
+        fprintf(stream, " ");
+    }
+
+    _url_dump_part("username", url->username, stream);
+    _url_dump_part("password", url->password, stream);
+    _url_dump_part("hostname", url->hostname, stream);
+    _url_dump_part("service", url->service, stream);
+    _url_dump_part("path", url->path, stream);
+
+    if (url->opts) {
+        fprintf(stream, "opts: ");
+
+        for (i = 0; i < url->opts->count; i++) {
+            fprintf(stream, "%s=%s ", url->opts->list[i].key, url->opts->list[i].value);
+        }
+    }
+
+    fprintf(stream, "\n");
+}
+
--- a/src/lib/url.h	Wed Oct 08 22:05:13 2008 +0300
+++ b/src/lib/url.h	Thu Oct 09 00:33:37 2008 +0300
@@ -13,13 +13,14 @@
  */
 
 #include <sys/types.h>
+#include <stdio.h>
 
 /*
  * The schema
  */
 struct url_schema {
     size_t count;
-    const char **list;
+    const char *list[];
 };
 
 /*
@@ -30,7 +31,7 @@
     struct url_opt {
         const char *key;
         const char *value;
-    } **list;
+    } list[];
 };
 
 /*
@@ -54,4 +55,9 @@
  */
 int url_parse (struct url *url, const char *text);
 
+/*
+ * Prints a url in a debug-output format.
+ */
+void url_dump (const struct url *url, FILE *stream);
+
 #endif /* LIB_URL_H */
--- a/src/url_test.c	Wed Oct 08 22:05:13 2008 +0300
+++ b/src/url_test.c	Thu Oct 09 00:33:37 2008 +0300
@@ -5,8 +5,9 @@
 
 #include "lib/url.h"
 
-#define FAIL(...) do { printf("FAIL: "); printf(__VA_ARGS__); return -1; } while (0)
+#define FAIL(...) do { printf("FAIL: "); printf(__VA_ARGS__); printf("\n"); return -1; } while (0)
 
+struct url_schema basic_http = { 1, { "http" } };
 
 struct url_test {
     const char *url;
@@ -16,9 +17,9 @@
         NULL, NULL, NULL, "localhost", "http", NULL, NULL
     } },
 
-/*    {   "http://example.com/path",  {
-        { 1, { "http" } }, NULL, NULL, "example.com", NULL, "path", NULL 
-    } }, */
+    {   "http://example.com/path",  {
+        &basic_http, NULL, NULL, "example.com", NULL, "path", NULL 
+    } },
     
     {   NULL,               {   } },
 };
@@ -26,14 +27,14 @@
 int cmp_url_str (const char *field, const char *test, const char *real) {
     if (!test) {
         if (real)
-            FAIL("%s: shouldn't be present", field);
+            FAIL("%s shouldn't be present", field);
 
     } else if (!real) {
-        FAIL("%s: missing", field);
+        FAIL("%s is missing", field);
 
     } else {
         if (strcmp(test, real) != 0)
-            FAIL("%s: differs: %s -> %s", field, test, real);
+            FAIL("%s differs: %s -> %s", field, test, real);
     }
 
     // ok
@@ -94,10 +95,10 @@
             FAIL("inconsistent opts count");
         
         for (i = 0; i < test->opts->count; i++) {
-            if (strcmp(test->opts->list[i]->key, real->opts->list[i]->key) != 0)
+            if (strcmp(test->opts->list[i].key, real->opts->list[i].key) != 0)
                 FAIL("differing scheme key #%d", i);
             
-            if (strcmp(test->opts->list[i]->value, real->opts->list[i]->value) != 0)
+            if (strcmp(test->opts->list[i].value, real->opts->list[i].value) != 0)
                 FAIL("differing scheme value #%d", i);
         }
     }
@@ -109,45 +110,6 @@
     return -1;
 }
 
-void print_url_part (const char *field, const char *val) {
-    if (val) {
-        printf("%s=%s ", field, val);
-    }
-}
-
-void print_url (const struct url *url) {
-    int i;
-
-    if (url->schema) {
-        printf("schema=");
-
-        for (i = 0; i < url->schema->count; i++) {
-            if (i > 0)
-                printf("+");
-
-            printf("%s", url->schema->list[i]);
-        }
-
-        printf(" ");
-    }
-
-    print_url_part("username", url->username);
-    print_url_part("password", url->password);
-    print_url_part("hostname", url->hostname);
-    print_url_part("service", url->service);
-    print_url_part("path", url->path);
-
-    if (url->opts) {
-        printf("opts: ");
-
-        for (i = 0; i < url->opts->count; i++) {
-            printf("%s=%s ", url->opts->list[i]->key, url->opts->list[i]->value);
-        }
-    }
-
-    printf("\n");
-}
-
 void usage (const char *exec_name) {
     printf("Usage: %s\n\n\tNo arguments are accepted\n", exec_name);
 
@@ -164,7 +126,7 @@
     // run the tests
     for (test = url_tests; test->url; test++) {
         // first output the URL we are handling...
-        printf("%s... ", test->url);
+        printf("%-80s - ", test->url);
         fflush(stdout);
         
         // parse the URL
@@ -178,14 +140,14 @@
         // compare it
         if (cmp_url(&test->expected, &url)) {
             printf("\texpected: ");
-            print_url(&test->expected);
+            url_dump(&test->expected, stdout);
 
             printf("\tresult:   ");
-            print_url(&url);
+            url_dump(&url, stdout);
 
         } else {
             printf("OK\n\t");
-            print_url(&url);
+            url_dump(&url, stdout);
         }
     }
 }