look ma, it compiles\!
authorTero Marttila <terom@fixme.fi>
Wed, 08 Oct 2008 22:05:13 +0300
changeset 15 a8d183e79ed9
parent 14 115067dfba55
child 16 74fb62022fb3
look ma, it compiles\!
Makefile
src/lib/lex.c
src/lib/lex.h
src/lib/url.c
src/lib/url.h
src/url_test.c
--- a/Makefile	Tue Oct 07 20:31:35 2008 +0300
+++ b/Makefile	Wed Oct 08 22:05:13 2008 +0300
@@ -19,6 +19,7 @@
 bin/hello: obj/evfuse.o obj/dirbuf.o obj/lib/log.o obj/lib/signals.o
 bin/simple_hello: obj/evfuse.o obj/dirbuf.o obj/lib/log.o obj/lib/signals.o obj/simple.o
 bin/evpq_test: obj/evpq.o obj/lib/log.o
+bin/url_test: obj/lib/url.o obj/lib/lex.o obj/lib/log.o
 
 # computed
 LDFLAGS = ${LIBRARY_PATHS} ${LIBRARY_LIST}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/lex.c	Wed Oct 08 22:05:13 2008 +0300
@@ -0,0 +1,7 @@
+
+#include "lex.h"
+
+int lexer (const struct lex *lex, const char *input, void *arg) {
+    // XXX: implement
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/lex.h	Wed Oct 08 22:05:13 2008 +0300
@@ -0,0 +1,129 @@
+#ifndef LIB_LEXER_H
+#define LIB_LEXER_H
+
+/*
+ * Simple FSM lexing
+ *
+ * The lexer is implemented as a Finite State Machine, consisting for a number of states, which then contain a set of
+ * transitions, which move the lexer from state to state based on each char of input at a time.
+ *
+ * Whenever the state changes, the token callback is triggered with the collected token data.
+ */
+
+#include <sys/types.h>
+
+/*
+ * Transition flags
+ */
+enum lex_transition_flags {
+    LEX_TRANS_DEFAULT   = 0x01,
+    LEX_TRANS_FINAL     = 0x02,
+    LEX_TRANS_INVALID   = 0x04,
+};
+
+/*
+ * A transition from one state to another.
+ */
+struct lex_transition {
+    // applies to chars [left, right]
+    char left, right;
+    
+    // flags from lex_transition_flags
+    char flags;
+    
+    // next state to enter
+    int next_state;
+};
+
+/*
+ * State flags
+ */ 
+enum lex_state_flags {
+    LEX_STATE_END       = 0x01,
+};
+
+/*
+ * A state
+ */
+struct lex_state {
+    // the state name (for debugging)
+    const char *name;
+
+    // flags from lex_state_flags
+    char flags;
+
+    // list of transitions for this state, terminated by a transition with next_state=0
+    struct lex_transition trans_list[15];
+};
+
+/*
+ * Special tokens
+ */
+
+// shows up in token_fn as the value of next_token when this_token is the last token.
+#define LEX_EOF 0
+
+/*
+ * Lex machine
+ */
+struct lex {
+    /*
+     * Core token handler. Everytime a full token is lexed (i.e. the state changes), this will be called.
+     * `this_token` represents the full token that was parsed, and `token_data` is the token's value. `next_token`
+     * is the state that terminated this token, and `prev_token` was the token before this one.
+     *
+     * `token_data` is a buffer allocated by the lexer that the actual input data is copied into. Thence, it can be
+     * modified, as its contents will be replaced by the next token. Hence, if you need to keep hold of it, copy it.
+     *
+     * Return zero to have lexing continue, nonzero to stop lexing.
+     */
+    int (*token_fn) (int this_token, char *token_data, int next_token, int prev_token, void *arg);
+
+    /*
+     * Called on every char handled by the lexer. `this_token` is the state of the token that the char belongs to.
+     *
+     * Return zero to have lexing continue, nonzero to stop lexing.
+     */
+    int (*char_fn) (int this_token, char token_char, void *arg);
+
+    /*
+     * Called when the end of input has been reached, `last_token` is the state that we terminated in.
+     *
+     * Return zero to indiciate that the input was valid, nonzero to indicate an error.
+     */
+    int (*end_fn) (int last_token, void *arg);
+    
+    // number of states
+    size_t state_count;
+
+    // array of lex_states, indexable by the state id.
+    struct lex_state state_list[];
+};
+
+/*
+ * Helper macros for building the state_list
+ */
+#define LEX_STATE(enum_val)     { #enum_val, 0,
+#define LEX_STATE_END(enum_val) { #enum_val, LEX_STATE_END,
+
+    #define LEX_CHAR(c, to)         { c, c, 0, to }
+    #define LEX_RANGE(l, r, to)     { l, r, 0, to }
+    #define LEX_ALPHA(to)           LEX_RANGE('a', 'z', to), LEX_RANGE('A', 'Z', to)
+    #define LEX_NUMBER(to)          LEX_RANGE('0', '9', to)
+    #define LEX_ALNUM(to)           LEX_ALPHA(to), LEX_NUMBER(to), LEX_CHAR('-', to), LEX_CHAR('_', to)
+    #define LEX_WHITESPACE(to)      LEX_CHAR(' ', to), LEX_CHAR('\n', to), LEX_CHAR('\t', to)
+    #define LEX_INVALID(c)          { c, c, LEX_TRANS_INVALID, 0 }
+
+    #define LEX_DEFAULT(to)         { 0, 0, LEX_TRANS_DEFAULT, to } \
+                                  }
+    #define LEX_END                 { 0, 0, 0, 0 } \
+                                  }
+
+/*
+ * Lex it!
+ *
+ * Return zero to indiciate that the input was valid, nonzero otherwise.
+ */
+int lexer (const struct lex *lex, const char *input, void *arg);
+
+#endif /* LIB_LEXER_H */
--- a/src/lib/url.c	Tue Oct 07 20:31:35 2008 +0300
+++ b/src/lib/url.c	Wed Oct 08 22:05:13 2008 +0300
@@ -1,6 +1,11 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
 
 #include "url.h"
-#include "lexer.h"
+#include "lex.h"
+#include "error.h"
+#include "misc.h"
 
 enum url_token {
     URL_INVALID,
@@ -41,8 +46,6 @@
     URL_OPT_VAL,
     URL_OPT_SEP,
     
-    URL_END,
-
     URL_MAX,
 };
 
@@ -50,24 +53,225 @@
  * Parser state
  */
 struct url_state {
+    // the URL to parse into
     struct url *url;
+    
+    // our lookahead-kludge
+    const char *alnum, *alnum2;
+    
+};
 
+static int _url_append_scheme (struct url *url, const char *data) {
+    
+}
 
-};
+static int _url_append_opt_key (struct url *url, const char *key) {
+
+}
+
+static int _url_append_opt_val (struct url *url, const char *value) {
+
+}
 
 static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) {
     enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token;
     struct url_state *state = arg;
-
-}
+    const char **copy_to = NULL;
 
-static int url_lex_end (int _last_token, void *arg) {
-    enum url_token last_token = _last_token;
-    struct url_state *state = arg;
+    (void) prev_token;
+    
+    switch (this_token) {
+        case URL_BEGIN_ALNUM:
+            switch (next_token) {
+                case URL_SCHEME_SEP:
+                    // store the scheme
+                    if (_url_append_scheme(state->url, token_data))
+                        goto error;
+                    
+                    break;
+                
+                case URL_USERNAME_END:
+                    // store the username
+                    copy_to = &state->url->username; break;
+                
+                case URL_PATH_START:
+                case URL_OPT_START:
+                case LEX_EOF:
+                    // store the hostname
+                    copy_to = &state->url->hostname; break;
 
+                case URL_BEGIN_COLON:
+                    // gah...
+                    copy_to = &state->alnum; break;
+                
+
+                default:
+                    FATAL("weird next token");
+            }
+            
+            break;
+
+        case URL_BEGIN_COLON:
+            switch (next_token) {
+                case URL_SCHEME_END_SLASH1:
+                    // store the schema
+                    if (_url_append_scheme(state->url, token_data))
+                        goto error;
+
+                    break;
+                
+                case URL_USERHOST_ALNUM2:
+                    // gah..
+                    break;
+
+                default:
+                    FATAL("weird next token");
+            }
+
+            break;
+
+        case URL_SCHEME:
+            // store the scheme
+            if (_url_append_scheme(state->url, token_data))
+                goto error;
+
+            break;
+    
+        case URL_SCHEME_SEP:
+            // ignore
+            break;
+
+        case URL_SCHEME_END_COL:
+        case URL_SCHEME_END_SLASH1:
+        case URL_SCHEME_END_SLASH2:
+            // ignore
+            break;
+        
+        case URL_USERHOST_ALNUM:
+            switch (next_token) {
+                case URL_USERNAME_END:
+                    // store the username
+                    copy_to = &state->url->username; break;
+                
+                case URL_PATH_START:
+                case URL_OPT_START:
+                case LEX_EOF:
+                    // store the hostname
+                    copy_to = &state->url->hostname; break;
+
+                case URL_USERHOST_COLON:
+                    // gah...
+                    copy_to = &state->alnum; break;
+
+                default:
+                    FATAL("weird next token");
+            }
+            
+            break;
+
+        case URL_USERHOST_COLON:
+            // ignore
+            break;
+
+        case URL_USERHOST_ALNUM2:
+            switch (next_token) {
+                case URL_USERNAME_END:
+                    // store the username and password
+                    state->url->username = state->alnum; state->alnum = NULL;
+                    copy_to = &state->url->password;
+
+                    break;
+
+                case URL_PATH_START:
+                case URL_OPT_START:
+                case LEX_EOF:
+                    // store the service
+                    copy_to = &state->url->service; break;
+
+                default:
+                    FATAL("weird next token");
+            }
+
+            break;
+
+        case URL_USERNAME:
+        case URL_PASSWORD_SEP:
+        case URL_PASSWORD:
+            FATAL("these should be overshadowed");
+        
+        case URL_USERNAME_END:
+            // ignore
+            break;
+
+        case URL_HOSTNAME:
+            // store
+            copy_to = &state->url->hostname; break;
+
+        case URL_SERVICE_SEP:
+            // ignore
+            break;
+
+        case URL_SERVICE:
+            // store
+            copy_to = &state->url->service; break;
+        
+        case URL_PATH_START:
+            // ignore
+            break;
+
+        case URL_PATH:
+            // store
+            copy_to = &state->url->path; break;
+
+        case URL_OPT_START:
+            // ignore
+            break;
+
+        case URL_OPT_KEY:
+            // store
+            if (_url_append_opt_key(state->url, token_data))
+                goto error;
+
+            break;
+
+        case URL_OPT_EQ:
+            // ignore
+            break;
+
+        case URL_OPT_VAL:
+            // store
+            if (_url_append_opt_val(state->url, token_data))
+                goto error;
+
+            break;
+        
+        case URL_OPT_SEP:
+            // ignore
+            break;
+        
+        default:
+            FATAL("invalid token");
+    }
+    
+    if (copy_to) {
+        // copy the token data
+        if ((*copy_to = strdup(token_data)) == NULL)
+            ERROR("strdup");
+    }
+
+    // good
+    return 0;
+
+error:
+    // XXX: error codes?
+    return -1;
 }
 
 static struct lex url_lex = {
+    .token_fn = url_lex_token,
+    .char_fn = NULL,
+    .end_fn = NULL,
+
     .state_count = URL_MAX,
     .state_list = {
         LEX_STATE ( URL_BEGIN ) {
@@ -135,7 +339,7 @@
             LEX_CHAR        (   '/',    URL_PATH_START          ),  // it was URL_HOSTNAME
             LEX_CHAR        (   '?',    URL_OPT_START           ),  // it was URL_HOSTNAME
             LEX_END
-        }
+        },
         
         // this can be URL_USERNAME_END or URL_SERVICE_SEP
         LEX_STATE ( URL_USERHOST_COLON ) {
@@ -212,7 +416,7 @@
 
         LEX_STATE_END ( URL_OPT_START ) {
             LEX_CHAR        (   '&',    URL_OPT_SEP             ),
-            LEX_CHAR        (   '=',    URL_ERROR               ),
+            LEX_INVALID     (   '='                             ),
             LEX_DEFAULT     (           URL_OPT_KEY             ),
         },
 
@@ -224,30 +428,26 @@
 
         LEX_STATE_END ( URL_OPT_EQ ) {
             LEX_CHAR        (   '&',    URL_OPT_SEP             ),
+            LEX_INVALID     (   '='                             ),
             LEX_DEFAULT     (           URL_OPT_VAL             ),
         },
 
         LEX_STATE_END ( URL_OPT_VAL ) {
             LEX_CHAR        (   '&',    URL_OPT_SEP             ),
+            LEX_INVALID     (   '='                             ),
             LEX_DEFAULT     (           URL_OPT_VAL             ),
         },
 
         LEX_STATE_END ( URL_OPT_SEP ) {
             LEX_CHAR        (   '&',    URL_OPT_SEP             ),
-            LEX_CHAR        (   '=',    URL_ERROR               ),
+            LEX_INVALID     (   '='                             ),
             LEX_DEFAULT     (           URL_OPT_KEY             ),
         },
         
         LEX_STATE ( URL_ERROR ) {
             LEX_END
         },
-
-        URL_MAX,
-    },
-
-    .token_fn = url_lex_token,
-    .char_fn = NULL,
-    .end_fn = url_lex_end,
+    }
 };
 
 int url_parse (struct url *url, const char *text) {
--- a/src/lib/url.h	Tue Oct 07 20:31:35 2008 +0300
+++ b/src/lib/url.h	Wed Oct 08 22:05:13 2008 +0300
@@ -12,6 +12,8 @@
  *  
  */
 
+#include <sys/types.h>
+
 /*
  * The schema
  */
@@ -28,7 +30,7 @@
     struct url_opt {
         const char *key;
         const char *value;
-    } *list;
+    } **list;
 };
 
 /*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/url_test.c	Wed Oct 08 22:05:13 2008 +0300
@@ -0,0 +1,192 @@
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "lib/url.h"
+
+#define FAIL(...) do { printf("FAIL: "); printf(__VA_ARGS__); return -1; } while (0)
+
+
+struct url_test {
+    const char *url;
+    const struct url expected;
+} url_tests[] = {
+    {   "localhost:http",   {
+        NULL, NULL, NULL, "localhost", "http", NULL, NULL
+    } },
+
+/*    {   "http://example.com/path",  {
+        { 1, { "http" } }, NULL, NULL, "example.com", NULL, "path", NULL 
+    } }, */
+    
+    {   NULL,               {   } },
+};
+
+int cmp_url_str (const char *field, const char *test, const char *real) {
+    if (!test) {
+        if (real)
+            FAIL("%s: shouldn't be present", field);
+
+    } else if (!real) {
+        FAIL("%s: missing", field);
+
+    } else {
+        if (strcmp(test, real) != 0)
+            FAIL("%s: differs: %s -> %s", field, test, real);
+    }
+
+    // ok
+    return 0;
+}
+
+int cmp_url (const struct url *test, const struct url *real) {
+    int i;
+
+    // test schema
+    if (!test->schema) {
+        if (real->schema)
+            FAIL("test has no schema, but real does");
+
+    } else if (!real->schema) {
+        FAIL("test has a schema, but real doesn't");
+
+    } else {
+        if (test->schema->count != test->schema->count)
+            FAIL("inconsistent scheme count");
+        
+        for (i = 0; i < test->schema->count; i++) {
+            if (strcmp(test->schema->list[i], real->schema->list[i]) != 0)
+                FAIL("differing scheme #%d", i);
+        }
+    }
+    
+    // test username
+    if (cmp_url_str("username", test->username, real->username))
+        goto error;
+
+    // test password
+    if (cmp_url_str("password", test->password, real->password))
+        goto error;
+
+    // test hostname
+    if (cmp_url_str("hostname", test->hostname, real->hostname))
+        goto error;
+
+    // test service
+    if (cmp_url_str("service", test->service, real->service))
+        goto error;
+
+    // test path
+    if (cmp_url_str("path", test->path, real->path))
+        goto error;
+
+    // test query
+    if (!test->opts) {
+        if (real->opts)
+            FAIL("test has no opts, but real does");
+
+    } else if (!real->opts) {
+        FAIL("test has opts, but real doesn't");
+
+    } else {
+        if (test->opts->count != test->opts->count)
+            FAIL("inconsistent opts count");
+        
+        for (i = 0; i < test->opts->count; i++) {
+            if (strcmp(test->opts->list[i]->key, real->opts->list[i]->key) != 0)
+                FAIL("differing scheme key #%d", i);
+            
+            if (strcmp(test->opts->list[i]->value, real->opts->list[i]->value) != 0)
+                FAIL("differing scheme value #%d", i);
+        }
+    }
+
+    // ok
+    return 0;
+
+error:
+    return -1;
+}
+
+void print_url_part (const char *field, const char *val) {
+    if (val) {
+        printf("%s=%s ", field, val);
+    }
+}
+
+void print_url (const struct url *url) {
+    int i;
+
+    if (url->schema) {
+        printf("schema=");
+
+        for (i = 0; i < url->schema->count; i++) {
+            if (i > 0)
+                printf("+");
+
+            printf("%s", url->schema->list[i]);
+        }
+
+        printf(" ");
+    }
+
+    print_url_part("username", url->username);
+    print_url_part("password", url->password);
+    print_url_part("hostname", url->hostname);
+    print_url_part("service", url->service);
+    print_url_part("path", url->path);
+
+    if (url->opts) {
+        printf("opts: ");
+
+        for (i = 0; i < url->opts->count; i++) {
+            printf("%s=%s ", url->opts->list[i]->key, url->opts->list[i]->value);
+        }
+    }
+
+    printf("\n");
+}
+
+void usage (const char *exec_name) {
+    printf("Usage: %s\n\n\tNo arguments are accepted\n", exec_name);
+
+    exit(EXIT_FAILURE);
+}
+
+int main (int argc, char **argv) {
+    const struct url_test *test;
+    struct url url;
+
+    if (argc > 1)
+        usage(argv[0]);
+
+    // run the tests
+    for (test = url_tests; test->url; test++) {
+        // first output the URL we are handling...
+        printf("%s... ", test->url);
+        fflush(stdout);
+        
+        // parse the URL
+        memset(&url, 0, sizeof(url));
+
+        if (url_parse(&url, test->url)) {
+            printf("FATAL: url_parse failed\n");
+            return EXIT_FAILURE;
+        }
+        
+        // compare it
+        if (cmp_url(&test->expected, &url)) {
+            printf("\texpected: ");
+            print_url(&test->expected);
+
+            printf("\tresult:   ");
+            print_url(&url);
+
+        } else {
+            printf("OK\n\t");
+            print_url(&url);
+        }
+    }
+}
+