# HG changeset patch # User Tero Marttila # Date 1223492713 -10800 # Node ID a8d183e79ed9706ee2a77f08576a395e5a95e251 # Parent 115067dfba554e6d8bcab914c308a9dfd08ee74f look ma, it compiles\! diff -r 115067dfba55 -r a8d183e79ed9 Makefile --- a/Makefile Tue Oct 07 20:31:35 2008 +0300 +++ b/Makefile Wed Oct 08 22:05:13 2008 +0300 @@ -19,6 +19,7 @@ bin/hello: obj/evfuse.o obj/dirbuf.o obj/lib/log.o obj/lib/signals.o bin/simple_hello: obj/evfuse.o obj/dirbuf.o obj/lib/log.o obj/lib/signals.o obj/simple.o bin/evpq_test: obj/evpq.o obj/lib/log.o +bin/url_test: obj/lib/url.o obj/lib/lex.o obj/lib/log.o # computed LDFLAGS = ${LIBRARY_PATHS} ${LIBRARY_LIST} diff -r 115067dfba55 -r a8d183e79ed9 src/lib/lex.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/lex.c Wed Oct 08 22:05:13 2008 +0300 @@ -0,0 +1,7 @@ + +#include "lex.h" + +int lexer (const struct lex *lex, const char *input, void *arg) { + // XXX: implement +} + diff -r 115067dfba55 -r a8d183e79ed9 src/lib/lex.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/lex.h Wed Oct 08 22:05:13 2008 +0300 @@ -0,0 +1,129 @@ +#ifndef LIB_LEXER_H +#define LIB_LEXER_H + +/* + * Simple FSM lexing + * + * The lexer is implemented as a Finite State Machine, consisting for a number of states, which then contain a set of + * transitions, which move the lexer from state to state based on each char of input at a time. + * + * Whenever the state changes, the token callback is triggered with the collected token data. + */ + +#include + +/* + * Transition flags + */ +enum lex_transition_flags { + LEX_TRANS_DEFAULT = 0x01, + LEX_TRANS_FINAL = 0x02, + LEX_TRANS_INVALID = 0x04, +}; + +/* + * A transition from one state to another. + */ +struct lex_transition { + // applies to chars [left, right] + char left, right; + + // flags from lex_transition_flags + char flags; + + // next state to enter + int next_state; +}; + +/* + * State flags + */ +enum lex_state_flags { + LEX_STATE_END = 0x01, +}; + +/* + * A state + */ +struct lex_state { + // the state name (for debugging) + const char *name; + + // flags from lex_state_flags + char flags; + + // list of transitions for this state, terminated by a transition with next_state=0 + struct lex_transition trans_list[15]; +}; + +/* + * Special tokens + */ + +// shows up in token_fn as the value of next_token when this_token is the last token. +#define LEX_EOF 0 + +/* + * Lex machine + */ +struct lex { + /* + * Core token handler. Everytime a full token is lexed (i.e. the state changes), this will be called. + * `this_token` represents the full token that was parsed, and `token_data` is the token's value. `next_token` + * is the state that terminated this token, and `prev_token` was the token before this one. + * + * `token_data` is a buffer allocated by the lexer that the actual input data is copied into. Thence, it can be + * modified, as its contents will be replaced by the next token. Hence, if you need to keep hold of it, copy it. + * + * Return zero to have lexing continue, nonzero to stop lexing. + */ + int (*token_fn) (int this_token, char *token_data, int next_token, int prev_token, void *arg); + + /* + * Called on every char handled by the lexer. `this_token` is the state of the token that the char belongs to. + * + * Return zero to have lexing continue, nonzero to stop lexing. + */ + int (*char_fn) (int this_token, char token_char, void *arg); + + /* + * Called when the end of input has been reached, `last_token` is the state that we terminated in. + * + * Return zero to indiciate that the input was valid, nonzero to indicate an error. + */ + int (*end_fn) (int last_token, void *arg); + + // number of states + size_t state_count; + + // array of lex_states, indexable by the state id. + struct lex_state state_list[]; +}; + +/* + * Helper macros for building the state_list + */ +#define LEX_STATE(enum_val) { #enum_val, 0, +#define LEX_STATE_END(enum_val) { #enum_val, LEX_STATE_END, + + #define LEX_CHAR(c, to) { c, c, 0, to } + #define LEX_RANGE(l, r, to) { l, r, 0, to } + #define LEX_ALPHA(to) LEX_RANGE('a', 'z', to), LEX_RANGE('A', 'Z', to) + #define LEX_NUMBER(to) LEX_RANGE('0', '9', to) + #define LEX_ALNUM(to) LEX_ALPHA(to), LEX_NUMBER(to), LEX_CHAR('-', to), LEX_CHAR('_', to) + #define LEX_WHITESPACE(to) LEX_CHAR(' ', to), LEX_CHAR('\n', to), LEX_CHAR('\t', to) + #define LEX_INVALID(c) { c, c, LEX_TRANS_INVALID, 0 } + + #define LEX_DEFAULT(to) { 0, 0, LEX_TRANS_DEFAULT, to } \ + } + #define LEX_END { 0, 0, 0, 0 } \ + } + +/* + * Lex it! + * + * Return zero to indiciate that the input was valid, nonzero otherwise. + */ +int lexer (const struct lex *lex, const char *input, void *arg); + +#endif /* LIB_LEXER_H */ diff -r 115067dfba55 -r a8d183e79ed9 src/lib/url.c --- a/src/lib/url.c Tue Oct 07 20:31:35 2008 +0300 +++ b/src/lib/url.c Wed Oct 08 22:05:13 2008 +0300 @@ -1,6 +1,11 @@ +#define _GNU_SOURCE +#include +#include #include "url.h" -#include "lexer.h" +#include "lex.h" +#include "error.h" +#include "misc.h" enum url_token { URL_INVALID, @@ -41,8 +46,6 @@ URL_OPT_VAL, URL_OPT_SEP, - URL_END, - URL_MAX, }; @@ -50,24 +53,225 @@ * Parser state */ struct url_state { + // the URL to parse into struct url *url; + + // our lookahead-kludge + const char *alnum, *alnum2; + +}; +static int _url_append_scheme (struct url *url, const char *data) { + +} -}; +static int _url_append_opt_key (struct url *url, const char *key) { + +} + +static int _url_append_opt_val (struct url *url, const char *value) { + +} static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) { enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token; struct url_state *state = arg; - -} + const char **copy_to = NULL; -static int url_lex_end (int _last_token, void *arg) { - enum url_token last_token = _last_token; - struct url_state *state = arg; + (void) prev_token; + + switch (this_token) { + case URL_BEGIN_ALNUM: + switch (next_token) { + case URL_SCHEME_SEP: + // store the scheme + if (_url_append_scheme(state->url, token_data)) + goto error; + + break; + + case URL_USERNAME_END: + // store the username + copy_to = &state->url->username; break; + + case URL_PATH_START: + case URL_OPT_START: + case LEX_EOF: + // store the hostname + copy_to = &state->url->hostname; break; + case URL_BEGIN_COLON: + // gah... + copy_to = &state->alnum; break; + + + default: + FATAL("weird next token"); + } + + break; + + case URL_BEGIN_COLON: + switch (next_token) { + case URL_SCHEME_END_SLASH1: + // store the schema + if (_url_append_scheme(state->url, token_data)) + goto error; + + break; + + case URL_USERHOST_ALNUM2: + // gah.. + break; + + default: + FATAL("weird next token"); + } + + break; + + case URL_SCHEME: + // store the scheme + if (_url_append_scheme(state->url, token_data)) + goto error; + + break; + + case URL_SCHEME_SEP: + // ignore + break; + + case URL_SCHEME_END_COL: + case URL_SCHEME_END_SLASH1: + case URL_SCHEME_END_SLASH2: + // ignore + break; + + case URL_USERHOST_ALNUM: + switch (next_token) { + case URL_USERNAME_END: + // store the username + copy_to = &state->url->username; break; + + case URL_PATH_START: + case URL_OPT_START: + case LEX_EOF: + // store the hostname + copy_to = &state->url->hostname; break; + + case URL_USERHOST_COLON: + // gah... + copy_to = &state->alnum; break; + + default: + FATAL("weird next token"); + } + + break; + + case URL_USERHOST_COLON: + // ignore + break; + + case URL_USERHOST_ALNUM2: + switch (next_token) { + case URL_USERNAME_END: + // store the username and password + state->url->username = state->alnum; state->alnum = NULL; + copy_to = &state->url->password; + + break; + + case URL_PATH_START: + case URL_OPT_START: + case LEX_EOF: + // store the service + copy_to = &state->url->service; break; + + default: + FATAL("weird next token"); + } + + break; + + case URL_USERNAME: + case URL_PASSWORD_SEP: + case URL_PASSWORD: + FATAL("these should be overshadowed"); + + case URL_USERNAME_END: + // ignore + break; + + case URL_HOSTNAME: + // store + copy_to = &state->url->hostname; break; + + case URL_SERVICE_SEP: + // ignore + break; + + case URL_SERVICE: + // store + copy_to = &state->url->service; break; + + case URL_PATH_START: + // ignore + break; + + case URL_PATH: + // store + copy_to = &state->url->path; break; + + case URL_OPT_START: + // ignore + break; + + case URL_OPT_KEY: + // store + if (_url_append_opt_key(state->url, token_data)) + goto error; + + break; + + case URL_OPT_EQ: + // ignore + break; + + case URL_OPT_VAL: + // store + if (_url_append_opt_val(state->url, token_data)) + goto error; + + break; + + case URL_OPT_SEP: + // ignore + break; + + default: + FATAL("invalid token"); + } + + if (copy_to) { + // copy the token data + if ((*copy_to = strdup(token_data)) == NULL) + ERROR("strdup"); + } + + // good + return 0; + +error: + // XXX: error codes? + return -1; } static struct lex url_lex = { + .token_fn = url_lex_token, + .char_fn = NULL, + .end_fn = NULL, + .state_count = URL_MAX, .state_list = { LEX_STATE ( URL_BEGIN ) { @@ -135,7 +339,7 @@ LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME LEX_END - } + }, // this can be URL_USERNAME_END or URL_SERVICE_SEP LEX_STATE ( URL_USERHOST_COLON ) { @@ -212,7 +416,7 @@ LEX_STATE_END ( URL_OPT_START ) { LEX_CHAR ( '&', URL_OPT_SEP ), - LEX_CHAR ( '=', URL_ERROR ), + LEX_INVALID ( '=' ), LEX_DEFAULT ( URL_OPT_KEY ), }, @@ -224,30 +428,26 @@ LEX_STATE_END ( URL_OPT_EQ ) { LEX_CHAR ( '&', URL_OPT_SEP ), + LEX_INVALID ( '=' ), LEX_DEFAULT ( URL_OPT_VAL ), }, LEX_STATE_END ( URL_OPT_VAL ) { LEX_CHAR ( '&', URL_OPT_SEP ), + LEX_INVALID ( '=' ), LEX_DEFAULT ( URL_OPT_VAL ), }, LEX_STATE_END ( URL_OPT_SEP ) { LEX_CHAR ( '&', URL_OPT_SEP ), - LEX_CHAR ( '=', URL_ERROR ), + LEX_INVALID ( '=' ), LEX_DEFAULT ( URL_OPT_KEY ), }, LEX_STATE ( URL_ERROR ) { LEX_END }, - - URL_MAX, - }, - - .token_fn = url_lex_token, - .char_fn = NULL, - .end_fn = url_lex_end, + } }; int url_parse (struct url *url, const char *text) { diff -r 115067dfba55 -r a8d183e79ed9 src/lib/url.h --- a/src/lib/url.h Tue Oct 07 20:31:35 2008 +0300 +++ b/src/lib/url.h Wed Oct 08 22:05:13 2008 +0300 @@ -12,6 +12,8 @@ * */ +#include + /* * The schema */ @@ -28,7 +30,7 @@ struct url_opt { const char *key; const char *value; - } *list; + } **list; }; /* diff -r 115067dfba55 -r a8d183e79ed9 src/url_test.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/url_test.c Wed Oct 08 22:05:13 2008 +0300 @@ -0,0 +1,192 @@ + +#include +#include +#include + +#include "lib/url.h" + +#define FAIL(...) do { printf("FAIL: "); printf(__VA_ARGS__); return -1; } while (0) + + +struct url_test { + const char *url; + const struct url expected; +} url_tests[] = { + { "localhost:http", { + NULL, NULL, NULL, "localhost", "http", NULL, NULL + } }, + +/* { "http://example.com/path", { + { 1, { "http" } }, NULL, NULL, "example.com", NULL, "path", NULL + } }, */ + + { NULL, { } }, +}; + +int cmp_url_str (const char *field, const char *test, const char *real) { + if (!test) { + if (real) + FAIL("%s: shouldn't be present", field); + + } else if (!real) { + FAIL("%s: missing", field); + + } else { + if (strcmp(test, real) != 0) + FAIL("%s: differs: %s -> %s", field, test, real); + } + + // ok + return 0; +} + +int cmp_url (const struct url *test, const struct url *real) { + int i; + + // test schema + if (!test->schema) { + if (real->schema) + FAIL("test has no schema, but real does"); + + } else if (!real->schema) { + FAIL("test has a schema, but real doesn't"); + + } else { + if (test->schema->count != test->schema->count) + FAIL("inconsistent scheme count"); + + for (i = 0; i < test->schema->count; i++) { + if (strcmp(test->schema->list[i], real->schema->list[i]) != 0) + FAIL("differing scheme #%d", i); + } + } + + // test username + if (cmp_url_str("username", test->username, real->username)) + goto error; + + // test password + if (cmp_url_str("password", test->password, real->password)) + goto error; + + // test hostname + if (cmp_url_str("hostname", test->hostname, real->hostname)) + goto error; + + // test service + if (cmp_url_str("service", test->service, real->service)) + goto error; + + // test path + if (cmp_url_str("path", test->path, real->path)) + goto error; + + // test query + if (!test->opts) { + if (real->opts) + FAIL("test has no opts, but real does"); + + } else if (!real->opts) { + FAIL("test has opts, but real doesn't"); + + } else { + if (test->opts->count != test->opts->count) + FAIL("inconsistent opts count"); + + for (i = 0; i < test->opts->count; i++) { + if (strcmp(test->opts->list[i]->key, real->opts->list[i]->key) != 0) + FAIL("differing scheme key #%d", i); + + if (strcmp(test->opts->list[i]->value, real->opts->list[i]->value) != 0) + FAIL("differing scheme value #%d", i); + } + } + + // ok + return 0; + +error: + return -1; +} + +void print_url_part (const char *field, const char *val) { + if (val) { + printf("%s=%s ", field, val); + } +} + +void print_url (const struct url *url) { + int i; + + if (url->schema) { + printf("schema="); + + for (i = 0; i < url->schema->count; i++) { + if (i > 0) + printf("+"); + + printf("%s", url->schema->list[i]); + } + + printf(" "); + } + + print_url_part("username", url->username); + print_url_part("password", url->password); + print_url_part("hostname", url->hostname); + print_url_part("service", url->service); + print_url_part("path", url->path); + + if (url->opts) { + printf("opts: "); + + for (i = 0; i < url->opts->count; i++) { + printf("%s=%s ", url->opts->list[i]->key, url->opts->list[i]->value); + } + } + + printf("\n"); +} + +void usage (const char *exec_name) { + printf("Usage: %s\n\n\tNo arguments are accepted\n", exec_name); + + exit(EXIT_FAILURE); +} + +int main (int argc, char **argv) { + const struct url_test *test; + struct url url; + + if (argc > 1) + usage(argv[0]); + + // run the tests + for (test = url_tests; test->url; test++) { + // first output the URL we are handling... + printf("%s... ", test->url); + fflush(stdout); + + // parse the URL + memset(&url, 0, sizeof(url)); + + if (url_parse(&url, test->url)) { + printf("FATAL: url_parse failed\n"); + return EXIT_FAILURE; + } + + // compare it + if (cmp_url(&test->expected, &url)) { + printf("\texpected: "); + print_url(&test->expected); + + printf("\tresult: "); + print_url(&url); + + } else { + printf("OK\n\t"); + print_url(&url); + } + } +} +