--- a/Makefile Tue Oct 07 20:31:35 2008 +0300
+++ b/Makefile Wed Oct 08 22:05:13 2008 +0300
@@ -19,6 +19,7 @@
bin/hello: obj/evfuse.o obj/dirbuf.o obj/lib/log.o obj/lib/signals.o
bin/simple_hello: obj/evfuse.o obj/dirbuf.o obj/lib/log.o obj/lib/signals.o obj/simple.o
bin/evpq_test: obj/evpq.o obj/lib/log.o
+bin/url_test: obj/lib/url.o obj/lib/lex.o obj/lib/log.o
# computed
LDFLAGS = ${LIBRARY_PATHS} ${LIBRARY_LIST}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/lex.c Wed Oct 08 22:05:13 2008 +0300
@@ -0,0 +1,7 @@
+
+#include "lex.h"
+
+int lexer (const struct lex *lex, const char *input, void *arg) {
+ // XXX: implement
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/lex.h Wed Oct 08 22:05:13 2008 +0300
@@ -0,0 +1,129 @@
+#ifndef LIB_LEXER_H
+#define LIB_LEXER_H
+
+/*
+ * Simple FSM lexing
+ *
+ * The lexer is implemented as a Finite State Machine, consisting for a number of states, which then contain a set of
+ * transitions, which move the lexer from state to state based on each char of input at a time.
+ *
+ * Whenever the state changes, the token callback is triggered with the collected token data.
+ */
+
+#include <sys/types.h>
+
+/*
+ * Transition flags
+ */
+enum lex_transition_flags {
+ LEX_TRANS_DEFAULT = 0x01,
+ LEX_TRANS_FINAL = 0x02,
+ LEX_TRANS_INVALID = 0x04,
+};
+
+/*
+ * A transition from one state to another.
+ */
+struct lex_transition {
+ // applies to chars [left, right]
+ char left, right;
+
+ // flags from lex_transition_flags
+ char flags;
+
+ // next state to enter
+ int next_state;
+};
+
+/*
+ * State flags
+ */
+enum lex_state_flags {
+ LEX_STATE_END = 0x01,
+};
+
+/*
+ * A state
+ */
+struct lex_state {
+ // the state name (for debugging)
+ const char *name;
+
+ // flags from lex_state_flags
+ char flags;
+
+ // list of transitions for this state, terminated by a transition with next_state=0
+ struct lex_transition trans_list[15];
+};
+
+/*
+ * Special tokens
+ */
+
+// shows up in token_fn as the value of next_token when this_token is the last token.
+#define LEX_EOF 0
+
+/*
+ * Lex machine
+ */
+struct lex {
+ /*
+ * Core token handler. Everytime a full token is lexed (i.e. the state changes), this will be called.
+ * `this_token` represents the full token that was parsed, and `token_data` is the token's value. `next_token`
+ * is the state that terminated this token, and `prev_token` was the token before this one.
+ *
+ * `token_data` is a buffer allocated by the lexer that the actual input data is copied into. Thence, it can be
+ * modified, as its contents will be replaced by the next token. Hence, if you need to keep hold of it, copy it.
+ *
+ * Return zero to have lexing continue, nonzero to stop lexing.
+ */
+ int (*token_fn) (int this_token, char *token_data, int next_token, int prev_token, void *arg);
+
+ /*
+ * Called on every char handled by the lexer. `this_token` is the state of the token that the char belongs to.
+ *
+ * Return zero to have lexing continue, nonzero to stop lexing.
+ */
+ int (*char_fn) (int this_token, char token_char, void *arg);
+
+ /*
+ * Called when the end of input has been reached, `last_token` is the state that we terminated in.
+ *
+ * Return zero to indiciate that the input was valid, nonzero to indicate an error.
+ */
+ int (*end_fn) (int last_token, void *arg);
+
+ // number of states
+ size_t state_count;
+
+ // array of lex_states, indexable by the state id.
+ struct lex_state state_list[];
+};
+
+/*
+ * Helper macros for building the state_list
+ */
+#define LEX_STATE(enum_val) { #enum_val, 0,
+#define LEX_STATE_END(enum_val) { #enum_val, LEX_STATE_END,
+
+ #define LEX_CHAR(c, to) { c, c, 0, to }
+ #define LEX_RANGE(l, r, to) { l, r, 0, to }
+ #define LEX_ALPHA(to) LEX_RANGE('a', 'z', to), LEX_RANGE('A', 'Z', to)
+ #define LEX_NUMBER(to) LEX_RANGE('0', '9', to)
+ #define LEX_ALNUM(to) LEX_ALPHA(to), LEX_NUMBER(to), LEX_CHAR('-', to), LEX_CHAR('_', to)
+ #define LEX_WHITESPACE(to) LEX_CHAR(' ', to), LEX_CHAR('\n', to), LEX_CHAR('\t', to)
+ #define LEX_INVALID(c) { c, c, LEX_TRANS_INVALID, 0 }
+
+ #define LEX_DEFAULT(to) { 0, 0, LEX_TRANS_DEFAULT, to } \
+ }
+ #define LEX_END { 0, 0, 0, 0 } \
+ }
+
+/*
+ * Lex it!
+ *
+ * Return zero to indiciate that the input was valid, nonzero otherwise.
+ */
+int lexer (const struct lex *lex, const char *input, void *arg);
+
+#endif /* LIB_LEXER_H */
--- a/src/lib/url.c Tue Oct 07 20:31:35 2008 +0300
+++ b/src/lib/url.c Wed Oct 08 22:05:13 2008 +0300
@@ -1,6 +1,11 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
#include "url.h"
-#include "lexer.h"
+#include "lex.h"
+#include "error.h"
+#include "misc.h"
enum url_token {
URL_INVALID,
@@ -41,8 +46,6 @@
URL_OPT_VAL,
URL_OPT_SEP,
- URL_END,
-
URL_MAX,
};
@@ -50,24 +53,225 @@
* Parser state
*/
struct url_state {
+ // the URL to parse into
struct url *url;
+
+ // our lookahead-kludge
+ const char *alnum, *alnum2;
+
+};
+static int _url_append_scheme (struct url *url, const char *data) {
+
+}
-};
+static int _url_append_opt_key (struct url *url, const char *key) {
+
+}
+
+static int _url_append_opt_val (struct url *url, const char *value) {
+
+}
static int url_lex_token (int _this_token, char *token_data, int _next_token, int _prev_token, void *arg) {
enum url_token this_token = _this_token, next_token = _next_token, prev_token = _prev_token;
struct url_state *state = arg;
-
-}
+ const char **copy_to = NULL;
-static int url_lex_end (int _last_token, void *arg) {
- enum url_token last_token = _last_token;
- struct url_state *state = arg;
+ (void) prev_token;
+
+ switch (this_token) {
+ case URL_BEGIN_ALNUM:
+ switch (next_token) {
+ case URL_SCHEME_SEP:
+ // store the scheme
+ if (_url_append_scheme(state->url, token_data))
+ goto error;
+
+ break;
+
+ case URL_USERNAME_END:
+ // store the username
+ copy_to = &state->url->username; break;
+
+ case URL_PATH_START:
+ case URL_OPT_START:
+ case LEX_EOF:
+ // store the hostname
+ copy_to = &state->url->hostname; break;
+ case URL_BEGIN_COLON:
+ // gah...
+ copy_to = &state->alnum; break;
+
+
+ default:
+ FATAL("weird next token");
+ }
+
+ break;
+
+ case URL_BEGIN_COLON:
+ switch (next_token) {
+ case URL_SCHEME_END_SLASH1:
+ // store the schema
+ if (_url_append_scheme(state->url, token_data))
+ goto error;
+
+ break;
+
+ case URL_USERHOST_ALNUM2:
+ // gah..
+ break;
+
+ default:
+ FATAL("weird next token");
+ }
+
+ break;
+
+ case URL_SCHEME:
+ // store the scheme
+ if (_url_append_scheme(state->url, token_data))
+ goto error;
+
+ break;
+
+ case URL_SCHEME_SEP:
+ // ignore
+ break;
+
+ case URL_SCHEME_END_COL:
+ case URL_SCHEME_END_SLASH1:
+ case URL_SCHEME_END_SLASH2:
+ // ignore
+ break;
+
+ case URL_USERHOST_ALNUM:
+ switch (next_token) {
+ case URL_USERNAME_END:
+ // store the username
+ copy_to = &state->url->username; break;
+
+ case URL_PATH_START:
+ case URL_OPT_START:
+ case LEX_EOF:
+ // store the hostname
+ copy_to = &state->url->hostname; break;
+
+ case URL_USERHOST_COLON:
+ // gah...
+ copy_to = &state->alnum; break;
+
+ default:
+ FATAL("weird next token");
+ }
+
+ break;
+
+ case URL_USERHOST_COLON:
+ // ignore
+ break;
+
+ case URL_USERHOST_ALNUM2:
+ switch (next_token) {
+ case URL_USERNAME_END:
+ // store the username and password
+ state->url->username = state->alnum; state->alnum = NULL;
+ copy_to = &state->url->password;
+
+ break;
+
+ case URL_PATH_START:
+ case URL_OPT_START:
+ case LEX_EOF:
+ // store the service
+ copy_to = &state->url->service; break;
+
+ default:
+ FATAL("weird next token");
+ }
+
+ break;
+
+ case URL_USERNAME:
+ case URL_PASSWORD_SEP:
+ case URL_PASSWORD:
+ FATAL("these should be overshadowed");
+
+ case URL_USERNAME_END:
+ // ignore
+ break;
+
+ case URL_HOSTNAME:
+ // store
+ copy_to = &state->url->hostname; break;
+
+ case URL_SERVICE_SEP:
+ // ignore
+ break;
+
+ case URL_SERVICE:
+ // store
+ copy_to = &state->url->service; break;
+
+ case URL_PATH_START:
+ // ignore
+ break;
+
+ case URL_PATH:
+ // store
+ copy_to = &state->url->path; break;
+
+ case URL_OPT_START:
+ // ignore
+ break;
+
+ case URL_OPT_KEY:
+ // store
+ if (_url_append_opt_key(state->url, token_data))
+ goto error;
+
+ break;
+
+ case URL_OPT_EQ:
+ // ignore
+ break;
+
+ case URL_OPT_VAL:
+ // store
+ if (_url_append_opt_val(state->url, token_data))
+ goto error;
+
+ break;
+
+ case URL_OPT_SEP:
+ // ignore
+ break;
+
+ default:
+ FATAL("invalid token");
+ }
+
+ if (copy_to) {
+ // copy the token data
+ if ((*copy_to = strdup(token_data)) == NULL)
+ ERROR("strdup");
+ }
+
+ // good
+ return 0;
+
+error:
+ // XXX: error codes?
+ return -1;
}
static struct lex url_lex = {
+ .token_fn = url_lex_token,
+ .char_fn = NULL,
+ .end_fn = NULL,
+
.state_count = URL_MAX,
.state_list = {
LEX_STATE ( URL_BEGIN ) {
@@ -135,7 +339,7 @@
LEX_CHAR ( '/', URL_PATH_START ), // it was URL_HOSTNAME
LEX_CHAR ( '?', URL_OPT_START ), // it was URL_HOSTNAME
LEX_END
- }
+ },
// this can be URL_USERNAME_END or URL_SERVICE_SEP
LEX_STATE ( URL_USERHOST_COLON ) {
@@ -212,7 +416,7 @@
LEX_STATE_END ( URL_OPT_START ) {
LEX_CHAR ( '&', URL_OPT_SEP ),
- LEX_CHAR ( '=', URL_ERROR ),
+ LEX_INVALID ( '=' ),
LEX_DEFAULT ( URL_OPT_KEY ),
},
@@ -224,30 +428,26 @@
LEX_STATE_END ( URL_OPT_EQ ) {
LEX_CHAR ( '&', URL_OPT_SEP ),
+ LEX_INVALID ( '=' ),
LEX_DEFAULT ( URL_OPT_VAL ),
},
LEX_STATE_END ( URL_OPT_VAL ) {
LEX_CHAR ( '&', URL_OPT_SEP ),
+ LEX_INVALID ( '=' ),
LEX_DEFAULT ( URL_OPT_VAL ),
},
LEX_STATE_END ( URL_OPT_SEP ) {
LEX_CHAR ( '&', URL_OPT_SEP ),
- LEX_CHAR ( '=', URL_ERROR ),
+ LEX_INVALID ( '=' ),
LEX_DEFAULT ( URL_OPT_KEY ),
},
LEX_STATE ( URL_ERROR ) {
LEX_END
},
-
- URL_MAX,
- },
-
- .token_fn = url_lex_token,
- .char_fn = NULL,
- .end_fn = url_lex_end,
+ }
};
int url_parse (struct url *url, const char *text) {
--- a/src/lib/url.h Tue Oct 07 20:31:35 2008 +0300
+++ b/src/lib/url.h Wed Oct 08 22:05:13 2008 +0300
@@ -12,6 +12,8 @@
*
*/
+#include <sys/types.h>
+
/*
* The schema
*/
@@ -28,7 +30,7 @@
struct url_opt {
const char *key;
const char *value;
- } *list;
+ } **list;
};
/*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/url_test.c Wed Oct 08 22:05:13 2008 +0300
@@ -0,0 +1,192 @@
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "lib/url.h"
+
+#define FAIL(...) do { printf("FAIL: "); printf(__VA_ARGS__); return -1; } while (0)
+
+
+struct url_test {
+ const char *url;
+ const struct url expected;
+} url_tests[] = {
+ { "localhost:http", {
+ NULL, NULL, NULL, "localhost", "http", NULL, NULL
+ } },
+
+/* { "http://example.com/path", {
+ { 1, { "http" } }, NULL, NULL, "example.com", NULL, "path", NULL
+ } }, */
+
+ { NULL, { } },
+};
+
+int cmp_url_str (const char *field, const char *test, const char *real) {
+ if (!test) {
+ if (real)
+ FAIL("%s: shouldn't be present", field);
+
+ } else if (!real) {
+ FAIL("%s: missing", field);
+
+ } else {
+ if (strcmp(test, real) != 0)
+ FAIL("%s: differs: %s -> %s", field, test, real);
+ }
+
+ // ok
+ return 0;
+}
+
+int cmp_url (const struct url *test, const struct url *real) {
+ int i;
+
+ // test schema
+ if (!test->schema) {
+ if (real->schema)
+ FAIL("test has no schema, but real does");
+
+ } else if (!real->schema) {
+ FAIL("test has a schema, but real doesn't");
+
+ } else {
+ if (test->schema->count != test->schema->count)
+ FAIL("inconsistent scheme count");
+
+ for (i = 0; i < test->schema->count; i++) {
+ if (strcmp(test->schema->list[i], real->schema->list[i]) != 0)
+ FAIL("differing scheme #%d", i);
+ }
+ }
+
+ // test username
+ if (cmp_url_str("username", test->username, real->username))
+ goto error;
+
+ // test password
+ if (cmp_url_str("password", test->password, real->password))
+ goto error;
+
+ // test hostname
+ if (cmp_url_str("hostname", test->hostname, real->hostname))
+ goto error;
+
+ // test service
+ if (cmp_url_str("service", test->service, real->service))
+ goto error;
+
+ // test path
+ if (cmp_url_str("path", test->path, real->path))
+ goto error;
+
+ // test query
+ if (!test->opts) {
+ if (real->opts)
+ FAIL("test has no opts, but real does");
+
+ } else if (!real->opts) {
+ FAIL("test has opts, but real doesn't");
+
+ } else {
+ if (test->opts->count != test->opts->count)
+ FAIL("inconsistent opts count");
+
+ for (i = 0; i < test->opts->count; i++) {
+ if (strcmp(test->opts->list[i]->key, real->opts->list[i]->key) != 0)
+ FAIL("differing scheme key #%d", i);
+
+ if (strcmp(test->opts->list[i]->value, real->opts->list[i]->value) != 0)
+ FAIL("differing scheme value #%d", i);
+ }
+ }
+
+ // ok
+ return 0;
+
+error:
+ return -1;
+}
+
+void print_url_part (const char *field, const char *val) {
+ if (val) {
+ printf("%s=%s ", field, val);
+ }
+}
+
+void print_url (const struct url *url) {
+ int i;
+
+ if (url->schema) {
+ printf("schema=");
+
+ for (i = 0; i < url->schema->count; i++) {
+ if (i > 0)
+ printf("+");
+
+ printf("%s", url->schema->list[i]);
+ }
+
+ printf(" ");
+ }
+
+ print_url_part("username", url->username);
+ print_url_part("password", url->password);
+ print_url_part("hostname", url->hostname);
+ print_url_part("service", url->service);
+ print_url_part("path", url->path);
+
+ if (url->opts) {
+ printf("opts: ");
+
+ for (i = 0; i < url->opts->count; i++) {
+ printf("%s=%s ", url->opts->list[i]->key, url->opts->list[i]->value);
+ }
+ }
+
+ printf("\n");
+}
+
+void usage (const char *exec_name) {
+ printf("Usage: %s\n\n\tNo arguments are accepted\n", exec_name);
+
+ exit(EXIT_FAILURE);
+}
+
+int main (int argc, char **argv) {
+ const struct url_test *test;
+ struct url url;
+
+ if (argc > 1)
+ usage(argv[0]);
+
+ // run the tests
+ for (test = url_tests; test->url; test++) {
+ // first output the URL we are handling...
+ printf("%s... ", test->url);
+ fflush(stdout);
+
+ // parse the URL
+ memset(&url, 0, sizeof(url));
+
+ if (url_parse(&url, test->url)) {
+ printf("FATAL: url_parse failed\n");
+ return EXIT_FAILURE;
+ }
+
+ // compare it
+ if (cmp_url(&test->expected, &url)) {
+ printf("\texpected: ");
+ print_url(&test->expected);
+
+ printf("\tresult: ");
+ print_url(&url);
+
+ } else {
+ printf("OK\n\t");
+ print_url(&url);
+ }
+ }
+}
+