split off dbfs components into a separate dir, improve dirop docs, error handling, etc
authorTero Marttila <terom@fixme.fi>
Wed, 15 Oct 2008 01:14:22 +0300
changeset 28 e944453ca924
parent 27 461be4cd34a3
child 29 5de62ca9a5aa
split off dbfs components into a separate dir, improve dirop docs, error handling, etc
Makefile
build/deps/dbfs/.empty_dir
obj/dbfs/.empty_dir
src/dbfs.c
src/dbfs.h
src/dbfs/common.c
src/dbfs/common.h
src/dbfs/core.c
src/dbfs/dbfs.c
src/dbfs/dirop.c
src/dbfs/ops.h
src/dirbuf.c
src/evsql.h
src/lib/error.h
src/lib/log.h
src/lib/misc.h
--- a/Makefile	Mon Oct 13 02:27:59 2008 +0300
+++ b/Makefile	Wed Oct 15 01:14:22 2008 +0300
@@ -5,13 +5,23 @@
 INCLUDE_PATHS = -I${LIBEVENT_PATH}/include -I${LIBFUSE_PATH}/include
 LDLIBS = -levent -lfuse -lpq
 
+ifdef DEBUG
+DEBUG_FLAGS = -DDEBUG_ENABLED
+else
+DEBUG_FLAGS = 
+endif
+
 # XXX: ugh... use `pkg-config fuse`
-DEFINES = -D_FILE_OFFSET_BITS=64
+DEFINES = -D_FILE_OFFSET_BITS=64 ${DEBUG_FLAGS}
 MY_CFLAGS = -Wall -g -std=gnu99
 
 BIN_NAMES = helloworld hello simple_hello evpq_test url_test dbfs
 BIN_PATHS = $(addprefix bin/,$(BIN_NAMES))
 
+# complex modules
+EVSQL_OBJS = obj/evsql.o obj/evsql_util.o obj/evpq.o
+DBFS_OBJS = obj/dbfs/dbfs.o obj/dbfs/common.o obj/dbfs/core.o obj/dbfs/dirop.o obj/dirbuf.o
+
 # first target
 all: ${BIN_PATHS}
 
@@ -21,7 +31,7 @@
 bin/simple_hello: obj/evfuse.o obj/dirbuf.o obj/lib/log.o obj/lib/signals.o obj/simple.o
 bin/evpq_test: obj/evpq.o obj/lib/log.o
 bin/url_test: obj/lib/url.o obj/lib/lex.o obj/lib/log.o
-bin/dbfs: obj/evsql.o obj/evsql_util.o obj/evpq.o obj/evfuse.o obj/dirbuf.o obj/lib/log.o obj/lib/signals.o
+bin/dbfs: ${DBFS_OBJS} ${EVSQL_OBJS} obj/evfuse.o obj/lib/log.o obj/lib/signals.o
 
 # computed
 LDFLAGS = ${LIBRARY_PATHS} ${LIBRARY_LIST}
--- a/src/dbfs.c	Mon Oct 13 02:27:59 2008 +0300
+++ b/src/dbfs.c	Wed Oct 15 01:14:22 2008 +0300
@@ -10,764 +10,54 @@
 
 #include <event2/event.h>
 
+#include "dbfs.h"
 #include "evfuse.h"
 #include "evsql.h"
-#include "dirbuf.h"
 #include "lib/log.h"
 #include "lib/signals.h"
 #include "lib/misc.h"
 
-#define SERROR(val) do { (val); goto error; } while(0)
-
-struct dbfs {
-    struct event_base *ev_base;
-    struct signals *signals;
-    
-    const char *db_conninfo;
-    struct evsql *db;
-
-    struct evfuse *ev_fuse;
-};
-
 #define CONNINFO_DEFAULT "dbname=test"
 
-// XXX: not sure how this should work
-#define CACHE_TIMEOUT 1.0
-
-mode_t _dbfs_mode (const char *type) {
-    if (!strcmp(type, "DIR"))
-        return S_IFDIR;
-
-    if (!strcmp(type, "REG"))
-        return S_IFREG;
-
-    else {
-        WARNING("[dbfs] weird mode-type: %s", type);
-        return 0;
-    }
-}
-
-void dbfs_init (void *userdata, struct fuse_conn_info *conn) {
-    INFO("[dbfs.init] userdata=%p, conn=%p", userdata, conn);
-
-}
-
-void dbfs_destroy (void *arg) {
-    struct dbfs *ctx = arg;
-    INFO("[dbfs.destroy %p]", ctx);
-
-    // exit libevent
-    event_base_loopexit(ctx->ev_base, NULL);
-}
-
-/*
- * Check the result set.
- *
- * Returns;
- *  -1  if the query failed, the columns do not match, or there are too many/few rows (unless rows was zero)
- *  0   the results match
- *  1   there were no results
- */
-int _dbfs_check_res (const struct evsql_result_info *res, size_t rows, size_t cols) {
-    int err = 0;
-
-    // check if it failed
-    if (res->error)
-        NERROR(evsql_result_error(res));
-        
-    // not found?
-    if (evsql_result_rows(res) == 0)
-        SERROR(err = 1);
-
-    // duplicate rows?
-    if (rows && evsql_result_rows(res) != rows)
-        ERROR("wrong number of rows returned");
-    
-    // correct number of columns
-    if (evsql_result_cols(res) != cols)
-        ERROR("wrong number of columns: %zu", evsql_result_cols(res));
-
-    // good
-    return 0;
-
-error:
-    if (!err)
-        err = -1;
-
-    return err;
-}
-
-int _dbfs_stat_info (struct stat *st, const struct evsql_result_info *res, size_t row, size_t col_offset) {
-    int err = 0;
-    
-    uint16_t mode;
-    uint64_t size, nlink;
-    const char *type;
-    
-    // extract the data
-    if (0
-        ||  evsql_result_string(res, row, col_offset + 0, &type,       0 ) // inodes.type
-        ||  evsql_result_uint16(res, row, col_offset + 1, &mode,       0 ) // inodes.mode
-        ||  evsql_result_uint64(res, row, col_offset + 2, &size,       0 ) // inodes.size
-        ||  evsql_result_uint64(res, row, col_offset + 3, &nlink,      0 ) // count(*)
-    )
-        EERROR(err = EIO, "invalid db data");
-
-    INFO("\tst_mode=S_IF%s | %ho, st_nlink=%llu, st_size=%llu", type, mode, (long long unsigned int) nlink, (long long unsigned int) size);
-
-    // convert and store
-    st->st_mode = _dbfs_mode(type) | mode;
-    st->st_nlink = nlink;
-    st->st_size = size;
-    
-    // good
-    return 0;
-
-error:
-    return -1;
-}
-
-void _dbfs_lookup_result (const struct evsql_result_info *res, void *arg) {
-    struct fuse_req *req = arg;
-    struct fuse_entry_param e; ZINIT(e);
-    int err = 0;
-    
-    uint32_t ino;
-    
-    // check the results
-    if ((err = _dbfs_check_res(res, 1, 5)))
-        SERROR(err = (err ==  1 ? ENOENT : EIO));
-    
-    // get the data
-    if (0
-        ||  evsql_result_uint32(res, 0, 0, &ino,        0 ) // inodes.ino
-    )
-        EERROR(err = EIO, "invalid db data");
-        
-    INFO("[dbfs.lookup] -> ino=%u", ino);
-    
-    // stat attrs
-    if (_dbfs_stat_info(&e.attr, res, 0, 1))
-        goto error;
-
-    // other attrs
-    e.ino = e.attr.st_ino = ino;
-    e.attr_timeout = CACHE_TIMEOUT;
-    e.entry_timeout = CACHE_TIMEOUT;
-        
-    // reply
-    if ((err = fuse_reply_entry(req, &e)))
-        EERROR(err, "fuse_reply_entry");
-
-error:
-    if (err && (err = fuse_reply_err(req, err)))
-        EWARNING(err, "fuse_reply_err");
-
-    // free
-    evsql_result_free(res);
-}
-
-void dbfs_lookup (struct fuse_req *req, fuse_ino_t parent, const char *name) {
-    struct dbfs *ctx = fuse_req_userdata(req);
-    int err;
-
-    INFO("[dbfs.lookup] parent=%lu name=%s", parent, name);
-    
-    // query and params
-    const char *sql = 
-        "SELECT"
-        " inodes.ino, inodes.type, inodes.mode, inodes.size, count(*)"
-        " FROM file_tree INNER JOIN inodes ON (file_tree.inode = inodes.ino)"
-        " WHERE file_tree.parent = $1::int4 AND file_tree.name = $2::varchar"
-        " GROUP BY inodes.ino, inodes.type, inodes.mode, inodes.size";
-    
-    static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) {
-        EVSQL_PARAM ( UINT32 ),
-        EVSQL_PARAM ( STRING ),
-
-        EVSQL_PARAMS_END
-    };
-    
-    // build params
-    if (0
-        ||  evsql_param_uint32(&params, 0, parent)
-        ||  evsql_param_string(&params, 1, name)
-    )
-        EERROR(err = EIO, "evsql_param_*");
-
-    // query
-    if (evsql_query_params(ctx->db, NULL, sql, &params, _dbfs_lookup_result, req) == NULL)
-        EERROR(err = EIO, "evsql_query_params");
-
-    // XXX: handle interrupts
-    
-    // wait
-    return;
-
-error:
-    if ((err = fuse_reply_err(req, err)))
-        EWARNING(err, "fuse_reply_err");
-}
-
-void _dbfs_getattr_result (const struct evsql_result_info *res, void *arg) {
-    struct fuse_req *req = arg;
-    struct stat st; ZINIT(st);
-    int err = 0;
-    
-    // check the results
-    if ((err = _dbfs_check_res(res, 1, 4)))
-        SERROR(err = (err ==  1 ? ENOENT : EIO));
-        
-    INFO("[dbfs.getattr %p] -> (stat follows)", req);
-    
-    // stat attrs
-    if (_dbfs_stat_info(&st, res, 0, 0))
-        goto error;
-
-    // XXX: we don't have the ino
-    st.st_ino = 0;
-
-    // reply
-    if ((err = fuse_reply_attr(req, &st, CACHE_TIMEOUT)))
-        EERROR(err, "fuse_reply_entry");
-
-error:
-    if (err && (err = fuse_reply_err(req, err)))
-        EWARNING(err, "fuse_reply_err");
-
-    // free
-    evsql_result_free(res);
-}
-
-static void dbfs_getattr (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi) {
-    struct dbfs *ctx = fuse_req_userdata(req);
-    int err;
-    
-    (void) fi;
-
-    INFO("[dbfs.getattr %p] ino=%lu", req, ino);
-
-    const char *sql =
-        "SELECT"
-        " inodes.type, inodes.mode, inodes.size, count(*)"
-        " FROM inodes"
-        " WHERE inodes.ino = $1::int4"
-        " GROUP BY inodes.type, inodes.mode, inodes.size";
-
-    static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) {
-        EVSQL_PARAM ( UINT32 ),
-
-        EVSQL_PARAMS_END
-    };
-
-    // build params
-    if (0
-        ||  evsql_param_uint32(&params, 0, ino)
-    )
-        SERROR(err = EIO);
-        
-    // query
-    if (evsql_query_params(ctx->db, NULL, sql, &params, _dbfs_getattr_result, req) == NULL)
-        SERROR(err = EIO);
-
-    // XXX: handle interrupts
-    
-    // wait
-    return;
-
-error:
-    if ((err = fuse_reply_err(req, err)))
-        EWARNING(err, "fuse_reply_err");
-}
-
-struct dbfs_dirop {
-    struct fuse_file_info fi;
-    struct fuse_req *req;
-
-    struct evsql_trans *trans;
-    
-    // dir/parent dir inodes
-    uint32_t ino, parent;
-    
-    // opendir has returned and releasedir hasn't been called yet
-    int open;
-
-    // for readdir
-    struct dirbuf dirbuf;
-};
-
-/*
- * Free the dirop, aborting any in-progress transaction.
- *
- * req must be NULL.
- */
-static void dbfs_dirop_free (struct dbfs_dirop *dirop) {
-    assert(dirop);
-    assert(!dirop->open);
-    assert(!dirop->req);
-
-    if (dirop->trans) {
-        WARNING("aborting transaction");
-        evsql_trans_abort(dirop->trans);
-    }
-
-    dirbuf_release(&dirop->dirbuf);
-
-    free(dirop);
-}
-
-static void dbfs_opendir_info_res (const struct evsql_result_info *res, void *arg) {
-    struct dbfs_dirop *dirop = arg;
-    struct fuse_req *req = dirop->req; dirop->req = NULL;
-    int err;
-    
-    assert(req != NULL);
-   
-    // check the results
-    if ((err = _dbfs_check_res(res, 1, 2)))
-        SERROR(err = (err ==  1 ? ENOENT : EIO));
-
-    const char *type;
-
-    // extract the data
-    if (0
-        ||  evsql_result_uint32(res, 0, 0, &dirop->parent,  1 ) // file_tree.parent
-        ||  evsql_result_string(res, 0, 1, &type,           0 ) // inodes.type
-    )
-        SERROR(err = EIO);
-
-    // is it a dir?
-    if (_dbfs_mode(type) != S_IFDIR)
-        EERROR(err = ENOTDIR, "wrong type: %s", type);
-    
-    INFO("[dbfs.opendir %p:%p] -> ino=%lu, parent=%lu, type=%s", dirop, req, (unsigned long int) dirop->ino, (unsigned long int) dirop->parent, type);
-    
-    // send the openddir reply
-    if ((err = fuse_reply_open(req, &dirop->fi)))
-        EERROR(err, "fuse_reply_open");
-    
-    // dirop is now open
-    dirop->open = 1;
-
-    // ok, wait for the opendir call
-    return;
-
-error:
-    if (err) {
-        // abort the trans
-        evsql_trans_abort(dirop->trans);
-        
-        dirop->trans = NULL;
-
-        if ((err = fuse_reply_err(req, err)))
-            EWARNING(err, "fuse_reply_err");
-    }
-    
-    // free
-    evsql_result_free(res);
-}
-
-/*
- * The opendir transaction is ready
- */
-static void dbfs_dirop_ready (struct evsql_trans *trans, void *arg) {
-    struct dbfs_dirop *dirop = arg;
-    struct fuse_req *req = dirop->req;
-    struct dbfs *ctx = fuse_req_userdata(req);
-    int err;
-
-    assert(req != NULL);
-
-    INFO("[dbfs.opendir %p:%p] -> trans=%p", dirop, req, trans);
-
-    // remember the transaction
-    dirop->trans = trans;
-    
-    // first fetch info about the dir itself
-    const char *sql =
-        "SELECT"
-        " file_tree.parent, inodes.type"
-        " FROM file_tree LEFT OUTER JOIN inodes ON (file_tree.inode = inodes.ino)"
-        " WHERE file_tree.inode = $1::int4";
-
-    static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) {
-        EVSQL_PARAM ( UINT32 ),
-
-        EVSQL_PARAMS_END
-    };
-
-    // build params
-    if (0
-        ||  evsql_param_uint32(&params, 0, dirop->ino)
-    )
-        SERROR(err = EIO);
-        
-    // query
-    if (evsql_query_params(ctx->db, dirop->trans, sql, &params, dbfs_opendir_info_res, dirop) == NULL)
-        SERROR(err = EIO);
-
-    // ok, wait for the info results
-    return;
-
-error:
-    // we handle the req
-    dirop->req = NULL;
-    
-    // free the dirop
-    dbfs_dirop_free(dirop);
-    
-    if ((err = fuse_reply_err(req, err)))
-        EWARNING(err, "fuse_reply_err");
-}
-
-static void dbfs_dirop_done (struct evsql_trans *trans, void *arg) {
-    struct dbfs_dirop *dirop = arg;
-    struct fuse_req *req = dirop->req; dirop->req = NULL;
-    int err;
-    
-    assert(req != NULL);
-
-    INFO("[dbfs.releasedir %p:%p] -> OK", dirop, req);
-
-    // forget trans
-    dirop->trans = NULL;
-    
-    // just reply
-    if ((err = fuse_reply_err(req, 0)))
-        EWARNING(err, "fuse_reply_err");
-    
-    // we can free dirop
-    dbfs_dirop_free(dirop);
-}
-
-static void dbfs_dirop_error (struct evsql_trans *trans, void *arg) {
-    struct dbfs_dirop *dirop = arg;
-    int err;
-
-    INFO("[dbfs:dirop %p:%p] evsql transaction error: %s", dirop, dirop->req, evsql_trans_error(trans));
-    
-    // deassociate the trans
-    dirop->trans = NULL;
-    
-    // error out and pending req
-    if (dirop->req) {
-        if ((err = fuse_reply_err(dirop->req, EIO)))
-            EWARNING(err, "fuse_erply_err");
-
-        dirop->req = NULL;
-
-        // only free the dirop if it isn't open
-        if (!dirop->open)
-            dbfs_dirop_free(dirop);
-    }
-}
-
-static void dbfs_opendir (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi) {
-    struct dbfs *ctx = fuse_req_userdata(req);
-    struct dbfs_dirop *dirop = NULL;
-    int err;
-    
-    // allocate it
-    if ((dirop = calloc(1, sizeof(*dirop))) == NULL && (err = EIO))
-        ERROR("calloc");
-
-    INFO("[dbfs.opendir %p:%p] ino=%lu, fi=%p", dirop, req, ino, fi);
-    
-    // store the dirop
-    // copy *fi since it's on the stack
-    dirop->fi = *fi;
-    dirop->fi.fh = (uint64_t) dirop;
-    dirop->req = req;
-    dirop->ino = ino;
-
-    // start a new transaction
-    if ((dirop->trans = evsql_trans(ctx->db, EVSQL_TRANS_SERIALIZABLE, dbfs_dirop_error, dbfs_dirop_ready, dbfs_dirop_done, dirop)) == NULL)
-        SERROR(err = EIO);
-    
-    // XXX: handle interrupts
-    
-    // wait
-    return;
-
-error:
-    // we handle the req
-    dirop->req = NULL;
-
-    dbfs_dirop_free(dirop);
-
-    if ((err = fuse_reply_err(req, err)))
-        EWARNING(err, "fuse_reply_err");
-}
-
-static void dbfs_readdir_files_res (const struct evsql_result_info *res, void *arg) {
-    struct dbfs_dirop *dirop = arg;
-    struct fuse_req *req = dirop->req; dirop->req = NULL;
-    int err;
-    size_t row;
-    
-    assert(req != NULL);
-    
-    // check the results
-    if ((err = _dbfs_check_res(res, 0, 4)) < 0)
-        SERROR(err = EIO);
-        
-    INFO("[dbfs.readdir %p:%p] -> files: res_rows=%zu", dirop, req, evsql_result_rows(res));
-        
-    // iterate over the rows
-    for (row = 0; row < evsql_result_rows(res); row++) {
-        uint32_t off, ino;
-        const char *name, *type;
-
-        // extract the data
-        if (0
-            ||  evsql_result_uint32(res, row, 0, &off,          0 ) // file_tree.offset
-            ||  evsql_result_string(res, row, 1, &name,         0 ) // file_tree.name
-            ||  evsql_result_uint32(res, row, 2, &ino,          0 ) // inodes.ino
-            ||  evsql_result_string(res, row, 3, &type,         0 ) // inodes.type
-        )
-            SERROR(err = EIO);
-        
-        INFO("\t%zu: off=%lu+2, name=%s, ino=%lu, type=%s", row, (long unsigned int) off, name, (long unsigned int) ino, type);
-
-        // add to the dirbuf
-        // offsets are just offset + 2
-        if ((err = dirbuf_add(req, &dirop->dirbuf, off + 2, off + 3, name, ino, _dbfs_mode(type))) < 0 && (err = EIO))
-            ERROR("failed to add dirent for inode=%lu", (long unsigned int) ino);
-        
-        // stop if it's full
-        if (err > 0)
-            break;
-    }
-
-    // send it
-    if ((err = dirbuf_done(req, &dirop->dirbuf)))
-        EERROR(err, "failed to send buf");
-    
-    // good, fallthrough
-    err = 0;
-
-error:
-    if (err) {
-        // abort the trans
-        evsql_trans_abort(dirop->trans);
-        
-        dirop->trans = NULL;
-
-        // we handle the req
-        dirop->req = NULL;
-    
-        if ((err = fuse_reply_err(req, err)))
-            EWARNING(err, "fuse_reply_err");
-    }
-    
-    // free
-    evsql_result_free(res);
-}
-
-static void dbfs_readdir (struct fuse_req *req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi) {
-    struct dbfs *ctx = fuse_req_userdata(req);
-    struct dbfs_dirop *dirop = (struct dbfs_dirop *) fi->fh;
-    int err;
-
-    assert(!dirop->req);
-    assert(dirop->trans);
-    assert(dirop->ino == ino);
-    
-    INFO("[dbfs.readdir %p:%p] ino=%lu, size=%zu, off=%zu, fi=%p : trans=%p", dirop, req, ino, size, off, fi, dirop->trans);
-
-    // update dirop
-    dirop->req = req;
-
-    // create the dirbuf
-    if (dirbuf_init(&dirop->dirbuf, size, off))
-        SERROR(err = EIO);
-
-    // add . and ..
-    // we set the next offset to 2, because all dirent offsets will be larger than that
-    if ((err = (0
-        ||  dirbuf_add(req, &dirop->dirbuf, 0, 1, ".",   dirop->ino,    S_IFDIR )
-        ||  dirbuf_add(req, &dirop->dirbuf, 1, 2, "..",  
-                        dirop->parent ? dirop->parent : dirop->ino,     S_IFDIR )
-    )) && (err = EIO))
-        ERROR("failed to add . and .. dirents");
-
-    // select all relevant file entries
-    const char *sql = 
-        "SELECT"
-        " file_tree.\"offset\", file_tree.name, inodes.ino, inodes.type"
-        " FROM file_tree LEFT OUTER JOIN inodes ON (file_tree.inode = inodes.ino)"
-        " WHERE file_tree.parent = $1::int4 AND file_tree.\"offset\" >= $2::int4"
-        " LIMIT $3::int4";
-
-    static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) {
-        EVSQL_PARAM ( UINT32 ),
-        EVSQL_PARAM ( UINT32 ),
-        EVSQL_PARAM ( UINT32 ),
-
-        EVSQL_PARAMS_END
-    };
-
-    // adjust offset to take . and .. into account
-    if (off > 2)
-        off -= 2;
-    
-    // build params
-    if (0
-        ||  evsql_param_uint32(&params, 0, dirop->ino)
-        ||  evsql_param_uint32(&params, 1, off)
-        ||  evsql_param_uint32(&params, 2, dirbuf_estimate(&dirop->dirbuf, 0))
-    )
-        SERROR(err = EIO);
-
-    // query
-    if (evsql_query_params(ctx->db, dirop->trans, sql, &params, dbfs_readdir_files_res, dirop) == NULL)
-        SERROR(err = EIO);
-
-    // good, wait
-    return;
-
-error:
-    // we handle the req
-    dirop->req = NULL;
-
-    // abort the trans
-    evsql_trans_abort(dirop->trans); dirop->trans = NULL;
-
-    if ((err = fuse_reply_err(req, err)))
-        EWARNING(err, "fuse_reply_err");
-
-}
-
-static void dbfs_releasedir (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi) {
-    struct dbfs *ctx = fuse_req_userdata(req);
-    struct dbfs_dirop *dirop = (struct dbfs_dirop *) fi->fh;
-    int err;
-
-    (void) ctx;
-    
-    assert(!dirop->req);
-    assert(dirop->ino == ino);
-
-    INFO("[dbfs.releasedir %p:%p] ino=%lu, fi=%p : trans=%p", dirop, req, ino, fi, dirop->trans);
-
-    // update dirop. Must keep it open so that dbfs_dirop_error won't free it
-    // copy *fi since it's on the stack
-    dirop->fi = *fi;
-    dirop->fi.fh = (uint64_t) dirop;
-    dirop->req = req;
-    
-    if (dirop->trans) {
-        // we can commit the transaction, although we didn't make any changes
-        // if this fails the transaction, then dbfs_dirop_error will take care of sending the error, and dirop->req will be
-        // NULL
-        if (evsql_trans_commit(dirop->trans))
-            SERROR(err = EIO);
-
-    } else {
-        // trans failed earlier, so have releasedir just succeed
-        if ((err = fuse_reply_err(req, 0)))
-            EERROR(err, "fuse_reply_err");
-
-        // req is done
-        dirop->req = NULL;
-    }
-
-    // fall-through to cleanup
-    err = 0;
-
-error:
-    // the dirop is not open anymore and can be freed once done with
-    dirop->open = 0;
-
-    // if trans_commit triggered an error but didn't call dbfs_dirop_error, we need to take care of it
-    if (err && dirop->req) {
-        int err2;
-
-        // we handle the req
-        dirop->req = NULL;
-
-        if ((err2 = fuse_reply_err(req, err)))
-            EWARNING(err2, "fuse_reply_err");
-    } 
-    
-    // same for trans, we need to abort it if trans_commit failed and fs_dirop_error didn't get called
-    if (err && dirop->trans) {
-        dbfs_dirop_free(dirop);
-    
-    } else
-      // alternatively, if the trans error'd itself away (now or earlier), we don't need to keep the dirop around
-      // anymore now that we've checkd its state
-      if (!dirop->trans) {
-        dbfs_dirop_free(dirop);
-    }
-}
-
-struct fuse_lowlevel_ops dbfs_llops = {
-
-    .init           = dbfs_init,
-    .destroy        = dbfs_destroy,
-    
-    .lookup         = dbfs_lookup,
-
-    .getattr        = dbfs_getattr,
-
-    .opendir        = dbfs_opendir,
-    .readdir        = dbfs_readdir,
-    .releasedir     = dbfs_releasedir,
-};
-
-void dbfs_sql_error (struct evsql *evsql, void *arg) {
-    struct dbfs *ctx = arg;
-
-    // AAAAAAAAAA.... panic
-    WARNING("[dbfs] SQL error: BREAKING MAIN LOOP LIEK NAO");
-
-    event_base_loopbreak(ctx->ev_base);
-}
-
 int main (int argc, char **argv) {
+    struct event_base *ev_base = NULL;
+    struct signals *signals = NULL;
+    struct dbfs *ctx = NULL;
+    const char *db_conninfo;
     struct fuse_args fuse_args = FUSE_ARGS_INIT(argc, argv);
-    struct dbfs ctx; ZINIT(ctx);
     
     // parse args, XXX: fuse_args
-    ctx.db_conninfo = CONNINFO_DEFAULT;
+    db_conninfo = CONNINFO_DEFAULT;
     
     // init libevent
-    if ((ctx.ev_base = event_base_new()) == NULL)
+    if ((ev_base = event_base_new()) == NULL)
         ERROR("event_base_new");
     
     // setup signals
-    if ((ctx.signals = signals_default(ctx.ev_base)) == NULL)
+    if ((signals = signals_default(ev_base)) == NULL)
         ERROR("signals_default");
 
-    // open sql
-    if ((ctx.db = evsql_new_pq(ctx.ev_base, ctx.db_conninfo, dbfs_sql_error, &ctx)) == NULL)
-        ERROR("evsql_new_pq");
-
-    // open fuse
-    if ((ctx.ev_fuse = evfuse_new(ctx.ev_base, &fuse_args, &dbfs_llops, &ctx)) == NULL)
-        ERROR("evfuse_new");
+    // setup dbfs
+    if ((ctx = dbfs_open(ev_base, &fuse_args, db_conninfo)) == NULL)
+        ERROR("dbfs_open");
 
     // run libevent
     INFO("running libevent loop");
 
-    if (event_base_dispatch(ctx.ev_base))
+    if (event_base_dispatch(ev_base))
         PERROR("event_base_dispatch");
     
     // clean shutdown
 
 error :
-    // cleanup
-    if (ctx.ev_fuse)
-        evfuse_free(ctx.ev_fuse);
+    if (ctx)
+        dbfs_release(ctx);
+    
+    if (signals)
+        signals_free(signals);
 
-    // XXX: ctx.db
-    
-    if (ctx.signals)
-        signals_free(ctx.signals);
-
-    if (ctx.ev_base)
-        event_base_free(ctx.ev_base);
+    if (ev_base)
+        event_base_free(ev_base);
     
     fuse_opt_free_args(&fuse_args);
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/dbfs.h	Wed Oct 15 01:14:22 2008 +0300
@@ -0,0 +1,25 @@
+#ifndef DBFS_H
+#define DBFS_H
+
+#include "evfuse.h"
+
+/*
+ * External interface for dbfs
+ */
+
+/*
+ * Context struct.
+ */
+struct dbfs;
+
+/*
+ * Create the evsql and evfuse contexts and run the fs
+ */
+struct dbfs *dbfs_open (struct event_base *ev_base, struct fuse_args *args, const char *db_conninfo);
+
+/*
+ * Release the dbfs's resources and free it
+ */
+void dbfs_release (struct dbfs *ctx);
+
+#endif /* DBFS_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/dbfs/common.c	Wed Oct 15 01:14:22 2008 +0300
@@ -0,0 +1,79 @@
+
+#include <string.h>
+
+#include "common.h"
+
+mode_t _dbfs_mode (const char *type) {
+    if (!strcmp(type, "DIR"))
+        return S_IFDIR;
+
+    if (!strcmp(type, "REG"))
+        return S_IFREG;
+
+    else {
+        WARNING("[dbfs] weird mode-type: %s", type);
+        return 0;
+    }
+}
+
+int _dbfs_check_res (const struct evsql_result_info *res, size_t rows, size_t cols) {
+    int err = 0;
+
+    // check if it failed
+    if (res->error)
+        NERROR(evsql_result_error(res));
+        
+    // not found?
+    if (evsql_result_rows(res) == 0)
+        SERROR(err = 1);
+
+    // duplicate rows?
+    if (rows && evsql_result_rows(res) != rows)
+        ERROR("wrong number of rows returned");
+    
+    // correct number of columns
+    if (evsql_result_cols(res) != cols)
+        ERROR("wrong number of columns: %zu", evsql_result_cols(res));
+
+    // good
+    return 0;
+
+error:
+    if (!err)
+        err = -1;
+
+    return err;
+}
+
+int _dbfs_stat_info (struct stat *st, const struct evsql_result_info *res, size_t row, size_t col_offset) {
+    int err = 0;
+    
+    uint16_t mode;
+    uint64_t size, nlink;
+    const char *type;
+    
+    // extract the data
+    if (0
+        ||  evsql_result_string(res, row, col_offset + 0, &type,       0 ) // inodes.type
+        ||  evsql_result_uint16(res, row, col_offset + 1, &mode,       0 ) // inodes.mode
+        ||  evsql_result_uint64(res, row, col_offset + 2, &size,       0 ) // inodes.size
+        ||  evsql_result_uint64(res, row, col_offset + 3, &nlink,      0 ) // count(*)
+    )
+        EERROR(err = EIO, "invalid db data");
+
+    INFO("\tst_mode=S_IF%s | %ho, st_nlink=%llu, st_size=%llu", type, mode, (long long unsigned int) nlink, (long long unsigned int) size);
+
+    // convert and store
+    st->st_mode = _dbfs_mode(type) | mode;
+    st->st_nlink = nlink;
+    st->st_size = size;
+    
+    // good
+    return 0;
+
+error:
+    return -1;
+}
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/dbfs/common.h	Wed Oct 15 01:14:22 2008 +0300
@@ -0,0 +1,62 @@
+#ifndef DBFS_COMMON_H
+#define DBFS_COMMON_H
+
+#include <sys/stat.h>
+#include <errno.h>
+
+#include <event2/event.h>
+
+#include "../evfuse.h"
+#include "../evsql.h"
+#include "../lib/log.h"
+#include "../lib/misc.h"
+
+/*
+ * Structs and functions shared between all dbfs components
+ */
+
+#define SERROR(val) do { (val); goto error; } while(0)
+
+struct dbfs {
+    struct event_base *ev_base;
+    
+    const char *db_conninfo;
+    struct evsql *db;
+
+    struct evfuse *ev_fuse;
+};
+
+// XXX: not sure how this should work
+#define CACHE_TIMEOUT 1.0
+
+/*
+ * Convert the CHAR(4) inodes.type from SQL into a mode_t.
+ *
+ * Returns zero for unknown types.
+ */
+mode_t _dbfs_mode (const char *type);
+
+/*
+ * Check that the number of rows and columns in the result set matches what we expect.
+ *
+ * If rows is nonzero, there must be exactly that many rows (mostly useful for rows=1).
+ * The number of columns must always be given, and match.
+ *
+ * Returns;
+ *  -1  if the query failed, the columns/rows do not match
+ *  0   the results match
+ *  1   there were no results (zero rows)
+ */
+int _dbfs_check_res (const struct evsql_result_info *res, size_t rows, size_t cols);
+
+/*
+ * Fill a `struct state` with info retrieved from a SQL query.
+ *
+ * The result must contain four columns, starting at the given offset:
+ *  inodes.type, inodes.mode, inodes.size, count(*) AS nlink
+ *
+ * Note that this does not fill the st_ino field
+ */
+int _dbfs_stat_info (struct stat *st, const struct evsql_result_info *res, size_t row, size_t col_offset);
+
+#endif /* DBFS_COMMON_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/dbfs/core.c	Wed Oct 15 01:14:22 2008 +0300
@@ -0,0 +1,161 @@
+
+#include "common.h"
+#include "ops.h"
+
+/*
+ * Core fs functionality like lookup, getattr
+ */
+
+void _dbfs_lookup_result (const struct evsql_result_info *res, void *arg) {
+    struct fuse_req *req = arg;
+    struct fuse_entry_param e; ZINIT(e);
+    int err = 0;
+    
+    uint32_t ino;
+    
+    // check the results
+    if ((err = _dbfs_check_res(res, 1, 5)))
+        SERROR(err = (err ==  1 ? ENOENT : EIO));
+    
+    // get the data
+    if (0
+        ||  evsql_result_uint32(res, 0, 0, &ino,        0 ) // inodes.ino
+    )
+        EERROR(err = EIO, "invalid db data");
+        
+    INFO("[dbfs.lookup] -> ino=%u", ino);
+    
+    // stat attrs
+    if (_dbfs_stat_info(&e.attr, res, 0, 1))
+        goto error;
+
+    // other attrs
+    e.ino = e.attr.st_ino = ino;
+    e.attr_timeout = CACHE_TIMEOUT;
+    e.entry_timeout = CACHE_TIMEOUT;
+        
+    // reply
+    if ((err = fuse_reply_entry(req, &e)))
+        EERROR(err, "fuse_reply_entry");
+
+error:
+    if (err && (err = fuse_reply_err(req, err)))
+        EWARNING(err, "fuse_reply_err");
+
+    // free
+    evsql_result_free(res);
+}
+
+void dbfs_lookup (struct fuse_req *req, fuse_ino_t parent, const char *name) {
+    struct dbfs *ctx = fuse_req_userdata(req);
+    int err;
+
+    INFO("[dbfs.lookup] parent=%lu name=%s", parent, name);
+    
+    // query and params
+    const char *sql = 
+        "SELECT"
+        " inodes.ino, inodes.type, inodes.mode, inodes.size, count(*)"
+        " FROM file_tree INNER JOIN inodes ON (file_tree.inode = inodes.ino)"
+        " WHERE file_tree.parent = $1::int4 AND file_tree.name = $2::varchar"
+        " GROUP BY inodes.ino, inodes.type, inodes.mode, inodes.size";
+    
+    static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) {
+        EVSQL_PARAM ( UINT32 ),
+        EVSQL_PARAM ( STRING ),
+
+        EVSQL_PARAMS_END
+    };
+    
+    // build params
+    if (0
+        ||  evsql_param_uint32(&params, 0, parent)
+        ||  evsql_param_string(&params, 1, name)
+    )
+        EERROR(err = EIO, "evsql_param_*");
+
+    // query
+    if (evsql_query_params(ctx->db, NULL, sql, &params, _dbfs_lookup_result, req) == NULL)
+        EERROR(err = EIO, "evsql_query_params");
+
+    // XXX: handle interrupts
+    
+    // wait
+    return;
+
+error:
+    if ((err = fuse_reply_err(req, err)))
+        EWARNING(err, "fuse_reply_err");
+}
+
+void _dbfs_getattr_result (const struct evsql_result_info *res, void *arg) {
+    struct fuse_req *req = arg;
+    struct stat st; ZINIT(st);
+    int err = 0;
+    
+    // check the results
+    if ((err = _dbfs_check_res(res, 1, 4)))
+        SERROR(err = (err ==  1 ? ENOENT : EIO));
+        
+    INFO("[dbfs.getattr %p] -> (stat follows)", req);
+    
+    // stat attrs
+    if (_dbfs_stat_info(&st, res, 0, 0))
+        goto error;
+
+    // XXX: we don't have the ino
+    st.st_ino = 0;
+
+    // reply
+    if ((err = fuse_reply_attr(req, &st, CACHE_TIMEOUT)))
+        EERROR(err, "fuse_reply_entry");
+
+error:
+    if (err && (err = fuse_reply_err(req, err)))
+        EWARNING(err, "fuse_reply_err");
+
+    // free
+    evsql_result_free(res);
+}
+
+void dbfs_getattr (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi) {
+    struct dbfs *ctx = fuse_req_userdata(req);
+    int err;
+    
+    (void) fi;
+
+    INFO("[dbfs.getattr %p] ino=%lu", req, ino);
+
+    const char *sql =
+        "SELECT"
+        " inodes.type, inodes.mode, inodes.size, count(*)"
+        " FROM inodes"
+        " WHERE inodes.ino = $1::int4"
+        " GROUP BY inodes.type, inodes.mode, inodes.size";
+
+    static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) {
+        EVSQL_PARAM ( UINT32 ),
+
+        EVSQL_PARAMS_END
+    };
+
+    // build params
+    if (0
+        ||  evsql_param_uint32(&params, 0, ino)
+    )
+        SERROR(err = EIO);
+        
+    // query
+    if (evsql_query_params(ctx->db, NULL, sql, &params, _dbfs_getattr_result, req) == NULL)
+        SERROR(err = EIO);
+
+    // XXX: handle interrupts
+    
+    // wait
+    return;
+
+error:
+    if ((err = fuse_reply_err(req, err)))
+        EWARNING(err, "fuse_reply_err");
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/dbfs/dbfs.c	Wed Oct 15 01:14:22 2008 +0300
@@ -0,0 +1,88 @@
+
+#include <stdlib.h>
+
+#include "../dbfs.h"
+#include "common.h"
+#include "ops.h"
+
+static struct fuse_lowlevel_ops dbfs_llops = {
+
+    .init           = dbfs_init,
+    .destroy        = dbfs_destroy,
+    
+    .lookup         = dbfs_lookup,
+
+    .getattr        = dbfs_getattr,
+
+    .opendir        = dbfs_opendir,
+    .readdir        = dbfs_readdir,
+    .releasedir     = dbfs_releasedir,
+};
+
+void dbfs_init (void *userdata, struct fuse_conn_info *conn) {
+    INFO("[dbfs.init] userdata=%p, conn=%p", userdata, conn);
+
+}
+
+void dbfs_destroy (void *arg) {
+    struct dbfs *ctx = arg;
+    INFO("[dbfs.destroy %p]", ctx);
+
+    // exit libevent
+    event_base_loopexit(ctx->ev_base, NULL);
+}
+
+
+void dbfs_sql_error (struct evsql *evsql, void *arg) {
+    struct dbfs *ctx = arg;
+
+    // AAAAAAAAAA.... panic
+    WARNING("[dbfs] SQL error: BREAKING MAIN LOOP LIEK NAO");
+
+    event_base_loopbreak(ctx->ev_base);
+}
+
+struct dbfs *dbfs_open (struct event_base *ev_base, struct fuse_args *args, const char *db_conninfo) {
+    struct dbfs *ctx = NULL;
+
+    // alloc ctx
+    if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
+        ERROR("calloc");
+    
+    ctx->ev_base = ev_base;
+    ctx->db_conninfo = db_conninfo;
+
+    // open sql
+    if ((ctx->db = evsql_new_pq(ctx->ev_base, ctx->db_conninfo, dbfs_sql_error, ctx)) == NULL)
+        ERROR("evsql_new_pq");
+
+    // open fuse
+    if ((ctx->ev_fuse = evfuse_new(ctx->ev_base, args, &dbfs_llops, ctx)) == NULL)
+        ERROR("evfuse_new");
+
+    // success
+    return ctx;
+
+error:
+    if (ctx)
+        dbfs_release(ctx);
+
+    return NULL;
+}    
+
+void dbfs_release (struct dbfs *ctx) {
+    // cleanup
+    if (ctx->ev_fuse) {
+        evfuse_free(ctx->ev_fuse);
+    
+        ctx->ev_fuse = NULL;
+    }
+
+    if (ctx->db) {
+        // XXX: not yet implemented 
+        // evsql_close(ctx->db);
+        // ctx->db = NULL;
+    }
+    
+    free(ctx);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/dbfs/dirop.c	Wed Oct 15 01:14:22 2008 +0300
@@ -0,0 +1,496 @@
+
+#include <stdlib.h>
+#include <assert.h>
+
+#include "common.h"
+#include "ops.h"
+#include "../dirbuf.h"
+
+/*
+ * Directory related functionality like opendir, readdir, releasedir
+ */
+
+struct dbfs_dirop {
+    struct fuse_file_info fi;
+    struct fuse_req *req;
+
+    struct evsql_trans *trans;
+    
+    // dir/parent dir inodes
+    uint32_t ino, parent;
+    
+    // opendir has returned and releasedir hasn't been called yet
+    int open;
+
+    // for readdir
+    struct dirbuf dirbuf;
+};
+
+/*
+ * Free the dirop, aborting any in-progress transaction.
+ *
+ * The dirop must any oustanding request responded to first, must not be open, and must not have a transaction.
+ *
+ * The dirbuf will be released, and the dirop free'd.
+ */
+static void _dbfs_dirop_free (struct dbfs_dirop *dirop) {
+    assert(dirop);
+    assert(!dirop->open);
+    assert(!dirop->req);
+    assert(!dirop->trans);
+    
+    // just release the dirbuf
+    dirbuf_release(&dirop->dirbuf);
+    
+    // and then free the dirop
+    free(dirop);
+}
+
+/*
+ * This will handle backend failures during requests.
+ *
+ * 1) if we have a trans, abort it
+ * 2) fail the req (mandatory)
+ *
+ * If the dirop is open, then we don't release it, but if it's not open, then the dirop will be free'd completely.
+ *
+ */
+static void _dbfs_dirop_fail (struct dbfs_dirop *dirop) {
+    int err;
+
+    assert(dirop->req);
+    
+    if (dirop->trans) {
+        // abort the trans
+        evsql_trans_abort(dirop->trans);
+        
+        dirop->trans = NULL;
+    }
+
+    // send an error reply
+    if ((err = fuse_reply_err(dirop->req, err)))
+        // XXX: handle these failures /somehow/, or requests will hang and interrupts might handle invalid dirops
+        EFATAL(err, "dbfs.fail %p:%p dirop_fail: reply with fuse_reply_err", dirop, dirop->req);
+   
+    // drop the req
+    dirop->req = NULL;
+
+    // is it open?
+    if (!dirop->open) {
+        // no, we can free it now and then forget about the whole thing
+        _dbfs_dirop_free(dirop);
+
+    } else {
+        // we need to wait for releasedir
+
+    }
+}
+
+/*
+ * Handle the results for the initial attribute lookup for the dir itself during opendir ops.
+ */
+static void dbfs_opendir_info_res (const struct evsql_result_info *res, void *arg) {
+    struct dbfs_dirop *dirop = arg;
+    int err;
+    
+    assert(dirop->trans);
+    assert(dirop->req);
+    assert(!dirop->open);
+   
+    // check the results
+    if ((err = _dbfs_check_res(res, 1, 2)))
+        SERROR(err = (err ==  1 ? ENOENT : EIO));
+
+    const char *type;
+
+    // extract the data
+    if (0
+        ||  evsql_result_uint32(res, 0, 0, &dirop->parent,  1 ) // file_tree.parent
+        ||  evsql_result_string(res, 0, 1, &type,           0 ) // inodes.type
+    )
+        SERROR(err = EIO);
+
+    // is it a dir?
+    if (_dbfs_mode(type) != S_IFDIR)
+        EERROR(err = ENOTDIR, "wrong type: %s", type);
+    
+    INFO("[dbfs.opendir %p:%p] -> ino=%lu, parent=%lu, type=%s", dirop, dirop->req, (unsigned long int) dirop->ino, (unsigned long int) dirop->parent, type);
+    
+    // send the openddir reply
+    if ((err = fuse_reply_open(dirop->req, &dirop->fi)))
+        EERROR(err, "fuse_reply_open");
+    
+    // req is done
+    dirop->req = NULL;
+
+    // dirop is now open
+    dirop->open = 1;
+
+    // success, fallthrough for evsql_result_free
+    err = 0;
+
+error:
+    if (err)
+        // fail it
+        _dbfs_dirop_fail(dirop);
+    
+    // free
+    evsql_result_free(res);
+}
+
+/*
+ * The opendir transaction is ready for use. Query for the given dir's info
+ */
+static void dbfs_dirop_ready (struct evsql_trans *trans, void *arg) {
+    struct dbfs_dirop *dirop = arg;
+    struct dbfs *ctx = fuse_req_userdata(dirop->req);
+    int err;
+    
+    // XXX: unless we abort queries
+    assert(trans == dirop->trans);
+    assert(dirop->req);
+    assert(!dirop->open);
+
+    INFO("[dbfs.opendir %p:%p] -> trans=%p", dirop, dirop->req, trans);
+
+    // remember the transaction
+    dirop->trans = trans;
+    
+    // first fetch info about the dir itself
+    const char *sql =
+        "SELECT"
+        " file_tree.parent, inodes.type"
+        " FROM file_tree LEFT OUTER JOIN inodes ON (file_tree.inode = inodes.ino)"
+        " WHERE file_tree.inode = $1::int4";
+
+    static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) {
+        EVSQL_PARAM ( UINT32 ),
+
+        EVSQL_PARAMS_END
+    };
+
+    // build params
+    if (0
+        ||  evsql_param_uint32(&params, 0, dirop->ino)
+    )
+        SERROR(err = EIO);
+        
+    // query
+    if (evsql_query_params(ctx->db, dirop->trans, sql, &params, dbfs_opendir_info_res, dirop) == NULL)
+        SERROR(err = EIO);
+
+    // ok, wait for the info results
+    return;
+
+error:
+    // fail it
+    _dbfs_dirop_fail(dirop);
+}
+
+/*
+ * The dirop trans was committed, i.e. releasedir has completed
+ */
+static void dbfs_dirop_done (struct evsql_trans *trans, void *arg) {
+    struct dbfs_dirop *dirop = arg;
+    int err;
+    
+    assert(dirop->trans);
+    assert(dirop->req);
+    assert(!dirop->open);   // should not be considered as open anymore at this point, as errors should release
+
+    INFO("[dbfs.releasedir %p:%p] -> OK", dirop, dirop->req);
+
+    // forget trans
+    dirop->trans = NULL;
+    
+    // just reply
+    if ((err = fuse_reply_err(dirop->req, 0)))
+        // XXX: handle these failures /somehow/, or requests will hang and interrupts might handle invalid dirops
+        EFATAL(err, "[dbfs.releasedir %p:%p] dirop_done: reply with fuse_reply_err", dirop, dirop->req);
+    
+    // req is done
+    dirop->req = NULL;
+
+    // then we can just free dirop
+    _dbfs_dirop_free(dirop);
+}
+
+/*
+ * The dirop trans has failed, somehow, at some point, some where.
+ *
+ * This might happend during the opendir evsql_trans, during a readdir evsql_query, during the releasedir
+ * evsql_trans_commit, or at any point in between.
+ *
+ * 1) loose the transaction
+ * 2) if dirop has a req, we handle failing it
+ */
+static void dbfs_dirop_error (struct evsql_trans *trans, void *arg) {
+    struct dbfs_dirop *dirop = arg;
+
+    INFO("[dbfs:dirop %p:%p] evsql transaction error: %s", dirop, dirop->req, evsql_trans_error(trans));
+    
+    // deassociate the trans
+    dirop->trans = NULL;
+    
+    // if we were answering a req, error it out, and if the dirop isn't open, release it
+    // if we didn't have a req outstanding, the dirop must be open, so we wouldn't free it in any case, and must wait
+    // for the next readdir/releasedir to detect this and return an error reply
+    if (dirop->req)
+        _dbfs_dirop_fail(dirop);
+    else
+        assert(dirop->open);
+}
+
+/*
+ * Handle opendir(), this means starting a new transaction, dbfs_dirop_ready/error will continue on from there.
+ *
+ * The contents of fi will be copied into the dirop, and will be used as the basis for the fuse_reply_open reply.
+ */
+void dbfs_opendir (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi) {
+    struct dbfs *ctx = fuse_req_userdata(req);
+    struct dbfs_dirop *dirop = NULL;
+    int err;
+    
+    // allocate it
+    if ((dirop = calloc(1, sizeof(*dirop))) == NULL && (err = EIO))
+        ERROR("calloc");
+
+    INFO("[dbfs.opendir %p:%p] ino=%lu, fi=%p", dirop, req, ino, fi);
+    
+    // store the dirop
+    // copy *fi since it's on the stack
+    dirop->fi = *fi;
+    dirop->fi.fh = (uint64_t) dirop;
+    dirop->req = req;
+    dirop->ino = ino;
+
+    // start a new transaction
+    if ((dirop->trans = evsql_trans(ctx->db, EVSQL_TRANS_SERIALIZABLE, dbfs_dirop_error, dbfs_dirop_ready, dbfs_dirop_done, dirop)) == NULL)
+        SERROR(err = EIO);
+    
+    // XXX: handle interrupts
+    
+    // wait
+    return;
+
+error:
+    if (dirop) {
+        // we can fail normally
+        _dbfs_dirop_fail(dirop);
+
+    } else {
+        // must error out manually as we couldn't alloc the context
+        if ((err = fuse_reply_err(req, err)))
+            EWARNING(err, "fuse_reply_err");
+    }
+}
+
+/*
+ * Got the list of files for our readdir() request.
+ *
+ * Fill up the dirbuf, and then send the reply.
+ *
+ */
+static void dbfs_readdir_files_res (const struct evsql_result_info *res, void *arg) {
+    struct dbfs_dirop *dirop = arg;
+    int err;
+    size_t row;
+    
+    assert(dirop->req);
+    assert(dirop->trans);
+    assert(dirop->open);
+    
+    // check the results
+    if ((err = _dbfs_check_res(res, 0, 4)) < 0)
+        SERROR(err = EIO);
+        
+    INFO("[dbfs.readdir %p:%p] -> files: res_rows=%zu", dirop, dirop->req, evsql_result_rows(res));
+        
+    // iterate over the rows
+    for (row = 0; row < evsql_result_rows(res); row++) {
+        uint32_t off, ino;
+        const char *name, *type;
+
+        // extract the data
+        if (0
+            ||  evsql_result_uint32(res, row, 0, &off,          0 ) // file_tree.offset
+            ||  evsql_result_string(res, row, 1, &name,         0 ) // file_tree.name
+            ||  evsql_result_uint32(res, row, 2, &ino,          0 ) // inodes.ino
+            ||  evsql_result_string(res, row, 3, &type,         0 ) // inodes.type
+        )
+            SERROR(err = EIO);
+        
+        INFO("\t%zu: off=%lu+2, name=%s, ino=%lu, type=%s", row, (long unsigned int) off, name, (long unsigned int) ino, type);
+
+        // add to the dirbuf
+        // offsets are just offset + 2
+        if ((err = dirbuf_add(dirop->req, &dirop->dirbuf, off + 2, off + 3, name, ino, _dbfs_mode(type))) < 0 && (err = EIO))
+            ERROR("failed to add dirent for inode=%lu", (long unsigned int) ino);
+        
+        // stop if it's full
+        if (err > 0)
+            break;
+    }
+
+    // send it
+    if ((err = dirbuf_done(dirop->req, &dirop->dirbuf)))
+        EERROR(err, "failed to send buf");
+
+    // req is done
+    dirop->req = NULL;
+    
+    // good, fallthrough
+    err = 0;
+
+error:
+    if (err)
+        _dbfs_dirop_fail(dirop);
+
+    // free
+    evsql_result_free(res);
+}
+
+/*
+ * Handle a readdir request. This will execute a SQL query inside the transaction to get the files at the given offset,
+ * and _dbfs_readdir_res will handle the results.
+ *
+ * If trans failed earlier, detect that and return an error.
+ */
+void dbfs_readdir (struct fuse_req *req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi) {
+    struct dbfs *ctx = fuse_req_userdata(req);
+    struct dbfs_dirop *dirop = (struct dbfs_dirop *) fi->fh;
+    int err;
+    
+    assert(dirop);
+    assert(!dirop->req);
+    assert(dirop->open);
+    assert(dirop->ino == ino);
+    
+    // store the new req
+    dirop->req = req;
+
+    // detect earlier failures
+    if (!dirop->trans && (err = EIO))
+        ERROR("dirop trans has failed");
+    
+    INFO("[dbfs.readdir %p:%p] ino=%lu, size=%zu, off=%zu, fi=%p : trans=%p", dirop, req, ino, size, off, fi, dirop->trans);
+
+    // create the dirbuf
+    if (dirbuf_init(&dirop->dirbuf, size, off))
+        SERROR(err = EIO);
+
+    // add . and ..
+    // we set the next offset to 2, because all dirent offsets will be larger than that
+    // assume that these two should *always* fit
+    if ((err = (0
+        ||  dirbuf_add(req, &dirop->dirbuf, 0, 1, ".",   dirop->ino,    S_IFDIR )
+        ||  dirbuf_add(req, &dirop->dirbuf, 1, 2, "..",  
+                        dirop->parent ? dirop->parent : dirop->ino,     S_IFDIR )
+    )) && (err = EIO))
+        ERROR("failed to add . and .. dirents");
+
+    // select all relevant file entries
+    const char *sql = 
+        "SELECT"
+        " file_tree.\"offset\", file_tree.name, inodes.ino, inodes.type"
+        " FROM file_tree LEFT OUTER JOIN inodes ON (file_tree.inode = inodes.ino)"
+        " WHERE file_tree.parent = $1::int4 AND file_tree.\"offset\" >= $2::int4"
+        " LIMIT $3::int4";
+
+    static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) {
+        EVSQL_PARAM ( UINT32 ),
+        EVSQL_PARAM ( UINT32 ),
+        EVSQL_PARAM ( UINT32 ),
+
+        EVSQL_PARAMS_END
+    };
+
+    // adjust offset to take . and .. into account
+    if (off > 2)
+        off -= 2;
+    
+    // build params
+    if (0
+        ||  evsql_param_uint32(&params, 0, dirop->ino)
+        ||  evsql_param_uint32(&params, 1, off)
+        ||  evsql_param_uint32(&params, 2, dirbuf_estimate(&dirop->dirbuf, 0))
+    )
+        SERROR(err = EIO);
+
+    // query
+    if (evsql_query_params(ctx->db, dirop->trans, sql, &params, dbfs_readdir_files_res, dirop) == NULL)
+        SERROR(err = EIO);
+
+    // good, wait
+    return;
+
+error:
+    _dbfs_dirop_fail(dirop);
+}
+
+/*
+ * "For every [succesfull] opendir call there will be exactly one releasedir call."
+ *
+ * The dirop may be in a failed state.
+ */
+void dbfs_releasedir (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi) {
+    struct dbfs *ctx = fuse_req_userdata(req);
+    struct dbfs_dirop *dirop = (struct dbfs_dirop *) fi->fh;
+    int err;
+
+    (void) ctx;
+    
+    assert(dirop);
+    assert(!dirop->req);
+    assert(dirop->ino == ino);
+    
+    // update to this req
+    dirop->req = req;
+
+    // fi is irrelevant, we don't touch the flags anyways
+    (void) fi;
+
+    // handle failed trans
+    if (!dirop->trans)
+        ERROR("trans has failed");
+    
+    // log
+    INFO("[dbfs.releasedir %p:%p] ino=%lu, fi=%p : trans=%p", dirop, req, ino, fi, dirop->trans);
+    
+    // we must commit the transaction (although it was jut SELECTs, no changes).
+    // Note that this might cause dbfs_dirop_error to be called, we can tell if that happaned by looking at dirop->req
+    // or dirop->trans this means that we need to keep the dirop open when calling trans_commit, so that dirop_error
+    // doesn't free it out from underneath us.
+    if (evsql_trans_commit(dirop->trans))
+        SERROR(err = EIO);
+
+    // fall-through to cleanup
+    err = 0;
+
+error:
+    // the dirop is not open anymore and can be free'd:
+    // a) if we already caught an error
+    // b) if we get+send an error later on
+    // c) if we get+send the done/no-error later on
+    dirop->open = 0;
+
+    // did the commit/pre-commit-checks fail?
+    if (err) {
+        // a) the trans failed earlier (readdir), so we have a req but no trans
+        // b) the trans commit failed, dirop_error got called -> no req and no trans
+        // c) the trans commit failed, dirop_error did not get called -> have req and trans
+        // we either have a req (may or may not have trans), or we don't have a trans either
+        // i.e. there is no situation where we don't have a req but do have a trans
+
+        if (dirop->req)
+            _dbfs_dirop_fail(dirop);
+        else
+            assert(!dirop->trans);
+
+    } else {
+        // shouldn't slip by, dirop_done should not get called directly. Once it does, it will handle both.
+        assert(dirop->req);
+        assert(dirop->trans);
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/dbfs/ops.h	Wed Oct 15 01:14:22 2008 +0300
@@ -0,0 +1,19 @@
+#ifndef DBFS_OPS_H
+#define DBFS_OPS_H
+
+#include "../evfuse.h"
+
+/* dbfs.c */
+void dbfs_init (void *userdata, struct fuse_conn_info *conn);
+void dbfs_destroy (void *arg);
+
+/* core.c */
+void dbfs_lookup (struct fuse_req *req, fuse_ino_t parent, const char *name);
+void dbfs_getattr (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi);
+
+/* dirop.c */
+void dbfs_opendir (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi);
+void dbfs_readdir (struct fuse_req *req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi);
+void dbfs_releasedir (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi);
+
+#endif /* DBFS_OPS_H */
--- a/src/dirbuf.c	Mon Oct 13 02:27:59 2008 +0300
+++ b/src/dirbuf.c	Wed Oct 15 01:14:22 2008 +0300
@@ -6,7 +6,9 @@
 #include "lib/math.h"
 
 int dirbuf_init (struct dirbuf *buf, size_t req_size, off_t req_off) {
+    buf->buf = NULL;
     buf->len = req_size;
+    buf->off = 0;
     buf->req_off = req_off;
     
     DEBUG("\tdirbuf.init: req_size=%zu", req_size);
@@ -40,7 +42,7 @@
     size_t ent_size;
 
     DEBUG("\tdirbuf.add: req_off=%zu, buf->len=%zu, buf->off=%zu, ent_off=%zu, next_off=%zu, ent_name=`%s`, ent_ino=%lu, ent_mode=%07o",
-        req_off, buf->len, buf->off, ent_off, next_off, ent_name, ent_ino, ent_mode);
+        buf->req_off, buf->len, buf->off, ent_off, next_off, ent_name, ent_ino, ent_mode);
     
     // skip entries as needed
     if (ent_off < buf->req_off) 
--- a/src/evsql.h	Mon Oct 13 02:27:59 2008 +0300
+++ b/src/evsql.h	Wed Oct 15 01:14:22 2008 +0300
@@ -6,6 +6,7 @@
  */
 
 // XXX: libpq
+#include <stdint.h>
 #include <postgresql/libpq-fe.h>
 #include <event2/event.h>
 
@@ -189,6 +190,8 @@
  *
  * trans must be idle, just like for evsql_query.
  *
+ * done_fn will never be called directly, always via the event loop.
+ *
  * You cannot abort a COMMIT, calling trans_abort on trans after a succesful trans_commit is a FATAL error.
  */
 int evsql_trans_commit (struct evsql_trans *trans);
@@ -207,8 +210,6 @@
 // error string, meant to be called from evsql_trans_error_cb
 const char *evsql_trans_error (struct evsql_trans *trans);
 
-// commit the transaction, calling 
-
 /*
  * Param-building functions
  */
--- a/src/lib/error.h	Mon Oct 13 02:27:59 2008 +0300
+++ b/src/lib/error.h	Wed Oct 15 01:14:22 2008 +0300
@@ -8,7 +8,9 @@
 #define EERROR(_err, ...) do { eerr_func(__func__, (_err), __VA_ARGS__); goto error; } while (0)
 #define NERROR(...) do { err_func_nonl(__func__, __VA_ARGS__); goto error; } while (0)
 
+// XXX: replace with *err_func(...) + exit(EXIT_FAILURE)
 #define FATAL(...) err_func_exit(__func__, __VA_ARGS__)
 #define PFATAL(...) perr_func_exit(__func__, __VA_ARGS__)
+#define EFATAL(_err, ...) eerr_func_exit(__func__, (_err), __VA_ARGS__)
 
 #endif /* LIB_ERROR_H */
--- a/src/lib/log.h	Mon Oct 13 02:27:59 2008 +0300
+++ b/src/lib/log.h	Wed Oct 15 01:14:22 2008 +0300
@@ -25,6 +25,8 @@
         __attribute__ ((format (printf, 4, 5)))
         __attribute__ ((noreturn));
 
+static inline void debug_dummy (int dummy, ...) { /* no-op */ }
+
 enum _debug_level {
     DEBUG_FATAL,
     DEBUG_ERROR,
@@ -48,6 +50,7 @@
 #define perr_func(func, ...)        _generic_err(       LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR,  func, 0,    __VA_ARGS__ )
 #define perr_func_exit(func, ...)   _generic_err_exit(  LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR,  func, 0,    __VA_ARGS__ )
 #define eerr_func(func, err, ...)   _generic_err(       LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR,  func, err,  __VA_ARGS__ )
+#define eerr_func_exit(func,err,...) _generic_err_exit( LOG_DISPLAY_STDERR | LOG_DISPLAY_PERR,  func, err,  __VA_ARGS__ )
 #define debug(func, ...)            _generic_err(       LOG_DISPLAY_STDERR,                     func, 0,    __VA_ARGS__ )
 #define debug_nonl(func, ...)       _generic_err(       LOG_DISPLAY_STDERR | LOG_DISPLAY_NONL,  func, 0,    __VA_ARGS__ )
 
@@ -65,10 +68,10 @@
 #define DEBUGN(...) debug_nonl(__func__, __VA_ARGS__)
 #define DEBUGNF(...) debug_nonl(NULL, __VA_ARGS__)
 #else
-#define DEBUG(...) (void) (0)
-#define DEBUGF(...) (void) (0)
-#define DEBUGN(...) (void) (0)
-#define DEBUGNF(...) (void) (0)
+#define DEBUG(...) debug_dummy(0, __VA_ARGS__)
+#define DEBUGF(...) debug_dummy(0, __VA_ARGS__)
+#define DEBUGN(...) debug_dummy(0, __VA_ARGS__)
+#define DEBUGNF(...) debug_dummy(0, __VA_ARGS__)
 #endif
 
 // default is to enable INFO
--- a/src/lib/misc.h	Mon Oct 13 02:27:59 2008 +0300
+++ b/src/lib/misc.h	Wed Oct 15 01:14:22 2008 +0300
@@ -1,6 +1,7 @@
 #ifndef LIB_UTIL_H
 #define LIB_UTIL_H
 
+#include <string.h>
 #include <arpa/inet.h>
 
 /*