src/dbfs/dirop.c
changeset 28 e944453ca924
child 29 5de62ca9a5aa
equal deleted inserted replaced
27:461be4cd34a3 28:e944453ca924
       
     1 
       
     2 #include <stdlib.h>
       
     3 #include <assert.h>
       
     4 
       
     5 #include "common.h"
       
     6 #include "ops.h"
       
     7 #include "../dirbuf.h"
       
     8 
       
     9 /*
       
    10  * Directory related functionality like opendir, readdir, releasedir
       
    11  */
       
    12 
       
    13 struct dbfs_dirop {
       
    14     struct fuse_file_info fi;
       
    15     struct fuse_req *req;
       
    16 
       
    17     struct evsql_trans *trans;
       
    18     
       
    19     // dir/parent dir inodes
       
    20     uint32_t ino, parent;
       
    21     
       
    22     // opendir has returned and releasedir hasn't been called yet
       
    23     int open;
       
    24 
       
    25     // for readdir
       
    26     struct dirbuf dirbuf;
       
    27 };
       
    28 
       
    29 /*
       
    30  * Free the dirop, aborting any in-progress transaction.
       
    31  *
       
    32  * The dirop must any oustanding request responded to first, must not be open, and must not have a transaction.
       
    33  *
       
    34  * The dirbuf will be released, and the dirop free'd.
       
    35  */
       
    36 static void _dbfs_dirop_free (struct dbfs_dirop *dirop) {
       
    37     assert(dirop);
       
    38     assert(!dirop->open);
       
    39     assert(!dirop->req);
       
    40     assert(!dirop->trans);
       
    41     
       
    42     // just release the dirbuf
       
    43     dirbuf_release(&dirop->dirbuf);
       
    44     
       
    45     // and then free the dirop
       
    46     free(dirop);
       
    47 }
       
    48 
       
    49 /*
       
    50  * This will handle backend failures during requests.
       
    51  *
       
    52  * 1) if we have a trans, abort it
       
    53  * 2) fail the req (mandatory)
       
    54  *
       
    55  * If the dirop is open, then we don't release it, but if it's not open, then the dirop will be free'd completely.
       
    56  *
       
    57  */
       
    58 static void _dbfs_dirop_fail (struct dbfs_dirop *dirop) {
       
    59     int err;
       
    60 
       
    61     assert(dirop->req);
       
    62     
       
    63     if (dirop->trans) {
       
    64         // abort the trans
       
    65         evsql_trans_abort(dirop->trans);
       
    66         
       
    67         dirop->trans = NULL;
       
    68     }
       
    69 
       
    70     // send an error reply
       
    71     if ((err = fuse_reply_err(dirop->req, err)))
       
    72         // XXX: handle these failures /somehow/, or requests will hang and interrupts might handle invalid dirops
       
    73         EFATAL(err, "dbfs.fail %p:%p dirop_fail: reply with fuse_reply_err", dirop, dirop->req);
       
    74    
       
    75     // drop the req
       
    76     dirop->req = NULL;
       
    77 
       
    78     // is it open?
       
    79     if (!dirop->open) {
       
    80         // no, we can free it now and then forget about the whole thing
       
    81         _dbfs_dirop_free(dirop);
       
    82 
       
    83     } else {
       
    84         // we need to wait for releasedir
       
    85 
       
    86     }
       
    87 }
       
    88 
       
    89 /*
       
    90  * Handle the results for the initial attribute lookup for the dir itself during opendir ops.
       
    91  */
       
    92 static void dbfs_opendir_info_res (const struct evsql_result_info *res, void *arg) {
       
    93     struct dbfs_dirop *dirop = arg;
       
    94     int err;
       
    95     
       
    96     assert(dirop->trans);
       
    97     assert(dirop->req);
       
    98     assert(!dirop->open);
       
    99    
       
   100     // check the results
       
   101     if ((err = _dbfs_check_res(res, 1, 2)))
       
   102         SERROR(err = (err ==  1 ? ENOENT : EIO));
       
   103 
       
   104     const char *type;
       
   105 
       
   106     // extract the data
       
   107     if (0
       
   108         ||  evsql_result_uint32(res, 0, 0, &dirop->parent,  1 ) // file_tree.parent
       
   109         ||  evsql_result_string(res, 0, 1, &type,           0 ) // inodes.type
       
   110     )
       
   111         SERROR(err = EIO);
       
   112 
       
   113     // is it a dir?
       
   114     if (_dbfs_mode(type) != S_IFDIR)
       
   115         EERROR(err = ENOTDIR, "wrong type: %s", type);
       
   116     
       
   117     INFO("[dbfs.opendir %p:%p] -> ino=%lu, parent=%lu, type=%s", dirop, dirop->req, (unsigned long int) dirop->ino, (unsigned long int) dirop->parent, type);
       
   118     
       
   119     // send the openddir reply
       
   120     if ((err = fuse_reply_open(dirop->req, &dirop->fi)))
       
   121         EERROR(err, "fuse_reply_open");
       
   122     
       
   123     // req is done
       
   124     dirop->req = NULL;
       
   125 
       
   126     // dirop is now open
       
   127     dirop->open = 1;
       
   128 
       
   129     // success, fallthrough for evsql_result_free
       
   130     err = 0;
       
   131 
       
   132 error:
       
   133     if (err)
       
   134         // fail it
       
   135         _dbfs_dirop_fail(dirop);
       
   136     
       
   137     // free
       
   138     evsql_result_free(res);
       
   139 }
       
   140 
       
   141 /*
       
   142  * The opendir transaction is ready for use. Query for the given dir's info
       
   143  */
       
   144 static void dbfs_dirop_ready (struct evsql_trans *trans, void *arg) {
       
   145     struct dbfs_dirop *dirop = arg;
       
   146     struct dbfs *ctx = fuse_req_userdata(dirop->req);
       
   147     int err;
       
   148     
       
   149     // XXX: unless we abort queries
       
   150     assert(trans == dirop->trans);
       
   151     assert(dirop->req);
       
   152     assert(!dirop->open);
       
   153 
       
   154     INFO("[dbfs.opendir %p:%p] -> trans=%p", dirop, dirop->req, trans);
       
   155 
       
   156     // remember the transaction
       
   157     dirop->trans = trans;
       
   158     
       
   159     // first fetch info about the dir itself
       
   160     const char *sql =
       
   161         "SELECT"
       
   162         " file_tree.parent, inodes.type"
       
   163         " FROM file_tree LEFT OUTER JOIN inodes ON (file_tree.inode = inodes.ino)"
       
   164         " WHERE file_tree.inode = $1::int4";
       
   165 
       
   166     static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) {
       
   167         EVSQL_PARAM ( UINT32 ),
       
   168 
       
   169         EVSQL_PARAMS_END
       
   170     };
       
   171 
       
   172     // build params
       
   173     if (0
       
   174         ||  evsql_param_uint32(&params, 0, dirop->ino)
       
   175     )
       
   176         SERROR(err = EIO);
       
   177         
       
   178     // query
       
   179     if (evsql_query_params(ctx->db, dirop->trans, sql, &params, dbfs_opendir_info_res, dirop) == NULL)
       
   180         SERROR(err = EIO);
       
   181 
       
   182     // ok, wait for the info results
       
   183     return;
       
   184 
       
   185 error:
       
   186     // fail it
       
   187     _dbfs_dirop_fail(dirop);
       
   188 }
       
   189 
       
   190 /*
       
   191  * The dirop trans was committed, i.e. releasedir has completed
       
   192  */
       
   193 static void dbfs_dirop_done (struct evsql_trans *trans, void *arg) {
       
   194     struct dbfs_dirop *dirop = arg;
       
   195     int err;
       
   196     
       
   197     assert(dirop->trans);
       
   198     assert(dirop->req);
       
   199     assert(!dirop->open);   // should not be considered as open anymore at this point, as errors should release
       
   200 
       
   201     INFO("[dbfs.releasedir %p:%p] -> OK", dirop, dirop->req);
       
   202 
       
   203     // forget trans
       
   204     dirop->trans = NULL;
       
   205     
       
   206     // just reply
       
   207     if ((err = fuse_reply_err(dirop->req, 0)))
       
   208         // XXX: handle these failures /somehow/, or requests will hang and interrupts might handle invalid dirops
       
   209         EFATAL(err, "[dbfs.releasedir %p:%p] dirop_done: reply with fuse_reply_err", dirop, dirop->req);
       
   210     
       
   211     // req is done
       
   212     dirop->req = NULL;
       
   213 
       
   214     // then we can just free dirop
       
   215     _dbfs_dirop_free(dirop);
       
   216 }
       
   217 
       
   218 /*
       
   219  * The dirop trans has failed, somehow, at some point, some where.
       
   220  *
       
   221  * This might happend during the opendir evsql_trans, during a readdir evsql_query, during the releasedir
       
   222  * evsql_trans_commit, or at any point in between.
       
   223  *
       
   224  * 1) loose the transaction
       
   225  * 2) if dirop has a req, we handle failing it
       
   226  */
       
   227 static void dbfs_dirop_error (struct evsql_trans *trans, void *arg) {
       
   228     struct dbfs_dirop *dirop = arg;
       
   229 
       
   230     INFO("[dbfs:dirop %p:%p] evsql transaction error: %s", dirop, dirop->req, evsql_trans_error(trans));
       
   231     
       
   232     // deassociate the trans
       
   233     dirop->trans = NULL;
       
   234     
       
   235     // if we were answering a req, error it out, and if the dirop isn't open, release it
       
   236     // if we didn't have a req outstanding, the dirop must be open, so we wouldn't free it in any case, and must wait
       
   237     // for the next readdir/releasedir to detect this and return an error reply
       
   238     if (dirop->req)
       
   239         _dbfs_dirop_fail(dirop);
       
   240     else
       
   241         assert(dirop->open);
       
   242 }
       
   243 
       
   244 /*
       
   245  * Handle opendir(), this means starting a new transaction, dbfs_dirop_ready/error will continue on from there.
       
   246  *
       
   247  * The contents of fi will be copied into the dirop, and will be used as the basis for the fuse_reply_open reply.
       
   248  */
       
   249 void dbfs_opendir (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi) {
       
   250     struct dbfs *ctx = fuse_req_userdata(req);
       
   251     struct dbfs_dirop *dirop = NULL;
       
   252     int err;
       
   253     
       
   254     // allocate it
       
   255     if ((dirop = calloc(1, sizeof(*dirop))) == NULL && (err = EIO))
       
   256         ERROR("calloc");
       
   257 
       
   258     INFO("[dbfs.opendir %p:%p] ino=%lu, fi=%p", dirop, req, ino, fi);
       
   259     
       
   260     // store the dirop
       
   261     // copy *fi since it's on the stack
       
   262     dirop->fi = *fi;
       
   263     dirop->fi.fh = (uint64_t) dirop;
       
   264     dirop->req = req;
       
   265     dirop->ino = ino;
       
   266 
       
   267     // start a new transaction
       
   268     if ((dirop->trans = evsql_trans(ctx->db, EVSQL_TRANS_SERIALIZABLE, dbfs_dirop_error, dbfs_dirop_ready, dbfs_dirop_done, dirop)) == NULL)
       
   269         SERROR(err = EIO);
       
   270     
       
   271     // XXX: handle interrupts
       
   272     
       
   273     // wait
       
   274     return;
       
   275 
       
   276 error:
       
   277     if (dirop) {
       
   278         // we can fail normally
       
   279         _dbfs_dirop_fail(dirop);
       
   280 
       
   281     } else {
       
   282         // must error out manually as we couldn't alloc the context
       
   283         if ((err = fuse_reply_err(req, err)))
       
   284             EWARNING(err, "fuse_reply_err");
       
   285     }
       
   286 }
       
   287 
       
   288 /*
       
   289  * Got the list of files for our readdir() request.
       
   290  *
       
   291  * Fill up the dirbuf, and then send the reply.
       
   292  *
       
   293  */
       
   294 static void dbfs_readdir_files_res (const struct evsql_result_info *res, void *arg) {
       
   295     struct dbfs_dirop *dirop = arg;
       
   296     int err;
       
   297     size_t row;
       
   298     
       
   299     assert(dirop->req);
       
   300     assert(dirop->trans);
       
   301     assert(dirop->open);
       
   302     
       
   303     // check the results
       
   304     if ((err = _dbfs_check_res(res, 0, 4)) < 0)
       
   305         SERROR(err = EIO);
       
   306         
       
   307     INFO("[dbfs.readdir %p:%p] -> files: res_rows=%zu", dirop, dirop->req, evsql_result_rows(res));
       
   308         
       
   309     // iterate over the rows
       
   310     for (row = 0; row < evsql_result_rows(res); row++) {
       
   311         uint32_t off, ino;
       
   312         const char *name, *type;
       
   313 
       
   314         // extract the data
       
   315         if (0
       
   316             ||  evsql_result_uint32(res, row, 0, &off,          0 ) // file_tree.offset
       
   317             ||  evsql_result_string(res, row, 1, &name,         0 ) // file_tree.name
       
   318             ||  evsql_result_uint32(res, row, 2, &ino,          0 ) // inodes.ino
       
   319             ||  evsql_result_string(res, row, 3, &type,         0 ) // inodes.type
       
   320         )
       
   321             SERROR(err = EIO);
       
   322         
       
   323         INFO("\t%zu: off=%lu+2, name=%s, ino=%lu, type=%s", row, (long unsigned int) off, name, (long unsigned int) ino, type);
       
   324 
       
   325         // add to the dirbuf
       
   326         // offsets are just offset + 2
       
   327         if ((err = dirbuf_add(dirop->req, &dirop->dirbuf, off + 2, off + 3, name, ino, _dbfs_mode(type))) < 0 && (err = EIO))
       
   328             ERROR("failed to add dirent for inode=%lu", (long unsigned int) ino);
       
   329         
       
   330         // stop if it's full
       
   331         if (err > 0)
       
   332             break;
       
   333     }
       
   334 
       
   335     // send it
       
   336     if ((err = dirbuf_done(dirop->req, &dirop->dirbuf)))
       
   337         EERROR(err, "failed to send buf");
       
   338 
       
   339     // req is done
       
   340     dirop->req = NULL;
       
   341     
       
   342     // good, fallthrough
       
   343     err = 0;
       
   344 
       
   345 error:
       
   346     if (err)
       
   347         _dbfs_dirop_fail(dirop);
       
   348 
       
   349     // free
       
   350     evsql_result_free(res);
       
   351 }
       
   352 
       
   353 /*
       
   354  * Handle a readdir request. This will execute a SQL query inside the transaction to get the files at the given offset,
       
   355  * and _dbfs_readdir_res will handle the results.
       
   356  *
       
   357  * If trans failed earlier, detect that and return an error.
       
   358  */
       
   359 void dbfs_readdir (struct fuse_req *req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi) {
       
   360     struct dbfs *ctx = fuse_req_userdata(req);
       
   361     struct dbfs_dirop *dirop = (struct dbfs_dirop *) fi->fh;
       
   362     int err;
       
   363     
       
   364     assert(dirop);
       
   365     assert(!dirop->req);
       
   366     assert(dirop->open);
       
   367     assert(dirop->ino == ino);
       
   368     
       
   369     // store the new req
       
   370     dirop->req = req;
       
   371 
       
   372     // detect earlier failures
       
   373     if (!dirop->trans && (err = EIO))
       
   374         ERROR("dirop trans has failed");
       
   375     
       
   376     INFO("[dbfs.readdir %p:%p] ino=%lu, size=%zu, off=%zu, fi=%p : trans=%p", dirop, req, ino, size, off, fi, dirop->trans);
       
   377 
       
   378     // create the dirbuf
       
   379     if (dirbuf_init(&dirop->dirbuf, size, off))
       
   380         SERROR(err = EIO);
       
   381 
       
   382     // add . and ..
       
   383     // we set the next offset to 2, because all dirent offsets will be larger than that
       
   384     // assume that these two should *always* fit
       
   385     if ((err = (0
       
   386         ||  dirbuf_add(req, &dirop->dirbuf, 0, 1, ".",   dirop->ino,    S_IFDIR )
       
   387         ||  dirbuf_add(req, &dirop->dirbuf, 1, 2, "..",  
       
   388                         dirop->parent ? dirop->parent : dirop->ino,     S_IFDIR )
       
   389     )) && (err = EIO))
       
   390         ERROR("failed to add . and .. dirents");
       
   391 
       
   392     // select all relevant file entries
       
   393     const char *sql = 
       
   394         "SELECT"
       
   395         " file_tree.\"offset\", file_tree.name, inodes.ino, inodes.type"
       
   396         " FROM file_tree LEFT OUTER JOIN inodes ON (file_tree.inode = inodes.ino)"
       
   397         " WHERE file_tree.parent = $1::int4 AND file_tree.\"offset\" >= $2::int4"
       
   398         " LIMIT $3::int4";
       
   399 
       
   400     static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) {
       
   401         EVSQL_PARAM ( UINT32 ),
       
   402         EVSQL_PARAM ( UINT32 ),
       
   403         EVSQL_PARAM ( UINT32 ),
       
   404 
       
   405         EVSQL_PARAMS_END
       
   406     };
       
   407 
       
   408     // adjust offset to take . and .. into account
       
   409     if (off > 2)
       
   410         off -= 2;
       
   411     
       
   412     // build params
       
   413     if (0
       
   414         ||  evsql_param_uint32(&params, 0, dirop->ino)
       
   415         ||  evsql_param_uint32(&params, 1, off)
       
   416         ||  evsql_param_uint32(&params, 2, dirbuf_estimate(&dirop->dirbuf, 0))
       
   417     )
       
   418         SERROR(err = EIO);
       
   419 
       
   420     // query
       
   421     if (evsql_query_params(ctx->db, dirop->trans, sql, &params, dbfs_readdir_files_res, dirop) == NULL)
       
   422         SERROR(err = EIO);
       
   423 
       
   424     // good, wait
       
   425     return;
       
   426 
       
   427 error:
       
   428     _dbfs_dirop_fail(dirop);
       
   429 }
       
   430 
       
   431 /*
       
   432  * "For every [succesfull] opendir call there will be exactly one releasedir call."
       
   433  *
       
   434  * The dirop may be in a failed state.
       
   435  */
       
   436 void dbfs_releasedir (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi) {
       
   437     struct dbfs *ctx = fuse_req_userdata(req);
       
   438     struct dbfs_dirop *dirop = (struct dbfs_dirop *) fi->fh;
       
   439     int err;
       
   440 
       
   441     (void) ctx;
       
   442     
       
   443     assert(dirop);
       
   444     assert(!dirop->req);
       
   445     assert(dirop->ino == ino);
       
   446     
       
   447     // update to this req
       
   448     dirop->req = req;
       
   449 
       
   450     // fi is irrelevant, we don't touch the flags anyways
       
   451     (void) fi;
       
   452 
       
   453     // handle failed trans
       
   454     if (!dirop->trans)
       
   455         ERROR("trans has failed");
       
   456     
       
   457     // log
       
   458     INFO("[dbfs.releasedir %p:%p] ino=%lu, fi=%p : trans=%p", dirop, req, ino, fi, dirop->trans);
       
   459     
       
   460     // we must commit the transaction (although it was jut SELECTs, no changes).
       
   461     // Note that this might cause dbfs_dirop_error to be called, we can tell if that happaned by looking at dirop->req
       
   462     // or dirop->trans this means that we need to keep the dirop open when calling trans_commit, so that dirop_error
       
   463     // doesn't free it out from underneath us.
       
   464     if (evsql_trans_commit(dirop->trans))
       
   465         SERROR(err = EIO);
       
   466 
       
   467     // fall-through to cleanup
       
   468     err = 0;
       
   469 
       
   470 error:
       
   471     // the dirop is not open anymore and can be free'd:
       
   472     // a) if we already caught an error
       
   473     // b) if we get+send an error later on
       
   474     // c) if we get+send the done/no-error later on
       
   475     dirop->open = 0;
       
   476 
       
   477     // did the commit/pre-commit-checks fail?
       
   478     if (err) {
       
   479         // a) the trans failed earlier (readdir), so we have a req but no trans
       
   480         // b) the trans commit failed, dirop_error got called -> no req and no trans
       
   481         // c) the trans commit failed, dirop_error did not get called -> have req and trans
       
   482         // we either have a req (may or may not have trans), or we don't have a trans either
       
   483         // i.e. there is no situation where we don't have a req but do have a trans
       
   484 
       
   485         if (dirop->req)
       
   486             _dbfs_dirop_fail(dirop);
       
   487         else
       
   488             assert(!dirop->trans);
       
   489 
       
   490     } else {
       
   491         // shouldn't slip by, dirop_done should not get called directly. Once it does, it will handle both.
       
   492         assert(dirop->req);
       
   493         assert(dirop->trans);
       
   494     }
       
   495 }
       
   496