|
1 |
|
2 #include <stdlib.h> |
|
3 #include <assert.h> |
|
4 |
|
5 #include "common.h" |
|
6 #include "ops.h" |
|
7 #include "../dirbuf.h" |
|
8 |
|
9 /* |
|
10 * Directory related functionality like opendir, readdir, releasedir |
|
11 */ |
|
12 |
|
13 struct dbfs_dirop { |
|
14 struct fuse_file_info fi; |
|
15 struct fuse_req *req; |
|
16 |
|
17 struct evsql_trans *trans; |
|
18 |
|
19 // dir/parent dir inodes |
|
20 uint32_t ino, parent; |
|
21 |
|
22 // opendir has returned and releasedir hasn't been called yet |
|
23 int open; |
|
24 |
|
25 // for readdir |
|
26 struct dirbuf dirbuf; |
|
27 }; |
|
28 |
|
29 /* |
|
30 * Free the dirop, aborting any in-progress transaction. |
|
31 * |
|
32 * The dirop must any oustanding request responded to first, must not be open, and must not have a transaction. |
|
33 * |
|
34 * The dirbuf will be released, and the dirop free'd. |
|
35 */ |
|
36 static void _dbfs_dirop_free (struct dbfs_dirop *dirop) { |
|
37 assert(dirop); |
|
38 assert(!dirop->open); |
|
39 assert(!dirop->req); |
|
40 assert(!dirop->trans); |
|
41 |
|
42 // just release the dirbuf |
|
43 dirbuf_release(&dirop->dirbuf); |
|
44 |
|
45 // and then free the dirop |
|
46 free(dirop); |
|
47 } |
|
48 |
|
49 /* |
|
50 * This will handle backend failures during requests. |
|
51 * |
|
52 * 1) if we have a trans, abort it |
|
53 * 2) fail the req (mandatory) |
|
54 * |
|
55 * If the dirop is open, then we don't release it, but if it's not open, then the dirop will be free'd completely. |
|
56 * |
|
57 */ |
|
58 static void _dbfs_dirop_fail (struct dbfs_dirop *dirop) { |
|
59 int err; |
|
60 |
|
61 assert(dirop->req); |
|
62 |
|
63 if (dirop->trans) { |
|
64 // abort the trans |
|
65 evsql_trans_abort(dirop->trans); |
|
66 |
|
67 dirop->trans = NULL; |
|
68 } |
|
69 |
|
70 // send an error reply |
|
71 if ((err = fuse_reply_err(dirop->req, err))) |
|
72 // XXX: handle these failures /somehow/, or requests will hang and interrupts might handle invalid dirops |
|
73 EFATAL(err, "dbfs.fail %p:%p dirop_fail: reply with fuse_reply_err", dirop, dirop->req); |
|
74 |
|
75 // drop the req |
|
76 dirop->req = NULL; |
|
77 |
|
78 // is it open? |
|
79 if (!dirop->open) { |
|
80 // no, we can free it now and then forget about the whole thing |
|
81 _dbfs_dirop_free(dirop); |
|
82 |
|
83 } else { |
|
84 // we need to wait for releasedir |
|
85 |
|
86 } |
|
87 } |
|
88 |
|
89 /* |
|
90 * Handle the results for the initial attribute lookup for the dir itself during opendir ops. |
|
91 */ |
|
92 static void dbfs_opendir_info_res (const struct evsql_result_info *res, void *arg) { |
|
93 struct dbfs_dirop *dirop = arg; |
|
94 int err; |
|
95 |
|
96 assert(dirop->trans); |
|
97 assert(dirop->req); |
|
98 assert(!dirop->open); |
|
99 |
|
100 // check the results |
|
101 if ((err = _dbfs_check_res(res, 1, 2))) |
|
102 SERROR(err = (err == 1 ? ENOENT : EIO)); |
|
103 |
|
104 const char *type; |
|
105 |
|
106 // extract the data |
|
107 if (0 |
|
108 || evsql_result_uint32(res, 0, 0, &dirop->parent, 1 ) // file_tree.parent |
|
109 || evsql_result_string(res, 0, 1, &type, 0 ) // inodes.type |
|
110 ) |
|
111 SERROR(err = EIO); |
|
112 |
|
113 // is it a dir? |
|
114 if (_dbfs_mode(type) != S_IFDIR) |
|
115 EERROR(err = ENOTDIR, "wrong type: %s", type); |
|
116 |
|
117 INFO("[dbfs.opendir %p:%p] -> ino=%lu, parent=%lu, type=%s", dirop, dirop->req, (unsigned long int) dirop->ino, (unsigned long int) dirop->parent, type); |
|
118 |
|
119 // send the openddir reply |
|
120 if ((err = fuse_reply_open(dirop->req, &dirop->fi))) |
|
121 EERROR(err, "fuse_reply_open"); |
|
122 |
|
123 // req is done |
|
124 dirop->req = NULL; |
|
125 |
|
126 // dirop is now open |
|
127 dirop->open = 1; |
|
128 |
|
129 // success, fallthrough for evsql_result_free |
|
130 err = 0; |
|
131 |
|
132 error: |
|
133 if (err) |
|
134 // fail it |
|
135 _dbfs_dirop_fail(dirop); |
|
136 |
|
137 // free |
|
138 evsql_result_free(res); |
|
139 } |
|
140 |
|
141 /* |
|
142 * The opendir transaction is ready for use. Query for the given dir's info |
|
143 */ |
|
144 static void dbfs_dirop_ready (struct evsql_trans *trans, void *arg) { |
|
145 struct dbfs_dirop *dirop = arg; |
|
146 struct dbfs *ctx = fuse_req_userdata(dirop->req); |
|
147 int err; |
|
148 |
|
149 // XXX: unless we abort queries |
|
150 assert(trans == dirop->trans); |
|
151 assert(dirop->req); |
|
152 assert(!dirop->open); |
|
153 |
|
154 INFO("[dbfs.opendir %p:%p] -> trans=%p", dirop, dirop->req, trans); |
|
155 |
|
156 // remember the transaction |
|
157 dirop->trans = trans; |
|
158 |
|
159 // first fetch info about the dir itself |
|
160 const char *sql = |
|
161 "SELECT" |
|
162 " file_tree.parent, inodes.type" |
|
163 " FROM file_tree LEFT OUTER JOIN inodes ON (file_tree.inode = inodes.ino)" |
|
164 " WHERE file_tree.inode = $1::int4"; |
|
165 |
|
166 static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) { |
|
167 EVSQL_PARAM ( UINT32 ), |
|
168 |
|
169 EVSQL_PARAMS_END |
|
170 }; |
|
171 |
|
172 // build params |
|
173 if (0 |
|
174 || evsql_param_uint32(¶ms, 0, dirop->ino) |
|
175 ) |
|
176 SERROR(err = EIO); |
|
177 |
|
178 // query |
|
179 if (evsql_query_params(ctx->db, dirop->trans, sql, ¶ms, dbfs_opendir_info_res, dirop) == NULL) |
|
180 SERROR(err = EIO); |
|
181 |
|
182 // ok, wait for the info results |
|
183 return; |
|
184 |
|
185 error: |
|
186 // fail it |
|
187 _dbfs_dirop_fail(dirop); |
|
188 } |
|
189 |
|
190 /* |
|
191 * The dirop trans was committed, i.e. releasedir has completed |
|
192 */ |
|
193 static void dbfs_dirop_done (struct evsql_trans *trans, void *arg) { |
|
194 struct dbfs_dirop *dirop = arg; |
|
195 int err; |
|
196 |
|
197 assert(dirop->trans); |
|
198 assert(dirop->req); |
|
199 assert(!dirop->open); // should not be considered as open anymore at this point, as errors should release |
|
200 |
|
201 INFO("[dbfs.releasedir %p:%p] -> OK", dirop, dirop->req); |
|
202 |
|
203 // forget trans |
|
204 dirop->trans = NULL; |
|
205 |
|
206 // just reply |
|
207 if ((err = fuse_reply_err(dirop->req, 0))) |
|
208 // XXX: handle these failures /somehow/, or requests will hang and interrupts might handle invalid dirops |
|
209 EFATAL(err, "[dbfs.releasedir %p:%p] dirop_done: reply with fuse_reply_err", dirop, dirop->req); |
|
210 |
|
211 // req is done |
|
212 dirop->req = NULL; |
|
213 |
|
214 // then we can just free dirop |
|
215 _dbfs_dirop_free(dirop); |
|
216 } |
|
217 |
|
218 /* |
|
219 * The dirop trans has failed, somehow, at some point, some where. |
|
220 * |
|
221 * This might happend during the opendir evsql_trans, during a readdir evsql_query, during the releasedir |
|
222 * evsql_trans_commit, or at any point in between. |
|
223 * |
|
224 * 1) loose the transaction |
|
225 * 2) if dirop has a req, we handle failing it |
|
226 */ |
|
227 static void dbfs_dirop_error (struct evsql_trans *trans, void *arg) { |
|
228 struct dbfs_dirop *dirop = arg; |
|
229 |
|
230 INFO("[dbfs:dirop %p:%p] evsql transaction error: %s", dirop, dirop->req, evsql_trans_error(trans)); |
|
231 |
|
232 // deassociate the trans |
|
233 dirop->trans = NULL; |
|
234 |
|
235 // if we were answering a req, error it out, and if the dirop isn't open, release it |
|
236 // if we didn't have a req outstanding, the dirop must be open, so we wouldn't free it in any case, and must wait |
|
237 // for the next readdir/releasedir to detect this and return an error reply |
|
238 if (dirop->req) |
|
239 _dbfs_dirop_fail(dirop); |
|
240 else |
|
241 assert(dirop->open); |
|
242 } |
|
243 |
|
244 /* |
|
245 * Handle opendir(), this means starting a new transaction, dbfs_dirop_ready/error will continue on from there. |
|
246 * |
|
247 * The contents of fi will be copied into the dirop, and will be used as the basis for the fuse_reply_open reply. |
|
248 */ |
|
249 void dbfs_opendir (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi) { |
|
250 struct dbfs *ctx = fuse_req_userdata(req); |
|
251 struct dbfs_dirop *dirop = NULL; |
|
252 int err; |
|
253 |
|
254 // allocate it |
|
255 if ((dirop = calloc(1, sizeof(*dirop))) == NULL && (err = EIO)) |
|
256 ERROR("calloc"); |
|
257 |
|
258 INFO("[dbfs.opendir %p:%p] ino=%lu, fi=%p", dirop, req, ino, fi); |
|
259 |
|
260 // store the dirop |
|
261 // copy *fi since it's on the stack |
|
262 dirop->fi = *fi; |
|
263 dirop->fi.fh = (uint64_t) dirop; |
|
264 dirop->req = req; |
|
265 dirop->ino = ino; |
|
266 |
|
267 // start a new transaction |
|
268 if ((dirop->trans = evsql_trans(ctx->db, EVSQL_TRANS_SERIALIZABLE, dbfs_dirop_error, dbfs_dirop_ready, dbfs_dirop_done, dirop)) == NULL) |
|
269 SERROR(err = EIO); |
|
270 |
|
271 // XXX: handle interrupts |
|
272 |
|
273 // wait |
|
274 return; |
|
275 |
|
276 error: |
|
277 if (dirop) { |
|
278 // we can fail normally |
|
279 _dbfs_dirop_fail(dirop); |
|
280 |
|
281 } else { |
|
282 // must error out manually as we couldn't alloc the context |
|
283 if ((err = fuse_reply_err(req, err))) |
|
284 EWARNING(err, "fuse_reply_err"); |
|
285 } |
|
286 } |
|
287 |
|
288 /* |
|
289 * Got the list of files for our readdir() request. |
|
290 * |
|
291 * Fill up the dirbuf, and then send the reply. |
|
292 * |
|
293 */ |
|
294 static void dbfs_readdir_files_res (const struct evsql_result_info *res, void *arg) { |
|
295 struct dbfs_dirop *dirop = arg; |
|
296 int err; |
|
297 size_t row; |
|
298 |
|
299 assert(dirop->req); |
|
300 assert(dirop->trans); |
|
301 assert(dirop->open); |
|
302 |
|
303 // check the results |
|
304 if ((err = _dbfs_check_res(res, 0, 4)) < 0) |
|
305 SERROR(err = EIO); |
|
306 |
|
307 INFO("[dbfs.readdir %p:%p] -> files: res_rows=%zu", dirop, dirop->req, evsql_result_rows(res)); |
|
308 |
|
309 // iterate over the rows |
|
310 for (row = 0; row < evsql_result_rows(res); row++) { |
|
311 uint32_t off, ino; |
|
312 const char *name, *type; |
|
313 |
|
314 // extract the data |
|
315 if (0 |
|
316 || evsql_result_uint32(res, row, 0, &off, 0 ) // file_tree.offset |
|
317 || evsql_result_string(res, row, 1, &name, 0 ) // file_tree.name |
|
318 || evsql_result_uint32(res, row, 2, &ino, 0 ) // inodes.ino |
|
319 || evsql_result_string(res, row, 3, &type, 0 ) // inodes.type |
|
320 ) |
|
321 SERROR(err = EIO); |
|
322 |
|
323 INFO("\t%zu: off=%lu+2, name=%s, ino=%lu, type=%s", row, (long unsigned int) off, name, (long unsigned int) ino, type); |
|
324 |
|
325 // add to the dirbuf |
|
326 // offsets are just offset + 2 |
|
327 if ((err = dirbuf_add(dirop->req, &dirop->dirbuf, off + 2, off + 3, name, ino, _dbfs_mode(type))) < 0 && (err = EIO)) |
|
328 ERROR("failed to add dirent for inode=%lu", (long unsigned int) ino); |
|
329 |
|
330 // stop if it's full |
|
331 if (err > 0) |
|
332 break; |
|
333 } |
|
334 |
|
335 // send it |
|
336 if ((err = dirbuf_done(dirop->req, &dirop->dirbuf))) |
|
337 EERROR(err, "failed to send buf"); |
|
338 |
|
339 // req is done |
|
340 dirop->req = NULL; |
|
341 |
|
342 // good, fallthrough |
|
343 err = 0; |
|
344 |
|
345 error: |
|
346 if (err) |
|
347 _dbfs_dirop_fail(dirop); |
|
348 |
|
349 // free |
|
350 evsql_result_free(res); |
|
351 } |
|
352 |
|
353 /* |
|
354 * Handle a readdir request. This will execute a SQL query inside the transaction to get the files at the given offset, |
|
355 * and _dbfs_readdir_res will handle the results. |
|
356 * |
|
357 * If trans failed earlier, detect that and return an error. |
|
358 */ |
|
359 void dbfs_readdir (struct fuse_req *req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi) { |
|
360 struct dbfs *ctx = fuse_req_userdata(req); |
|
361 struct dbfs_dirop *dirop = (struct dbfs_dirop *) fi->fh; |
|
362 int err; |
|
363 |
|
364 assert(dirop); |
|
365 assert(!dirop->req); |
|
366 assert(dirop->open); |
|
367 assert(dirop->ino == ino); |
|
368 |
|
369 // store the new req |
|
370 dirop->req = req; |
|
371 |
|
372 // detect earlier failures |
|
373 if (!dirop->trans && (err = EIO)) |
|
374 ERROR("dirop trans has failed"); |
|
375 |
|
376 INFO("[dbfs.readdir %p:%p] ino=%lu, size=%zu, off=%zu, fi=%p : trans=%p", dirop, req, ino, size, off, fi, dirop->trans); |
|
377 |
|
378 // create the dirbuf |
|
379 if (dirbuf_init(&dirop->dirbuf, size, off)) |
|
380 SERROR(err = EIO); |
|
381 |
|
382 // add . and .. |
|
383 // we set the next offset to 2, because all dirent offsets will be larger than that |
|
384 // assume that these two should *always* fit |
|
385 if ((err = (0 |
|
386 || dirbuf_add(req, &dirop->dirbuf, 0, 1, ".", dirop->ino, S_IFDIR ) |
|
387 || dirbuf_add(req, &dirop->dirbuf, 1, 2, "..", |
|
388 dirop->parent ? dirop->parent : dirop->ino, S_IFDIR ) |
|
389 )) && (err = EIO)) |
|
390 ERROR("failed to add . and .. dirents"); |
|
391 |
|
392 // select all relevant file entries |
|
393 const char *sql = |
|
394 "SELECT" |
|
395 " file_tree.\"offset\", file_tree.name, inodes.ino, inodes.type" |
|
396 " FROM file_tree LEFT OUTER JOIN inodes ON (file_tree.inode = inodes.ino)" |
|
397 " WHERE file_tree.parent = $1::int4 AND file_tree.\"offset\" >= $2::int4" |
|
398 " LIMIT $3::int4"; |
|
399 |
|
400 static struct evsql_query_params params = EVSQL_PARAMS(EVSQL_FMT_BINARY) { |
|
401 EVSQL_PARAM ( UINT32 ), |
|
402 EVSQL_PARAM ( UINT32 ), |
|
403 EVSQL_PARAM ( UINT32 ), |
|
404 |
|
405 EVSQL_PARAMS_END |
|
406 }; |
|
407 |
|
408 // adjust offset to take . and .. into account |
|
409 if (off > 2) |
|
410 off -= 2; |
|
411 |
|
412 // build params |
|
413 if (0 |
|
414 || evsql_param_uint32(¶ms, 0, dirop->ino) |
|
415 || evsql_param_uint32(¶ms, 1, off) |
|
416 || evsql_param_uint32(¶ms, 2, dirbuf_estimate(&dirop->dirbuf, 0)) |
|
417 ) |
|
418 SERROR(err = EIO); |
|
419 |
|
420 // query |
|
421 if (evsql_query_params(ctx->db, dirop->trans, sql, ¶ms, dbfs_readdir_files_res, dirop) == NULL) |
|
422 SERROR(err = EIO); |
|
423 |
|
424 // good, wait |
|
425 return; |
|
426 |
|
427 error: |
|
428 _dbfs_dirop_fail(dirop); |
|
429 } |
|
430 |
|
431 /* |
|
432 * "For every [succesfull] opendir call there will be exactly one releasedir call." |
|
433 * |
|
434 * The dirop may be in a failed state. |
|
435 */ |
|
436 void dbfs_releasedir (struct fuse_req *req, fuse_ino_t ino, struct fuse_file_info *fi) { |
|
437 struct dbfs *ctx = fuse_req_userdata(req); |
|
438 struct dbfs_dirop *dirop = (struct dbfs_dirop *) fi->fh; |
|
439 int err; |
|
440 |
|
441 (void) ctx; |
|
442 |
|
443 assert(dirop); |
|
444 assert(!dirop->req); |
|
445 assert(dirop->ino == ino); |
|
446 |
|
447 // update to this req |
|
448 dirop->req = req; |
|
449 |
|
450 // fi is irrelevant, we don't touch the flags anyways |
|
451 (void) fi; |
|
452 |
|
453 // handle failed trans |
|
454 if (!dirop->trans) |
|
455 ERROR("trans has failed"); |
|
456 |
|
457 // log |
|
458 INFO("[dbfs.releasedir %p:%p] ino=%lu, fi=%p : trans=%p", dirop, req, ino, fi, dirop->trans); |
|
459 |
|
460 // we must commit the transaction (although it was jut SELECTs, no changes). |
|
461 // Note that this might cause dbfs_dirop_error to be called, we can tell if that happaned by looking at dirop->req |
|
462 // or dirop->trans this means that we need to keep the dirop open when calling trans_commit, so that dirop_error |
|
463 // doesn't free it out from underneath us. |
|
464 if (evsql_trans_commit(dirop->trans)) |
|
465 SERROR(err = EIO); |
|
466 |
|
467 // fall-through to cleanup |
|
468 err = 0; |
|
469 |
|
470 error: |
|
471 // the dirop is not open anymore and can be free'd: |
|
472 // a) if we already caught an error |
|
473 // b) if we get+send an error later on |
|
474 // c) if we get+send the done/no-error later on |
|
475 dirop->open = 0; |
|
476 |
|
477 // did the commit/pre-commit-checks fail? |
|
478 if (err) { |
|
479 // a) the trans failed earlier (readdir), so we have a req but no trans |
|
480 // b) the trans commit failed, dirop_error got called -> no req and no trans |
|
481 // c) the trans commit failed, dirop_error did not get called -> have req and trans |
|
482 // we either have a req (may or may not have trans), or we don't have a trans either |
|
483 // i.e. there is no situation where we don't have a req but do have a trans |
|
484 |
|
485 if (dirop->req) |
|
486 _dbfs_dirop_fail(dirop); |
|
487 else |
|
488 assert(!dirop->trans); |
|
489 |
|
490 } else { |
|
491 // shouldn't slip by, dirop_done should not get called directly. Once it does, it will handle both. |
|
492 assert(dirop->req); |
|
493 assert(dirop->trans); |
|
494 } |
|
495 } |
|
496 |