diff options
author | Tavian Barnes <tavianator@tavianator.com> | 2019-03-01 08:50:49 -0500 |
---|---|---|
committer | Tavian Barnes <tavianator@tavianator.com> | 2019-03-01 08:51:44 -0500 |
commit | b921fb2f43869ed12822ee99a80ccba9fd67c640 (patch) | |
tree | 374a249116e60fbe38be6c098f36fcd8d1a193c9 | |
parent | b59a6f99a18246b08beb7997d9d2f59bac14d2de (diff) | |
download | bfs-b921fb2f43869ed12822ee99a80ccba9fd67c640.tar.xz |
Implement -unique
Closes #48
-rw-r--r-- | bfs.1 | 5 | ||||
-rw-r--r-- | cmdline.h | 2 | ||||
-rw-r--r-- | eval.c | 43 | ||||
-rw-r--r-- | parse.c | 26 | ||||
-rwxr-xr-x | tests.sh | 16 | ||||
-rw-r--r-- | tests/test_L_unique.out | 1 | ||||
-rw-r--r-- | tests/test_L_unique_loops.out | 3 | ||||
-rw-r--r-- | tests/test_unique.out | 2 |
8 files changed, 95 insertions, 3 deletions
@@ -255,6 +255,11 @@ regexes (default: see .B \-regextype .IR help ). +.TP +.B \-unique +Skip any files that have already been seen. +Particularly useful along with +.BR \-L . .PP .B \-warn .br @@ -96,6 +96,8 @@ struct cmdline { bool xargs_safe; /** Whether to ignore deletions that race with bfs. */ bool ignore_races; + /** Whether to only return unique files. */ + bool unique; /** The command line expression. */ struct expr *expr; @@ -29,6 +29,7 @@ #include "posix1e.h" #include "printf.h" #include "stat.h" +#include "trie.h" #include "util.h" #include <assert.h> #include <dirent.h> @@ -1134,6 +1135,30 @@ bool eval_comma(const struct expr *expr, struct eval_state *state) { return eval_expr(expr->rhs, state); } +/** Check if we've seen a file before. */ +static bool eval_file_unique(struct eval_state *state, struct trie *seen) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + // Glue the device and inode numbers together as a unique ID + unsigned char id[sizeof(statbuf->dev) + sizeof(statbuf->ino)]; + memcpy(id, &statbuf->dev, sizeof(statbuf->dev)); + memcpy(id + sizeof(statbuf->dev), &statbuf->ino, sizeof(statbuf->ino)); + + int ret = trie_meminsert(seen, id, sizeof(id)); + if (ret > 0) { + state->action = BFTW_SKIP_SUBTREE; + return false; + } else if (ret < 0) { + eval_report_error(state); + return false; + } else { + return true; + } +} + /** * Dump the bftw_typeflag for -D search. */ @@ -1193,6 +1218,8 @@ static const char *dump_bftw_action(enum bftw_action action) { struct callback_args { /** The parsed command line. */ const struct cmdline *cmdline; + /** The set of seen files. */ + struct trie *seen; /** Eventual return value from eval_cmdline(). */ int ret; /** Whether to quit immediately. */ @@ -1228,6 +1255,12 @@ static enum bftw_action cmdline_callback(struct BFTW *ftwbuf, void *ptr) { goto done; } + if (cmdline->unique) { + if (!eval_file_unique(&state, args->seen)) { + goto done; + } + } + if (cmdline->xargs_safe && strpbrk(ftwbuf->path, " \t\n\'\"\\")) { args->ret = EXIT_FAILURE; eval_error(&state, "Path is not safe for xargs.\n"); @@ -1357,6 +1390,12 @@ int eval_cmdline(const struct cmdline *cmdline) { .quit = false, }; + struct trie seen; + if (cmdline->unique) { + trie_init(&seen); + args.seen = &seen; + } + for (struct root *root = cmdline->roots; root && !args.quit; root = root->next) { if (cmdline->debug & DEBUG_SEARCH) { fprintf(stderr, "bftw(\"%s\", cmdline_callback, %d, ", root->path, nopenfd); @@ -1378,5 +1417,9 @@ int eval_cmdline(const struct cmdline *cmdline) { dump_cmdline(cmdline, true); } + if (cmdline->unique) { + trie_destroy(&seen); + } + return args.ret; } @@ -965,7 +965,7 @@ static struct expr *parse_optlevel(struct parser_state *state, int arg1, int arg */ static struct expr *parse_follow(struct parser_state *state, int flags, int option) { struct cmdline *cmdline = state->cmdline; - cmdline->flags &= ~(BFTW_COMFOLLOW | BFTW_LOGICAL | BFTW_DETECT_CYCLES); + cmdline->flags &= ~(BFTW_COMFOLLOW | BFTW_LOGICAL); cmdline->flags |= flags; if (option) { return parse_nullary_positional_option(state); @@ -1428,6 +1428,14 @@ fail: } /** + * Parse -unique. + */ +static struct expr *parse_unique(struct parser_state *state, int arg1, int arg2) { + state->cmdline->unique = true; + return parse_nullary_option(state); +} + +/** * Parse -used N. */ static struct expr *parse_used(struct parser_state *state, int arg1, int arg2) { @@ -2408,6 +2416,8 @@ static struct expr *parse_help(struct parser_state *state, int arg1, int arg2) { cfprintf(cout, " ${blu}-regextype${rs} ${bld}TYPE${rs}\n"); cfprintf(cout, " Use ${bld}TYPE${rs}-flavored regexes (default: ${bld}posix-basic${rs}; see ${blu}-regextype${rs}" " ${bld}help${rs})\n"); + cfprintf(cout, " ${blu}-unique{rs}\n"); + cfprintf(cout, " Skip any files that have already been seen\n"); cfprintf(cout, " ${blu}-warn${rs}\n"); cfprintf(cout, " ${blu}-nowarn${rs}\n"); cfprintf(cout, " Turn on or off warnings about the command line\n\n"); @@ -2590,7 +2600,7 @@ static const struct table_entry parse_table[] = { {"-O", true, parse_optlevel}, {"-P", false, parse_follow, 0, false}, {"-H", false, parse_follow, BFTW_COMFOLLOW, false}, - {"-L", false, parse_follow, BFTW_LOGICAL | BFTW_DETECT_CYCLES, false}, + {"-L", false, parse_follow, BFTW_LOGICAL, false}, {"-X", false, parse_xargs_safe}, {"-a"}, {"-acl", false, parse_acl}, @@ -2615,7 +2625,7 @@ static const struct table_entry parse_table[] = { {"-f", false, parse_f}, {"-false", false, parse_const, false}, {"-fls", false, parse_fls}, - {"-follow", false, parse_follow, BFTW_LOGICAL | BFTW_DETECT_CYCLES, true}, + {"-follow", false, parse_follow, BFTW_LOGICAL, true}, {"-fprint", false, parse_fprint}, {"-fprint0", false, parse_fprint0}, {"-fprintf", false, parse_fprintf}, @@ -2673,6 +2683,7 @@ static const struct table_entry parse_table[] = { {"-true", false, parse_const, true}, {"-type", false, parse_type, false}, {"-uid", false, parse_user}, + {"-unique", false, parse_unique}, {"-used", false, parse_used}, {"-user", false, parse_user}, {"-version", false, parse_version}, @@ -3061,6 +3072,9 @@ void dump_cmdline(const struct cmdline *cmdline, bool verbose) { if (cmdline->maxdepth != INT_MAX) { cfprintf(cerr, "${blu}-maxdepth${rs} ${bld}%d${rs} ", cmdline->maxdepth); } + if (cmdline->unique) { + cfprintf(cerr, "${blu}-unique${rs} "); + } dump_expr(cerr, cmdline->expr, verbose); @@ -3123,6 +3137,7 @@ struct cmdline *parse_cmdline(int argc, char *argv[]) { cmdline->debug = 0; cmdline->xargs_safe = false; cmdline->ignore_races = false; + cmdline->unique = false; cmdline->expr = &expr_true; cmdline->open_files = NULL; cmdline->nopen_files = 0; @@ -3193,6 +3208,11 @@ struct cmdline *parse_cmdline(int argc, char *argv[]) { } } + if ((cmdline->flags & BFTW_LOGICAL) && !cmdline->unique) { + // We need bftw() to detect cycles unless -unique does it for us + cmdline->flags |= BFTW_DETECT_CYCLES; + } + if (cmdline->debug & DEBUG_TREE) { dump_cmdline(cmdline, false); } @@ -648,6 +648,10 @@ bfs_tests=( test_type_multi + test_unique + test_L_unique + test_L_unique_loops + test_xtype_multi test_xtype_reorder @@ -1979,6 +1983,18 @@ function test_closed_stderr() { ! invoke_bfs basic >&- 2>&- } +function test_unique() { + bfs_diff links/{file,symlink,hardlink} -unique +} + +function test_L_unique() { + bfs_diff -L links/{file,symlink,hardlink} -unique +} + +function test_L_unique_loops() { + bfs_diff -L loops/deeply/nested -unique +} + if [ -t 1 -a ! "$VERBOSE" ]; then in_place=yes fi diff --git a/tests/test_L_unique.out b/tests/test_L_unique.out new file mode 100644 index 0000000..c94c48e --- /dev/null +++ b/tests/test_L_unique.out @@ -0,0 +1 @@ +links/file diff --git a/tests/test_L_unique_loops.out b/tests/test_L_unique_loops.out new file mode 100644 index 0000000..dad0a98 --- /dev/null +++ b/tests/test_L_unique_loops.out @@ -0,0 +1,3 @@ +loops/deeply/nested +loops/deeply/nested/dir +loops/deeply/nested/loop diff --git a/tests/test_unique.out b/tests/test_unique.out new file mode 100644 index 0000000..289cbde --- /dev/null +++ b/tests/test_unique.out @@ -0,0 +1,2 @@ +links/file +links/symlink |