From b921fb2f43869ed12822ee99a80ccba9fd67c640 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Fri, 1 Mar 2019 08:50:49 -0500 Subject: Implement -unique Closes #48 --- bfs.1 | 5 +++++ cmdline.h | 2 ++ eval.c | 43 +++++++++++++++++++++++++++++++++++++++++++ parse.c | 26 +++++++++++++++++++++++--- tests.sh | 16 ++++++++++++++++ tests/test_L_unique.out | 1 + tests/test_L_unique_loops.out | 3 +++ tests/test_unique.out | 2 ++ 8 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 tests/test_L_unique.out create mode 100644 tests/test_L_unique_loops.out create mode 100644 tests/test_unique.out diff --git a/bfs.1 b/bfs.1 index e33513e..a67c254 100644 --- a/bfs.1 +++ b/bfs.1 @@ -255,6 +255,11 @@ regexes (default: see .B \-regextype .IR help ). +.TP +.B \-unique +Skip any files that have already been seen. +Particularly useful along with +.BR \-L . .PP .B \-warn .br diff --git a/cmdline.h b/cmdline.h index 4a1ef3b..3a8c064 100644 --- a/cmdline.h +++ b/cmdline.h @@ -96,6 +96,8 @@ struct cmdline { bool xargs_safe; /** Whether to ignore deletions that race with bfs. */ bool ignore_races; + /** Whether to only return unique files. */ + bool unique; /** The command line expression. */ struct expr *expr; diff --git a/eval.c b/eval.c index b41ebf6..432adfc 100644 --- a/eval.c +++ b/eval.c @@ -29,6 +29,7 @@ #include "posix1e.h" #include "printf.h" #include "stat.h" +#include "trie.h" #include "util.h" #include #include @@ -1134,6 +1135,30 @@ bool eval_comma(const struct expr *expr, struct eval_state *state) { return eval_expr(expr->rhs, state); } +/** Check if we've seen a file before. */ +static bool eval_file_unique(struct eval_state *state, struct trie *seen) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + // Glue the device and inode numbers together as a unique ID + unsigned char id[sizeof(statbuf->dev) + sizeof(statbuf->ino)]; + memcpy(id, &statbuf->dev, sizeof(statbuf->dev)); + memcpy(id + sizeof(statbuf->dev), &statbuf->ino, sizeof(statbuf->ino)); + + int ret = trie_meminsert(seen, id, sizeof(id)); + if (ret > 0) { + state->action = BFTW_SKIP_SUBTREE; + return false; + } else if (ret < 0) { + eval_report_error(state); + return false; + } else { + return true; + } +} + /** * Dump the bftw_typeflag for -D search. */ @@ -1193,6 +1218,8 @@ static const char *dump_bftw_action(enum bftw_action action) { struct callback_args { /** The parsed command line. */ const struct cmdline *cmdline; + /** The set of seen files. */ + struct trie *seen; /** Eventual return value from eval_cmdline(). */ int ret; /** Whether to quit immediately. */ @@ -1228,6 +1255,12 @@ static enum bftw_action cmdline_callback(struct BFTW *ftwbuf, void *ptr) { goto done; } + if (cmdline->unique) { + if (!eval_file_unique(&state, args->seen)) { + goto done; + } + } + if (cmdline->xargs_safe && strpbrk(ftwbuf->path, " \t\n\'\"\\")) { args->ret = EXIT_FAILURE; eval_error(&state, "Path is not safe for xargs.\n"); @@ -1357,6 +1390,12 @@ int eval_cmdline(const struct cmdline *cmdline) { .quit = false, }; + struct trie seen; + if (cmdline->unique) { + trie_init(&seen); + args.seen = &seen; + } + for (struct root *root = cmdline->roots; root && !args.quit; root = root->next) { if (cmdline->debug & DEBUG_SEARCH) { fprintf(stderr, "bftw(\"%s\", cmdline_callback, %d, ", root->path, nopenfd); @@ -1378,5 +1417,9 @@ int eval_cmdline(const struct cmdline *cmdline) { dump_cmdline(cmdline, true); } + if (cmdline->unique) { + trie_destroy(&seen); + } + return args.ret; } diff --git a/parse.c b/parse.c index 43699a6..780f678 100644 --- a/parse.c +++ b/parse.c @@ -965,7 +965,7 @@ static struct expr *parse_optlevel(struct parser_state *state, int arg1, int arg */ static struct expr *parse_follow(struct parser_state *state, int flags, int option) { struct cmdline *cmdline = state->cmdline; - cmdline->flags &= ~(BFTW_COMFOLLOW | BFTW_LOGICAL | BFTW_DETECT_CYCLES); + cmdline->flags &= ~(BFTW_COMFOLLOW | BFTW_LOGICAL); cmdline->flags |= flags; if (option) { return parse_nullary_positional_option(state); @@ -1427,6 +1427,14 @@ fail: return NULL; } +/** + * Parse -unique. + */ +static struct expr *parse_unique(struct parser_state *state, int arg1, int arg2) { + state->cmdline->unique = true; + return parse_nullary_option(state); +} + /** * Parse -used N. */ @@ -2408,6 +2416,8 @@ static struct expr *parse_help(struct parser_state *state, int arg1, int arg2) { cfprintf(cout, " ${blu}-regextype${rs} ${bld}TYPE${rs}\n"); cfprintf(cout, " Use ${bld}TYPE${rs}-flavored regexes (default: ${bld}posix-basic${rs}; see ${blu}-regextype${rs}" " ${bld}help${rs})\n"); + cfprintf(cout, " ${blu}-unique{rs}\n"); + cfprintf(cout, " Skip any files that have already been seen\n"); cfprintf(cout, " ${blu}-warn${rs}\n"); cfprintf(cout, " ${blu}-nowarn${rs}\n"); cfprintf(cout, " Turn on or off warnings about the command line\n\n"); @@ -2590,7 +2600,7 @@ static const struct table_entry parse_table[] = { {"-O", true, parse_optlevel}, {"-P", false, parse_follow, 0, false}, {"-H", false, parse_follow, BFTW_COMFOLLOW, false}, - {"-L", false, parse_follow, BFTW_LOGICAL | BFTW_DETECT_CYCLES, false}, + {"-L", false, parse_follow, BFTW_LOGICAL, false}, {"-X", false, parse_xargs_safe}, {"-a"}, {"-acl", false, parse_acl}, @@ -2615,7 +2625,7 @@ static const struct table_entry parse_table[] = { {"-f", false, parse_f}, {"-false", false, parse_const, false}, {"-fls", false, parse_fls}, - {"-follow", false, parse_follow, BFTW_LOGICAL | BFTW_DETECT_CYCLES, true}, + {"-follow", false, parse_follow, BFTW_LOGICAL, true}, {"-fprint", false, parse_fprint}, {"-fprint0", false, parse_fprint0}, {"-fprintf", false, parse_fprintf}, @@ -2673,6 +2683,7 @@ static const struct table_entry parse_table[] = { {"-true", false, parse_const, true}, {"-type", false, parse_type, false}, {"-uid", false, parse_user}, + {"-unique", false, parse_unique}, {"-used", false, parse_used}, {"-user", false, parse_user}, {"-version", false, parse_version}, @@ -3061,6 +3072,9 @@ void dump_cmdline(const struct cmdline *cmdline, bool verbose) { if (cmdline->maxdepth != INT_MAX) { cfprintf(cerr, "${blu}-maxdepth${rs} ${bld}%d${rs} ", cmdline->maxdepth); } + if (cmdline->unique) { + cfprintf(cerr, "${blu}-unique${rs} "); + } dump_expr(cerr, cmdline->expr, verbose); @@ -3123,6 +3137,7 @@ struct cmdline *parse_cmdline(int argc, char *argv[]) { cmdline->debug = 0; cmdline->xargs_safe = false; cmdline->ignore_races = false; + cmdline->unique = false; cmdline->expr = &expr_true; cmdline->open_files = NULL; cmdline->nopen_files = 0; @@ -3193,6 +3208,11 @@ struct cmdline *parse_cmdline(int argc, char *argv[]) { } } + if ((cmdline->flags & BFTW_LOGICAL) && !cmdline->unique) { + // We need bftw() to detect cycles unless -unique does it for us + cmdline->flags |= BFTW_DETECT_CYCLES; + } + if (cmdline->debug & DEBUG_TREE) { dump_cmdline(cmdline, false); } diff --git a/tests.sh b/tests.sh index 189d324..f26e888 100755 --- a/tests.sh +++ b/tests.sh @@ -648,6 +648,10 @@ bfs_tests=( test_type_multi + test_unique + test_L_unique + test_L_unique_loops + test_xtype_multi test_xtype_reorder @@ -1979,6 +1983,18 @@ function test_closed_stderr() { ! invoke_bfs basic >&- 2>&- } +function test_unique() { + bfs_diff links/{file,symlink,hardlink} -unique +} + +function test_L_unique() { + bfs_diff -L links/{file,symlink,hardlink} -unique +} + +function test_L_unique_loops() { + bfs_diff -L loops/deeply/nested -unique +} + if [ -t 1 -a ! "$VERBOSE" ]; then in_place=yes fi diff --git a/tests/test_L_unique.out b/tests/test_L_unique.out new file mode 100644 index 0000000..c94c48e --- /dev/null +++ b/tests/test_L_unique.out @@ -0,0 +1 @@ +links/file diff --git a/tests/test_L_unique_loops.out b/tests/test_L_unique_loops.out new file mode 100644 index 0000000..dad0a98 --- /dev/null +++ b/tests/test_L_unique_loops.out @@ -0,0 +1,3 @@ +loops/deeply/nested +loops/deeply/nested/dir +loops/deeply/nested/loop diff --git a/tests/test_unique.out b/tests/test_unique.out new file mode 100644 index 0000000..289cbde --- /dev/null +++ b/tests/test_unique.out @@ -0,0 +1,2 @@ +links/file +links/symlink -- cgit v1.2.3