From 352b9eaf4479d204c617c68744de9ba075078308 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Sat, 21 Mar 2020 11:48:06 -0400 Subject: Implement -s flag from FreeBSD find to sort results --- bfs.1 | 7 +++++ bftw.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++-- bftw.h | 2 ++ parse.c | 11 +++++++ tests.sh | 40 +++++++++++++++++++++++++ tests/test_S_bfs.out | 19 ++++++++++++ tests/test_S_dfs.out | 19 ++++++++++++ tests/test_S_ids.out | 19 ++++++++++++ tests/test_s.out | 11 +++++++ 9 files changed, 208 insertions(+), 3 deletions(-) create mode 100644 tests/test_S_bfs.out create mode 100644 tests/test_S_dfs.out create mode 100644 tests/test_S_ids.out create mode 100644 tests/test_s.out diff --git a/bfs.1 b/bfs.1 index 5322f3c..64c03b2 100644 --- a/bfs.1 +++ b/bfs.1 @@ -101,6 +101,13 @@ names. Search in post-order (same as .BR \-depth ). .TP +.B \-s +Visit directory entries in sorted order. +The sorting takes place within each directory separately, which makes it different from +.B bfs ... | +.BR sort , +but still provides a deterministic ordering. +.TP .B \-x Don't descend into other mount points (same as \fB\-xdev\fR). .TP diff --git a/bftw.c b/bftw.c index f0225f6..35f086d 100644 --- a/bftw.c +++ b/bftw.c @@ -549,6 +549,67 @@ static struct bftw_file *bftw_queue_pop(struct bftw_queue *queue) { return file; } +/** The split phase of mergesort. */ +static struct bftw_file **bftw_sort_split(struct bftw_file **head, struct bftw_file **tail) { + struct bftw_file **tortoise = head, **hare = head; + + while (*hare != *tail) { + tortoise = &(*tortoise)->next; + hare = &(*hare)->next; + if (*hare != *tail) { + hare = &(*hare)->next; + } + } + + return tortoise; +} + +/** The merge phase of mergesort. */ +static struct bftw_file **bftw_sort_merge(struct bftw_file **head, struct bftw_file **mid, struct bftw_file **tail) { + struct bftw_file *left = *head, *right = *mid, *end = *tail; + *mid = NULL; + *tail = NULL; + + while (left || right) { + struct bftw_file *next; + if (left && (!right || strcoll(left->name, right->name) <= 0)) { + next = left; + left = left->next; + } else { + next = right; + right = right->next; + } + + *head = next; + head = &next->next; + } + + *head = end; + return head; +} + +/** + * Sort a (sub-)list of files. + * + * @param head + * The head of the (sub-)list to sort. + * @param tail + * The tail of the (sub-)list to sort. + * @return + * The new tail of the (sub-)list. + */ +static struct bftw_file **bftw_sort_files(struct bftw_file **head, struct bftw_file **tail) { + struct bftw_file **mid = bftw_sort_split(head, tail); + if (*mid == *head || *mid == *tail) { + return tail; + } + + mid = bftw_sort_files(head, mid); + tail = bftw_sort_files(mid, tail); + + return bftw_sort_merge(head, mid, tail); +} + /** * A directory reader. */ @@ -635,6 +696,8 @@ struct bftw_state { struct bftw_cache cache; /** The queue of directories left to explore. */ struct bftw_queue queue; + /** The start of the current batch of files. */ + struct bftw_file **batch; /** The current path. */ char *path; @@ -672,6 +735,7 @@ static int bftw_state_init(struct bftw_state *state, const struct bftw_args *arg } bftw_queue_init(&state->queue); + state->batch = NULL; state->path = dstralloc(0); if (!state->path) { @@ -1345,6 +1409,14 @@ static void bftw_batch_start(struct bftw_state *state) { if (state->strategy == BFTW_DFS) { state->queue.target = &state->queue.head; } + state->batch = state->queue.target; +} + +/** Finish adding a batch of files. */ +static void bftw_batch_finish(struct bftw_state *state) { + if (state->flags & BFTW_SORT) { + state->queue.target = bftw_sort_files(state->batch, state->queue.target); + } } /** @@ -1362,6 +1434,7 @@ static int bftw_batch(const struct bftw_args *args) { goto done; } } + bftw_batch_finish(&state); while (bftw_pop(&state) > 0) { enum bftw_release_flags relflags = BFTW_VISIT_ALL; @@ -1384,6 +1457,7 @@ static int bftw_batch(const struct bftw_args *args) { goto done; } } + bftw_batch_finish(&state); if (bftw_release_reader(&state, relflags) == BFTW_STOP) { goto done; @@ -1499,7 +1573,7 @@ static int bftw_ids(const struct bftw_args *args) { while (ret == 0 && !state.quit && !state.bottom) { state.bottom = true; - ret = bftw_stream(&ids_args); + ret = args->flags & BFTW_SORT ? bftw_batch(&ids_args) : bftw_stream(&ids_args); ++state.depth; } @@ -1508,7 +1582,7 @@ static int bftw_ids(const struct bftw_args *args) { while (ret == 0 && !state.quit && state.depth > 0) { --state.depth; - ret = bftw_stream(&ids_args); + ret = args->flags & BFTW_SORT ? bftw_batch(&ids_args) : bftw_stream(&ids_args); } } @@ -1525,7 +1599,10 @@ static int bftw_ids(const struct bftw_args *args) { int bftw(const struct bftw_args *args) { switch (args->strategy) { case BFTW_BFS: - return bftw_stream(args); + if (!(args->flags & BFTW_SORT)) { + return bftw_stream(args); + } + // Fallthrough case BFTW_DFS: return bftw_batch(args); case BFTW_IDS: diff --git a/bftw.h b/bftw.h index 06fdedb..c08544d 100644 --- a/bftw.h +++ b/bftw.h @@ -188,6 +188,8 @@ enum bftw_flags { BFTW_MOUNT = 1 << 6, /** Skip the descendents of mount points. */ BFTW_XDEV = 1 << 7, + /** Sort directory entries before processing them. */ + BFTW_SORT = 1 << 8, }; /** diff --git a/parse.c b/parse.c index 5851430..8e52999 100644 --- a/parse.c +++ b/parse.c @@ -2220,6 +2220,14 @@ list_types: return NULL; } +/** + * Parse -s. + */ +static struct expr *parse_s(struct parser_state *state, int arg1, int arg2) { + state->cmdline->flags |= BFTW_SORT; + return parse_nullary_flag(state); +} + /** * Parse -samefile FILE. */ @@ -2651,6 +2659,8 @@ static struct expr *parse_help(struct parser_state *state, int arg1, int arg2) { cfprintf(cout, " Filter out files with non-${ex}xargs${rs}-safe names\n"); cfprintf(cout, " ${cyn}-d${rs}\n"); cfprintf(cout, " Search in post-order (same as ${blu}-depth${rs})\n"); + cfprintf(cout, " ${cyn}-s${rs}\n"); + cfprintf(cout, " Visit directory entries in sorted order\n"); cfprintf(cout, " ${cyn}-x${rs}\n"); cfprintf(cout, " Don't descend into other mount points (same as ${blu}-xdev${rs})\n"); @@ -2991,6 +3001,7 @@ static const struct table_entry parse_table[] = { {"-regex", parse_regex, 0}, {"-regextype", parse_regextype}, {"-rm", parse_delete}, + {"-s", parse_s}, {"-samefile", parse_samefile}, {"-since", parse_since, BFS_STAT_MTIME}, {"-size", parse_size}, diff --git a/tests.sh b/tests.sh index 711bbb6..4b0771e 100755 --- a/tests.sh +++ b/tests.sh @@ -272,6 +272,8 @@ bsd_tests=( test_f + test_s + test_double_dash test_flag_double_dash @@ -591,6 +593,12 @@ bfs_tests=( test_expr_flag_path test_expr_path_flag + # Flags + + test_S_bfs + test_S_dfs + test_S_ids + # Primaries test_color @@ -1745,6 +1753,16 @@ function test_f() { bfs_diff -f '-' -f '(' } +function test_s() { + invoke_bfs -s weirdnames -maxdepth 1 >"$TMP/test_s.out" + + if [ "$UPDATE" ]; then + cp {"$TMP","$TESTS"}/test_s.out + else + diff -u {"$TESTS","$TMP"}/test_s.out + fi +} + function test_hidden() { bfs_diff weirdnames -hidden } @@ -2568,6 +2586,28 @@ function test_help() { return 0 } +function test_S() { + invoke_bfs -S "$1" -s basic >"$TMP/test_S_$1.out" + + if [ "$UPDATE" ]; then + cp {"$TMP","$TESTS"}/"test_S_$1.out" + else + diff -u {"$TESTS","$TMP"}/"test_S_$1.out" + fi +} + +function test_S_bfs() { + test_S bfs +} + +function test_S_dfs() { + test_S dfs +} + +function test_S_ids() { + test_S ids +} + BOL= EOL='\n' diff --git a/tests/test_S_bfs.out b/tests/test_S_bfs.out new file mode 100644 index 0000000..bb3cd8d --- /dev/null +++ b/tests/test_S_bfs.out @@ -0,0 +1,19 @@ +basic +basic/a +basic/b +basic/c +basic/e +basic/g +basic/i +basic/j +basic/k +basic/l +basic/c/d +basic/e/f +basic/g/h +basic/j/foo +basic/k/foo +basic/l/foo +basic/k/foo/bar +basic/l/foo/bar +basic/l/foo/bar/baz diff --git a/tests/test_S_dfs.out b/tests/test_S_dfs.out new file mode 100644 index 0000000..a7ccfe4 --- /dev/null +++ b/tests/test_S_dfs.out @@ -0,0 +1,19 @@ +basic +basic/a +basic/b +basic/c +basic/c/d +basic/e +basic/e/f +basic/g +basic/g/h +basic/i +basic/j +basic/j/foo +basic/k +basic/k/foo +basic/k/foo/bar +basic/l +basic/l/foo +basic/l/foo/bar +basic/l/foo/bar/baz diff --git a/tests/test_S_ids.out b/tests/test_S_ids.out new file mode 100644 index 0000000..bb3cd8d --- /dev/null +++ b/tests/test_S_ids.out @@ -0,0 +1,19 @@ +basic +basic/a +basic/b +basic/c +basic/e +basic/g +basic/i +basic/j +basic/k +basic/l +basic/c/d +basic/e/f +basic/g/h +basic/j/foo +basic/k/foo +basic/l/foo +basic/k/foo/bar +basic/l/foo/bar +basic/l/foo/bar/baz diff --git a/tests/test_s.out b/tests/test_s.out new file mode 100644 index 0000000..e736cb5 --- /dev/null +++ b/tests/test_s.out @@ -0,0 +1,11 @@ +weirdnames +weirdnames/ +weirdnames/! +weirdnames/!- +weirdnames/( +weirdnames/(- +weirdnames/) +weirdnames/, +weirdnames/- +weirdnames/... +weirdnames/\ -- cgit v1.2.3