Implement exponential deepening search

author: Tavian Barnes <tavianator@tavianator.com> 2020-06-12 16:02:07 -0400
committer: Tavian Barnes <tavianator@tavianator.com> 2020-06-16 09:02:11 -0400
commit: bf063268ea9a11bc5413864626a4b945b1ecf80b (patch)
tree: 5b12dd6d045b6a7ea8df8e7845df9e7d2a675ba7
parent: 8cf40eeaf953b1c2f5c097623572948c4630ee39 (diff)
download: bfs-bf063268ea9a11bc5413864626a4b945b1ecf80b.tar.xz
6 files changed, 114 insertions, 31 deletions
diff --git a/Makefile b/Makefile
index 3844966..72abf89 100644
--- a/Makefile
+++ b/Makefile
@@ -135,7 +135,7 @@ tests/xtimegm: time.o tests/xtimegm.o
 %.o: %.c
 	$(CC) $(ALL_CFLAGS) -c $< -o $@
 
-check: check-trie check-xtimegm check-bfs check-dfs check-ids
+check: check-trie check-xtimegm check-bfs check-dfs check-ids check-eds
 
 check-trie: tests/trie
 	$<
diff --git a/RELEASES.md b/RELEASES.md
index efb10ef..e8e0b71 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -6,7 +6,7 @@
 
 **Unreleased**
 
-- New `-exclude <expression>` syntax to more easily and reliably filter out paths ([#8]).
+- [#8]: New `-exclude <expression>` syntax to more easily and reliably filter out paths.
   For example:
 
       bfs -name config -exclude -name .git
@@ -24,12 +24,17 @@
       # -exclude applies even to paths below the minimum depth:
       bfs -mindepth 3 -name config -exclude -name .git
 
-- `-nohidden` is now equivalent to `-exclude -hidden`.
+- [#30]: `-nohidden` is now equivalent to `-exclude -hidden`.
   This changes the behavior of command lines like
 
       bfs -type f -nohidden
 
-  to do what was intended ([#30]).
+  to do what was intended.
+
+- Optimized the iterative deepening (`-S ids`) implementation
+
+- Added a new search strategy: exponential deepening search (`-S eds`).
+  This strategy provides many of the benefits of iterative deepening, but much faster due to fewer re-traversals.
 
 - Fixed an optimizer bug that could skip `-empty`/`-xtype` if they didn't always lead to an action
 
diff --git a/bfs.1 b/bfs.1
index 20e1c77..4c78bde 100644
--- a/bfs.1
+++ b/bfs.1
@@ -126,20 +126,21 @@ Turn on a debugging flag (see
 .RS
 Enable optimization level
 .I N
-(default: 3)
+(default:
+.IR 3 ).
 .TP
-\fB\-O\fI0\fR
+.BI \-O 0
 Disable all optimizations.
 .TP
-\fB\-O\fI1\fR
+.BI \-O 1
 Basic logical simplifications.
 .TP
-\fB\-O\fI2\fR
+.BI \-O 2
 All
 .BI \-O 1
 optimizations, plus dead code elimination and data flow analysis.
 .TP
-\fB\-O\fI3\fR
+.BI \-O 3
 All
 .BI \-O 2
 optimizations, plus re-order expressions to reduce expected cost.
@@ -147,15 +148,34 @@ optimizations, plus re-order expressions to reduce expected cost.
 \fB\-O\fI4\fR/\fB\-O\fIfast\fR
 All optimizations, including aggressive optimizations that may alter the observed behavior in corner cases.
 .RE
+.PP
+\fB\-S \fIbfs\fR|\fIdfs\fR|\fIids\fR|\fIeds\fR
+.RS
+Choose the search strategy.
 .TP
-\fB\-S \fIbfs\fR|\fIdfs\fR|\fIids\fR
-Use
-.IR b readth- f irst/ d epth- f irst/ i terative
-.IR d eepening
-.IR s earch
-(default:
+.I bfs
+Breadth-first search (the default).
+.TP
+.I dfs
+Depth-first search.
+Uses less memory than breadth-first search, but is typically slower to return relevant results.
+.TP
+.I ids
+Iterative deepening search.
+Performs repeated depth-first searches with increasing depth limits.
+This gives results in the same order as breadth-first search, but with the reduced memory consumption of depth-first search.
+Tends to be very slow in practice, so use it only if you absolutely need breadth-first ordering, but
+.B \-S
+.I bfs
+consumes too much memory.
+.TP
+.I eds
+Exponential deepening search.
+A compromise between breadth- and depth-first search, which searches exponentially increasing depth ranges (e.g 0-1, 1-2, 2-4, 4-8, etc.).
+Provides many of the benefits of breadth-first search with depth-first's reduced memory consumption.
+Typically far faster than
 .B \-S
-.IR bfs ).
+.IR ids .
 .RE
 .SH OPERATORS
 .TP
diff --git a/bftw.c b/bftw.c
index 09c26aa..b5c50bb 100644
--- a/bftw.c
+++ b/bftw.c
@@ -1498,8 +1498,12 @@ struct bftw_ids_state {
 	void *ptr;
 	/** Which visit this search corresponds to. */
 	enum bftw_visit visit;
-	/** The current target depth. */
-	size_t depth;
+	/** Whether to override the bftw_visit. */
+	bool force_visit;
+	/** The current minimum depth (inclusive). */
+	size_t min_depth;
+	/** The current maximum depth (exclusive). */
+	size_t max_depth;
 	/** The set of pruned paths. */
 	struct trie pruned;
 	/** An error code to report. */
@@ -1514,18 +1518,20 @@ struct bftw_ids_state {
 static enum bftw_action bftw_ids_callback(const struct BFTW *ftwbuf, void *ptr) {
 	struct bftw_ids_state *state = ptr;
 
-	struct BFTW *mutbuf = (struct BFTW *)ftwbuf;
-	mutbuf->visit = state->visit;
+	if (state->force_visit) {
+		struct BFTW *mutbuf = (struct BFTW *)ftwbuf;
+		mutbuf->visit = state->visit;
+	}
 
 	if (ftwbuf->typeflag == BFTW_ERROR) {
-		if (state->depth - ftwbuf->depth <= 1) {
+		if (ftwbuf->depth + 1 >= state->min_depth) {
 			return state->delegate(ftwbuf, state->ptr);
 		} else {
 			return BFTW_PRUNE;
 		}
 	}
 
-	if (ftwbuf->depth < state->depth) {
+	if (ftwbuf->depth < state->min_depth) {
 		if (trie_find_str(&state->pruned, ftwbuf->path)) {
 			return BFTW_PRUNE;
 		} else {
@@ -1537,11 +1543,14 @@ static enum bftw_action bftw_ids_callback(const struct BFTW *ftwbuf, void *ptr)
 		}
 	}
 
-	enum bftw_action ret = state->delegate(ftwbuf, state->ptr);
+	enum bftw_action ret = BFTW_CONTINUE;
+	if (ftwbuf->visit == state->visit) {
+		ret = state->delegate(ftwbuf, state->ptr);
+	}
 
 	switch (ret) {
 	case BFTW_CONTINUE:
-		if (ftwbuf->typeflag == BFTW_DIR) {
+		if (ftwbuf->typeflag == BFTW_DIR && ftwbuf->depth + 1 >= state->max_depth) {
 			state->bottom = false;
 			ret = BFTW_PRUNE;
 		}
@@ -1568,7 +1577,9 @@ static void bftw_ids_init(const struct bftw_args *args, struct bftw_ids_state *s
 	state->delegate = args->callback;
 	state->ptr = args->ptr;
 	state->visit = BFTW_PRE;
-	state->depth = 0;
+	state->force_visit = false;
+	state->min_depth = 0;
+	state->max_depth = 1;
 	trie_init(&state->pruned);
 	state->error = 0;
 	state->bottom = false;
@@ -1613,14 +1624,17 @@ static int bftw_ids(const struct bftw_args *args) {
 			state.quit = true;
 		}
 
-		++state.depth;
+		++state.min_depth;
+		++state.max_depth;
 	}
 
 	if (args->flags & BFTW_DEPTH) {
 		state.visit = BFTW_POST;
+		state.force_visit = true;
 
-		while (!state.quit && state.depth > 0) {
-			--state.depth;
+		while (!state.quit && state.min_depth > 0) {
+			--state.max_depth;
+			--state.min_depth;
 
 			if (bftw_auto(&ids_args) != 0) {
 				state.error = errno;
@@ -1632,6 +1646,39 @@ static int bftw_ids(const struct bftw_args *args) {
 	return bftw_ids_finish(&state);
 }
 
+/**
+ * Exponential deepening bftw() wrapper.
+ */
+static int bftw_eds(const struct bftw_args *args) {
+	struct bftw_ids_state state;
+	struct bftw_args ids_args;
+	bftw_ids_init(args, &state, &ids_args);
+
+	while (!state.quit && !state.bottom) {
+		state.bottom = true;
+
+		if (bftw_auto(&ids_args) != 0) {
+			state.error = errno;
+			state.quit = true;
+		}
+
+		state.min_depth = state.max_depth;
+		state.max_depth *= 2;
+	}
+
+	if (!state.quit && (args->flags & BFTW_DEPTH)) {
+		state.visit = BFTW_POST;
+		state.min_depth = 0;
+		ids_args.flags |= BFTW_DEPTH;
+
+		if (bftw_auto(&ids_args) != 0) {
+			state.error = errno;
+		}
+	}
+
+	return bftw_ids_finish(&state);
+}
+
 int bftw(const struct bftw_args *args) {
 	switch (args->strategy) {
 	case BFTW_BFS:
@@ -1640,6 +1687,8 @@ int bftw(const struct bftw_args *args) {
 		return bftw_batch(args);
 	case BFTW_IDS:
 		return bftw_ids(args);
+	case BFTW_EDS:
+		return bftw_eds(args);
 	}
 
 	errno = EINVAL;
diff --git a/bftw.h b/bftw.h
index c08544d..4aecaf7 100644
--- a/bftw.h
+++ b/bftw.h
@@ -202,6 +202,8 @@ enum bftw_strategy {
 	BFTW_DFS,
 	/** Iterative deepening search. */
 	BFTW_IDS,
+	/** Exponential deepening search. */
+	BFTW_EDS,
 };
 
 /**
diff --git a/parse.c b/parse.c
index 0faedf4..f990a69 100644
--- a/parse.c
+++ b/parse.c
@@ -2261,6 +2261,8 @@ static struct expr *parse_search_strategy(struct parser_state *state, int arg1,
 		cmdline->strategy = BFTW_DFS;
 	} else if (strcmp(arg, "ids") == 0) {
 		cmdline->strategy = BFTW_IDS;
+	} else if (strcmp(arg, "eds") == 0) {
+		cmdline->strategy = BFTW_EDS;
 	} else if (strcmp(arg, "help") == 0) {
 		state->just_info = true;
 		cfile = cmdline->cout;
@@ -2277,6 +2279,7 @@ list_strategies:
 	cfprintf(cfile, "  ${bld}bfs${rs}: breadth-first search\n");
 	cfprintf(cfile, "  ${bld}dfs${rs}: depth-first search\n");
 	cfprintf(cfile, "  ${bld}ids${rs}: iterative deepening search\n");
+	cfprintf(cfile, "  ${bld}eds${rs}: exponential deepening search\n");
 	return NULL;
 }
 
@@ -2657,9 +2660,10 @@ static struct expr *parse_help(struct parser_state *state, int arg1, int arg2) {
 	cfprintf(cout, "  ${cyn}-D${rs} ${bld}FLAG${rs}\n");
 	cfprintf(cout, "      Turn on a debugging flag (see ${cyn}-D${rs} ${bld}help${rs})\n");
 	cfprintf(cout, "  ${cyn}-O${bld}N${rs}\n");
-	cfprintf(cout, "      Enable optimization level ${bld}N${rs} (default: 3)\n");
-	cfprintf(cout, "  ${cyn}-S${rs} ${bld}bfs${rs}|${bld}dfs${rs}|${bld}ids${rs}\n");
-	cfprintf(cout, "      Use ${bld}b${rs}readth-${bld}f${rs}irst/${bld}d${rs}epth-${bld}f${rs}irst/${bld}i${rs}terative ${bld}d${rs}eepening ${bld}s${rs}earch (default: ${cyn}-S${rs} ${bld}bfs${rs})\n\n");
+	cfprintf(cout, "      Enable optimization level ${bld}N${rs} (default: ${bld}3${rs})\n");
+	cfprintf(cout, "  ${cyn}-S${rs} ${bld}bfs${rs}|${bld}dfs${rs}|${bld}ids${rs}|${bld}eds${rs}\n");
+	cfprintf(cout, "      Use ${bld}b${rs}readth-${bld}f${rs}irst/${bld}d${rs}epth-${bld}f${rs}irst/${bld}i${rs}terative/${bld}e${rs}xponential ${bld}d${rs}eepening ${bld}s${rs}earch\n");
+	cfprintf(cout, "      (default: ${cyn}-S${rs} ${bld}bfs${rs})\n\n");
 
 	cfprintf(cout, "${bld}Operators:${rs}\n\n");
 
@@ -3408,6 +3412,9 @@ void dump_cmdline(const struct cmdline *cmdline, enum debug_flags flag) {
 	case BFTW_IDS:
 		strategy = "ids";
 		break;
+	case BFTW_EDS:
+		strategy = "eds";
+		break;
 	}
 	assert(strategy);
 	cfprintf(cerr, "${cyn}-S${rs} ${bld}%s${rs} ", strategy);
author	Tavian Barnes <tavianator@tavianator.com>	2020-06-12 16:02:07 -0400
committer	Tavian Barnes <tavianator@tavianator.com>	2020-06-16 09:02:11 -0400
commit	bf063268ea9a11bc5413864626a4b945b1ecf80b (patch)
tree	5b12dd6d045b6a7ea8df8e7845df9e7d2a675ba7
parent	8cf40eeaf953b1c2f5c097623572948c4630ee39 (diff)
download	bfs-bf063268ea9a11bc5413864626a4b945b1ecf80b.tar.xz