summaryrefslogtreecommitdiffstats
path: root/src/opt.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/opt.c')
-rw-r--r--src/opt.c2299
1 files changed, 2299 insertions, 0 deletions
diff --git a/src/opt.c b/src/opt.c
new file mode 100644
index 0000000..883d598
--- /dev/null
+++ b/src/opt.c
@@ -0,0 +1,2299 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * The expression optimizer. Different optimization levels are supported:
+ *
+ * -O1: basic logical simplifications, like folding (-true -and -foo) to -foo.
+ *
+ * -O2: dead code elimination and data flow analysis. struct df_domain is used
+ * to record data flow facts that are true at various points of evaluation.
+ * Specifically, struct df_domain records the state before an expression is
+ * evaluated (opt->before), and after an expression returns true
+ * (opt->after_true) or false (opt->after_false). Additionally, opt->impure
+ * records the possible state before any expression with side effects is
+ * evaluated.
+ *
+ * -O3: expression re-ordering to reduce expected cost. In an expression like
+ * (-foo -and -bar), if both -foo and -bar are pure (no side effects), they can
+ * be re-ordered to (-bar -and -foo). This is profitable if the expected cost
+ * is lower for the re-ordered expression, for example if -foo is very slow or
+ * -bar is likely to return false.
+ *
+ * -O4/-Ofast: aggressive optimizations that may affect correctness in corner
+ * cases. The main effect is to use opt->impure to determine if any side-
+ * effects are reachable at all, skipping the traversal if not.
+ */
+
+#include "prelude.h"
+#include "opt.h"
+#include "bftw.h"
+#include "bit.h"
+#include "color.h"
+#include "ctx.h"
+#include "diag.h"
+#include "dir.h"
+#include "eval.h"
+#include "exec.h"
+#include "expr.h"
+#include "list.h"
+#include "pwcache.h"
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static char *fake_and_arg = "-and";
+static char *fake_or_arg = "-or";
+static char *fake_not_arg = "-not";
+
+/**
+ * The data flow domain for predicates.
+ */
+enum df_pred {
+ /** The bottom state (unreachable). */
+ PRED_BOTTOM = 0,
+ /** The predicate is known to be false. */
+ PRED_FALSE = 1 << false,
+ /** The predicate is known to be true. */
+ PRED_TRUE = 1 << true,
+ /** The top state (unknown). */
+ PRED_TOP = PRED_FALSE | PRED_TRUE,
+};
+
+/** Make a predicate known. */
+static void constrain_pred(enum df_pred *pred, bool value) {
+ *pred &= 1 << value;
+}
+
+/** Compute the join (union) of two predicates. */
+static void pred_join(enum df_pred *dest, enum df_pred src) {
+ *dest |= src;
+}
+
+/**
+ * Types of predicates we track.
+ */
+enum pred_type {
+ /** -readable */
+ READABLE_PRED,
+ /** -writable */
+ WRITABLE_PRED,
+ /** -executable */
+ EXECUTABLE_PRED,
+ /** -acl */
+ ACL_PRED,
+ /** -capable */
+ CAPABLE_PRED,
+ /** -empty */
+ EMPTY_PRED,
+ /** -hidden */
+ HIDDEN_PRED,
+ /** -nogroup */
+ NOGROUP_PRED,
+ /** -nouser */
+ NOUSER_PRED,
+ /** -sparse */
+ SPARSE_PRED,
+ /** -xattr */
+ XATTR_PRED,
+ /** The number of pred_types. */
+ PRED_TYPES,
+};
+
+/** Get the name of a predicate type. */
+static const char *pred_type_name(enum pred_type type) {
+ switch (type) {
+ case READABLE_PRED:
+ return "-readable";
+ case WRITABLE_PRED:
+ return "-writable";
+ case EXECUTABLE_PRED:
+ return "-executable";
+ case ACL_PRED:
+ return "-acl";
+ case CAPABLE_PRED:
+ return "-capable";
+ case EMPTY_PRED:
+ return "-empty";
+ case HIDDEN_PRED:
+ return "-hidden";
+ case NOGROUP_PRED:
+ return "-nogroup";
+ case NOUSER_PRED:
+ return "-nouser";
+ case SPARSE_PRED:
+ return "-sparse";
+ case XATTR_PRED:
+ return "-xattr";
+
+ case PRED_TYPES:
+ break;
+ }
+
+ bfs_bug("Unknown predicate %d", (int)type);
+ return "???";
+}
+
+/**
+ * A contrained integer range.
+ */
+struct df_range {
+ /** The (inclusive) minimum value. */
+ long long min;
+ /** The (inclusive) maximum value. */
+ long long max;
+};
+
+/** Initialize an empty range. */
+static void range_init_bottom(struct df_range *range) {
+ range->min = LLONG_MAX;
+ range->max = LLONG_MIN;
+}
+
+/** Check if a range is empty. */
+static bool range_is_bottom(const struct df_range *range) {
+ return range->min > range->max;
+}
+
+/** Initialize a full range. */
+static void range_init_top(struct df_range *range) {
+ // All ranges we currently track are non-negative
+ range->min = 0;
+ range->max = LLONG_MAX;
+}
+
+/** Check for an infinite range. */
+static bool range_is_top(const struct df_range *range) {
+ return range->min == 0 && range->max == LLONG_MAX;
+}
+
+/** Compute the minimum of two values. */
+static long long min_value(long long a, long long b) {
+ if (a < b) {
+ return a;
+ } else {
+ return b;
+ }
+}
+
+/** Compute the maximum of two values. */
+static long long max_value(long long a, long long b) {
+ if (a > b) {
+ return a;
+ } else {
+ return b;
+ }
+}
+
+/** Constrain the minimum of a range. */
+static void constrain_min(struct df_range *range, long long value) {
+ range->min = max_value(range->min, value);
+}
+
+/** Contrain the maximum of a range. */
+static void constrain_max(struct df_range *range, long long value) {
+ range->max = min_value(range->max, value);
+}
+
+/** Remove a single value from a range. */
+static void range_remove(struct df_range *range, long long value) {
+ if (range->min == value) {
+ if (range->min == LLONG_MAX) {
+ range->max = LLONG_MIN;
+ } else {
+ ++range->min;
+ }
+ }
+
+ if (range->max == value) {
+ if (range->max == LLONG_MIN) {
+ range->min = LLONG_MAX;
+ } else {
+ --range->max;
+ }
+ }
+}
+
+/** Compute the union of two ranges. */
+static void range_join(struct df_range *dest, const struct df_range *src) {
+ dest->min = min_value(dest->min, src->min);
+ dest->max = max_value(dest->max, src->max);
+}
+
+/**
+ * Types of ranges we track.
+ */
+enum range_type {
+ /** Search tree depth. */
+ DEPTH_RANGE,
+ /** Group ID. */
+ GID_RANGE,
+ /** Inode number. */
+ INUM_RANGE,
+ /** Hard link count. */
+ LINKS_RANGE,
+ /** File size. */
+ SIZE_RANGE,
+ /** User ID. */
+ UID_RANGE,
+ /** The number of range_types. */
+ RANGE_TYPES,
+};
+
+/** Get the name of a range type. */
+static const char *range_type_name(enum range_type type) {
+ switch (type) {
+ case DEPTH_RANGE:
+ return "-depth";
+ case GID_RANGE:
+ return "-gid";
+ case INUM_RANGE:
+ return "-inum";
+ case LINKS_RANGE:
+ return "-links";
+ case SIZE_RANGE:
+ return "-size";
+ case UID_RANGE:
+ return "-uid";
+
+ case RANGE_TYPES:
+ break;
+ }
+
+ bfs_bug("Unknown range %d", (int)type);
+ return "???";
+}
+
+/**
+ * The data flow analysis domain.
+ */
+struct df_domain {
+ /** The predicates we track. */
+ enum df_pred preds[PRED_TYPES];
+
+ /** The value ranges we track. */
+ struct df_range ranges[RANGE_TYPES];
+
+ /** Bitmask of possible -types. */
+ unsigned int types;
+ /** Bitmask of possible -xtypes. */
+ unsigned int xtypes;
+};
+
+/** Set a data flow value to bottom. */
+static void df_init_bottom(struct df_domain *value) {
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ value->preds[i] = PRED_BOTTOM;
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ range_init_bottom(&value->ranges[i]);
+ }
+
+ value->types = 0;
+ value->xtypes = 0;
+}
+
+/** Determine whether a fact set is impossible. */
+static bool df_is_bottom(const struct df_domain *value) {
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ if (range_is_bottom(&value->ranges[i])) {
+ return true;
+ }
+ }
+
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ if (value->preds[i] == PRED_BOTTOM) {
+ return true;
+ }
+ }
+
+ if (!value->types || !value->xtypes) {
+ return true;
+ }
+
+ return false;
+}
+
+/** Initialize some data flow value. */
+static void df_init_top(struct df_domain *value) {
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ value->preds[i] = PRED_TOP;
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ range_init_top(&value->ranges[i]);
+ }
+
+ value->types = ~0;
+ value->xtypes = ~0;
+}
+
+/** Check for the top element. */
+static bool df_is_top(const struct df_domain *value) {
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ if (value->preds[i] != PRED_TOP) {
+ return false;
+ }
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ if (!range_is_top(&value->ranges[i])) {
+ return false;
+ }
+ }
+
+ if (value->types != ~0U) {
+ return false;
+ }
+
+ if (value->xtypes != ~0U) {
+ return false;
+ }
+
+ return true;
+}
+
+/** Compute the union of two fact sets. */
+static void df_join(struct df_domain *dest, const struct df_domain *src) {
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ pred_join(&dest->preds[i], src->preds[i]);
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ range_join(&dest->ranges[i], &src->ranges[i]);
+ }
+
+ dest->types |= src->types;
+ dest->xtypes |= src->xtypes;
+}
+
+/**
+ * Optimizer state.
+ */
+struct bfs_opt {
+ /** The context we're optimizing. */
+ struct bfs_ctx *ctx;
+ /** Optimization level (ctx->optlevel). */
+ int level;
+ /** Recursion depth. */
+ int depth;
+
+ /** Whether to produce warnings. */
+ bool warn;
+ /** Whether the result of this expression is ignored. */
+ bool ignore_result;
+
+ /** Data flow state before this expression is evaluated. */
+ struct df_domain before;
+ /** Data flow state after this expression returns true. */
+ struct df_domain after_true;
+ /** Data flow state after this expression returns false. */
+ struct df_domain after_false;
+ /** Data flow state before any side-effecting expressions are evaluated. */
+ struct df_domain *impure;
+};
+
+/** Log an optimization. */
+attr(printf(2, 3))
+static bool opt_debug(struct bfs_opt *opt, const char *format, ...) {
+ if (bfs_debug_prefix(opt->ctx, DEBUG_OPT)) {
+ for (int i = 0; i < opt->depth; ++i) {
+ cfprintf(opt->ctx->cerr, "│ ");
+ }
+
+ va_list args;
+ va_start(args, format);
+ cvfprintf(opt->ctx->cerr, format, args);
+ va_end(args);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/** Log a recursive call. */
+attr(printf(2, 3))
+static bool opt_enter(struct bfs_opt *opt, const char *format, ...) {
+ int depth = opt->depth;
+ if (depth > 0) {
+ --opt->depth;
+ }
+
+ bool debug = opt_debug(opt, "%s", depth > 0 ? "├─╮ " : "");
+ if (debug) {
+ va_list args;
+ va_start(args, format);
+ cvfprintf(opt->ctx->cerr, format, args);
+ va_end(args);
+ }
+
+ opt->depth = depth + 1;
+ return debug;
+}
+
+/** Log a recursive return. */
+attr(printf(2, 3))
+static bool opt_leave(struct bfs_opt *opt, const char *format, ...) {
+ bool debug = false;
+ int depth = opt->depth;
+
+ if (format) {
+ if (depth > 1) {
+ opt->depth -= 2;
+ }
+
+ debug = opt_debug(opt, "%s", depth > 1 ? "├─╯ " : "");
+ if (debug) {
+ va_list args;
+ va_start(args, format);
+ cvfprintf(opt->ctx->cerr, format, args);
+ va_end(args);
+ }
+ }
+
+ opt->depth = depth - 1;
+ return debug;
+}
+
+/** Log a shallow visit. */
+attr(printf(2, 3))
+static bool opt_visit(struct bfs_opt *opt, const char *format, ...) {
+ int depth = opt->depth;
+ if (depth > 0) {
+ --opt->depth;
+ }
+
+ bool debug = opt_debug(opt, "%s", depth > 0 ? "├─◯ " : "");
+ if (debug) {
+ va_list args;
+ va_start(args, format);
+ cvfprintf(opt->ctx->cerr, format, args);
+ va_end(args);
+ }
+
+ opt->depth = depth;
+ return debug;
+}
+
+/** Log the deletion of an expression. */
+attr(printf(2, 3))
+static bool opt_delete(struct bfs_opt *opt, const char *format, ...) {
+ int depth = opt->depth;
+
+ if (depth > 0) {
+ --opt->depth;
+ }
+
+ bool debug = opt_debug(opt, "%s", depth > 0 ? "├─✘ " : "");
+ if (debug) {
+ va_list args;
+ va_start(args, format);
+ cvfprintf(opt->ctx->cerr, format, args);
+ va_end(args);
+ }
+
+ opt->depth = depth;
+ return debug;
+}
+
+typedef bool dump_fn(struct bfs_opt *opt, const char *format, ...);
+
+/** Print a df_pred. */
+static void pred_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum pred_type type) {
+ dump(opt, "${blu}%s${rs}: ", pred_type_name(type));
+
+ FILE *file = opt->ctx->cerr->file;
+ switch (value->preds[type]) {
+ case PRED_BOTTOM:
+ fprintf(file, "⊥\n");
+ break;
+ case PRED_TOP:
+ fprintf(file, "⊤\n");
+ break;
+ case PRED_TRUE:
+ fprintf(file, "true\n");
+ break;
+ case PRED_FALSE:
+ fprintf(file, "false\n");
+ break;
+ }
+}
+
+/** Print a df_range. */
+static void range_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum range_type type) {
+ dump(opt, "${blu}%s${rs}: ", range_type_name(type));
+
+ FILE *file = opt->ctx->cerr->file;
+ const struct df_range *range = &value->ranges[type];
+ if (range_is_bottom(range)) {
+ fprintf(file, "⊥\n");
+ } else if (range_is_top(range)) {
+ fprintf(file, "⊤\n");
+ } else if (range->min == range->max) {
+ fprintf(file, "%lld\n", range->min);
+ } else {
+ if (range->min == LLONG_MIN) {
+ fprintf(file, "(-∞, ");
+ } else {
+ fprintf(file, "[%lld, ", range->min);
+ }
+ if (range->max == LLONG_MAX) {
+ fprintf(file, "∞)\n");
+ } else {
+ fprintf(file, "%lld]\n", range->max);
+ }
+ }
+}
+
+/** Print a set of types. */
+static void types_dump(dump_fn *dump, struct bfs_opt *opt, const char *name, unsigned int types) {
+ dump(opt, "${blu}%s${rs}: ", name);
+
+ FILE *file = opt->ctx->cerr->file;
+ if (types == 0) {
+ fprintf(file, " ⊥\n");
+ } else if (types == ~0U) {
+ fprintf(file, " ⊤\n");
+ } else if (count_ones(types) < count_ones(~types)) {
+ fprintf(file, " 0x%X\n", types);
+ } else {
+ fprintf(file, "~0x%X\n", ~types);
+ }
+}
+
+/** Calculate the number of lines of df_dump() output. */
+static int df_dump_lines(const struct df_domain *value) {
+ int lines = 0;
+
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ lines += value->preds[i] != PRED_TOP;
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ lines += !range_is_top(&value->ranges[i]);
+ }
+
+ lines += value->types != ~0U;
+ lines += value->xtypes != ~0U;
+
+ return lines;
+}
+
+/** Get the right debugging function for a df_dump() line. */
+static dump_fn *df_dump_line(int lines, int *line) {
+ ++*line;
+
+ if (lines == 1) {
+ return opt_visit;
+ } else if (*line == 1) {
+ return opt_enter;
+ } else if (*line == lines) {
+ return opt_leave;
+ } else {
+ return opt_debug;
+ }
+}
+
+/** Print a data flow value. */
+static void df_dump(struct bfs_opt *opt, const char *str, const struct df_domain *value) {
+ if (df_is_bottom(value)) {
+ opt_debug(opt, "%s: ⊥\n", str);
+ return;
+ } else if (df_is_top(value)) {
+ opt_debug(opt, "%s: ⊤\n", str);
+ return;
+ }
+
+ if (!opt_debug(opt, "%s:\n", str)) {
+ return;
+ }
+
+ int lines = df_dump_lines(value);
+ int line = 0;
+
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ if (value->preds[i] != PRED_TOP) {
+ pred_dump(df_dump_line(lines, &line), opt, value, i);
+ }
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ if (!range_is_top(&value->ranges[i])) {
+ range_dump(df_dump_line(lines, &line), opt, value, i);
+ }
+ }
+
+ if (value->types != ~0U) {
+ types_dump(df_dump_line(lines, &line), opt, "-type", value->types);
+ }
+
+ if (value->xtypes != ~0U) {
+ types_dump(df_dump_line(lines, &line), opt, "-xtype", value->xtypes);
+ }
+}
+
+/** Check if an expression is constant. */
+static bool is_const(const struct bfs_expr *expr) {
+ return expr->eval_fn == eval_true || expr->eval_fn == eval_false;
+}
+
+/** Warn about an expression. */
+attr(printf(3, 4))
+static void opt_warning(const struct bfs_opt *opt, const struct bfs_expr *expr, const char *format, ...) {
+ if (!opt->warn) {
+ return;
+ }
+
+ if (bfs_expr_is_parent(expr) || is_const(expr)) {
+ return;
+ }
+
+ if (bfs_expr_warning(opt->ctx, expr)) {
+ va_list args;
+ va_start(args, format);
+ bfs_vwarning(opt->ctx, format, args);
+ va_end(args);
+ }
+}
+
+/** Remove and return an expression's children. */
+static void foster_children(struct bfs_expr *expr, struct bfs_exprs *children) {
+ bfs_assert(bfs_expr_is_parent(expr));
+
+ SLIST_INIT(children);
+ SLIST_EXTEND(children, &expr->children);
+
+ expr->persistent_fds = 0;
+ expr->ephemeral_fds = 0;
+ expr->pure = true;
+}
+
+/** Return an expression's only child. */
+static struct bfs_expr *only_child(struct bfs_expr *expr) {
+ bfs_assert(bfs_expr_is_parent(expr));
+ struct bfs_expr *child = bfs_expr_children(expr);
+ bfs_assert(child && !child->next);
+ return child;
+}
+
+/** Foster an expression's only child. */
+static struct bfs_expr *foster_only_child(struct bfs_expr *expr) {
+ struct bfs_expr *child = only_child(expr);
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+ return child;
+}
+
+/** An expression visitor. */
+struct visitor;
+
+/** An expression-visiting function. */
+typedef struct bfs_expr *visit_fn(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor);
+
+/** An entry in a visitor lookup table. */
+struct visitor_table {
+ /** The evaluation function to match on. */
+ bfs_eval_fn *eval_fn;
+ /** The visitor function. */
+ visit_fn *visit;
+};
+
+/** Look up a visitor in a table. */
+static visit_fn *look_up_visitor(const struct bfs_expr *expr, const struct visitor_table table[]) {
+ for (size_t i = 0; table[i].eval_fn; ++i) {
+ if (expr->eval_fn == table[i].eval_fn) {
+ return table[i].visit;
+ }
+ }
+
+ return NULL;
+}
+
+struct visitor {
+ /** The name of this visitor. */
+ const char *name;
+
+ /** A function to call before visiting children. */
+ visit_fn *enter;
+ /** The default visitor. */
+ visit_fn *visit;
+ /** A function to call after visiting children. */
+ visit_fn *leave;
+
+ /** A visitor lookup table. */
+ const struct visitor_table *table;
+};
+
+/** Recursive visitor implementation. */
+static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor);
+
+/** Visit a negation. */
+static struct bfs_expr *visit_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_expr *rhs = foster_only_child(expr);
+
+ struct bfs_opt nested = *opt;
+ rhs = visit_deep(&nested, rhs, visitor);
+ if (!rhs) {
+ return NULL;
+ }
+
+ opt->after_true = nested.after_false;
+ opt->after_false = nested.after_true;
+
+ bfs_expr_append(expr, rhs);
+ return expr;
+}
+
+/** Visit a conjunction. */
+static struct bfs_expr *visit_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ // Base case (-and) == (-true)
+ df_init_bottom(&opt->after_false);
+ struct bfs_opt nested = *opt;
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (SLIST_EMPTY(&children)) {
+ nested.ignore_result = opt->ignore_result;
+ } else {
+ nested.ignore_result = false;
+ }
+
+ child = visit_deep(&nested, child, visitor);
+ if (!child) {
+ return NULL;
+ }
+
+ df_join(&opt->after_false, &nested.after_false);
+ nested.before = nested.after_true;
+
+ bfs_expr_append(expr, child);
+ }
+
+ opt->after_true = nested.after_true;
+
+ return expr;
+}
+
+/** Visit a disjunction. */
+static struct bfs_expr *visit_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ // Base case (-or) == (-false)
+ df_init_bottom(&opt->after_true);
+ struct bfs_opt nested = *opt;
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (SLIST_EMPTY(&children)) {
+ nested.ignore_result = opt->ignore_result;
+ } else {
+ nested.ignore_result = false;
+ }
+
+ child = visit_deep(&nested, child, visitor);
+ if (!child) {
+ return NULL;
+ }
+
+ df_join(&opt->after_true, &nested.after_true);
+ nested.before = nested.after_false;
+
+ bfs_expr_append(expr, child);
+ }
+
+ opt->after_false = nested.after_false;
+
+ return expr;
+}
+
+/** Visit a comma expression. */
+static struct bfs_expr *visit_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ struct bfs_opt nested = *opt;
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (SLIST_EMPTY(&children)) {
+ nested.ignore_result = opt->ignore_result;
+ } else {
+ nested.ignore_result = true;
+ }
+
+ child = visit_deep(&nested, child, visitor);
+ if (!child) {
+ return NULL;
+ }
+
+ nested.before = nested.after_true;
+ df_join(&nested.before, &nested.after_false);
+
+ bfs_expr_append(expr, child);
+ }
+
+ opt->after_true = nested.after_true;
+ opt->after_false = nested.after_false;
+
+ return expr;
+}
+
+/** Default enter() function. */
+static struct bfs_expr *visit_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ opt_enter(opt, "%pe\n", expr);
+ opt->after_true = opt->before;
+ opt->after_false = opt->before;
+ return expr;
+}
+
+/** Default leave() function. */
+static struct bfs_expr *visit_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ opt_leave(opt, "%pe\n", expr);
+ return expr;
+}
+
+static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ bool entered = false;
+
+ visit_fn *enter = visitor->enter ? visitor->enter : visit_enter;
+ visit_fn *leave = visitor->leave ? visitor->leave : visit_leave;
+
+ static const struct visitor_table table[] = {
+ {eval_not, visit_not},
+ {eval_and, visit_and},
+ {eval_or, visit_or},
+ {eval_comma, visit_comma},
+ {NULL, NULL},
+ };
+ visit_fn *recursive = look_up_visitor(expr, table);
+ if (recursive) {
+ if (!entered) {
+ expr = enter(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ entered = true;
+ }
+
+ expr = recursive(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ visit_fn *general = visitor->visit;
+ if (general) {
+ if (!entered) {
+ expr = enter(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ entered = true;
+ }
+
+ expr = general(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ visit_fn *specific = look_up_visitor(expr, visitor->table);
+ if (specific) {
+ if (!entered) {
+ expr = enter(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ entered = true;
+ }
+
+ expr = specific(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ if (entered) {
+ expr = leave(opt, expr, visitor);
+ } else {
+ opt_visit(opt, "%pe\n", expr);
+ }
+
+ return expr;
+}
+
+/** Visit an expression recursively. */
+static struct bfs_expr *visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ opt_enter(opt, "%s()\n", visitor->name);
+ expr = visit_deep(opt, expr, visitor);
+ opt_leave(opt, "\n");
+ return expr;
+}
+
+/** Visit an expression non-recursively. */
+static struct bfs_expr *visit_shallow(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ visit_fn *general = visitor->visit;
+ if (expr && general) {
+ expr = general(opt, expr, visitor);
+ }
+
+ if (!expr) {
+ return NULL;
+ }
+
+ visit_fn *specific = look_up_visitor(expr, visitor->table);
+ if (specific) {
+ expr = specific(opt, expr, visitor);
+ }
+
+ return expr;
+}
+
+/** Annotate -{execut,read,writ}able. */
+static struct bfs_expr *annotate_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ expr->probability = 1.0;
+ if (expr->num & R_OK) {
+ expr->probability *= 0.99;
+ }
+ if (expr->num & W_OK) {
+ expr->probability *= 0.8;
+ }
+ if (expr->num & X_OK) {
+ expr->probability *= 0.2;
+ }
+
+ return expr;
+}
+
+/** Annotate -empty. */
+static struct bfs_expr *annotate_empty(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (opt->level >= 4) {
+ // Since -empty attempts to open and read directories, it may
+ // have side effects such as reporting permission errors, and
+ // thus shouldn't be re-ordered without aggressive optimizations
+ expr->pure = true;
+ }
+
+ return expr;
+}
+
+/** Annotate -exec. */
+static struct bfs_expr *annotate_exec(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (expr->exec->flags & BFS_EXEC_MULTI) {
+ expr->always_true = true;
+ } else {
+ expr->cost = 1000000.0;
+ }
+
+ return expr;
+}
+
+/** Annotate -name/-lname/-path. */
+static struct bfs_expr *annotate_fnmatch(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (expr->literal) {
+ expr->probability = 0.1;
+ } else {
+ expr->probability = 0.5;
+ }
+
+ return expr;
+}
+
+/** Annotate -f?print. */
+static struct bfs_expr *annotate_fprint(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ const struct colors *colors = expr->cfile->colors;
+ expr->calls_stat = colors && colors_need_stat(colors);
+ return expr;
+}
+
+/** Estimate probability for -x?type. */
+static void estimate_type_probability(struct bfs_expr *expr) {
+ unsigned int types = expr->num;
+
+ expr->probability = 0.0;
+ if (types & (1 << BFS_BLK)) {
+ expr->probability += 0.00000721183;
+ }
+ if (types & (1 << BFS_CHR)) {
+ expr->probability += 0.0000499855;
+ }
+ if (types & (1 << BFS_DIR)) {
+ expr->probability += 0.114475;
+ }
+ if (types & (1 << BFS_DOOR)) {
+ expr->probability += 0.000001;
+ }
+ if (types & (1 << BFS_FIFO)) {
+ expr->probability += 0.00000248684;
+ }
+ if (types & (1 << BFS_REG)) {
+ expr->probability += 0.859772;
+ }
+ if (types & (1 << BFS_LNK)) {
+ expr->probability += 0.0256816;
+ }
+ if (types & (1 << BFS_SOCK)) {
+ expr->probability += 0.0000116881;
+ }
+ if (types & (1 << BFS_WHT)) {
+ expr->probability += 0.000001;
+ }
+}
+
+/** Annotate -type. */
+static struct bfs_expr *annotate_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ estimate_type_probability(expr);
+ return expr;
+}
+
+/** Annotate -xtype. */
+static struct bfs_expr *annotate_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (opt->level >= 4) {
+ // Since -xtype dereferences symbolic links, it may have side
+ // effects such as reporting permission errors, and thus
+ // shouldn't be re-ordered without aggressive optimizations
+ expr->pure = true;
+ }
+
+ estimate_type_probability(expr);
+ return expr;
+}
+
+/** Annotate a negation. */
+static struct bfs_expr *annotate_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_expr *rhs = only_child(expr);
+ expr->pure = rhs->pure;
+ expr->always_true = rhs->always_false;
+ expr->always_false = rhs->always_true;
+ expr->cost = rhs->cost;
+ expr->probability = 1.0 - rhs->probability;
+ return expr;
+}
+
+/** Annotate a conjunction. */
+static struct bfs_expr *annotate_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ expr->pure = true;
+ expr->always_true = true;
+ expr->always_false = false;
+ expr->cost = 0.0;
+ expr->probability = 1.0;
+
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ expr->pure &= child->pure;
+ expr->always_true &= child->always_true;
+ expr->always_false |= child->always_false;
+ expr->cost += expr->probability * child->cost;
+ expr->probability *= child->probability;
+ }
+
+ return expr;
+}
+
+/** Annotate a disjunction. */
+static struct bfs_expr *annotate_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ expr->pure = true;
+ expr->always_true = false;
+ expr->always_false = true;
+ expr->cost = 0.0;
+
+ float false_prob = 1.0;
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ expr->pure &= child->pure;
+ expr->always_true |= child->always_true;
+ expr->always_false &= child->always_false;
+ expr->cost += false_prob * child->cost;
+ false_prob *= (1.0 - child->probability);
+ }
+ expr->probability = 1.0 - false_prob;
+
+ return expr;
+}
+
+/** Annotate a comma expression. */
+static struct bfs_expr *annotate_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ expr->pure = true;
+ expr->cost = 0.0;
+
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ expr->pure &= child->pure;
+ expr->always_true = child->always_true;
+ expr->always_false = child->always_false;
+ expr->cost += child->cost;
+ expr->probability = child->probability;
+ }
+
+ return expr;
+}
+
+/** Annotate an arbitrary expression. */
+static struct bfs_expr *annotate_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ /** Table of pure expressions. */
+ static bfs_eval_fn *const pure[] = {
+ eval_access,
+ eval_acl,
+ eval_capable,
+ eval_depth,
+ eval_false,
+ eval_flags,
+ eval_fstype,
+ eval_gid,
+ eval_hidden,
+ eval_inum,
+ eval_links,
+ eval_lname,
+ eval_name,
+ eval_newer,
+ eval_nogroup,
+ eval_nouser,
+ eval_path,
+ eval_perm,
+ eval_regex,
+ eval_samefile,
+ eval_size,
+ eval_sparse,
+ eval_time,
+ eval_true,
+ eval_type,
+ eval_uid,
+ eval_used,
+ eval_xattr,
+ eval_xattrname,
+ };
+
+ expr->pure = false;
+ for (size_t i = 0; i < countof(pure); ++i) {
+ if (expr->eval_fn == pure[i]) {
+ expr->pure = true;
+ break;
+ }
+ }
+
+ /** Table of always-true expressions. */
+ static bfs_eval_fn *const always_true[] = {
+ eval_fls,
+ eval_fprint,
+ eval_fprint0,
+ eval_fprintf,
+ eval_fprintx,
+ eval_limit,
+ eval_prune,
+ eval_true,
+ // Non-returning
+ eval_exit,
+ eval_quit,
+ };
+
+ expr->always_true = false;
+ for (size_t i = 0; i < countof(always_true); ++i) {
+ if (expr->eval_fn == always_true[i]) {
+ expr->always_true = true;
+ break;
+ }
+ }
+
+ /** Table of always-false expressions. */
+ static bfs_eval_fn *const always_false[] = {
+ eval_false,
+ // Non-returning
+ eval_exit,
+ eval_quit,
+ };
+
+ expr->always_false = false;
+ for (size_t i = 0; i < countof(always_false); ++i) {
+ if (expr->eval_fn == always_false[i]) {
+ expr->always_false = true;
+ break;
+ }
+ }
+
+ /** Table of stat-calling primaries. */
+ static bfs_eval_fn *const calls_stat[] = {
+ eval_empty,
+ eval_flags,
+ eval_fls,
+ eval_fprintf,
+ eval_fstype,
+ eval_gid,
+ eval_inum,
+ eval_links,
+ eval_newer,
+ eval_nogroup,
+ eval_nouser,
+ eval_perm,
+ eval_samefile,
+ eval_size,
+ eval_sparse,
+ eval_time,
+ eval_uid,
+ eval_used,
+ eval_xattr,
+ eval_xattrname,
+ };
+
+ expr->calls_stat = false;
+ for (size_t i = 0; i < countof(calls_stat); ++i) {
+ if (expr->eval_fn == calls_stat[i]) {
+ expr->calls_stat = true;
+ break;
+ }
+ }
+
+#define FAST_COST 40.0
+#define FNMATCH_COST 400.0
+#define STAT_COST 1000.0
+#define PRINT_COST 20000.0
+
+ /** Table of expression costs. */
+ static const struct {
+ bfs_eval_fn *eval_fn;
+ float cost;
+ } costs[] = {
+ {eval_access, STAT_COST},
+ {eval_acl, STAT_COST},
+ {eval_capable, STAT_COST},
+ {eval_empty, 2 * STAT_COST}, // readdir() is worse than stat()
+ {eval_flags, STAT_COST},
+ {eval_fls, PRINT_COST},
+ {eval_fprint, PRINT_COST},
+ {eval_fprint0, PRINT_COST},
+ {eval_fprintf, PRINT_COST},
+ {eval_fprintx, PRINT_COST},
+ {eval_fstype, STAT_COST},
+ {eval_gid, STAT_COST},
+ {eval_inum, STAT_COST},
+ {eval_links, STAT_COST},
+ {eval_lname, FNMATCH_COST},
+ {eval_name, FNMATCH_COST},
+ {eval_newer, STAT_COST},
+ {eval_nogroup, STAT_COST},
+ {eval_nouser, STAT_COST},
+ {eval_path, FNMATCH_COST},
+ {eval_perm, STAT_COST},
+ {eval_samefile, STAT_COST},
+ {eval_size, STAT_COST},
+ {eval_sparse, STAT_COST},
+ {eval_time, STAT_COST},
+ {eval_uid, STAT_COST},
+ {eval_used, STAT_COST},
+ {eval_xattr, STAT_COST},
+ {eval_xattrname, STAT_COST},
+ };
+
+ expr->cost = FAST_COST;
+ for (size_t i = 0; i < countof(costs); ++i) {
+ if (expr->eval_fn == costs[i].eval_fn) {
+ expr->cost = costs[i].cost;
+ break;
+ }
+ }
+
+ /** Table of expression probabilities. */
+ static const struct {
+ /** The evaluation function with this cost. */
+ bfs_eval_fn *eval_fn;
+ /** The matching probability. */
+ float probability;
+ } probs[] = {
+ {eval_acl, 0.00002},
+ {eval_capable, 0.000002},
+ {eval_empty, 0.01},
+ {eval_false, 0.0},
+ {eval_hidden, 0.01},
+ {eval_nogroup, 0.01},
+ {eval_nouser, 0.01},
+ {eval_samefile, 0.01},
+ {eval_true, 1.0},
+ {eval_xattr, 0.01},
+ {eval_xattrname, 0.01},
+ };
+
+ expr->probability = 0.5;
+ for (size_t i = 0; i < countof(probs); ++i) {
+ if (expr->eval_fn == probs[i].eval_fn) {
+ expr->probability = probs[i].probability;
+ break;
+ }
+ }
+
+ return expr;
+}
+
+/**
+ * Annotating visitor.
+ */
+static const struct visitor annotate = {
+ .name = "annotate",
+ .visit = annotate_visit,
+ .table = (const struct visitor_table[]) {
+ {eval_access, annotate_access},
+ {eval_empty, annotate_empty},
+ {eval_exec, annotate_exec},
+ {eval_fprint, annotate_fprint},
+ {eval_lname, annotate_fnmatch},
+ {eval_name, annotate_fnmatch},
+ {eval_path, annotate_fnmatch},
+ {eval_type, annotate_type},
+ {eval_xtype, annotate_xtype},
+
+ {eval_not, annotate_not},
+ {eval_and, annotate_and},
+ {eval_or, annotate_or},
+ {eval_comma, annotate_comma},
+
+ {NULL, NULL},
+ },
+};
+
+/** Create a constant expression. */
+static struct bfs_expr *opt_const(struct bfs_opt *opt, bool value) {
+ static bfs_eval_fn *const fns[] = {eval_false, eval_true};
+ static char *fake_args[] = {"-false", "-true"};
+
+ struct bfs_expr *expr = bfs_expr_new(opt->ctx, fns[value], 1, &fake_args[value]);
+ return visit_shallow(opt, expr, &annotate);
+}
+
+/** Negate an expression, keeping it canonical. */
+static struct bfs_expr *negate_expr(struct bfs_opt *opt, struct bfs_expr *expr, char **argv) {
+ if (expr->eval_fn == eval_not) {
+ return only_child(expr);
+ } else if (expr->eval_fn == eval_true) {
+ return opt_const(opt, false);
+ } else if (expr->eval_fn == eval_false) {
+ return opt_const(opt, true);
+ }
+
+ struct bfs_expr *ret = bfs_expr_new(opt->ctx, eval_not, 1, argv);
+ if (!ret) {
+ return NULL;
+ }
+
+ bfs_expr_append(ret, expr);
+ return visit_shallow(opt, ret, &annotate);
+}
+
+/** Sink negations into a conjunction/disjunction using De Morgan's laws. */
+static struct bfs_expr *sink_not_andor(struct bfs_opt *opt, struct bfs_expr *expr) {
+ opt_debug(opt, "De Morgan's laws\n");
+
+ char **argv = expr->argv;
+ expr = only_child(expr);
+ opt_enter(opt, "%pe\n", expr);
+
+ if (expr->eval_fn == eval_and) {
+ expr->eval_fn = eval_or;
+ expr->argv = &fake_or_arg;
+ } else {
+ bfs_assert(expr->eval_fn == eval_or);
+ expr->eval_fn = eval_and;
+ expr->argv = &fake_and_arg;
+ }
+
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ struct bfs_expr *child;
+ while ((child = SLIST_POP(&children))) {
+ opt_enter(opt, "%pe\n", child);
+
+ child = negate_expr(opt, child, argv);
+ if (!child) {
+ return NULL;
+ }
+
+ opt_leave(opt, "%pe\n", child);
+ bfs_expr_append(expr, child);
+ }
+
+ opt_leave(opt, "%pe\n", expr);
+ return visit_shallow(opt, expr, &annotate);
+}
+
+/** Sink a negation into a comma expression. */
+static struct bfs_expr *sink_not_comma(struct bfs_opt *opt, struct bfs_expr *expr) {
+ bfs_assert(expr->eval_fn == eval_comma);
+
+ opt_enter(opt, "%pe\n", expr);
+
+ char **argv = expr->argv;
+ expr = only_child(expr);
+
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ struct bfs_expr *child;
+ while ((child = SLIST_POP(&children))) {
+ if (SLIST_EMPTY(&children)) {
+ opt_enter(opt, "%pe\n", child);
+ opt_debug(opt, "sink\n");
+
+ child = negate_expr(opt, child, argv);
+ if (!child) {
+ return NULL;
+ }
+
+ opt_leave(opt, "%pe\n", child);
+ } else {
+ opt_visit(opt, "%pe\n", child);
+ }
+
+ bfs_expr_append(expr, child);
+ }
+
+ opt_leave(opt, "%pe\n", expr);
+ return visit_shallow(opt, expr, &annotate);
+}
+
+/** Canonicalize a negation. */
+static struct bfs_expr *canonicalize_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_expr *rhs = only_child(expr);
+
+ if (rhs->eval_fn == eval_not) {
+ opt_debug(opt, "double negation\n");
+ rhs = only_child(expr);
+ return only_child(rhs);
+ } else if (rhs->eval_fn == eval_and || rhs->eval_fn == eval_or) {
+ return sink_not_andor(opt, expr);
+ } else if (rhs->eval_fn == eval_comma) {
+ return sink_not_comma(opt, expr);
+ } else if (is_const(rhs)) {
+ opt_debug(opt, "constant propagation\n");
+ return opt_const(opt, rhs->eval_fn == eval_false);
+ } else {
+ return expr;
+ }
+}
+
+/** Canonicalize an associative operator. */
+static struct bfs_expr *canonicalize_assoc(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ struct bfs_exprs flat;
+ SLIST_INIT(&flat);
+
+ struct bfs_expr *child;
+ while ((child = SLIST_POP(&children))) {
+ if (child->eval_fn == expr->eval_fn) {
+ struct bfs_expr *head = SLIST_HEAD(&child->children);
+ struct bfs_expr *tail = SLIST_TAIL(&child->children);
+
+ if (!head) {
+ opt_delete(opt, "%pe [empty]\n", child);
+ } else {
+ opt_enter(opt, "%pe\n", child);
+ opt_debug(opt, "associativity\n");
+ if (head == tail) {
+ opt_leave(opt, "%pe\n", head);
+ } else if (head->next == tail) {
+ opt_leave(opt, "%pe %pe\n", head, tail);
+ } else {
+ opt_leave(opt, "%pe ... %pe\n", head, tail);
+ }
+ }
+
+ SLIST_EXTEND(&flat, &child->children);
+ } else {
+ opt_visit(opt, "%pe\n", child);
+ SLIST_APPEND(&flat, child);
+ }
+ }
+
+ bfs_expr_extend(expr, &flat);
+
+ return visit_shallow(opt, expr, &annotate);
+}
+
+/**
+ * Canonicalizing visitor.
+ */
+static const struct visitor canonicalize = {
+ .name = "canonicalize",
+ .table = (const struct visitor_table[]) {
+ {eval_not, canonicalize_not},
+ {eval_and, canonicalize_assoc},
+ {eval_or, canonicalize_assoc},
+ {eval_comma, canonicalize_assoc},
+ {NULL, NULL},
+ },
+};
+
+/** Calculate the cost of an ordered pair of expressions. */
+static float expr_cost(const struct bfs_expr *parent, const struct bfs_expr *lhs, const struct bfs_expr *rhs) {
+ // https://cs.stackexchange.com/a/66921/21004
+ float prob = lhs->probability;
+ if (parent->eval_fn == eval_or) {
+ prob = 1.0 - prob;
+ }
+ return lhs->cost + prob * rhs->cost;
+}
+
+/** Sort a block of expressions. */
+static void sort_exprs(struct bfs_opt *opt, struct bfs_expr *parent, struct bfs_exprs *exprs) {
+ if (!exprs->head || !exprs->head->next) {
+ return;
+ }
+
+ struct bfs_exprs left, right;
+ SLIST_INIT(&left);
+ SLIST_INIT(&right);
+
+ // Split
+ for (struct bfs_expr *hare = exprs->head; hare && (hare = hare->next); hare = hare->next) {
+ struct bfs_expr *tortoise = SLIST_POP(exprs);
+ SLIST_APPEND(&left, tortoise);
+ }
+ SLIST_EXTEND(&right, exprs);
+
+ // Recurse
+ sort_exprs(opt, parent, &left);
+ sort_exprs(opt, parent, &right);
+
+ // Merge
+ while (!SLIST_EMPTY(&left) && !SLIST_EMPTY(&right)) {
+ struct bfs_expr *lhs = left.head;
+ struct bfs_expr *rhs = right.head;
+
+ float cost = expr_cost(parent, lhs, rhs);
+ float swapped = expr_cost(parent, rhs, lhs);
+
+ if (cost <= swapped) {
+ SLIST_POP(&left);
+ SLIST_APPEND(exprs, lhs);
+ } else {
+ opt_enter(opt, "%pe %pe [${ylw}%g${rs}]\n", lhs, rhs, cost);
+ SLIST_POP(&right);
+ SLIST_APPEND(exprs, rhs);
+ opt_leave(opt, "%pe %pe [${ylw}%g${rs}]\n", rhs, lhs, swapped);
+ }
+ }
+ SLIST_EXTEND(exprs, &left);
+ SLIST_EXTEND(exprs, &right);
+}
+
+/** Reorder children to reduce cost. */
+static struct bfs_expr *reorder_andor(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ // Split into blocks of consecutive pure/impure expressions, and sort
+ // the pure blocks
+ struct bfs_exprs pure;
+ SLIST_INIT(&pure);
+
+ struct bfs_expr *child;
+ while ((child = SLIST_POP(&children))) {
+ if (child->pure) {
+ SLIST_APPEND(&pure, child);
+ } else {
+ sort_exprs(opt, expr, &pure);
+ bfs_expr_extend(expr, &pure);
+ bfs_expr_append(expr, child);
+ }
+ }
+ sort_exprs(opt, expr, &pure);
+ bfs_expr_extend(expr, &pure);
+
+ return visit_shallow(opt, expr, &annotate);
+}
+
+/**
+ * Reordering visitor.
+ */
+static const struct visitor reorder = {
+ .name = "reorder",
+ .table = (const struct visitor_table[]) {
+ {eval_and, reorder_andor},
+ {eval_or, reorder_andor},
+ {NULL, NULL},
+ },
+};
+
+/** Transfer function for simple predicates. */
+static void data_flow_pred(struct bfs_opt *opt, enum pred_type pred, bool value) {
+ constrain_pred(&opt->after_true.preds[pred], value);
+ constrain_pred(&opt->after_false.preds[pred], !value);
+}
+
+/** Transfer function for icmp-style ([+-]N) expressions. */
+static void data_flow_icmp(struct bfs_opt *opt, const struct bfs_expr *expr, enum range_type type) {
+ struct df_range *true_range = &opt->after_true.ranges[type];
+ struct df_range *false_range = &opt->after_false.ranges[type];
+ long long value = expr->num;
+
+ switch (expr->int_cmp) {
+ case BFS_INT_EQUAL:
+ constrain_min(true_range, value);
+ constrain_max(true_range, value);
+ range_remove(false_range, value);
+ break;
+
+ case BFS_INT_LESS:
+ constrain_min(false_range, value);
+ constrain_max(true_range, value);
+ range_remove(true_range, value);
+ break;
+
+ case BFS_INT_GREATER:
+ constrain_max(false_range, value);
+ constrain_min(true_range, value);
+ range_remove(true_range, value);
+ break;
+ }
+}
+
+/** Transfer function for -{execut,read,writ}able. */
+static struct bfs_expr *data_flow_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (expr->num & R_OK) {
+ data_flow_pred(opt, READABLE_PRED, true);
+ }
+ if (expr->num & W_OK) {
+ data_flow_pred(opt, WRITABLE_PRED, true);
+ }
+ if (expr->num & X_OK) {
+ data_flow_pred(opt, EXECUTABLE_PRED, true);
+ }
+
+ return expr;
+}
+
+/** Transfer function for -gid. */
+static struct bfs_expr *data_flow_gid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *range = &opt->after_true.ranges[GID_RANGE];
+ if (range->min == range->max) {
+ gid_t gid = range->min;
+ bool nogroup = !bfs_getgrgid(opt->ctx->groups, gid);
+ if (errno == 0) {
+ data_flow_pred(opt, NOGROUP_PRED, nogroup);
+ }
+ }
+
+ return expr;
+}
+
+/** Transfer function for -inum. */
+static struct bfs_expr *data_flow_inum(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *range = &opt->after_true.ranges[INUM_RANGE];
+ if (range->min == range->max) {
+ expr->probability = 0.01;
+ } else {
+ expr->probability = 0.5;
+ }
+
+ return expr;
+}
+
+/** Transfer function for -links. */
+static struct bfs_expr *data_flow_links(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *range = &opt->after_true.ranges[LINKS_RANGE];
+ if (1 >= range->min && 1 <= range->max) {
+ expr->probability = 0.99;
+ } else {
+ expr->probability = 0.5;
+ }
+
+ return expr;
+}
+
+/** Transfer function for -samefile. */
+static struct bfs_expr *data_flow_samefile(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *true_range = &opt->after_true.ranges[INUM_RANGE];
+ constrain_min(true_range, expr->ino);
+ constrain_max(true_range, expr->ino);
+
+ struct df_range *false_range = &opt->after_false.ranges[INUM_RANGE];
+ range_remove(false_range, expr->ino);
+
+ return expr;
+}
+
+/** Transfer function for -size. */
+static struct bfs_expr *data_flow_size(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *range = &opt->after_true.ranges[SIZE_RANGE];
+ if (range->min == range->max) {
+ expr->probability = 0.01;
+ } else {
+ expr->probability = 0.5;
+ }
+
+ return expr;
+}
+
+/** Transfer function for -type. */
+static struct bfs_expr *data_flow_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ opt->after_true.types &= expr->num;
+ opt->after_false.types &= ~expr->num;
+ return expr;
+}
+
+/** Transfer function for -uid. */
+static struct bfs_expr *data_flow_uid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *range = &opt->after_true.ranges[UID_RANGE];
+ if (range->min == range->max) {
+ uid_t uid = range->min;
+ bool nouser = !bfs_getpwuid(opt->ctx->users, uid);
+ if (errno == 0) {
+ data_flow_pred(opt, NOUSER_PRED, nouser);
+ }
+ }
+
+ return expr;
+}
+
+/** Transfer function for -xtype. */
+static struct bfs_expr *data_flow_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ opt->after_true.xtypes &= expr->num;
+ opt->after_false.xtypes &= ~expr->num;
+ return expr;
+}
+
+/** Data flow visitor entry. */
+static struct bfs_expr *data_flow_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ visit_enter(opt, expr, visitor);
+
+ df_dump(opt, "before", &opt->before);
+
+ if (!bfs_expr_is_parent(expr) && !expr->pure) {
+ df_join(opt->impure, &opt->before);
+ df_dump(opt, "impure", opt->impure);
+ }
+
+ return expr;
+}
+
+/** Data flow visitor exit. */
+static struct bfs_expr *data_flow_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (expr->always_true) {
+ expr->probability = 1.0;
+ df_init_bottom(&opt->after_false);
+ }
+
+ if (expr->always_false) {
+ expr->probability = 0.0;
+ df_init_bottom(&opt->after_true);
+ }
+
+ df_dump(opt, "after true", &opt->after_true);
+ df_dump(opt, "after false", &opt->after_false);
+
+ if (df_is_bottom(&opt->after_false)) {
+ if (!expr->pure) {
+ expr->always_true = true;
+ expr->probability = 0.0;
+ } else if (expr->eval_fn != eval_true) {
+ opt_warning(opt, expr, "This expression is always true.\n\n");
+ opt_debug(opt, "pure, always true\n");
+ expr = opt_const(opt, true);
+ if (!expr) {
+ return NULL;
+ }
+ }
+ }
+
+ if (df_is_bottom(&opt->after_true)) {
+ if (!expr->pure) {
+ expr->always_false = true;
+ expr->probability = 0.0;
+ } else if (expr->eval_fn != eval_false) {
+ opt_warning(opt, expr, "This expression is always false.\n\n");
+ opt_debug(opt, "pure, always false\n");
+ expr = opt_const(opt, false);
+ if (!expr) {
+ return NULL;
+ }
+ }
+ }
+
+ return visit_leave(opt, expr, visitor);
+}
+
+/** Data flow visitor function. */
+static struct bfs_expr *data_flow_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (opt->ignore_result && expr->pure) {
+ opt_debug(opt, "ignored result\n");
+ opt_warning(opt, expr, "The result of this expression is ignored.\n\n");
+ expr = opt_const(opt, false);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ if (df_is_bottom(&opt->before)) {
+ opt_debug(opt, "unreachable\n");
+ opt_warning(opt, expr, "This expression is unreachable.\n\n");
+ expr = opt_const(opt, false);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ /** Table of simple predicates. */
+ static const struct {
+ bfs_eval_fn *eval_fn;
+ enum pred_type pred;
+ } preds[] = {
+ {eval_acl, ACL_PRED},
+ {eval_capable, CAPABLE_PRED},
+ {eval_empty, EMPTY_PRED},
+ {eval_hidden, HIDDEN_PRED},
+ {eval_nogroup, NOGROUP_PRED},
+ {eval_nouser, NOUSER_PRED},
+ {eval_sparse, SPARSE_PRED},
+ {eval_xattr, XATTR_PRED},
+ };
+
+ for (size_t i = 0; i < countof(preds); ++i) {
+ if (preds[i].eval_fn == expr->eval_fn) {
+ data_flow_pred(opt, preds[i].pred, true);
+ break;
+ }
+ }
+
+ /** Table of simple range comparisons. */
+ static const struct {
+ bfs_eval_fn *eval_fn;
+ enum range_type range;
+ } ranges[] = {
+ {eval_depth, DEPTH_RANGE},
+ {eval_gid, GID_RANGE},
+ {eval_inum, INUM_RANGE},
+ {eval_links, LINKS_RANGE},
+ {eval_size, SIZE_RANGE},
+ {eval_uid, UID_RANGE},
+ };
+
+ for (size_t i = 0; i < countof(ranges); ++i) {
+ if (ranges[i].eval_fn == expr->eval_fn) {
+ data_flow_icmp(opt, expr, ranges[i].range);
+ break;
+ }
+ }
+
+ return expr;
+}
+
+/**
+ * Data flow visitor.
+ */
+static const struct visitor data_flow = {
+ .name = "data_flow",
+ .enter = data_flow_enter,
+ .visit = data_flow_visit,
+ .leave = data_flow_leave,
+ .table = (const struct visitor_table[]) {
+ {eval_access, data_flow_access},
+ {eval_gid, data_flow_gid},
+ {eval_inum, data_flow_inum},
+ {eval_links, data_flow_links},
+ {eval_samefile, data_flow_samefile},
+ {eval_size, data_flow_size},
+ {eval_type, data_flow_type},
+ {eval_uid, data_flow_uid},
+ {eval_xtype, data_flow_xtype},
+ {NULL, NULL},
+ },
+};
+
+/** Simplify a negation. */
+static struct bfs_expr *simplify_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (opt->ignore_result) {
+ opt_debug(opt, "ignored result\n");
+ expr = only_child(expr);
+ }
+
+ return expr;
+}
+
+/** Lift negations out of a conjunction/disjunction using De Morgan's laws. */
+static struct bfs_expr *lift_andor_not(struct bfs_opt *opt, struct bfs_expr *expr) {
+ // Only lift negations if it would reduce the number of (-not) expressions
+ size_t added = 0, removed = 0;
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ if (child->eval_fn == eval_not) {
+ ++removed;
+ } else {
+ ++added;
+ }
+ }
+ if (added >= removed) {
+ return visit_shallow(opt, expr, &annotate);
+ }
+
+ opt_debug(opt, "De Morgan's laws\n");
+
+ if (expr->eval_fn == eval_and) {
+ expr->eval_fn = eval_or;
+ expr->argv = &fake_or_arg;
+ } else {
+ bfs_assert(expr->eval_fn == eval_or);
+ expr->eval_fn = eval_and;
+ expr->argv = &fake_and_arg;
+ }
+
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ struct bfs_expr *child;
+ while ((child = SLIST_POP(&children))) {
+ opt_enter(opt, "%pe\n", child);
+
+ child = negate_expr(opt, child, &fake_not_arg);
+ if (!child) {
+ return NULL;
+ }
+
+ opt_leave(opt, "%pe\n", child);
+ bfs_expr_append(expr, child);
+ }
+
+ expr = visit_shallow(opt, expr, &annotate);
+ return negate_expr(opt, expr, &fake_not_arg);
+}
+
+/** Get the first ignorable expression in a conjunction/disjunction. */
+static struct bfs_expr *first_ignorable(struct bfs_opt *opt, struct bfs_expr *expr) {
+ if (opt->level < 2 || !opt->ignore_result) {
+ return NULL;
+ }
+
+ struct bfs_expr *ret = NULL;
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ if (!child->pure) {
+ ret = NULL;
+ } else if (!ret) {
+ ret = child;
+ }
+ }
+
+ return ret;
+}
+
+/** Simplify a conjunction. */
+static struct bfs_expr *simplify_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_expr *ignorable = first_ignorable(opt, expr);
+ bool ignore = false;
+
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (child == ignorable) {
+ ignore = true;
+ }
+
+ if (ignore) {
+ opt_delete(opt, "%pe [ignored result]\n", child);
+ opt_warning(opt, child, "The result of this expression is ignored.\n\n");
+ continue;
+ }
+
+ if (child->eval_fn == eval_true) {
+ opt_delete(opt, "%pe [conjunction elimination]\n", child);
+ continue;
+ }
+
+ opt_visit(opt, "%pe\n", child);
+ bfs_expr_append(expr, child);
+
+ if (child->always_false) {
+ while ((child = SLIST_POP(&children))) {
+ opt_delete(opt, "%pe [short-circuit]\n", child);
+ }
+ }
+ }
+
+ struct bfs_expr *child = bfs_expr_children(expr);
+ if (!child) {
+ opt_debug(opt, "nullary identity\n");
+ return opt_const(opt, true);
+ } else if (!child->next) {
+ opt_debug(opt, "unary identity\n");
+ return only_child(expr);
+ }
+
+ return lift_andor_not(opt, expr);
+}
+
+/** Simplify a disjunction. */
+static struct bfs_expr *simplify_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_expr *ignorable = first_ignorable(opt, expr);
+ bool ignore = false;
+
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (child == ignorable) {
+ ignore = true;
+ }
+
+ if (ignore) {
+ opt_delete(opt, "%pe [ignored result]\n", child);
+ opt_warning(opt, child, "The result of this expression is ignored.\n\n");
+ continue;
+ }
+
+ if (child->eval_fn == eval_false) {
+ opt_delete(opt, "%pe [disjunctive syllogism]\n", child);
+ continue;
+ }
+
+ opt_visit(opt, "%pe\n", child);
+ bfs_expr_append(expr, child);
+
+ if (child->always_true) {
+ while ((child = SLIST_POP(&children))) {
+ opt_delete(opt, "%pe [short-circuit]\n", child);
+ }
+ }
+ }
+
+ struct bfs_expr *child = bfs_expr_children(expr);
+ if (!child) {
+ opt_debug(opt, "nullary identity\n");
+ return opt_const(opt, false);
+ } else if (!child->next) {
+ opt_debug(opt, "unary identity\n");
+ return only_child(expr);
+ }
+
+ return lift_andor_not(opt, expr);
+}
+
+/** Simplify a comma expression. */
+static struct bfs_expr *simplify_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (opt->level >= 2 && child->pure && !SLIST_EMPTY(&children)) {
+ opt_delete(opt, "%pe [ignored result]\n", child);
+ opt_warning(opt, child, "The result of this expression is ignored.\n\n");
+ continue;
+ }
+
+ opt_visit(opt, "%pe\n", child);
+ bfs_expr_append(expr, child);
+ }
+
+ struct bfs_expr *child = bfs_expr_children(expr);
+ if (child && !child->next) {
+ opt_debug(opt, "unary identity\n");
+ return only_child(expr);
+ }
+
+ return expr;
+}
+
+/**
+ * Logical simplification visitor.
+ */
+static const struct visitor simplify = {
+ .name = "simplify",
+ .table = (const struct visitor_table[]) {
+ {eval_not, simplify_not},
+ {eval_and, simplify_and},
+ {eval_or, simplify_or},
+ {eval_comma, simplify_comma},
+ {NULL, NULL},
+ },
+};
+
+/** Optimize an expression. */
+static struct bfs_expr *optimize(struct bfs_opt *opt, struct bfs_expr *expr) {
+ opt_enter(opt, "pass 0:\n");
+ expr = visit(opt, expr, &annotate);
+ opt_leave(opt, NULL);
+
+ /** Table of optimization passes. */
+ static const struct {
+ /** Minimum optlevel for this pass. */
+ int level;
+ /** The visitor for this pass. */
+ const struct visitor *visitor;
+ } passes[] = {
+ {1, &canonicalize},
+ {3, &reorder},
+ {2, &data_flow},
+ {1, &simplify},
+ };
+
+ struct df_domain impure;
+
+ for (int i = 0; i < 3; ++i) {
+ struct bfs_opt nested = *opt;
+ nested.impure = &impure;
+ impure = *opt->impure;
+
+ opt_enter(&nested, "pass %d:\n", i + 1);
+
+ for (size_t j = 0; j < countof(passes); ++j) {
+ if (opt->level < passes[j].level) {
+ continue;
+ }
+
+ // Skip reordering the first time through the passes, to
+ // make warnings more understandable
+ if (passes[j].visitor == &reorder) {
+ if (i == 0) {
+ continue;
+ } else {
+ nested.warn = false;
+ }
+ }
+
+ expr = visit(&nested, expr, passes[j].visitor);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ opt_leave(&nested, NULL);
+
+ if (!bfs_expr_is_parent(expr)) {
+ break;
+ }
+ }
+
+ *opt->impure = impure;
+ return expr;
+}
+
+/** Estimate the odds of an expression calling stat(). */
+static float expr_stat_odds(struct bfs_expr *expr) {
+ if (expr->calls_stat) {
+ return 1.0;
+ }
+
+ float nostat_odds = 1.0;
+ float reached_odds = 1.0;
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ float child_odds = expr_stat_odds(child);
+ nostat_odds *= 1.0 - reached_odds * child_odds;
+
+ if (expr->eval_fn == eval_and) {
+ reached_odds *= child->probability;
+ } else if (expr->eval_fn == eval_or) {
+ reached_odds *= 1.0 - child->probability;
+ }
+ }
+
+ return 1.0 - nostat_odds;
+}
+
+/** Estimate the odds of calling stat(). */
+static float estimate_stat_odds(struct bfs_ctx *ctx) {
+ if (ctx->unique) {
+ return 1.0;
+ }
+
+ float nostat_odds = 1.0 - expr_stat_odds(ctx->exclude);
+
+ float reached_odds = 1.0 - ctx->exclude->probability;
+ float expr_odds = expr_stat_odds(ctx->expr);
+ nostat_odds *= 1.0 - reached_odds * expr_odds;
+
+ return 1.0 - nostat_odds;
+}
+
+int bfs_optimize(struct bfs_ctx *ctx) {
+ bfs_ctx_dump(ctx, DEBUG_OPT);
+
+ struct df_domain impure;
+ df_init_bottom(&impure);
+
+ struct bfs_opt opt = {
+ .ctx = ctx,
+ .level = ctx->optlevel,
+ .depth = 0,
+ .warn = ctx->warn,
+ .ignore_result = false,
+ .impure = &impure,
+ };
+ df_init_top(&opt.before);
+
+ ctx->exclude = optimize(&opt, ctx->exclude);
+ if (!ctx->exclude) {
+ return -1;
+ }
+
+ // Only non-excluded files are evaluated
+ opt.before = opt.after_false;
+ opt.ignore_result = true;
+
+ struct df_range *depth = &opt.before.ranges[DEPTH_RANGE];
+ if (ctx->mindepth > 0) {
+ constrain_min(depth, ctx->mindepth);
+ }
+ if (ctx->maxdepth < INT_MAX) {
+ constrain_max(depth, ctx->maxdepth);
+ }
+
+ ctx->expr = optimize(&opt, ctx->expr);
+ if (!ctx->expr) {
+ return -1;
+ }
+
+ if (opt.level >= 2 && df_is_bottom(&impure)) {
+ bfs_warning(ctx, "This command won't do anything.\n\n");
+ }
+
+ const struct df_range *impure_depth = &impure.ranges[DEPTH_RANGE];
+ long long mindepth = impure_depth->min;
+ long long maxdepth = impure_depth->max;
+
+ opt_enter(&opt, "post-process:\n");
+
+ if (opt.level >= 2 && mindepth > ctx->mindepth) {
+ if (mindepth > INT_MAX) {
+ mindepth = INT_MAX;
+ }
+ opt_enter(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth);
+ ctx->mindepth = mindepth;
+ opt_leave(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth);
+ }
+
+ if (opt.level >= 4 && maxdepth < ctx->maxdepth) {
+ if (maxdepth < INT_MIN) {
+ maxdepth = INT_MIN;
+ }
+ opt_enter(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth);
+ ctx->maxdepth = maxdepth;
+ opt_leave(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth);
+ }
+
+ if (opt.level >= 3) {
+ // bfs_eval() can do lazy stat() calls, but only on one thread.
+ float lazy_cost = estimate_stat_odds(ctx);
+ // bftw() can do eager stat() calls in parallel
+ float eager_cost = 1.0 / ctx->threads;
+
+ if (eager_cost <= lazy_cost) {
+ opt_enter(&opt, "lazy stat cost: ${ylw}%g${rs}\n", lazy_cost);
+ ctx->flags |= BFTW_STAT;
+ opt_leave(&opt, "eager stat cost: ${ylw}%g${rs}\n", eager_cost);
+ }
+
+ }
+
+ opt_leave(&opt, NULL);
+
+ return 0;
+}