diff options
Diffstat (limited to 'src/opt.c')
-rw-r--r-- | src/opt.c | 1088 |
1 files changed, 1088 insertions, 0 deletions
diff --git a/src/opt.c b/src/opt.c new file mode 100644 index 0000000..f8c0ba3 --- /dev/null +++ b/src/opt.c @@ -0,0 +1,1088 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2017-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * The expression optimizer. Different optimization levels are supported: + * + * -O1: basic logical simplifications, like folding (-true -and -foo) to -foo. + * + * -O2: dead code elimination and data flow analysis. struct opt_facts is used + * to record data flow facts that are true at various points of evaluation. + * Specifically, struct opt_facts records the facts that must be true before an + * expression is evaluated (state->facts), and those that must be true after the + * expression is evaluated, given that it returns true (state->facts_when_true) + * or false (state->facts_when_true). Additionally, state->facts_when_impure + * records the possible data flow facts before any expressions with side effects + * are evaluated. + * + * -O3: expression re-ordering to reduce expected cost. In an expression like + * (-foo -and -bar), if both -foo and -bar are pure (no side effects), they can + * be re-ordered to (-bar -and -foo). This is profitable if the expected cost + * is lower for the re-ordered expression, for example if -foo is very slow or + * -bar is likely to return false. + * + * -O4/-Ofast: aggressive optimizations that may affect correctness in corner + * cases. The main effect is to use facts_when_impure to determine if any side- + * effects are reachable at all, and skipping the traversal if not. + */ + +#include "opt.h" +#include "color.h" +#include "ctx.h" +#include "diag.h" +#include "eval.h" +#include "expr.h" +#include "pwcache.h" +#include "util.h" +#include <assert.h> +#include <limits.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +static char *fake_and_arg = "-a"; +static char *fake_or_arg = "-o"; +static char *fake_not_arg = "!"; + +/** + * A contrained integer range. + */ +struct range { + /** The (inclusive) minimum value. */ + long long min; + /** The (inclusive) maximum value. */ + long long max; +}; + +/** Compute the minimum of two values. */ +static long long min_value(long long a, long long b) { + if (a < b) { + return a; + } else { + return b; + } +} + +/** Compute the maximum of two values. */ +static long long max_value(long long a, long long b) { + if (a > b) { + return a; + } else { + return b; + } +} + +/** Constrain the minimum of a range. */ +static void constrain_min(struct range *range, long long value) { + range->min = max_value(range->min, value); +} + +/** Contrain the maximum of a range. */ +static void constrain_max(struct range *range, long long value) { + range->max = min_value(range->max, value); +} + +/** Remove a single value from a range. */ +static void range_remove(struct range *range, long long value) { + if (range->min == value) { + if (range->min == LLONG_MAX) { + range->max = LLONG_MIN; + } else { + ++range->min; + } + } + + if (range->max == value) { + if (range->max == LLONG_MIN) { + range->min = LLONG_MAX; + } else { + --range->max; + } + } +} + +/** Compute the union of two ranges. */ +static void range_union(struct range *result, const struct range *lhs, const struct range *rhs) { + result->min = min_value(lhs->min, rhs->min); + result->max = max_value(lhs->max, rhs->max); +} + +/** Check if a range contains no values. */ +static bool range_is_impossible(const struct range *range) { + return range->min > range->max; +} + +/** Set a range to contain no values. */ +static void set_range_impossible(struct range *range) { + range->min = LLONG_MAX; + range->max = LLONG_MIN; +} + +/** + * Types of ranges we track. + */ +enum range_type { + /** Search tree depth. */ + DEPTH_RANGE, + /** Group ID. */ + GID_RANGE, + /** Inode number. */ + INUM_RANGE, + /** Hard link count. */ + LINKS_RANGE, + /** File size. */ + SIZE_RANGE, + /** User ID. */ + UID_RANGE, + /** The number of range_types. */ + RANGE_TYPES, +}; + +/** + * A possibly-known value of a predicate. + */ +enum known_pred { + /** The state is impossible to reach. */ + PRED_IMPOSSIBLE = -2, + /** The value of the predicate is not known. */ + PRED_UNKNOWN = -1, + /** The predicate is known to be false. */ + PRED_FALSE = false, + /** The predicate is known to be true. */ + PRED_TRUE = true, +}; + +/** Make a predicate known. */ +static void constrain_pred(enum known_pred *pred, bool value) { + if (*pred == PRED_UNKNOWN) { + *pred = value; + } else if (*pred == !value) { + *pred = PRED_IMPOSSIBLE; + } +} + +/** Compute the union of two known predicates. */ +static enum known_pred pred_union(enum known_pred lhs, enum known_pred rhs) { + if (lhs == PRED_IMPOSSIBLE) { + return rhs; + } else if (rhs == PRED_IMPOSSIBLE) { + return lhs; + } else if (lhs == rhs) { + return lhs; + } else { + return PRED_UNKNOWN; + } +} + +/** + * Types of predicates we track. + */ +enum pred_type { + /** -readable */ + READABLE_PRED, + /** -writable */ + WRITABLE_PRED, + /** -executable */ + EXECUTABLE_PRED, + /** -acl */ + ACL_PRED, + /** -capable */ + CAPABLE_PRED, + /** -empty */ + EMPTY_PRED, + /** -hidden */ + HIDDEN_PRED, + /** -nogroup */ + NOGROUP_PRED, + /** -nouser */ + NOUSER_PRED, + /** -sparse */ + SPARSE_PRED, + /** -xattr */ + XATTR_PRED, + /** The number of pred_types. */ + PRED_TYPES, +}; + +/** + * Data flow facts about an evaluation point. + */ +struct opt_facts { + /** The value ranges we track. */ + struct range ranges[RANGE_TYPES]; + + /** The predicates we track. */ + enum known_pred preds[PRED_TYPES]; + + /** Bitmask of possible file types. */ + unsigned int types; + /** Bitmask of possible link target types. */ + unsigned int xtypes; +}; + +/** Initialize some data flow facts. */ +static void facts_init(struct opt_facts *facts) { + for (int i = 0; i < RANGE_TYPES; ++i) { + struct range *range = &facts->ranges[i]; + range->min = 0; // All ranges we currently track are non-negative + range->max = LLONG_MAX; + } + + for (int i = 0; i < PRED_TYPES; ++i) { + facts->preds[i] = PRED_UNKNOWN; + } + + facts->types = ~0; + facts->xtypes = ~0; +} + +/** Compute the union of two fact sets. */ +static void facts_union(struct opt_facts *result, const struct opt_facts *lhs, const struct opt_facts *rhs) { + for (int i = 0; i < RANGE_TYPES; ++i) { + range_union(&result->ranges[i], &lhs->ranges[i], &rhs->ranges[i]); + } + + for (int i = 0; i < PRED_TYPES; ++i) { + result->preds[i] = pred_union(lhs->preds[i], rhs->preds[i]); + } + + result->types = lhs->types | rhs->types; + result->xtypes = lhs->xtypes | rhs->xtypes; +} + +/** Determine whether a fact set is impossible. */ +static bool facts_are_impossible(const struct opt_facts *facts) { + for (int i = 0; i < RANGE_TYPES; ++i) { + if (range_is_impossible(&facts->ranges[i])) { + return true; + } + } + + for (int i = 0; i < PRED_TYPES; ++i) { + if (facts->preds[i] == PRED_IMPOSSIBLE) { + return true; + } + } + + if (!facts->types || !facts->xtypes) { + return true; + } + + return false; +} + +/** Set some facts to be impossible. */ +static void set_facts_impossible(struct opt_facts *facts) { + for (int i = 0; i < RANGE_TYPES; ++i) { + set_range_impossible(&facts->ranges[i]); + } + + for (int i = 0; i < PRED_TYPES; ++i) { + facts->preds[i] = PRED_IMPOSSIBLE; + } + + facts->types = 0; + facts->xtypes = 0; +} + +/** + * Optimizer state. + */ +struct opt_state { + /** The context we're optimizing. */ + const struct bfs_ctx *ctx; + + /** Data flow facts before this expression is evaluated. */ + struct opt_facts facts; + /** Data flow facts after this expression returns true. */ + struct opt_facts facts_when_true; + /** Data flow facts after this expression returns false. */ + struct opt_facts facts_when_false; + /** Data flow facts before any side-effecting expressions are evaluated. */ + struct opt_facts *facts_when_impure; +}; + +/** Log an optimization. */ +BFS_FORMATTER(3, 4) +static bool opt_debug(const struct opt_state *state, int level, const char *format, ...) { + assert(state->ctx->optlevel >= level); + + if (bfs_debug(state->ctx, DEBUG_OPT, "${cyn}-O%d${rs}: ", level)) { + va_list args; + va_start(args, format); + cvfprintf(state->ctx->cerr, format, args); + va_end(args); + return true; + } else { + return false; + } +} + +/** Warn about an expression. */ +BFS_FORMATTER(3, 4) +static void opt_warning(const struct opt_state *state, const struct bfs_expr *expr, const char *format, ...) { + if (bfs_expr_warning(state->ctx, expr)) { + va_list args; + va_start(args, format); + bfs_warning(state->ctx, format, args); + va_end(args); + } +} + +/** Extract a child expression, freeing the outer expression. */ +static struct bfs_expr *extract_child_expr(struct bfs_expr *expr, struct bfs_expr **child) { + struct bfs_expr *ret = *child; + *child = NULL; + bfs_expr_free(expr); + return ret; +} + +/** + * Negate an expression. + */ +static struct bfs_expr *negate_expr(struct bfs_expr *rhs, char **argv) { + if (rhs->eval_fn == eval_not) { + return extract_child_expr(rhs, &rhs->rhs); + } + + struct bfs_expr *expr = bfs_expr_new(eval_not, 1, argv); + if (!expr) { + bfs_expr_free(rhs); + return NULL; + } + + if (argv == &fake_not_arg) { + expr->synthetic = true; + } + + expr->lhs = NULL; + expr->rhs = rhs; + return expr; +} + +static struct bfs_expr *optimize_not_expr(const struct opt_state *state, struct bfs_expr *expr); +static struct bfs_expr *optimize_and_expr(const struct opt_state *state, struct bfs_expr *expr); +static struct bfs_expr *optimize_or_expr(const struct opt_state *state, struct bfs_expr *expr); + +/** + * Apply De Morgan's laws. + */ +static struct bfs_expr *de_morgan(const struct opt_state *state, struct bfs_expr *expr, char **argv) { + bool debug = opt_debug(state, 1, "De Morgan's laws: %pe ", expr); + + struct bfs_expr *parent = negate_expr(expr, argv); + if (!parent) { + return NULL; + } + + bool has_parent = true; + if (parent->eval_fn != eval_not) { + expr = parent; + has_parent = false; + } + + assert(expr->eval_fn == eval_and || expr->eval_fn == eval_or); + if (expr->eval_fn == eval_and) { + expr->eval_fn = eval_or; + expr->argv = &fake_or_arg; + } else { + expr->eval_fn = eval_and; + expr->argv = &fake_and_arg; + } + expr->synthetic = true; + + expr->lhs = negate_expr(expr->lhs, argv); + expr->rhs = negate_expr(expr->rhs, argv); + if (!expr->lhs || !expr->rhs) { + bfs_expr_free(parent); + return NULL; + } + + if (debug) { + cfprintf(state->ctx->cerr, "<==> %pe\n", parent); + } + + if (expr->lhs->eval_fn == eval_not) { + expr->lhs = optimize_not_expr(state, expr->lhs); + } + if (expr->rhs->eval_fn == eval_not) { + expr->rhs = optimize_not_expr(state, expr->rhs); + } + if (!expr->lhs || !expr->rhs) { + bfs_expr_free(parent); + return NULL; + } + + if (expr->eval_fn == eval_and) { + expr = optimize_and_expr(state, expr); + } else { + expr = optimize_or_expr(state, expr); + } + if (has_parent) { + parent->rhs = expr; + } else { + parent = expr; + } + if (!expr) { + bfs_expr_free(parent); + return NULL; + } + + if (has_parent) { + parent = optimize_not_expr(state, parent); + } + return parent; +} + +/** Optimize an expression recursively. */ +static struct bfs_expr *optimize_expr_recursive(struct opt_state *state, struct bfs_expr *expr); + +/** + * Optimize a negation. + */ +static struct bfs_expr *optimize_not_expr(const struct opt_state *state, struct bfs_expr *expr) { + assert(expr->eval_fn == eval_not); + + struct bfs_expr *rhs = expr->rhs; + + int optlevel = state->ctx->optlevel; + if (optlevel >= 1) { + if (rhs == &bfs_true) { + opt_debug(state, 1, "constant propagation: %pe <==> %pe\n", expr, &bfs_false); + bfs_expr_free(expr); + return &bfs_false; + } else if (rhs == &bfs_false) { + opt_debug(state, 1, "constant propagation: %pe <==> %pe\n", expr, &bfs_true); + bfs_expr_free(expr); + return &bfs_true; + } else if (rhs->eval_fn == eval_not) { + opt_debug(state, 1, "double negation: %pe <==> %pe\n", expr, rhs->rhs); + return extract_child_expr(expr, &rhs->rhs); + } else if (bfs_expr_never_returns(rhs)) { + opt_debug(state, 1, "reachability: %pe <==> %pe\n", expr, rhs); + return extract_child_expr(expr, &expr->rhs); + } else if ((rhs->eval_fn == eval_and || rhs->eval_fn == eval_or) + && (rhs->lhs->eval_fn == eval_not || rhs->rhs->eval_fn == eval_not)) { + return de_morgan(state, expr, expr->argv); + } + } + + expr->pure = rhs->pure; + expr->always_true = rhs->always_false; + expr->always_false = rhs->always_true; + expr->cost = rhs->cost; + expr->probability = 1.0 - rhs->probability; + + return expr; +} + +/** Optimize a negation recursively. */ +static struct bfs_expr *optimize_not_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { + struct opt_state rhs_state = *state; + expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); + if (!expr->rhs) { + goto fail; + } + + state->facts_when_true = rhs_state.facts_when_false; + state->facts_when_false = rhs_state.facts_when_true; + + return optimize_not_expr(state, expr); + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** Optimize a conjunction. */ +static struct bfs_expr *optimize_and_expr(const struct opt_state *state, struct bfs_expr *expr) { + assert(expr->eval_fn == eval_and); + + struct bfs_expr *lhs = expr->lhs; + struct bfs_expr *rhs = expr->rhs; + + const struct bfs_ctx *ctx = state->ctx; + int optlevel = ctx->optlevel; + if (optlevel >= 1) { + if (lhs == &bfs_true) { + opt_debug(state, 1, "conjunction elimination: %pe <==> %pe\n", expr, rhs); + return extract_child_expr(expr, &expr->rhs); + } else if (rhs == &bfs_true) { + opt_debug(state, 1, "conjunction elimination: %pe <==> %pe\n", expr, lhs); + return extract_child_expr(expr, &expr->lhs); + } else if (lhs->always_false) { + opt_debug(state, 1, "short-circuit: %pe <==> %pe\n", expr, lhs); + opt_warning(state, expr->rhs, "This expression is unreachable.\n\n"); + return extract_child_expr(expr, &expr->lhs); + } else if (lhs->always_true && rhs == &bfs_false) { + bool debug = opt_debug(state, 1, "strength reduction: %pe <==> ", expr); + struct bfs_expr *ret = extract_child_expr(expr, &expr->lhs); + ret = negate_expr(ret, &fake_not_arg); + if (debug && ret) { + cfprintf(ctx->cerr, "%pe\n", ret); + } + return ret; + } else if (optlevel >= 2 && lhs->pure && rhs == &bfs_false) { + opt_debug(state, 2, "purity: %pe <==> %pe\n", expr, rhs); + opt_warning(state, expr->lhs, "The result of this expression is ignored.\n\n"); + return extract_child_expr(expr, &expr->rhs); + } else if (lhs->eval_fn == eval_not && rhs->eval_fn == eval_not) { + return de_morgan(state, expr, expr->lhs->argv); + } + } + + expr->pure = lhs->pure && rhs->pure; + expr->always_true = lhs->always_true && rhs->always_true; + expr->always_false = lhs->always_false || rhs->always_false; + expr->cost = lhs->cost + lhs->probability*rhs->cost; + expr->probability = lhs->probability*rhs->probability; + + return expr; +} + +/** Optimize a conjunction recursively. */ +static struct bfs_expr *optimize_and_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { + struct opt_state lhs_state = *state; + expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); + if (!expr->lhs) { + goto fail; + } + + struct opt_state rhs_state = *state; + rhs_state.facts = lhs_state.facts_when_true; + expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); + if (!expr->rhs) { + goto fail; + } + + state->facts_when_true = rhs_state.facts_when_true; + facts_union(&state->facts_when_false, &lhs_state.facts_when_false, &rhs_state.facts_when_false); + + return optimize_and_expr(state, expr); + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** Optimize a disjunction. */ +static struct bfs_expr *optimize_or_expr(const struct opt_state *state, struct bfs_expr *expr) { + assert(expr->eval_fn == eval_or); + + struct bfs_expr *lhs = expr->lhs; + struct bfs_expr *rhs = expr->rhs; + + const struct bfs_ctx *ctx = state->ctx; + int optlevel = ctx->optlevel; + if (optlevel >= 1) { + if (lhs->always_true) { + opt_debug(state, 1, "short-circuit: %pe <==> %pe\n", expr, lhs); + opt_warning(state, expr->rhs, "This expression is unreachable.\n\n"); + return extract_child_expr(expr, &expr->lhs); + } else if (lhs == &bfs_false) { + opt_debug(state, 1, "disjunctive syllogism: %pe <==> %pe\n", expr, rhs); + return extract_child_expr(expr, &expr->rhs); + } else if (rhs == &bfs_false) { + opt_debug(state, 1, "disjunctive syllogism: %pe <==> %pe\n", expr, lhs); + return extract_child_expr(expr, &expr->lhs); + } else if (lhs->always_false && rhs == &bfs_true) { + bool debug = opt_debug(state, 1, "strength reduction: %pe <==> ", expr); + struct bfs_expr *ret = extract_child_expr(expr, &expr->lhs); + ret = negate_expr(ret, &fake_not_arg); + if (debug && ret) { + cfprintf(ctx->cerr, "%pe\n", ret); + } + return ret; + } else if (optlevel >= 2 && lhs->pure && rhs == &bfs_true) { + opt_debug(state, 2, "purity: %pe <==> %pe\n", expr, rhs); + opt_warning(state, expr->lhs, "The result of this expression is ignored.\n\n"); + return extract_child_expr(expr, &expr->rhs); + } else if (lhs->eval_fn == eval_not && rhs->eval_fn == eval_not) { + return de_morgan(state, expr, expr->lhs->argv); + } + } + + expr->pure = lhs->pure && rhs->pure; + expr->always_true = lhs->always_true || rhs->always_true; + expr->always_false = lhs->always_false && rhs->always_false; + expr->cost = lhs->cost + (1 - lhs->probability)*rhs->cost; + expr->probability = lhs->probability + rhs->probability - lhs->probability*rhs->probability; + + return expr; +} + +/** Optimize a disjunction recursively. */ +static struct bfs_expr *optimize_or_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { + struct opt_state lhs_state = *state; + expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); + if (!expr->lhs) { + goto fail; + } + + struct opt_state rhs_state = *state; + rhs_state.facts = lhs_state.facts_when_false; + expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); + if (!expr->rhs) { + goto fail; + } + + facts_union(&state->facts_when_true, &lhs_state.facts_when_true, &rhs_state.facts_when_true); + state->facts_when_false = rhs_state.facts_when_false; + + return optimize_or_expr(state, expr); + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** Optimize an expression in an ignored-result context. */ +static struct bfs_expr *ignore_result(const struct opt_state *state, struct bfs_expr *expr) { + int optlevel = state->ctx->optlevel; + + if (optlevel >= 1) { + while (true) { + if (expr->eval_fn == eval_not) { + opt_debug(state, 1, "ignored result: %pe --> %pe\n", expr, expr->rhs); + opt_warning(state, expr, "The result of this expression is ignored.\n\n"); + expr = extract_child_expr(expr, &expr->rhs); + } else if (optlevel >= 2 + && (expr->eval_fn == eval_and || expr->eval_fn == eval_or || expr->eval_fn == eval_comma) + && expr->rhs->pure) { + opt_debug(state, 2, "ignored result: %pe --> %pe\n", expr, expr->lhs); + opt_warning(state, expr->rhs, "The result of this expression is ignored.\n\n"); + expr = extract_child_expr(expr, &expr->lhs); + } else { + break; + } + } + + if (optlevel >= 2 && expr->pure && expr != &bfs_false) { + opt_debug(state, 2, "ignored result: %pe --> %pe\n", expr, &bfs_false); + opt_warning(state, expr, "The result of this expression is ignored.\n\n"); + bfs_expr_free(expr); + expr = &bfs_false; + } + } + + return expr; +} + +/** Optimize a comma expression. */ +static struct bfs_expr *optimize_comma_expr(const struct opt_state *state, struct bfs_expr *expr) { + assert(expr->eval_fn == eval_comma); + + struct bfs_expr *lhs = expr->lhs; + struct bfs_expr *rhs = expr->rhs; + + int optlevel = state->ctx->optlevel; + if (optlevel >= 1) { + lhs = expr->lhs = ignore_result(state, lhs); + + if (bfs_expr_never_returns(lhs)) { + opt_debug(state, 1, "reachability: %pe <==> %pe\n", expr, lhs); + opt_warning(state, expr->rhs, "This expression is unreachable.\n\n"); + return extract_child_expr(expr, &expr->lhs); + } else if ((lhs->always_true && rhs == &bfs_true) + || (lhs->always_false && rhs == &bfs_false)) { + opt_debug(state, 1, "redundancy elimination: %pe <==> %pe\n", expr, lhs); + return extract_child_expr(expr, &expr->lhs); + } else if (optlevel >= 2 && lhs->pure) { + opt_debug(state, 2, "purity: %pe <==> %pe\n", expr, rhs); + opt_warning(state, expr->lhs, "The result of this expression is ignored.\n\n"); + return extract_child_expr(expr, &expr->rhs); + } + } + + expr->pure = lhs->pure && rhs->pure; + expr->always_true = bfs_expr_never_returns(lhs) || rhs->always_true; + expr->always_false = bfs_expr_never_returns(lhs) || rhs->always_false; + expr->cost = lhs->cost + rhs->cost; + expr->probability = rhs->probability; + + return expr; +} + +/** Optimize a comma expression recursively. */ +static struct bfs_expr *optimize_comma_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { + struct opt_state lhs_state = *state; + expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); + if (!expr->lhs) { + goto fail; + } + + struct opt_state rhs_state = *state; + facts_union(&rhs_state.facts, &lhs_state.facts_when_true, &lhs_state.facts_when_false); + expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); + if (!expr->rhs) { + goto fail; + } + + return optimize_comma_expr(state, expr); + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** Infer data flow facts about a predicate. */ +static void infer_pred_facts(struct opt_state *state, enum pred_type pred) { + constrain_pred(&state->facts_when_true.preds[pred], true); + constrain_pred(&state->facts_when_false.preds[pred], false); +} + +/** Infer data flow facts about an -{execut,read,writ}able expression. */ +static void infer_access_facts(struct opt_state *state, const struct bfs_expr *expr) { + if (expr->num & R_OK) { + infer_pred_facts(state, READABLE_PRED); + } + if (expr->num & W_OK) { + infer_pred_facts(state, WRITABLE_PRED); + } + if (expr->num & X_OK) { + infer_pred_facts(state, EXECUTABLE_PRED); + } +} + +/** Infer data flow facts about an icmp-style ([+-]N) expression. */ +static void infer_icmp_facts(struct opt_state *state, const struct bfs_expr *expr, enum range_type type) { + struct range *range_when_true = &state->facts_when_true.ranges[type]; + struct range *range_when_false = &state->facts_when_false.ranges[type]; + long long value = expr->num; + + switch (expr->int_cmp) { + case BFS_INT_EQUAL: + constrain_min(range_when_true, value); + constrain_max(range_when_true, value); + range_remove(range_when_false, value); + break; + + case BFS_INT_LESS: + constrain_min(range_when_false, value); + constrain_max(range_when_true, value); + range_remove(range_when_true, value); + break; + + case BFS_INT_GREATER: + constrain_max(range_when_false, value); + constrain_min(range_when_true, value); + range_remove(range_when_true, value); + break; + } +} + +/** Infer data flow facts about a -gid expression. */ +static void infer_gid_facts(struct opt_state *state, const struct bfs_expr *expr) { + infer_icmp_facts(state, expr, GID_RANGE); + + const struct bfs_groups *groups = bfs_ctx_groups(state->ctx); + struct range *range = &state->facts_when_true.ranges[GID_RANGE]; + if (groups && range->min == range->max) { + gid_t gid = range->min; + bool nogroup = !bfs_getgrgid(groups, gid); + constrain_pred(&state->facts_when_true.preds[NOGROUP_PRED], nogroup); + } +} + +/** Infer data flow facts about a -uid expression. */ +static void infer_uid_facts(struct opt_state *state, const struct bfs_expr *expr) { + infer_icmp_facts(state, expr, UID_RANGE); + + const struct bfs_users *users = bfs_ctx_users(state->ctx); + struct range *range = &state->facts_when_true.ranges[UID_RANGE]; + if (users && range->min == range->max) { + uid_t uid = range->min; + bool nouser = !bfs_getpwuid(users, uid); + constrain_pred(&state->facts_when_true.preds[NOUSER_PRED], nouser); + } +} + +/** Infer data flow facts about a -samefile expression. */ +static void infer_samefile_facts(struct opt_state *state, const struct bfs_expr *expr) { + struct range *range_when_true = &state->facts_when_true.ranges[INUM_RANGE]; + constrain_min(range_when_true, expr->ino); + constrain_max(range_when_true, expr->ino); +} + +/** Infer data flow facts about a -type expression. */ +static void infer_type_facts(struct opt_state *state, const struct bfs_expr *expr) { + state->facts_when_true.types &= expr->num; + state->facts_when_false.types &= ~expr->num; +} + +/** Infer data flow facts about an -xtype expression. */ +static void infer_xtype_facts(struct opt_state *state, const struct bfs_expr *expr) { + state->facts_when_true.xtypes &= expr->num; + state->facts_when_false.xtypes &= ~expr->num; +} + +static struct bfs_expr *optimize_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { + int optlevel = state->ctx->optlevel; + + state->facts_when_true = state->facts; + state->facts_when_false = state->facts; + + if (optlevel >= 2 && facts_are_impossible(&state->facts)) { + opt_debug(state, 2, "reachability: %pe --> %pe\n", expr, &bfs_false); + opt_warning(state, expr, "This expression is unreachable.\n\n"); + bfs_expr_free(expr); + expr = &bfs_false; + goto done; + } + + if (!bfs_expr_has_children(expr) && !expr->pure) { + facts_union(state->facts_when_impure, state->facts_when_impure, &state->facts); + } + + if (expr->eval_fn == eval_access) { + infer_access_facts(state, expr); + } else if (expr->eval_fn == eval_acl) { + infer_pred_facts(state, ACL_PRED); + } else if (expr->eval_fn == eval_capable) { + infer_pred_facts(state, CAPABLE_PRED); + } else if (expr->eval_fn == eval_depth) { + infer_icmp_facts(state, expr, DEPTH_RANGE); + } else if (expr->eval_fn == eval_empty) { + infer_pred_facts(state, EMPTY_PRED); + } else if (expr->eval_fn == eval_gid) { + infer_gid_facts(state, expr); + } else if (expr->eval_fn == eval_hidden) { + infer_pred_facts(state, HIDDEN_PRED); + } else if (expr->eval_fn == eval_inum) { + infer_icmp_facts(state, expr, INUM_RANGE); + } else if (expr->eval_fn == eval_links) { + infer_icmp_facts(state, expr, LINKS_RANGE); + } else if (expr->eval_fn == eval_nogroup) { + infer_pred_facts(state, NOGROUP_PRED); + } else if (expr->eval_fn == eval_nouser) { + infer_pred_facts(state, NOUSER_PRED); + } else if (expr->eval_fn == eval_samefile) { + infer_samefile_facts(state, expr); + } else if (expr->eval_fn == eval_size) { + infer_icmp_facts(state, expr, SIZE_RANGE); + } else if (expr->eval_fn == eval_sparse) { + infer_pred_facts(state, SPARSE_PRED); + } else if (expr->eval_fn == eval_type) { + infer_type_facts(state, expr); + } else if (expr->eval_fn == eval_uid) { + infer_uid_facts(state, expr); + } else if (expr->eval_fn == eval_xattr) { + infer_pred_facts(state, XATTR_PRED); + } else if (expr->eval_fn == eval_xtype) { + infer_xtype_facts(state, expr); + } else if (expr->eval_fn == eval_not) { + expr = optimize_not_expr_recursive(state, expr); + } else if (expr->eval_fn == eval_and) { + expr = optimize_and_expr_recursive(state, expr); + } else if (expr->eval_fn == eval_or) { + expr = optimize_or_expr_recursive(state, expr); + } else if (expr->eval_fn == eval_comma) { + expr = optimize_comma_expr_recursive(state, expr); + } + + if (!expr) { + goto done; + } + + if (bfs_expr_has_children(expr)) { + struct bfs_expr *lhs = expr->lhs; + struct bfs_expr *rhs = expr->rhs; + if (rhs) { + expr->persistent_fds = rhs->persistent_fds; + expr->ephemeral_fds = rhs->ephemeral_fds; + } + if (lhs) { + expr->persistent_fds += lhs->persistent_fds; + if (lhs->ephemeral_fds > expr->ephemeral_fds) { + expr->ephemeral_fds = lhs->ephemeral_fds; + } + } + } + + if (expr->always_true) { + set_facts_impossible(&state->facts_when_false); + } + if (expr->always_false) { + set_facts_impossible(&state->facts_when_true); + } + + if (optlevel < 2 || expr == &bfs_true || expr == &bfs_false) { + goto done; + } + + if (facts_are_impossible(&state->facts_when_true)) { + if (expr->pure) { + opt_debug(state, 2, "data flow: %pe --> %pe\n", expr, &bfs_false); + opt_warning(state, expr, "This expression is always false.\n\n"); + bfs_expr_free(expr); + expr = &bfs_false; + } else { + expr->always_false = true; + expr->probability = 0.0; + } + } else if (facts_are_impossible(&state->facts_when_false)) { + if (expr->pure) { + opt_debug(state, 2, "data flow: %pe --> %pe\n", expr, &bfs_true); + opt_warning(state, expr, "This expression is always true.\n\n"); + bfs_expr_free(expr); + expr = &bfs_true; + } else { + expr->always_true = true; + expr->probability = 1.0; + } + } + +done: + return expr; +} + +/** Swap the children of a binary expression if it would reduce the cost. */ +static bool reorder_expr(const struct opt_state *state, struct bfs_expr *expr, float swapped_cost) { + if (swapped_cost < expr->cost) { + bool debug = opt_debug(state, 3, "cost: %pe <==> ", expr); + struct bfs_expr *lhs = expr->lhs; + expr->lhs = expr->rhs; + expr->rhs = lhs; + if (debug) { + cfprintf(state->ctx->cerr, "%pe (~${ylw}%g${rs} --> ~${ylw}%g${rs})\n", expr, expr->cost, swapped_cost); + } + expr->cost = swapped_cost; + return true; + } else { + return false; + } +} + +/** + * Recursively reorder sub-expressions to reduce the overall cost. + * + * @param expr + * The expression to optimize. + * @return + * Whether any subexpression was reordered. + */ +static bool reorder_expr_recursive(const struct opt_state *state, struct bfs_expr *expr) { + if (!bfs_expr_has_children(expr)) { + return false; + } + + struct bfs_expr *lhs = expr->lhs; + struct bfs_expr *rhs = expr->rhs; + + bool ret = false; + if (lhs) { + ret |= reorder_expr_recursive(state, lhs); + } + if (rhs) { + ret |= reorder_expr_recursive(state, rhs); + } + + if (expr->eval_fn == eval_and || expr->eval_fn == eval_or) { + if (lhs->pure && rhs->pure) { + float rhs_prob = expr->eval_fn == eval_and ? rhs->probability : 1.0 - rhs->probability; + float swapped_cost = rhs->cost + rhs_prob*lhs->cost; + ret |= reorder_expr(state, expr, swapped_cost); + } + } + + return ret; +} + +/** + * Optimize a top-level expression. + */ +static struct bfs_expr *optimize_expr(struct opt_state *state, struct bfs_expr *expr) { + struct opt_facts saved_impure = *state->facts_when_impure; + + expr = optimize_expr_recursive(state, expr); + if (!expr) { + return NULL; + } + + if (state->ctx->optlevel >= 3 && reorder_expr_recursive(state, expr)) { + // Re-do optimizations to account for the new ordering + *state->facts_when_impure = saved_impure; + expr = optimize_expr_recursive(state, expr); + if (!expr) { + return NULL; + } + } + + return expr; +} + +int bfs_optimize(struct bfs_ctx *ctx) { + bfs_ctx_dump(ctx, DEBUG_OPT); + + struct opt_facts facts_when_impure; + set_facts_impossible(&facts_when_impure); + + struct opt_state state = { + .ctx = ctx, + .facts_when_impure = &facts_when_impure, + }; + facts_init(&state.facts); + + ctx->exclude = optimize_expr(&state, ctx->exclude); + if (!ctx->exclude) { + return -1; + } + + // Only non-excluded files are evaluated + state.facts = state.facts_when_false; + + struct range *depth = &state.facts.ranges[DEPTH_RANGE]; + constrain_min(depth, ctx->mindepth); + constrain_max(depth, ctx->maxdepth); + + ctx->expr = optimize_expr(&state, ctx->expr); + if (!ctx->expr) { + return -1; + } + + ctx->expr = ignore_result(&state, ctx->expr); + + if (facts_are_impossible(&facts_when_impure)) { + bfs_warning(ctx, "This command won't do anything.\n\n"); + } + + const struct range *depth_when_impure = &facts_when_impure.ranges[DEPTH_RANGE]; + long long mindepth = depth_when_impure->min; + long long maxdepth = depth_when_impure->max; + + int optlevel = ctx->optlevel; + + if (optlevel >= 2 && mindepth > ctx->mindepth) { + if (mindepth > INT_MAX) { + mindepth = INT_MAX; + } + ctx->mindepth = mindepth; + opt_debug(&state, 2, "data flow: mindepth --> %d\n", ctx->mindepth); + } + + if (optlevel >= 4 && maxdepth < ctx->maxdepth) { + if (maxdepth < INT_MIN) { + maxdepth = INT_MIN; + } + ctx->maxdepth = maxdepth; + opt_debug(&state, 4, "data flow: maxdepth --> %d\n", ctx->maxdepth); + } + + return 0; +} |