From 4a36bb92a5bbdc41965a6d2c6eae6cdca5983474 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Sun, 7 Jan 2024 12:19:17 -0500 Subject: expr: Make expressions variadic Rather than only unary/binary expressions, we now support an arbitrary number of children. The optimizer has been re-written almost completely and now supports optimal reordering of longer expression chains, rather than just arm-swapping. Fixes #85. --- src/opt.c | 2468 ++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 1642 insertions(+), 826 deletions(-) (limited to 'src/opt.c') diff --git a/src/opt.c b/src/opt.c index 1dc985a..7203c61 100644 --- a/src/opt.c +++ b/src/opt.c @@ -21,11 +21,12 @@ * -bar is likely to return false. * * -O4/-Ofast: aggressive optimizations that may affect correctness in corner - * cases. The main effect is to use impure to determine if any side-effects are - * reachable at all, and skipping the traversal if not. + * cases. The main effect is to use opt->impure to determine if any side- + * effects are reachable at all, skipping the traversal if not. */ #include "opt.h" +#include "bit.h" #include "color.h" #include "config.h" #include "ctx.h" @@ -33,6 +34,7 @@ #include "eval.h" #include "exec.h" #include "expr.h" +#include "list.h" #include "pwcache.h" #include #include @@ -41,9 +43,9 @@ #include #include -static char *fake_and_arg = "-a"; -static char *fake_or_arg = "-o"; -static char *fake_not_arg = "!"; +static char *fake_and_arg = "-and"; +static char *fake_or_arg = "-or"; +static char *fake_not_arg = "-not"; /** * The data flow domain for predicates. @@ -69,6 +71,70 @@ static void pred_join(enum df_pred *dest, enum df_pred src) { *dest |= src; } +/** + * Types of predicates we track. + */ +enum pred_type { + /** -readable */ + READABLE_PRED, + /** -writable */ + WRITABLE_PRED, + /** -executable */ + EXECUTABLE_PRED, + /** -acl */ + ACL_PRED, + /** -capable */ + CAPABLE_PRED, + /** -empty */ + EMPTY_PRED, + /** -hidden */ + HIDDEN_PRED, + /** -nogroup */ + NOGROUP_PRED, + /** -nouser */ + NOUSER_PRED, + /** -sparse */ + SPARSE_PRED, + /** -xattr */ + XATTR_PRED, + /** The number of pred_types. */ + PRED_TYPES, +}; + +/** Get the name of a predicate type. */ +static const char *pred_type_name(enum pred_type type) { + switch (type) { + case READABLE_PRED: + return "-readable"; + case WRITABLE_PRED: + return "-writable"; + case EXECUTABLE_PRED: + return "-executable"; + case ACL_PRED: + return "-acl"; + case CAPABLE_PRED: + return "-capable"; + case EMPTY_PRED: + return "-empty"; + case HIDDEN_PRED: + return "-hidden"; + case NOGROUP_PRED: + return "-nogroup"; + case NOUSER_PRED: + return "-nouser"; + case SPARSE_PRED: + return "-sparse"; + case XATTR_PRED: + return "-xattr"; + + case PRED_TYPES: + break; + } + + bfs_bug("Unknown predicate %d", (int)type); + return "???"; +} + /** * A contrained integer range. */ @@ -97,6 +163,11 @@ static void range_init_top(struct df_range *range) { range->max = LLONG_MAX; } +/** Check for an infinite range. */ +static bool range_is_top(const struct df_range *range) { + return range->min == 0 && range->max == LLONG_MAX; +} + /** Compute the minimum of two values. */ static long long min_value(long long a, long long b) { if (a < b) { @@ -170,35 +241,29 @@ enum range_type { RANGE_TYPES, }; -/** - * Types of predicates we track. - */ -enum pred_type { - /** -readable */ - READABLE_PRED, - /** -writable */ - WRITABLE_PRED, - /** -executable */ - EXECUTABLE_PRED, - /** -acl */ - ACL_PRED, - /** -capable */ - CAPABLE_PRED, - /** -empty */ - EMPTY_PRED, - /** -hidden */ - HIDDEN_PRED, - /** -nogroup */ - NOGROUP_PRED, - /** -nouser */ - NOUSER_PRED, - /** -sparse */ - SPARSE_PRED, - /** -xattr */ - XATTR_PRED, - /** The number of pred_types. */ - PRED_TYPES, -}; +/** Get the name of a range type. */ +static const char *range_type_name(enum range_type type) { + switch (type) { + case DEPTH_RANGE: + return "-depth"; + case GID_RANGE: + return "-gid"; + case INUM_RANGE: + return "-inum"; + case LINKS_RANGE: + return "-links"; + case SIZE_RANGE: + return "-size"; + case UID_RANGE: + return "-uid"; + + case RANGE_TYPES: + break; + } + + bfs_bug("Unknown range %d", (int)type); + return "???"; +} /** * The data flow analysis domain. @@ -210,9 +275,9 @@ struct df_domain { /** The value ranges we track. */ struct df_range ranges[RANGE_TYPES]; - /** Bitmask of possible file types. */ + /** Bitmask of possible -types. */ unsigned int types; - /** Bitmask of possible link target types. */ + /** Bitmask of possible -xtypes. */ unsigned int xtypes; }; @@ -265,6 +330,31 @@ static void df_init_top(struct df_domain *value) { value->xtypes = ~0; } +/** Check for the top element. */ +static bool df_is_top(const struct df_domain *value) { + for (int i = 0; i < PRED_TYPES; ++i) { + if (value->preds[i] != PRED_TOP) { + return false; + } + } + + for (int i = 0; i < RANGE_TYPES; ++i) { + if (!range_is_top(&value->ranges[i])) { + return false; + } + } + + if (value->types != ~0U) { + return false; + } + + if (value->xtypes != ~0U) { + return false; + } + + return true; +} + /** Compute the union of two fact sets. */ static void df_join(struct df_domain *dest, const struct df_domain *src) { for (int i = 0; i < PRED_TYPES; ++i) { @@ -285,6 +375,15 @@ static void df_join(struct df_domain *dest, const struct df_domain *src) { struct bfs_opt { /** The context we're optimizing. */ struct bfs_ctx *ctx; + /** Optimization level (ctx->optlevel). */ + int level; + /** Recursion depth. */ + int depth; + + /** Whether to produce warnings. */ + bool warn; + /** Whether the result of this expression is ignored. */ + bool ignore_result; /** Data flow state before this expression is evaluated. */ struct df_domain before; @@ -296,18 +395,14 @@ struct bfs_opt { struct df_domain *impure; }; -/** Constrain the value of a predicate. */ -static void opt_constrain_pred(struct bfs_opt *opt, enum pred_type type, bool value) { - constrain_pred(&opt->after_true.preds[type], value); - constrain_pred(&opt->after_false.preds[type], !value); -} - /** Log an optimization. */ -attr(printf(3, 4)) -static bool opt_debug(const struct bfs_opt *opt, int level, const char *format, ...) { - bfs_assert(opt->ctx->optlevel >= level); +attr(printf(2, 3)) +static bool opt_debug(struct bfs_opt *opt, const char *format, ...) { + if (bfs_debug_prefix(opt->ctx, DEBUG_OPT)) { + for (int i = 0; i < opt->depth; ++i) { + cfprintf(opt->ctx->cerr, "│ "); + } - if (bfs_debug(opt->ctx, DEBUG_OPT, "${cyn}-O%d${rs}: ", level)) { va_list args; va_start(args, format); cvfprintf(opt->ctx->cerr, format, args); @@ -318,450 +413,577 @@ static bool opt_debug(const struct bfs_opt *opt, int level, const char *format, } } -/** Warn about an expression. */ -attr(printf(3, 4)) -static void opt_warning(const struct bfs_opt *opt, const struct bfs_expr *expr, const char *format, ...) { - if (bfs_expr_warning(opt->ctx, expr)) { +/** Log a recursive call. */ +attr(printf(2, 3)) +static bool opt_enter(struct bfs_opt *opt, const char *format, ...) { + int depth = opt->depth; + if (depth > 0) { + --opt->depth; + } + + bool debug = opt_debug(opt, "%s", depth > 0 ? "├─╮ " : ""); + if (debug) { va_list args; va_start(args, format); - bfs_vwarning(opt->ctx, format, args); + cvfprintf(opt->ctx->cerr, format, args); va_end(args); } -} -/** Create a constant expression. */ -static struct bfs_expr *opt_const(const struct bfs_opt *opt, bool value) { - static bfs_eval_fn *fns[] = {eval_false, eval_true}; - static char *fake_args[] = {"-false", "-true"}; - return bfs_expr_new(opt->ctx, fns[value], 1, &fake_args[value]); + opt->depth = depth + 1; + return debug; } -/** - * Negate an expression. - */ -static struct bfs_expr *negate_expr(const struct bfs_opt *opt, struct bfs_expr *rhs, char **argv) { - if (rhs->eval_fn == eval_not) { - return rhs->rhs; - } +/** Log a recursive return. */ +attr(printf(2, 3)) +static bool opt_leave(struct bfs_opt *opt, const char *format, ...) { + bool debug = false; + int depth = opt->depth; - struct bfs_expr *expr = bfs_expr_new(opt->ctx, eval_not, 1, argv); - if (!expr) { - return NULL; + if (format) { + if (depth > 1) { + opt->depth -= 2; + } + + debug = opt_debug(opt, "%s", depth > 1 ? "├─╯ " : ""); + if (debug) { + va_list args; + va_start(args, format); + cvfprintf(opt->ctx->cerr, format, args); + va_end(args); + } } - expr->lhs = NULL; - expr->rhs = rhs; - return expr; + opt->depth = depth - 1; + return debug; } -static struct bfs_expr *optimize_not_expr(const struct bfs_opt *opt, struct bfs_expr *expr); -static struct bfs_expr *optimize_and_expr(const struct bfs_opt *opt, struct bfs_expr *expr); -static struct bfs_expr *optimize_or_expr(const struct bfs_opt *opt, struct bfs_expr *expr); - -/** - * Apply De Morgan's laws. - */ -static struct bfs_expr *de_morgan(const struct bfs_opt *opt, struct bfs_expr *expr, char **argv) { - bool debug = opt_debug(opt, 1, "De Morgan's laws: %pe ", expr); - - struct bfs_expr *parent = negate_expr(opt, expr, argv); - if (!parent) { - return NULL; +/** Log a shallow visit. */ +attr(printf(2, 3)) +static bool opt_visit(struct bfs_opt *opt, const char *format, ...) { + int depth = opt->depth; + if (depth > 0) { + --opt->depth; } - bool has_parent = true; - if (parent->eval_fn != eval_not) { - expr = parent; - has_parent = false; + bool debug = opt_debug(opt, "%s", depth > 0 ? "├─◯ " : ""); + if (debug) { + va_list args; + va_start(args, format); + cvfprintf(opt->ctx->cerr, format, args); + va_end(args); } - bfs_assert(expr->eval_fn == eval_and || expr->eval_fn == eval_or); - if (expr->eval_fn == eval_and) { - expr->eval_fn = eval_or; - expr->argv = &fake_or_arg; - } else { - expr->eval_fn = eval_and; - expr->argv = &fake_and_arg; - } + opt->depth = depth; + return debug; +} - expr->lhs = negate_expr(opt, expr->lhs, argv); - expr->rhs = negate_expr(opt, expr->rhs, argv); - if (!expr->lhs || !expr->rhs) { - return NULL; +/** Log the deletion of an expression. */ +attr(printf(2, 3)) +static bool opt_delete(struct bfs_opt *opt, const char *format, ...) { + int depth = opt->depth; + + if (depth > 0) { + --opt->depth; } + bool debug = opt_debug(opt, "%s", depth > 0 ? "├─✘ " : ""); if (debug) { - cfprintf(opt->ctx->cerr, "<==> %pe\n", parent); + va_list args; + va_start(args, format); + cvfprintf(opt->ctx->cerr, format, args); + va_end(args); } - if (expr->lhs->eval_fn == eval_not) { - expr->lhs = optimize_not_expr(opt, expr->lhs); - } - if (expr->rhs->eval_fn == eval_not) { - expr->rhs = optimize_not_expr(opt, expr->rhs); - } - if (!expr->lhs || !expr->rhs) { - return NULL; - } + opt->depth = depth; + return debug; +} - if (expr->eval_fn == eval_and) { - expr = optimize_and_expr(opt, expr); - } else { - expr = optimize_or_expr(opt, expr); +typedef bool dump_fn(struct bfs_opt *opt, const char *format, ...); + +/** Print a df_pred. */ +static void pred_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum pred_type type) { + dump(opt, "${blu}%s${rs}: ", pred_type_name(type)); + + FILE *file = opt->ctx->cerr->file; + switch (value->preds[type]) { + case PRED_BOTTOM: + fprintf(file, "⊥\n"); + break; + case PRED_TOP: + fprintf(file, "⊤\n"); + break; + case PRED_TRUE: + fprintf(file, "true\n"); + break; + case PRED_FALSE: + fprintf(file, "false\n"); + break; } - if (has_parent) { - parent->rhs = expr; +} + +/** Print a df_range. */ +static void range_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum range_type type) { + dump(opt, "${blu}%s${rs}: ", range_type_name(type)); + + FILE *file = opt->ctx->cerr->file; + const struct df_range *range = &value->ranges[type]; + if (range_is_bottom(range)) { + fprintf(file, "⊥\n"); + } else if (range_is_top(range)) { + fprintf(file, "⊤\n"); + } else if (range->min == range->max) { + fprintf(file, "%lld\n", range->min); } else { - parent = expr; - } - if (!expr) { - return NULL; + if (range->min == LLONG_MIN) { + fprintf(file, "(-∞, "); + } else { + fprintf(file, "[%lld, ", range->min); + } + if (range->max == LLONG_MAX) { + fprintf(file, "∞)\n"); + } else { + fprintf(file, "%lld]\n", range->max); + } } +} - if (has_parent) { - parent = optimize_not_expr(opt, parent); +/** Print a set of types. */ +static void types_dump(dump_fn *dump, struct bfs_opt *opt, const char *name, unsigned int types) { + dump(opt, "${blu}%s${rs}: ", name); + + FILE *file = opt->ctx->cerr->file; + if (types == 0) { + fprintf(file, " ⊥\n"); + } else if (types == ~0U) { + fprintf(file, " ⊤\n"); + } else if (count_ones(types) < count_ones(~types)) { + fprintf(file, " 0x%X\n", types); + } else { + fprintf(file, "~0x%X\n", ~types); } - return parent; } -/** Optimize an expression recursively. */ -static struct bfs_expr *optimize_expr_recursive(struct bfs_opt *opt, struct bfs_expr *expr); +/** Calculate the number of lines of df_dump() output. */ +static int df_dump_lines(const struct df_domain *value) { + int lines = 0; -/** - * Optimize a negation. - */ -static struct bfs_expr *optimize_not_expr(const struct bfs_opt *opt, struct bfs_expr *expr) { - bfs_assert(expr->eval_fn == eval_not); - - struct bfs_expr *rhs = expr->rhs; - - int optlevel = opt->ctx->optlevel; - if (optlevel >= 1) { - if (rhs->eval_fn == eval_true || rhs->eval_fn == eval_false) { - struct bfs_expr *ret = opt_const(opt, rhs->eval_fn == eval_false); - opt_debug(opt, 1, "constant propagation: %pe <==> %pe\n", expr, ret); - return ret; - } else if (rhs->eval_fn == eval_not) { - opt_debug(opt, 1, "double negation: %pe <==> %pe\n", expr, rhs->rhs); - return rhs->rhs; - } else if (bfs_expr_never_returns(rhs)) { - opt_debug(opt, 1, "reachability: %pe <==> %pe\n", expr, rhs); - return expr->rhs; - } else if ((rhs->eval_fn == eval_and || rhs->eval_fn == eval_or) - && (rhs->lhs->eval_fn == eval_not || rhs->rhs->eval_fn == eval_not)) { - return de_morgan(opt, expr, expr->argv); - } + for (int i = 0; i < PRED_TYPES; ++i) { + lines += value->preds[i] != PRED_TOP; } - expr->pure = rhs->pure; - expr->always_true = rhs->always_false; - expr->always_false = rhs->always_true; - expr->cost = rhs->cost; - expr->probability = 1.0 - rhs->probability; + for (int i = 0; i < RANGE_TYPES; ++i) { + lines += !range_is_top(&value->ranges[i]); + } - return expr; + lines += value->types != ~0U; + lines += value->xtypes != ~0U; + + return lines; } -/** Optimize a negation recursively. */ -static struct bfs_expr *optimize_not_expr_recursive(struct bfs_opt *opt, struct bfs_expr *expr) { - struct bfs_opt rhs_state = *opt; - expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); - if (!expr->rhs) { - return NULL; +/** Get the right debugging function for a df_dump() line. */ +static dump_fn *df_dump_line(int lines, int *line) { + ++*line; + + if (lines == 1) { + return opt_visit; + } else if (*line == 1) { + return opt_enter; + } else if (*line == lines) { + return opt_leave; + } else { + return opt_debug; } +} - opt->after_true = rhs_state.after_false; - opt->after_false = rhs_state.after_true; - - return optimize_not_expr(opt, expr); -} - -/** Optimize a conjunction. */ -static struct bfs_expr *optimize_and_expr(const struct bfs_opt *opt, struct bfs_expr *expr) { - bfs_assert(expr->eval_fn == eval_and); - - struct bfs_expr *lhs = expr->lhs; - struct bfs_expr *rhs = expr->rhs; - - const struct bfs_ctx *ctx = opt->ctx; - int optlevel = ctx->optlevel; - if (optlevel >= 1) { - if (lhs->eval_fn == eval_true) { - opt_debug(opt, 1, "conjunction elimination: %pe <==> %pe\n", expr, rhs); - return expr->rhs; - } else if (rhs->eval_fn == eval_true) { - opt_debug(opt, 1, "conjunction elimination: %pe <==> %pe\n", expr, lhs); - return expr->lhs; - } else if (lhs->always_false) { - opt_debug(opt, 1, "short-circuit: %pe <==> %pe\n", expr, lhs); - opt_warning(opt, expr->rhs, "This expression is unreachable.\n\n"); - return expr->lhs; - } else if (lhs->always_true && rhs->eval_fn == eval_false) { - bool debug = opt_debug(opt, 1, "strength reduction: %pe <==> ", expr); - struct bfs_expr *ret = expr->lhs; - ret = negate_expr(opt, ret, &fake_not_arg); - if (debug && ret) { - cfprintf(ctx->cerr, "%pe\n", ret); - } - return ret; - } else if (optlevel >= 2 && lhs->pure && rhs->eval_fn == eval_false) { - opt_debug(opt, 2, "purity: %pe <==> %pe\n", expr, rhs); - opt_warning(opt, expr->lhs, "The result of this expression is ignored.\n\n"); - return expr->rhs; - } else if (lhs->eval_fn == eval_not && rhs->eval_fn == eval_not) { - return de_morgan(opt, expr, expr->lhs->argv); - } +/** Print a data flow value. */ +static void df_dump(struct bfs_opt *opt, const char *str, const struct df_domain *value) { + if (df_is_bottom(value)) { + opt_debug(opt, "%s: ⊥\n", str); + return; + } else if (df_is_top(value)) { + opt_debug(opt, "%s: ⊤\n", str); + return; } - expr->pure = lhs->pure && rhs->pure; - expr->always_true = lhs->always_true && rhs->always_true; - expr->always_false = lhs->always_false || rhs->always_false; - expr->cost = lhs->cost + lhs->probability * rhs->cost; - expr->probability = lhs->probability * rhs->probability; + if (!opt_debug(opt, "%s:\n", str)) { + return; + } - return expr; -} + int lines = df_dump_lines(value); + int line = 0; -/** Optimize a conjunction recursively. */ -static struct bfs_expr *optimize_and_expr_recursive(struct bfs_opt *opt, struct bfs_expr *expr) { - struct bfs_opt lhs_state = *opt; - expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); - if (!expr->lhs) { - return NULL; + for (int i = 0; i < PRED_TYPES; ++i) { + if (value->preds[i] != PRED_TOP) { + pred_dump(df_dump_line(lines, &line), opt, value, i); + } } - struct bfs_opt rhs_state = *opt; - rhs_state.before = lhs_state.after_true; - expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); - if (!expr->rhs) { - return NULL; + for (int i = 0; i < RANGE_TYPES; ++i) { + if (!range_is_top(&value->ranges[i])) { + range_dump(df_dump_line(lines, &line), opt, value, i); + } } - opt->after_true = rhs_state.after_true; - opt->after_false = lhs_state.after_false; - df_join(&opt->after_false, &rhs_state.after_false); - - return optimize_and_expr(opt, expr); -} - -/** Optimize a disjunction. */ -static struct bfs_expr *optimize_or_expr(const struct bfs_opt *opt, struct bfs_expr *expr) { - bfs_assert(expr->eval_fn == eval_or); - - struct bfs_expr *lhs = expr->lhs; - struct bfs_expr *rhs = expr->rhs; - - const struct bfs_ctx *ctx = opt->ctx; - int optlevel = ctx->optlevel; - if (optlevel >= 1) { - if (lhs->always_true) { - opt_debug(opt, 1, "short-circuit: %pe <==> %pe\n", expr, lhs); - opt_warning(opt, expr->rhs, "This expression is unreachable.\n\n"); - return expr->lhs; - } else if (lhs->eval_fn == eval_false) { - opt_debug(opt, 1, "disjunctive syllogism: %pe <==> %pe\n", expr, rhs); - return expr->rhs; - } else if (rhs->eval_fn == eval_false) { - opt_debug(opt, 1, "disjunctive syllogism: %pe <==> %pe\n", expr, lhs); - return expr->lhs; - } else if (lhs->always_false && rhs->eval_fn == eval_true) { - bool debug = opt_debug(opt, 1, "strength reduction: %pe <==> ", expr); - struct bfs_expr *ret = expr->lhs; - ret = negate_expr(opt, ret, &fake_not_arg); - if (debug && ret) { - cfprintf(ctx->cerr, "%pe\n", ret); - } - return ret; - } else if (optlevel >= 2 && lhs->pure && rhs->eval_fn == eval_true) { - opt_debug(opt, 2, "purity: %pe <==> %pe\n", expr, rhs); - opt_warning(opt, expr->lhs, "The result of this expression is ignored.\n\n"); - return expr->rhs; - } else if (lhs->eval_fn == eval_not && rhs->eval_fn == eval_not) { - return de_morgan(opt, expr, expr->lhs->argv); - } + if (value->types != ~0U) { + types_dump(df_dump_line(lines, &line), opt, "-type", value->types); } - expr->pure = lhs->pure && rhs->pure; - expr->always_true = lhs->always_true || rhs->always_true; - expr->always_false = lhs->always_false && rhs->always_false; - expr->cost = lhs->cost + (1 - lhs->probability) * rhs->cost; - expr->probability = lhs->probability + rhs->probability - lhs->probability * rhs->probability; + if (value->xtypes != ~0U) { + types_dump(df_dump_line(lines, &line), opt, "-xtype", value->xtypes); + } +} - return expr; +/** Check if an expression is constant. */ +static bool is_const(const struct bfs_expr *expr) { + return expr->eval_fn == eval_true || expr->eval_fn == eval_false; } -/** Optimize a disjunction recursively. */ -static struct bfs_expr *optimize_or_expr_recursive(struct bfs_opt *opt, struct bfs_expr *expr) { - struct bfs_opt lhs_state = *opt; - expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); - if (!expr->lhs) { - return NULL; +/** Warn about an expression. */ +attr(printf(3, 4)) +static void opt_warning(const struct bfs_opt *opt, const struct bfs_expr *expr, const char *format, ...) { + if (!opt->warn) { + return; } - struct bfs_opt rhs_state = *opt; - rhs_state.before = lhs_state.after_false; - expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); - if (!expr->rhs) { - return NULL; + if (bfs_expr_is_parent(expr) || is_const(expr)) { + return; } - opt->after_false = rhs_state.after_false; - opt->after_true = lhs_state.after_true; - df_join(&opt->after_true, &rhs_state.after_true); - - return optimize_or_expr(opt, expr); + if (bfs_expr_warning(opt->ctx, expr)) { + va_list args; + va_start(args, format); + bfs_vwarning(opt->ctx, format, args); + va_end(args); + } } -/** Optimize an expression in an ignored-result context. */ -static struct bfs_expr *ignore_result(const struct bfs_opt *opt, struct bfs_expr *expr) { - int optlevel = opt->ctx->optlevel; +/** Remove and return an expression's children. */ +static void foster_children(struct bfs_expr *expr, struct bfs_exprs *children) { + bfs_assert(bfs_expr_is_parent(expr)); - if (optlevel >= 1) { - while (true) { - if (expr->eval_fn == eval_not) { - opt_debug(opt, 1, "ignored result: %pe --> %pe\n", expr, expr->rhs); - opt_warning(opt, expr, "The result of this expression is ignored.\n\n"); - expr = expr->rhs; - } else if (optlevel >= 2 - && (expr->eval_fn == eval_and || expr->eval_fn == eval_or || expr->eval_fn == eval_comma) - && expr->rhs->pure) { - opt_debug(opt, 2, "ignored result: %pe --> %pe\n", expr, expr->lhs); - opt_warning(opt, expr->rhs, "The result of this expression is ignored.\n\n"); - expr = expr->lhs; - } else { - break; - } - } + SLIST_INIT(children); + SLIST_EXTEND(children, &expr->children); - if (optlevel >= 2 && expr->pure && expr->eval_fn != eval_false) { - struct bfs_expr *ret = opt_const(opt, false); - opt_debug(opt, 2, "ignored result: %pe --> %pe\n", expr, ret); - opt_warning(opt, expr, "The result of this expression is ignored.\n\n"); - return ret; - } - } + expr->persistent_fds = 0; + expr->ephemeral_fds = 0; + expr->pure = true; +} - return expr; +/** Return an expression's only child. */ +static struct bfs_expr *only_child(struct bfs_expr *expr) { + bfs_assert(bfs_expr_is_parent(expr)); + struct bfs_expr *child = bfs_expr_children(expr); + bfs_assert(child && !child->next); + return child; } -/** Optimize a comma expression. */ -static struct bfs_expr *optimize_comma_expr(const struct bfs_opt *opt, struct bfs_expr *expr) { - bfs_assert(expr->eval_fn == eval_comma); +/** Foster an expression's only child. */ +static struct bfs_expr *foster_only_child(struct bfs_expr *expr) { + struct bfs_expr *child = only_child(expr); + struct bfs_exprs children; + foster_children(expr, &children); + return child; +} - struct bfs_expr *lhs = expr->lhs; - struct bfs_expr *rhs = expr->rhs; - - int optlevel = opt->ctx->optlevel; - if (optlevel >= 1) { - lhs = expr->lhs = ignore_result(opt, lhs); - - if (bfs_expr_never_returns(lhs)) { - opt_debug(opt, 1, "reachability: %pe <==> %pe\n", expr, lhs); - opt_warning(opt, expr->rhs, "This expression is unreachable.\n\n"); - return expr->lhs; - } else if ((lhs->always_true && rhs->eval_fn == eval_true) - || (lhs->always_false && rhs->eval_fn == eval_false)) { - opt_debug(opt, 1, "redundancy elimination: %pe <==> %pe\n", expr, lhs); - return expr->lhs; - } else if (optlevel >= 2 && lhs->pure) { - opt_debug(opt, 2, "purity: %pe <==> %pe\n", expr, rhs); - opt_warning(opt, expr->lhs, "The result of this expression is ignored.\n\n"); - return expr->rhs; +/** An expression visitor. */ +struct visitor; + +/** An expression-visiting function. */ +typedef struct bfs_expr *visit_fn(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor); + +/** An entry in a visitor lookup table. */ +struct visitor_table { + /** The evaluation function to match on. */ + bfs_eval_fn *eval_fn; + /** The visitor function. */ + visit_fn *visit; +}; + +/** Look up a visitor in a table. */ +static visit_fn *look_up_visitor(const struct bfs_expr *expr, const struct visitor_table table[]) { + for (size_t i = 0; table[i].eval_fn; ++i) { + if (expr->eval_fn == table[i].eval_fn) { + return table[i].visit; } } - expr->pure = lhs->pure && rhs->pure; - expr->always_true = bfs_expr_never_returns(lhs) || rhs->always_true; - expr->always_false = bfs_expr_never_returns(lhs) || rhs->always_false; - expr->cost = lhs->cost + rhs->cost; - expr->probability = rhs->probability; - - return expr; + return NULL; } -/** Optimize a comma expression recursively. */ -static struct bfs_expr *optimize_comma_expr_recursive(struct bfs_opt *opt, struct bfs_expr *expr) { - struct bfs_opt lhs_state = *opt; - expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); - if (!expr->lhs) { - return NULL; - } +struct visitor { + /** The name of this visitor. */ + const char *name; + + /** A function to call before visiting children. */ + visit_fn *enter; + /** The default visitor. */ + visit_fn *visit; + /** A function to call after visiting children. */ + visit_fn *leave; + + /** A visitor lookup table. */ + struct visitor_table table[]; +}; - struct bfs_opt rhs_state = *opt; - rhs_state.before = lhs_state.after_true; - df_join(&rhs_state.before, &lhs_state.after_false); +/** Recursive visitor implementation. */ +static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor); - expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); - if (!expr->rhs) { +/** Visit a negation. */ +static struct bfs_expr *visit_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *rhs = foster_only_child(expr); + + struct bfs_opt nested = *opt; + rhs = visit_deep(&nested, rhs, visitor); + if (!rhs) { return NULL; } - return optimize_comma_expr(opt, expr); + opt->after_true = nested.after_false; + opt->after_false = nested.after_true; + + bfs_expr_append(expr, rhs); + return expr; } -/** Optimize an icmp-style ([+-]N) expression. */ -static void optimize_icmp(struct bfs_opt *opt, const struct bfs_expr *expr, enum range_type type) { - struct df_range *true_range = &opt->after_true.ranges[type]; - struct df_range *false_range = &opt->after_false.ranges[type]; - long long value = expr->num; +/** Visit a conjunction. */ +static struct bfs_expr *visit_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); - switch (expr->int_cmp) { - case BFS_INT_EQUAL: - constrain_min(true_range, value); - constrain_max(true_range, value); - range_remove(false_range, value); - break; + // Base case (-and) == (-true) + df_init_bottom(&opt->after_false); + struct bfs_opt nested = *opt; - case BFS_INT_LESS: - constrain_min(false_range, value); - constrain_max(true_range, value); - range_remove(true_range, value); - break; + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); - case BFS_INT_GREATER: - constrain_max(false_range, value); - constrain_min(true_range, value); - range_remove(true_range, value); - break; - } -} + if (SLIST_EMPTY(&children)) { + nested.ignore_result = opt->ignore_result; + } else { + nested.ignore_result = false; + } -/** Optimize -{execut,read,writ}able. */ -static struct bfs_expr *optimize_access(struct bfs_opt *opt, struct bfs_expr *expr) { - expr->probability = 1.0; + child = visit_deep(&nested, child, visitor); + if (!child) { + return NULL; + } - if (expr->num & R_OK) { - opt_constrain_pred(opt, READABLE_PRED, true); - expr->probability *= 0.99; - } + df_join(&opt->after_false, &nested.after_false); + nested.before = nested.after_true; - if (expr->num & W_OK) { - opt_constrain_pred(opt, WRITABLE_PRED, true); - expr->probability *= 0.8; + bfs_expr_append(expr, child); } - if (expr->num & X_OK) { - opt_constrain_pred(opt, EXECUTABLE_PRED, true); - expr->probability *= 0.2; - } + opt->after_true = nested.after_true; return expr; } -/** Optimize -empty. */ -static struct bfs_expr *optimize_empty(struct bfs_opt *opt, struct bfs_expr *expr) { - if (opt->ctx->optlevel >= 4) { - // Since -empty attempts to open and read directories, it may - // have side effects such as reporting permission errors, and - // thus shouldn't be re-ordered without aggressive optimizations - expr->pure = true; - } +/** Visit a disjunction. */ +static struct bfs_expr *visit_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); - return expr; -} + // Base case (-or) == (-false) + df_init_bottom(&opt->after_true); + struct bfs_opt nested = *opt; -/** Optimize -{exec,ok}{,dir}. */ -static struct bfs_expr *optimize_exec(struct bfs_opt *opt, struct bfs_expr *expr) { + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); + + if (SLIST_EMPTY(&children)) { + nested.ignore_result = opt->ignore_result; + } else { + nested.ignore_result = false; + } + + child = visit_deep(&nested, child, visitor); + if (!child) { + return NULL; + } + + df_join(&opt->after_true, &nested.after_true); + nested.before = nested.after_false; + + bfs_expr_append(expr, child); + } + + opt->after_false = nested.after_false; + + return expr; +} + +/** Visit a comma expression. */ +static struct bfs_expr *visit_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + struct bfs_opt nested = *opt; + + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); + + if (SLIST_EMPTY(&children)) { + nested.ignore_result = opt->ignore_result; + } else { + nested.ignore_result = true; + } + + child = visit_deep(&nested, child, visitor); + if (!child) { + return NULL; + } + + nested.before = nested.after_true; + df_join(&nested.before, &nested.after_false); + + bfs_expr_append(expr, child); + } + + opt->after_true = nested.after_true; + opt->after_false = nested.after_false; + + return expr; +} + +/** Default enter() function. */ +static struct bfs_expr *visit_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt_enter(opt, "%pe\n", expr); + opt->after_true = opt->before; + opt->after_false = opt->before; + return expr; +} + +/** Default leave() function. */ +static struct bfs_expr *visit_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt_leave(opt, "%pe\n", expr); + return expr; +} + +static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + bool entered = false; + + visit_fn *enter = visitor->enter ? visitor->enter : visit_enter; + visit_fn *leave = visitor->leave ? visitor->leave : visit_leave; + + static const struct visitor_table table[] = { + {eval_not, visit_not}, + {eval_and, visit_and}, + {eval_or, visit_or}, + {eval_comma, visit_comma}, + {NULL, NULL}, + }; + visit_fn *recursive = look_up_visitor(expr, table); + if (recursive) { + if (!entered) { + expr = enter(opt, expr, visitor); + if (!expr) { + return NULL; + } + entered = true; + } + + expr = recursive(opt, expr, visitor); + if (!expr) { + return NULL; + } + } + + visit_fn *general = visitor->visit; + if (general) { + if (!entered) { + expr = enter(opt, expr, visitor); + if (!expr) { + return NULL; + } + entered = true; + } + + expr = general(opt, expr, visitor); + if (!expr) { + return NULL; + } + } + + visit_fn *specific = look_up_visitor(expr, visitor->table); + if (specific) { + if (!entered) { + expr = enter(opt, expr, visitor); + if (!expr) { + return NULL; + } + entered = true; + } + + expr = specific(opt, expr, visitor); + if (!expr) { + return NULL; + } + } + + if (entered) { + expr = leave(opt, expr, visitor); + } else { + opt_visit(opt, "%pe\n", expr); + } + + return expr; +} + +/** Visit an expression recursively. */ +static struct bfs_expr *visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt_enter(opt, "%s()\n", visitor->name); + expr = visit_deep(opt, expr, visitor); + opt_leave(opt, "\n"); + return expr; +} + +/** Visit an expression non-recursively. */ +static struct bfs_expr *visit_shallow(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + visit_fn *general = visitor->visit; + if (expr && general) { + expr = general(opt, expr, visitor); + } + + visit_fn *specific = look_up_visitor(expr, visitor->table); + if (expr && specific) { + expr = specific(opt, expr, visitor); + } + + return expr; +} + +/** Annotate -{execut,read,writ}able. */ +static struct bfs_expr *annotate_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + expr->probability = 1.0; + if (expr->num & R_OK) { + expr->probability *= 0.99; + } + if (expr->num & W_OK) { + expr->probability *= 0.8; + } + if (expr->num & X_OK) { + expr->probability *= 0.2; + } + + return expr; +} + +/** Annotate -empty. */ +static struct bfs_expr *annotate_empty(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->level >= 4) { + // Since -empty attempts to open and read directories, it may + // have side effects such as reporting permission errors, and + // thus shouldn't be re-ordered without aggressive optimizations + expr->pure = true; + } + + return expr; +} + +/** Annotate -exec. */ +static struct bfs_expr *annotate_exec(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { if (expr->exec->flags & BFS_EXEC_MULTI) { expr->always_true = true; } else { @@ -771,33 +993,650 @@ static struct bfs_expr *optimize_exec(struct bfs_opt *opt, struct bfs_expr *expr return expr; } -/** Optimize -name/-lname/-path. */ -static struct bfs_expr *optimize_fnmatch(struct bfs_opt *opt, struct bfs_expr *expr) { - if (strchr(expr->argv[1], '*')) { +/** Annotate -name/-lname/-path. */ +static struct bfs_expr *annotate_fnmatch(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (expr->literal) { + expr->probability = 0.1; + } else { expr->probability = 0.5; + } + + return expr; +} + +/** Estimate probability for -x?type. */ +static void estimate_type_probability(struct bfs_expr *expr) { + unsigned int types = expr->num; + + expr->probability = 0.0; + if (types & (1 << BFS_BLK)) { + expr->probability += 0.00000721183; + } + if (types & (1 << BFS_CHR)) { + expr->probability += 0.0000499855; + } + if (types & (1 << BFS_DIR)) { + expr->probability += 0.114475; + } + if (types & (1 << BFS_DOOR)) { + expr->probability += 0.000001; + } + if (types & (1 << BFS_FIFO)) { + expr->probability += 0.00000248684; + } + if (types & (1 << BFS_REG)) { + expr->probability += 0.859772; + } + if (types & (1 << BFS_LNK)) { + expr->probability += 0.0256816; + } + if (types & (1 << BFS_SOCK)) { + expr->probability += 0.0000116881; + } + if (types & (1 << BFS_WHT)) { + expr->probability += 0.000001; + } +} + +/** Annotate -type. */ +static struct bfs_expr *annotate_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + estimate_type_probability(expr); + return expr; +} + +/** Annotate -xtype. */ +static struct bfs_expr *annotate_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->level >= 4) { + // Since -xtype dereferences symbolic links, it may have side + // effects such as reporting permission errors, and thus + // shouldn't be re-ordered without aggressive optimizations + expr->pure = true; + } + + estimate_type_probability(expr); + return expr; +} + +/** Annotate a negation. */ +static struct bfs_expr *annotate_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *rhs = only_child(expr); + expr->pure = rhs->pure; + expr->always_true = rhs->always_false; + expr->always_false = rhs->always_true; + expr->cost = rhs->cost; + expr->probability = 1.0 - rhs->probability; + return expr; +} + +/** Annotate a conjunction. */ +static struct bfs_expr *annotate_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + expr->pure = true; + expr->always_true = true; + expr->always_false = false; + expr->cost = 0.0; + expr->probability = 1.0; + + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + expr->pure &= child->pure; + expr->always_true &= child->always_true; + expr->always_false |= child->always_false; + expr->cost += expr->probability * child->cost; + expr->probability *= child->probability; + } + + return expr; +} + +/** Annotate a disjunction. */ +static struct bfs_expr *annotate_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + expr->pure = true; + expr->always_true = false; + expr->always_false = true; + expr->cost = 0.0; + + float false_prob = 1.0; + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + expr->pure &= child->pure; + expr->always_true |= child->always_true; + expr->always_false &= child->always_false; + expr->cost += false_prob * child->cost; + false_prob *= (1.0 - child->probability); + } + expr->probability = 1.0 - false_prob; + + return expr; +} + +/** Annotate a comma expression. */ +static struct bfs_expr *annotate_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + expr->pure = true; + expr->cost = 0.0; + + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + expr->pure &= child->pure; + expr->always_true = child->always_true; + expr->always_false = child->always_false; + expr->cost += child->cost; + expr->probability = child->probability; + } + + return expr; +} + +/** Annotate an arbitrary expression. */ +static struct bfs_expr *annotate_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + /** Table of pure expressions. */ + static bfs_eval_fn *const pure[] = { + eval_access, + eval_acl, + eval_capable, + eval_depth, + eval_false, + eval_flags, + eval_fstype, + eval_gid, + eval_hidden, + eval_inum, + eval_links, + eval_lname, + eval_name, + eval_newer, + eval_nogroup, + eval_nouser, + eval_path, + eval_perm, + eval_regex, + eval_samefile, + eval_size, + eval_sparse, + eval_time, + eval_true, + eval_type, + eval_uid, + eval_used, + eval_xattr, + eval_xattrname, + }; + + expr->pure = false; + for (size_t i = 0; i < countof(pure); ++i) { + if (expr->eval_fn == pure[i]) { + expr->pure = true; + break; + } + } + + /** Table of always-true expressions. */ + static bfs_eval_fn *const always_true[] = { + eval_fls, + eval_fprint, + eval_fprint0, + eval_fprintf, + eval_fprintx, + eval_prune, + eval_true, + // Non-returning + eval_exit, + eval_quit, + }; + + expr->always_true = false; + for (size_t i = 0; i < countof(always_true); ++i) { + if (expr->eval_fn == always_true[i]) { + expr->always_true = true; + break; + } + } + + /** Table of always-false expressions. */ + static bfs_eval_fn *const always_false[] = { + eval_false, + // Non-returning + eval_exit, + eval_quit, + }; + + expr->always_false = false; + for (size_t i = 0; i < countof(always_false); ++i) { + if (expr->eval_fn == always_false[i]) { + expr->always_false = true; + break; + } + } + +#define FAST_COST 40.0 +#define FNMATCH_COST 400.0 +#define STAT_COST 1000.0 +#define PRINT_COST 20000.0 + + /** Table of expression costs. */ + static const struct { + bfs_eval_fn *eval_fn; + float cost; + } costs[] = { + {eval_access, STAT_COST}, + {eval_acl, STAT_COST}, + {eval_capable, STAT_COST}, + {eval_empty, 2 * STAT_COST}, // readdir() is worse than stat() + {eval_fls, PRINT_COST}, + {eval_fprint, PRINT_COST}, + {eval_fprint0, PRINT_COST}, + {eval_fprintf, PRINT_COST}, + {eval_fprintx, PRINT_COST}, + {eval_fstype, STAT_COST}, + {eval_gid, STAT_COST}, + {eval_inum, STAT_COST}, + {eval_links, STAT_COST}, + {eval_lname, FNMATCH_COST}, + {eval_name, FNMATCH_COST}, + {eval_newer, STAT_COST}, + {eval_nogroup, STAT_COST}, + {eval_nouser, STAT_COST}, + {eval_path, FNMATCH_COST}, + {eval_perm, STAT_COST}, + {eval_samefile, STAT_COST}, + {eval_size, STAT_COST}, + {eval_sparse, STAT_COST}, + {eval_time, STAT_COST}, + {eval_uid, STAT_COST}, + {eval_used, STAT_COST}, + {eval_xattr, STAT_COST}, + {eval_xattrname, STAT_COST}, + }; + + expr->cost = FAST_COST; + for (size_t i = 0; i < countof(costs); ++i) { + if (expr->eval_fn == costs[i].eval_fn) { + expr->cost = costs[i].cost; + break; + } + } + + /** Table of expression probabilities. */ + static const struct { + /** The evaluation function with this cost. */ + bfs_eval_fn *eval_fn; + /** The matching probability. */ + float probability; + } probs[] = { + {eval_acl, 0.00002}, + {eval_capable, 0.000002}, + {eval_empty, 0.01}, + {eval_false, 0.0}, + {eval_hidden, 0.01}, + {eval_nogroup, 0.01}, + {eval_nouser, 0.01}, + {eval_samefile, 0.01}, + {eval_true, 1.0}, + {eval_xattr, 0.01}, + {eval_xattrname, 0.01}, + }; + + expr->probability = 0.5; + for (size_t i = 0; i < countof(probs); ++i) { + if (expr->eval_fn == probs[i].eval_fn) { + expr->probability = probs[i].probability; + break; + } + } + + return expr; +} + +/** + * Annotating visitor. + */ +static const struct visitor annotate = { + .name = "annotate", + .visit = annotate_visit, + .table = { + {eval_access, annotate_access}, + {eval_empty, annotate_empty}, + {eval_exec, annotate_exec}, + {eval_lname, annotate_fnmatch}, + {eval_name, annotate_fnmatch}, + {eval_path, annotate_fnmatch}, + {eval_type, annotate_type}, + {eval_xtype, annotate_xtype}, + + {eval_not, annotate_not}, + {eval_and, annotate_and}, + {eval_or, annotate_or}, + {eval_comma, annotate_comma}, + + {NULL, NULL}, + }, +}; + +/** Create a constant expression. */ +static struct bfs_expr *opt_const(struct bfs_opt *opt, bool value) { + static bfs_eval_fn *const fns[] = {eval_false, eval_true}; + static char *fake_args[] = {"-false", "-true"}; + + struct bfs_expr *expr = bfs_expr_new(opt->ctx, fns[value], 1, &fake_args[value]); + return visit_shallow(opt, expr, &annotate); +} + +/** Negate an expression, keeping it canonical. */ +static struct bfs_expr *negate_expr(struct bfs_opt *opt, struct bfs_expr *expr, char **argv) { + if (expr->eval_fn == eval_not) { + return only_child(expr); + } else if (expr->eval_fn == eval_true) { + return opt_const(opt, false); + } else if (expr->eval_fn == eval_false) { + return opt_const(opt, true); + } + + struct bfs_expr *ret = bfs_expr_new(opt->ctx, eval_not, 1, argv); + if (!ret) { + return NULL; + } + + bfs_expr_append(ret, expr); + return visit_shallow(opt, ret, &annotate); +} + +/** Sink negations into a conjunction/disjunction using De Morgan's laws. */ +static struct bfs_expr *sink_not_andor(struct bfs_opt *opt, struct bfs_expr *expr) { + opt_debug(opt, "De Morgan's laws\n"); + + char **argv = expr->argv; + expr = only_child(expr); + opt_enter(opt, "%pe\n", expr); + + if (expr->eval_fn == eval_and) { + expr->eval_fn = eval_or; + expr->argv = &fake_or_arg; } else { - expr->probability = 0.1; + bfs_assert(expr->eval_fn == eval_or); + expr->eval_fn = eval_and; + expr->argv = &fake_and_arg; + } + + struct bfs_exprs children; + foster_children(expr, &children); + + struct bfs_expr *child; + while ((child = SLIST_POP(&children))) { + opt_enter(opt, "%pe\n", child); + + child = negate_expr(opt, child, argv); + if (!child) { + return NULL; + } + + opt_leave(opt, "%pe\n", child); + bfs_expr_append(expr, child); + } + + opt_leave(opt, "%pe\n", expr); + return visit_shallow(opt, expr, &annotate); +} + +/** Sink a negation into a comma expression. */ +static struct bfs_expr *sink_not_comma(struct bfs_opt *opt, struct bfs_expr *expr) { + bfs_assert(expr->eval_fn == eval_comma); + + opt_enter(opt, "%pe\n", expr); + + char **argv = expr->argv; + expr = only_child(expr); + + struct bfs_exprs children; + foster_children(expr, &children); + + struct bfs_expr *child; + while ((child = SLIST_POP(&children))) { + if (SLIST_EMPTY(&children)) { + opt_enter(opt, "%pe\n", child); + opt_debug(opt, "sink\n"); + + child = negate_expr(opt, child, argv); + if (!child) { + return NULL; + } + + opt_leave(opt, "%pe\n", child); + } else { + opt_visit(opt, "%pe\n", child); + } + + bfs_expr_append(expr, child); + } + + opt_leave(opt, "%pe\n", expr); + return visit_shallow(opt, expr, &annotate); +} + +/** Canonicalize a negation. */ +static struct bfs_expr *canonicalize_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *rhs = only_child(expr); + + if (rhs->eval_fn == eval_not) { + opt_debug(opt, "double negation\n"); + rhs = only_child(expr); + return only_child(rhs); + } else if (rhs->eval_fn == eval_and || rhs->eval_fn == eval_or) { + return sink_not_andor(opt, expr); + } else if (rhs->eval_fn == eval_comma) { + return sink_not_comma(opt, expr); + } else if (is_const(rhs)) { + opt_debug(opt, "constant propagation\n"); + return opt_const(opt, rhs->eval_fn == eval_false); + } else { + return expr; + } +} + +/** Canonicalize an associative operator. */ +static struct bfs_expr *canonicalize_assoc(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + struct bfs_exprs flat; + SLIST_INIT(&flat); + + struct bfs_expr *child; + while ((child = SLIST_POP(&children))) { + if (child->eval_fn == expr->eval_fn) { + struct bfs_expr *head = SLIST_HEAD(&child->children); + struct bfs_expr *tail = SLIST_TAIL(&child->children); + + if (!head) { + opt_delete(opt, "%pe [empty]\n", child); + } else { + opt_enter(opt, "%pe\n", child); + opt_debug(opt, "associativity\n"); + if (head == tail) { + opt_leave(opt, "%pe\n", head); + } else if (head->next == tail) { + opt_leave(opt, "%pe %pe\n", head, tail); + } else { + opt_leave(opt, "%pe ... %pe\n", head, tail); + } + } + + SLIST_EXTEND(&flat, &child->children); + } else { + opt_visit(opt, "%pe\n", child); + SLIST_APPEND(&flat, child); + } + } + + bfs_expr_extend(expr, &flat); + + return visit_shallow(opt, expr, &annotate); +} + +/** + * Canonicalizing visitor. + */ +static const struct visitor canonicalize = { + .name = "canonicalize", + .table = { + {eval_not, canonicalize_not}, + {eval_and, canonicalize_assoc}, + {eval_or, canonicalize_assoc}, + {eval_comma, canonicalize_assoc}, + {NULL, NULL}, + }, +}; + +/** Calculate the cost of an ordered pair of expressions. */ +static float expr_cost(const struct bfs_expr *parent, const struct bfs_expr *lhs, const struct bfs_expr *rhs) { + // https://cs.stackexchange.com/a/66921/21004 + float prob = lhs->probability; + if (parent->eval_fn == eval_or) { + prob = 1.0 - prob; + } + return lhs->cost + prob * rhs->cost; +} + +/** Sort a block of expressions. */ +static void sort_exprs(struct bfs_opt *opt, struct bfs_expr *parent, struct bfs_exprs *exprs) { + if (!exprs->head || !exprs->head->next) { + return; + } + + struct bfs_exprs left, right; + SLIST_INIT(&left); + SLIST_INIT(&right); + + // Split + for (struct bfs_expr *hare = exprs->head; hare && (hare = hare->next); hare = hare->next) { + struct bfs_expr *tortoise = SLIST_POP(exprs); + SLIST_APPEND(&left, tortoise); + } + SLIST_EXTEND(&right, exprs); + + // Recurse + sort_exprs(opt, parent, &left); + sort_exprs(opt, parent, &right); + + // Merge + while (!SLIST_EMPTY(&left) && !SLIST_EMPTY(&right)) { + struct bfs_expr *lhs = left.head; + struct bfs_expr *rhs = right.head; + + float cost = expr_cost(parent, lhs, rhs); + float swapped = expr_cost(parent, rhs, lhs); + + if (cost <= swapped) { + SLIST_POP(&left); + SLIST_APPEND(exprs, lhs); + } else { + opt_enter(opt, "%pe %pe [${ylw}%g${rs}]\n", lhs, rhs, cost); + SLIST_POP(&right); + SLIST_APPEND(exprs, rhs); + opt_leave(opt, "%pe %pe [${ylw}%g${rs}]\n", rhs, lhs, swapped); + } + } + SLIST_EXTEND(exprs, &left); + SLIST_EXTEND(exprs, &right); +} + +/** Reorder children to reduce cost. */ +static struct bfs_expr *reorder_andor(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + // Split into blocks of consecutive pure/impure expressions, and sort + // the pure blocks + struct bfs_exprs pure; + SLIST_INIT(&pure); + + struct bfs_expr *child; + while ((child = SLIST_POP(&children))) { + if (child->pure) { + SLIST_APPEND(&pure, child); + } else { + sort_exprs(opt, expr, &pure); + bfs_expr_extend(expr, &pure); + bfs_expr_append(expr, child); + } + } + sort_exprs(opt, expr, &pure); + bfs_expr_extend(expr, &pure); + + return visit_shallow(opt, expr, &annotate); +} + +/** + * Reordering visitor. + */ +static const struct visitor reorder = { + .name = "reorder", + .table = { + {eval_and, reorder_andor}, + {eval_or, reorder_andor}, + {NULL, NULL}, + }, +}; + +/** Transfer function for simple predicates. */ +static void data_flow_pred(struct bfs_opt *opt, enum pred_type pred, bool value) { + constrain_pred(&opt->after_true.preds[pred], value); + constrain_pred(&opt->after_false.preds[pred], !value); +} + +/** Transfer function for icmp-style ([+-]N) expressions. */ +static void data_flow_icmp(struct bfs_opt *opt, const struct bfs_expr *expr, enum range_type type) { + struct df_range *true_range = &opt->after_true.ranges[type]; + struct df_range *false_range = &opt->after_false.ranges[type]; + long long value = expr->num; + + switch (expr->int_cmp) { + case BFS_INT_EQUAL: + constrain_min(true_range, value); + constrain_max(true_range, value); + range_remove(false_range, value); + break; + + case BFS_INT_LESS: + constrain_min(false_range, value); + constrain_max(true_range, value); + range_remove(true_range, value); + break; + + case BFS_INT_GREATER: + constrain_max(false_range, value); + constrain_min(true_range, value); + range_remove(true_range, value); + break; + } +} + +/** Transfer function for -{execut,read,writ}able. */ +static struct bfs_expr *data_flow_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (expr->num & R_OK) { + data_flow_pred(opt, READABLE_PRED, true); + } + if (expr->num & W_OK) { + data_flow_pred(opt, WRITABLE_PRED, true); + } + if (expr->num & X_OK) { + data_flow_pred(opt, EXECUTABLE_PRED, true); } return expr; } -/** Optimize -gid. */ -static struct bfs_expr *optimize_gid(struct bfs_opt *opt, struct bfs_expr *expr) { +/** Transfer function for -gid. */ +static struct bfs_expr *data_flow_gid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct df_range *range = &opt->after_true.ranges[GID_RANGE]; if (range->min == range->max) { gid_t gid = range->min; bool nogroup = !bfs_getgrgid(opt->ctx->groups, gid); if (errno == 0) { - opt_constrain_pred(opt, NOGROUP_PRED, nogroup); + data_flow_pred(opt, NOGROUP_PRED, nogroup); } } return expr; } -/** Optimize -inum. */ -static struct bfs_expr *optimize_inum(struct bfs_opt *opt, struct bfs_expr *expr) { +/** Transfer function for -inum. */ +static struct bfs_expr *data_flow_inum(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct df_range *range = &opt->after_true.ranges[INUM_RANGE]; if (range->min == range->max) { expr->probability = 0.01; @@ -808,8 +1647,8 @@ static struct bfs_expr *optimize_inum(struct bfs_opt *opt, struct bfs_expr *expr return expr; } -/** Optimize -links. */ -static struct bfs_expr *optimize_links(struct bfs_opt *opt, struct bfs_expr *expr) { +/** Transfer function for -links. */ +static struct bfs_expr *data_flow_links(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct df_range *range = &opt->after_true.ranges[LINKS_RANGE]; if (1 >= range->min && 1 <= range->max) { expr->probability = 0.99; @@ -820,30 +1659,20 @@ static struct bfs_expr *optimize_links(struct bfs_opt *opt, struct bfs_expr *exp return expr; } -/** Optimize -uid. */ -static struct bfs_expr *optimize_uid(struct bfs_opt *opt, struct bfs_expr *expr) { - struct df_range *range = &opt->after_true.ranges[UID_RANGE]; - if (range->min == range->max) { - uid_t uid = range->min; - bool nouser = !bfs_getpwuid(opt->ctx->users, uid); - if (errno == 0) { - opt_constrain_pred(opt, NOUSER_PRED, nouser); - } - } +/** Transfer function for -samefile. */ +static struct bfs_expr *data_flow_samefile(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *true_range = &opt->after_true.ranges[INUM_RANGE]; + constrain_min(true_range, expr->ino); + constrain_max(true_range, expr->ino); - return expr; -} + struct df_range *false_range = &opt->after_false.ranges[INUM_RANGE]; + range_remove(false_range, expr->ino); -/** Optimize -samefile. */ -static struct bfs_expr *optimize_samefile(struct bfs_opt *opt, struct bfs_expr *expr) { - struct df_range *range = &opt->after_true.ranges[INUM_RANGE]; - constrain_min(range, expr->ino); - constrain_max(range, expr->ino); return expr; } -/** Optimize -size. */ -static struct bfs_expr *optimize_size(struct bfs_opt *opt, struct bfs_expr *expr) { +/** Transfer function for -size. */ +static struct bfs_expr *data_flow_size(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct df_range *range = &opt->after_true.ranges[SIZE_RANGE]; if (range->min == range->max) { expr->probability = 0.01; @@ -854,473 +1683,449 @@ static struct bfs_expr *optimize_size(struct bfs_opt *opt, struct bfs_expr *expr return expr; } -/** Estimate probability for -x?type. */ -static void estimate_type_probability(struct bfs_expr *expr) { - unsigned int types = expr->num; - - expr->probability = 0.0; - if (types & (1 << BFS_BLK)) { - expr->probability += 0.00000721183; - } - if (types & (1 << BFS_CHR)) { - expr->probability += 0.0000499855; - } - if (types & (1 << BFS_DIR)) { - expr->probability += 0.114475; - } - if (types & (1 << BFS_DOOR)) { - expr->probability += 0.000001; - } - if (types & (1 << BFS_FIFO)) { - expr->probability += 0.00000248684; - } - if (types & (1 << BFS_REG)) { - expr->probability += 0.859772; - } - if (types & (1 << BFS_LNK)) { - expr->probability += 0.0256816; - } - if (types & (1 << BFS_SOCK)) { - expr->probability += 0.0000116881; - } - if (types & (1 << BFS_WHT)) { - expr->probability += 0.000001; - } -} - -/** Optimize -type. */ -static struct bfs_expr *optimize_type(struct bfs_opt *opt, struct bfs_expr *expr) { +/** Transfer function for -type. */ +static struct bfs_expr *data_flow_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { opt->after_true.types &= expr->num; opt->after_false.types &= ~expr->num; - - estimate_type_probability(expr); - return expr; } -/** Optimize -xtype. */ -static struct bfs_expr *optimize_xtype(struct bfs_opt *opt, struct bfs_expr *expr) { - if (opt->ctx->optlevel >= 4) { - // Since -xtype dereferences symbolic links, it may have side - // effects such as reporting permission errors, and thus - // shouldn't be re-ordered without aggressive optimizations - expr->pure = true; +/** Transfer function for -uid. */ +static struct bfs_expr *data_flow_uid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *range = &opt->after_true.ranges[UID_RANGE]; + if (range->min == range->max) { + uid_t uid = range->min; + bool nouser = !bfs_getpwuid(opt->ctx->users, uid); + if (errno == 0) { + data_flow_pred(opt, NOUSER_PRED, nouser); + } } + return expr; +} + +/** Transfer function for -xtype. */ +static struct bfs_expr *data_flow_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { opt->after_true.xtypes &= expr->num; opt->after_false.xtypes &= ~expr->num; - - estimate_type_probability(expr); - return expr; } -/** - * Table of pure expressions. - */ -static bfs_eval_fn *const opt_pure[] = { - eval_access, - eval_acl, - eval_capable, - eval_depth, - eval_false, - eval_flags, - eval_fstype, - eval_gid, - eval_hidden, - eval_inum, - eval_links, - eval_lname, - eval_name, - eval_newer, - eval_nogroup, - eval_nouser, - eval_path, - eval_perm, - eval_regex, - eval_samefile, - eval_size, - eval_sparse, - eval_time, - eval_true, - eval_type, - eval_uid, - eval_used, - eval_xattr, - eval_xattrname, -}; - -/** - * Table of always-true expressions. - */ -static bfs_eval_fn *const opt_always_true[] = { - eval_fls, - eval_fprint, - eval_fprint0, - eval_fprintf, - eval_fprintx, - eval_prune, - eval_true, - - // Non-returning - eval_exit, - eval_quit, -}; - -/** - * Table of always-false expressions. - */ -static bfs_eval_fn *const opt_always_false[] = { - eval_false, - - // Non-returning - eval_exit, - eval_quit, -}; - -#define FAST_COST 40.0 -#define FNMATCH_COST 400.0 -#define STAT_COST 1000.0 -#define PRINT_COST 20000.0 - -/** - * Table of expression costs. - */ -static const struct { - /** The evaluation function with this cost. */ - bfs_eval_fn *eval_fn; - /** The matching cost. */ - float cost; -} opt_costs[] = { - {eval_access, STAT_COST}, - {eval_acl, STAT_COST}, - {eval_capable, STAT_COST}, - {eval_empty, 2 * STAT_COST}, // readdir() is worse than stat() - {eval_fls, PRINT_COST}, - {eval_fprint, PRINT_COST}, - {eval_fprint0, PRINT_COST}, - {eval_fprintf, PRINT_COST}, - {eval_fprintx, PRINT_COST}, - {eval_fstype, STAT_COST}, - {eval_gid, STAT_COST}, - {eval_inum, STAT_COST}, - {eval_links, STAT_COST}, - {eval_lname, FNMATCH_COST}, - {eval_name, FNMATCH_COST}, - {eval_newer, STAT_COST}, - {eval_nogroup, STAT_COST}, - {eval_nouser, STAT_COST}, - {eval_path, FNMATCH_COST}, - {eval_perm, STAT_COST}, - {eval_samefile, STAT_COST}, - {eval_size, STAT_COST}, - {eval_sparse, STAT_COST}, - {eval_time, STAT_COST}, - {eval_uid, STAT_COST}, - {eval_used, STAT_COST}, - {eval_xattr, STAT_COST}, - {eval_xattrname, STAT_COST}, -}; - -/** - * Table of expression probabilities. - */ -static const struct { - /** The evaluation function with this cost. */ - bfs_eval_fn *eval_fn; - /** The matching probability. */ - float probability; -} opt_probs[] = { - {eval_acl, 0.00002}, - {eval_capable, 0.000002}, - {eval_empty, 0.01}, - {eval_false, 0.0}, - {eval_hidden, 0.01}, - {eval_nogroup, 0.01}, - {eval_nouser, 0.01}, - {eval_samefile, 0.01}, - {eval_true, 1.0}, - {eval_xattr, 0.01}, - {eval_xattrname, 0.01}, -}; +/** Data flow visitor entry. */ +static struct bfs_expr *data_flow_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + visit_enter(opt, expr, visitor); -/** - * Table of simple predicates. - */ -static const struct { - /** The evaluation function this optimizer applies to. */ - bfs_eval_fn *eval_fn; - /** The corresponding predicate. */ - enum pred_type pred; -} opt_preds[] = { - {eval_acl, ACL_PRED}, - {eval_capable, CAPABLE_PRED}, - {eval_empty, EMPTY_PRED}, - {eval_hidden, HIDDEN_PRED}, - {eval_nogroup, NOGROUP_PRED}, - {eval_nouser, NOUSER_PRED}, - {eval_sparse, SPARSE_PRED}, - {eval_xattr, XATTR_PRED}, -}; + df_dump(opt, "before", &opt->before); -/** - * Table of simple range comparisons. - */ -static const struct { - /** The evaluation function this optimizer applies to. */ - bfs_eval_fn *eval_fn; - /** The corresponding range. */ - enum range_type range; -} opt_ranges[] = { - {eval_depth, DEPTH_RANGE}, - {eval_gid, GID_RANGE}, - {eval_inum, INUM_RANGE}, - {eval_links, LINKS_RANGE}, - {eval_size, SIZE_RANGE}, - {eval_uid, UID_RANGE}, -}; + if (!bfs_expr_is_parent(expr) && !expr->pure) { + df_join(opt->impure, &opt->before); + df_dump(opt, "impure", opt->impure); + } -/** Signature for custom optimizer functions. */ -typedef struct bfs_expr *bfs_opt_fn(struct bfs_opt *opt, struct bfs_expr *expr); + return expr; +} -/** Table of custom optimizer functions. */ -static const struct { - /** The evaluation function this optimizer applies to. */ - bfs_eval_fn *eval_fn; - /** The corresponding optimizer function. */ - bfs_opt_fn *opt_fn; -} opt_fns[] = { - // Primaries - {eval_access, optimize_access}, - {eval_empty, optimize_empty}, - {eval_exec, optimize_exec}, - {eval_gid, optimize_gid}, - {eval_inum, optimize_inum}, - {eval_links, optimize_links}, - {eval_lname, optimize_fnmatch}, - {eval_name, optimize_fnmatch}, - {eval_path, optimize_fnmatch}, - {eval_samefile, optimize_samefile}, - {eval_size, optimize_size}, - {eval_type, optimize_type}, - {eval_uid, optimize_uid}, - {eval_xtype, optimize_xtype}, - - // Operators - {eval_and, optimize_and_expr_recursive}, - {eval_comma, optimize_comma_expr_recursive}, - {eval_not, optimize_not_expr_recursive}, - {eval_or, optimize_or_expr_recursive}, -}; +/** Data flow visitor exit. */ +static struct bfs_expr *data_flow_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (expr->always_true) { + expr->probability = 1.0; + df_init_bottom(&opt->after_false); + } -/** - * Look up the appropriate optimizer for an expression and call it. - */ -static struct bfs_expr *optimize_expr_lookup(struct bfs_opt *opt, struct bfs_expr *expr) { - for (size_t i = 0; i < countof(opt_pure); ++i) { - if (opt_pure[i] == expr->eval_fn) { - expr->pure = true; - break; - } + if (expr->always_false) { + expr->probability = 0.0; + df_init_bottom(&opt->after_true); } - for (size_t i = 0; i < countof(opt_always_true); ++i) { - if (opt_always_true[i] == expr->eval_fn) { + df_dump(opt, "after true", &opt->after_true); + df_dump(opt, "after false", &opt->after_false); + + if (df_is_bottom(&opt->after_false)) { + if (!expr->pure) { expr->always_true = true; - break; + expr->probability = 0.0; + } else if (expr->eval_fn != eval_true) { + opt_warning(opt, expr, "This expression is always true.\n\n"); + opt_debug(opt, "pure, always true\n"); + expr = opt_const(opt, true); + if (!expr) { + return NULL; + } } } - for (size_t i = 0; i < countof(opt_always_false); ++i) { - if (opt_always_false[i] == expr->eval_fn) { + if (df_is_bottom(&opt->after_true)) { + if (!expr->pure) { expr->always_false = true; - break; + expr->probability = 0.0; + } else if (expr->eval_fn != eval_false) { + opt_warning(opt, expr, "This expression is always false.\n\n"); + opt_debug(opt, "pure, always false\n"); + expr = opt_const(opt, false); + if (!expr) { + return NULL; + } } } - expr->cost = FAST_COST; - for (size_t i = 0; i < countof(opt_costs); ++i) { - if (opt_costs[i].eval_fn == expr->eval_fn) { - expr->cost = opt_costs[i].cost; - break; + return visit_leave(opt, expr, visitor); +} + +/** Data flow visitor function. */ +static struct bfs_expr *data_flow_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->ignore_result && expr->pure) { + opt_debug(opt, "ignored result\n"); + opt_warning(opt, expr, "The result of this expression is ignored.\n\n"); + expr = opt_const(opt, false); + if (!expr) { + return NULL; } } - for (size_t i = 0; i < countof(opt_probs); ++i) { - if (opt_probs[i].eval_fn == expr->eval_fn) { - expr->probability = opt_probs[i].probability; - break; + if (df_is_bottom(&opt->before)) { + opt_debug(opt, "unreachable\n"); + opt_warning(opt, expr, "This expression is unreachable.\n\n"); + expr = opt_const(opt, false); + if (!expr) { + return NULL; } } - for (size_t i = 0; i < countof(opt_preds); ++i) { - if (opt_preds[i].eval_fn == expr->eval_fn) { - opt_constrain_pred(opt, opt_preds[i].pred, true); + /** Table of simple predicates. */ + static const struct { + bfs_eval_fn *eval_fn; + enum pred_type pred; + } preds[] = { + {eval_acl, ACL_PRED}, + {eval_capable, CAPABLE_PRED}, + {eval_empty, EMPTY_PRED}, + {eval_hidden, HIDDEN_PRED}, + {eval_nogroup, NOGROUP_PRED}, + {eval_nouser, NOUSER_PRED}, + {eval_sparse, SPARSE_PRED}, + {eval_xattr, XATTR_PRED}, + }; + + for (size_t i = 0; i < countof(preds); ++i) { + if (preds[i].eval_fn == expr->eval_fn) { + data_flow_pred(opt, preds[i].pred, true); break; } } - for (size_t i = 0; i < countof(opt_ranges); ++i) { - if (opt_ranges[i].eval_fn == expr->eval_fn) { - optimize_icmp(opt, expr, opt_ranges[i].range); + /** Table of simple range comparisons. */ + static const struct { + bfs_eval_fn *eval_fn; + enum range_type range; + } ranges[] = { + {eval_depth, DEPTH_RANGE}, + {eval_gid, GID_RANGE}, + {eval_inum, INUM_RANGE}, + {eval_links, LINKS_RANGE}, + {eval_size, SIZE_RANGE}, + {eval_uid, UID_RANGE}, + }; + + for (size_t i = 0; i < countof(ranges); ++i) { + if (ranges[i].eval_fn == expr->eval_fn) { + data_flow_icmp(opt, expr, ranges[i].range); break; } } - for (size_t i = 0; i < countof(opt_fns); ++i) { - if (opt_fns[i].eval_fn == expr->eval_fn) { - return opt_fns[i].opt_fn(opt, expr); - } + return expr; +} + +/** + * Data flow visitor. + */ +static const struct visitor data_flow = { + .name = "data_flow", + .enter = data_flow_enter, + .visit = data_flow_visit, + .leave = data_flow_leave, + .table = { + {eval_access, data_flow_access}, + {eval_gid, data_flow_gid}, + {eval_inum, data_flow_inum}, + {eval_links, data_flow_links}, + {eval_samefile, data_flow_samefile}, + {eval_size, data_flow_size}, + {eval_type, data_flow_type}, + {eval_uid, data_flow_uid}, + {eval_xtype, data_flow_xtype}, + {NULL, NULL}, + }, +}; + +/** Simplify a negation. */ +static struct bfs_expr *simplify_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->ignore_result) { + opt_debug(opt, "ignored result\n"); + expr = only_child(expr); } return expr; } -static struct bfs_expr *optimize_expr_recursive(struct bfs_opt *opt, struct bfs_expr *expr) { - int optlevel = opt->ctx->optlevel; +/** Lift negations out of a conjunction/disjunction using De Morgan's laws. */ +static struct bfs_expr *lift_andor_not(struct bfs_opt *opt, struct bfs_expr *expr) { + // Only lift negations if it would reduce the number of (-not) expressions + size_t added = 0, removed = 0; + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + if (child->eval_fn == eval_not) { + ++removed; + } else { + ++added; + } + } + if (added >= removed) { + return visit_shallow(opt, expr, &annotate); + } - opt->after_true = opt->before; - opt->after_false = opt->before; + opt_debug(opt, "De Morgan's laws\n"); - if (optlevel >= 2 && df_is_bottom(&opt->before)) { - struct bfs_expr *ret = opt_const(opt, false); - opt_debug(opt, 2, "reachability: %pe --> %pe\n", expr, ret); - opt_warning(opt, expr, "This expression is unreachable.\n\n"); - return ret; + if (expr->eval_fn == eval_and) { + expr->eval_fn = eval_or; + expr->argv = &fake_or_arg; + } else { + bfs_assert(expr->eval_fn == eval_or); + expr->eval_fn = eval_and; + expr->argv = &fake_and_arg; + } + + struct bfs_exprs children; + foster_children(expr, &children); + + struct bfs_expr *child; + while ((child = SLIST_POP(&children))) { + opt_enter(opt, "%pe\n", child); + + child = negate_expr(opt, child, &fake_not_arg); + if (!child) { + return NULL; + } + + opt_leave(opt, "%pe\n", child); + bfs_expr_append(expr, child); } - expr = optimize_expr_lookup(opt, expr); - if (!expr) { + expr = visit_shallow(opt, expr, &annotate); + return negate_expr(opt, expr, &fake_not_arg); +} + +/** Get the first ignorable expression in a conjunction/disjunction. */ +static struct bfs_expr *first_ignorable(struct bfs_opt *opt, struct bfs_expr *expr) { + if (opt->level < 2 || !opt->ignore_result) { return NULL; } - if (bfs_expr_is_parent(expr)) { - struct bfs_expr *lhs = expr->lhs; - struct bfs_expr *rhs = expr->rhs; - if (rhs) { - expr->persistent_fds = rhs->persistent_fds; - expr->ephemeral_fds = rhs->ephemeral_fds; + struct bfs_expr *ret = NULL; + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + if (!child->pure) { + ret = NULL; + } else if (!ret) { + ret = child; + } + } + + return ret; +} + +/** Simplify a conjunction. */ +static struct bfs_expr *simplify_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *ignorable = first_ignorable(opt, expr); + bool ignore = false; + + struct bfs_exprs children; + foster_children(expr, &children); + + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); + + if (child == ignorable) { + ignore = true; + } + + if (ignore) { + opt_delete(opt, "%pe [ignored result]\n", child); + opt_warning(opt, child, "The result of this expression is ignored.\n\n"); + continue; + } + + if (child->eval_fn == eval_true) { + opt_delete(opt, "%pe [conjunction elimination]\n", child); + continue; } - if (lhs) { - expr->persistent_fds += lhs->persistent_fds; - if (lhs->ephemeral_fds > expr->ephemeral_fds) { - expr->ephemeral_fds = lhs->ephemeral_fds; + + opt_visit(opt, "%pe\n", child); + bfs_expr_append(expr, child); + + if (child->always_false) { + while ((child = SLIST_POP(&children))) { + opt_delete(opt, "%pe [short-circuit]\n", child); } } - } else if (!expr->pure) { - df_join(opt->impure, &opt->before); } - if (expr->always_true) { - expr->probability = 1.0; - df_init_bottom(&opt->after_false); - } - if (expr->always_false) { - expr->probability = 0.0; - df_init_bottom(&opt->after_true); + struct bfs_expr *child = bfs_expr_children(expr); + if (!child) { + opt_debug(opt, "nullary identity\n"); + return opt_const(opt, true); + } else if (!child->next) { + opt_debug(opt, "unary identity\n"); + return only_child(expr); } - if (optlevel < 2 || expr->eval_fn == eval_true || expr->eval_fn == eval_false) { - return expr; - } + return lift_andor_not(opt, expr); +} - if (df_is_bottom(&opt->after_true)) { - if (expr->pure) { - struct bfs_expr *ret = opt_const(opt, false); - opt_warning(opt, expr, "This expression is always false.\n\n"); - opt_debug(opt, 2, "data flow: %pe --> %pe\n", expr, ret); - return ret; - } else { - expr->always_false = true; - expr->probability = 0.0; +/** Simplify a disjunction. */ +static struct bfs_expr *simplify_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *ignorable = first_ignorable(opt, expr); + bool ignore = false; + + struct bfs_exprs children; + foster_children(expr, &children); + + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); + + if (child == ignorable) { + ignore = true; } - } else if (df_is_bottom(&opt->after_false)) { - if (expr->pure) { - struct bfs_expr *ret = opt_const(opt, true); - opt_debug(opt, 2, "data flow: %pe --> %pe\n", expr, ret); - opt_warning(opt, expr, "This expression is always true.\n\n"); - return ret; - } else { - expr->always_true = true; - expr->probability = 1.0; + + if (ignore) { + opt_delete(opt, "%pe [ignored result]\n", child); + opt_warning(opt, child, "The result of this expression is ignored.\n\n"); + continue; } - } - return expr; -} + if (child->eval_fn == eval_false) { + opt_delete(opt, "%pe [disjunctive syllogism]\n", child); + continue; + } -/** Swap the children of a binary expression if it would reduce the cost. */ -static bool reorder_expr(const struct bfs_opt *opt, struct bfs_expr *expr, float swapped_cost) { - if (swapped_cost < expr->cost) { - bool debug = opt_debug(opt, 3, "cost: %pe <==> ", expr); - struct bfs_expr *lhs = expr->lhs; - expr->lhs = expr->rhs; - expr->rhs = lhs; - if (debug) { - cfprintf(opt->ctx->cerr, "%pe (~${ylw}%g${rs} --> ~${ylw}%g${rs})\n", expr, expr->cost, swapped_cost); + opt_visit(opt, "%pe\n", child); + bfs_expr_append(expr, child); + + if (child->always_true) { + while ((child = SLIST_POP(&children))) { + opt_delete(opt, "%pe [short-circuit]\n", child); + } } - expr->cost = swapped_cost; - return true; - } else { - return false; } -} -/** - * Recursively reorder sub-expressions to reduce the overall cost. - * - * @param expr - * The expression to optimize. - * @return - * Whether any subexpression was reordered. - */ -static bool reorder_expr_recursive(const struct bfs_opt *opt, struct bfs_expr *expr) { - if (!bfs_expr_is_parent(expr)) { - return false; + struct bfs_expr *child = bfs_expr_children(expr); + if (!child) { + opt_debug(opt, "nullary identity\n"); + return opt_const(opt, false); + } else if (!child->next) { + opt_debug(opt, "unary identity\n"); + return only_child(expr); } - struct bfs_expr *lhs = expr->lhs; - struct bfs_expr *rhs = expr->rhs; + return lift_andor_not(opt, expr); +} - bool ret = false; - if (lhs) { - ret |= reorder_expr_recursive(opt, lhs); - } - if (rhs) { - ret |= reorder_expr_recursive(opt, rhs); - } +/** Simplify a comma expression. */ +static struct bfs_expr *simplify_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); - if (expr->eval_fn == eval_and || expr->eval_fn == eval_or) { - if (lhs->pure && rhs->pure) { - float rhs_prob = expr->eval_fn == eval_and ? rhs->probability : 1.0 - rhs->probability; - float swapped_cost = rhs->cost + rhs_prob * lhs->cost; - ret |= reorder_expr(opt, expr, swapped_cost); + if (opt->level >= 2 && child->pure && !SLIST_EMPTY(&children)) { + opt_delete(opt, "%pe [ignored result]\n", child); + opt_warning(opt, child, "The result of this expression is ignored.\n\n"); + continue; } + + opt_visit(opt, "%pe\n", child); + bfs_expr_append(expr, child); } - return ret; + struct bfs_expr *child = bfs_expr_children(expr); + if (child && !child->next) { + opt_debug(opt, "unary identity\n"); + return only_child(expr); + } + + return expr; } /** - * Optimize a top-level expression. + * Logical simplification visitor. */ -static struct bfs_expr *optimize_expr(struct bfs_opt *opt, struct bfs_expr *expr) { - struct df_domain saved_impure = *opt->impure; +static const struct visitor simplify = { + .name = "simplify", + .table = { + {eval_not, simplify_not}, + {eval_and, simplify_and}, + {eval_or, simplify_or}, + {eval_comma, simplify_comma}, + {NULL, NULL}, + }, +}; - expr = optimize_expr_recursive(opt, expr); - if (!expr) { - return NULL; - } +/** Optimize an expression. */ +static struct bfs_expr *optimize(struct bfs_opt *opt, struct bfs_expr *expr) { + opt_enter(opt, "pass 0:\n"); + expr = visit(opt, expr, &annotate); + opt_leave(opt, NULL); + + /** Table of optimization passes. */ + static const struct { + /** Minimum optlevel for this pass. */ + int level; + /** The visitor for this pass. */ + const struct visitor *visitor; + } passes[] = { + {1, &canonicalize}, + {3, &reorder}, + {2, &data_flow}, + {1, &simplify}, + }; - if (opt->ctx->optlevel >= 3 && reorder_expr_recursive(opt, expr)) { - // Re-do optimizations to account for the new ordering - *opt->impure = saved_impure; - expr = optimize_expr_recursive(opt, expr); - if (!expr) { - return NULL; + struct df_domain impure; + + for (int i = 0; i < 3; ++i) { + struct bfs_opt nested = *opt; + nested.impure = &impure; + impure = *opt->impure; + + opt_enter(&nested, "pass %d:\n", i + 1); + + for (size_t j = 0; j < countof(passes); ++j) { + if (opt->level < passes[j].level) { + continue; + } + + // Skip reordering the first time through the passes, to + // make warnings more understandable + if (passes[j].visitor == &reorder) { + if (i == 0) { + continue; + } else { + nested.warn = false; + } + } + + expr = visit(&nested, expr, passes[j].visitor); + if (!expr) { + return NULL; + } + } + + opt_leave(&nested, NULL); + + if (!bfs_expr_is_parent(expr)) { + break; } } + *opt->impure = impure; return expr; } @@ -1332,30 +2137,37 @@ int bfs_optimize(struct bfs_ctx *ctx) { struct bfs_opt opt = { .ctx = ctx, + .level = ctx->optlevel, + .depth = 0, + .warn = ctx->warn, + .ignore_result = false, .impure = &impure, }; df_init_top(&opt.before); - ctx->exclude = optimize_expr(&opt, ctx->exclude); + ctx->exclude = optimize(&opt, ctx->exclude); if (!ctx->exclude) { return -1; } // Only non-excluded files are evaluated opt.before = opt.after_false; + opt.ignore_result = true; struct df_range *depth = &opt.before.ranges[DEPTH_RANGE]; - constrain_min(depth, ctx->mindepth); - constrain_max(depth, ctx->maxdepth); + if (ctx->mindepth > 0) { + constrain_min(depth, ctx->mindepth); + } + if (ctx->maxdepth < INT_MAX) { + constrain_max(depth, ctx->maxdepth); + } - ctx->expr = optimize_expr(&opt, ctx->expr); + ctx->expr = optimize(&opt, ctx->expr); if (!ctx->expr) { return -1; } - ctx->expr = ignore_result(&opt, ctx->expr); - - if (df_is_bottom(&impure)) { + if (opt.level >= 2 && df_is_bottom(&impure)) { bfs_warning(ctx, "This command won't do anything.\n\n"); } @@ -1363,23 +2175,27 @@ int bfs_optimize(struct bfs_ctx *ctx) { long long mindepth = impure_depth->min; long long maxdepth = impure_depth->max; - int optlevel = ctx->optlevel; + opt_enter(&opt, "post-process:\n"); - if (optlevel >= 2 && mindepth > ctx->mindepth) { + if (opt.level >= 2 && mindepth > ctx->mindepth) { if (mindepth > INT_MAX) { mindepth = INT_MAX; } + opt_enter(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth); ctx->mindepth = mindepth; - opt_debug(&opt, 2, "data flow: mindepth --> %d\n", ctx->mindepth); + opt_leave(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth); } - if (optlevel >= 4 && maxdepth < ctx->maxdepth) { + if (opt.level >= 4 && maxdepth < ctx->maxdepth) { if (maxdepth < INT_MIN) { maxdepth = INT_MIN; } + opt_enter(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth); ctx->maxdepth = maxdepth; - opt_debug(&opt, 4, "data flow: maxdepth --> %d\n", ctx->maxdepth); + opt_leave(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth); } + opt_leave(&opt, NULL); + return 0; } -- cgit v1.2.3