// Copyright © Tavian Barnes // SPDX-License-Identifier: 0BSD /** * The expression optimizer. Different optimization levels are supported: * * -O1: basic logical simplifications, like folding (-true -and -foo) to -foo. * * -O2: dead code elimination and data flow analysis. struct df_domain is used * to record data flow facts that are true at various points of evaluation. * Specifically, struct df_domain records the state before an expression is * evaluated (opt->before), and after an expression returns true * (opt->after_true) or false (opt->after_false). Additionally, opt->impure * records the possible state before any expression with side effects is * evaluated. * * -O3: expression re-ordering to reduce expected cost. In an expression like * (-foo -and -bar), if both -foo and -bar are pure (no side effects), they can * be re-ordered to (-bar -and -foo). This is profitable if the expected cost * is lower for the re-ordered expression, for example if -foo is very slow or * -bar is likely to return false. * * -O4/-Ofast: aggressive optimizations that may affect correctness in corner * cases. The main effect is to use opt->impure to determine if any side- * effects are reachable at all, skipping the traversal if not. */ #include "opt.h" #include "bfs.h" #include "bftw.h" #include "bit.h" #include "color.h" #include "ctx.h" #include "diag.h" #include "dir.h" #include "eval.h" #include "exec.h" #include "expr.h" #include "list.h" #include "pwcache.h" #include #include #include #include #include static char *fake_and_arg = "-and"; static char *fake_or_arg = "-or"; static char *fake_not_arg = "-not"; /** * The data flow domain for predicates. */ enum df_pred { /** The bottom state (unreachable). */ PRED_BOTTOM = 0, /** The predicate is known to be false. */ PRED_FALSE = 1 << false, /** The predicate is known to be true. */ PRED_TRUE = 1 << true, /** The top state (unknown). */ PRED_TOP = PRED_FALSE | PRED_TRUE, }; /** Make a predicate known. */ static void constrain_pred(enum df_pred *pred, bool value) { *pred &= 1 << value; } /** Compute the join (union) of two predicates. */ static void pred_join(enum df_pred *dest, enum df_pred src) { *dest |= src; } /** * Types of predicates we track. */ enum pred_type { /** -readable */ READABLE_PRED, /** -writable */ WRITABLE_PRED, /** -executable */ EXECUTABLE_PRED, /** -acl */ ACL_PRED, /** -capable */ CAPABLE_PRED, /** -empty */ EMPTY_PRED, /** -hidden */ HIDDEN_PRED, /** -nogroup */ NOGROUP_PRED, /** -nouser */ NOUSER_PRED, /** -sparse */ SPARSE_PRED, /** -xattr */ XATTR_PRED, /** The number of pred_types. */ PRED_TYPES, }; /** Predicate type names. */ static const char *const pred_names[] = { [READABLE_PRED] = "-readable", [WRITABLE_PRED] = "-writable", [EXECUTABLE_PRED] = "-executable", [ACL_PRED] = "-acl", [CAPABLE_PRED] = "-capable", [EMPTY_PRED] = "-empty", [HIDDEN_PRED] = "-hidden", [NOGROUP_PRED] = "-nogroup", [NOUSER_PRED] = "-nouser", [SPARSE_PRED] = "-sparse", [XATTR_PRED] = "-xattr", }; /** * A contrained integer range. */ struct df_range { /** The (inclusive) minimum value. */ long long min; /** The (inclusive) maximum value. */ long long max; }; /** Initialize an empty range. */ static void range_init_bottom(struct df_range *range) { range->min = LLONG_MAX; range->max = LLONG_MIN; } /** Check if a range is empty. */ static bool range_is_bottom(const struct df_range *range) { return range->min > range->max; } /** Initialize a full range. */ static void range_init_top(struct df_range *range) { // All ranges we currently track are non-negative range->min = 0; range->max = LLONG_MAX; } /** Check for an infinite range. */ static bool range_is_top(const struct df_range *range) { return range->min == 0 && range->max == LLONG_MAX; } /** Compute the minimum of two values. */ static long long min_value(long long a, long long b) { if (a < b) { return a; } else { return b; } } /** Compute the maximum of two values. */ static long long max_value(long long a, long long b) { if (a > b) { return a; } else { return b; } } /** Constrain the minimum of a range. */ static void constrain_min(struct df_range *range, long long value) { range->min = max_value(range->min, value); } /** Constrain the maximum of a range. */ static void constrain_max(struct df_range *range, long long value) { range->max = min_value(range->max, value); } /** Constrain a range to a single value. */ static void constrain_range(struct df_range *range, long long value) { constrain_min(range, value); constrain_max(range, value); } /** Remove a single value from a range. */ static void range_remove(struct df_range *range, long long value) { if (range->min == value) { if (range->min == LLONG_MAX) { range->max = LLONG_MIN; } else { ++range->min; } } if (range->max == value) { if (range->max == LLONG_MIN) { range->min = LLONG_MAX; } else { --range->max; } } } /** Compute the union of two ranges. */ static void range_join(struct df_range *dest, const struct df_range *src) { dest->min = min_value(dest->min, src->min); dest->max = max_value(dest->max, src->max); } /** * Types of ranges we track. */ enum range_type { /** Search tree depth. */ DEPTH_RANGE, /** Group ID. */ GID_RANGE, /** Inode number. */ INUM_RANGE, /** Hard link count. */ LINKS_RANGE, /** File size. */ SIZE_RANGE, /** User ID. */ UID_RANGE, /** The number of range_types. */ RANGE_TYPES, }; /** Range type names. */ static const char *const range_names[] = { [DEPTH_RANGE] = "-depth", [GID_RANGE] = "-gid", [INUM_RANGE] = "-inum", [LINKS_RANGE] = "-links", [SIZE_RANGE] = "-size", [UID_RANGE] = "-uid", }; /** * The data flow analysis domain. */ struct df_domain { /** The predicates we track. */ enum df_pred preds[PRED_TYPES]; /** The value ranges we track. */ struct df_range ranges[RANGE_TYPES]; /** Bitmask of possible -types. */ unsigned int types; /** Bitmask of possible -xtypes. */ unsigned int xtypes; }; /** Set a data flow value to bottom. */ static void df_init_bottom(struct df_domain *value) { for (int i = 0; i < PRED_TYPES; ++i) { value->preds[i] = PRED_BOTTOM; } for (int i = 0; i < RANGE_TYPES; ++i) { range_init_bottom(&value->ranges[i]); } value->types = 0; value->xtypes = 0; } /** Determine whether a fact set is impossible. */ static bool df_is_bottom(const struct df_domain *value) { for (int i = 0; i < RANGE_TYPES; ++i) { if (range_is_bottom(&value->ranges[i])) { return true; } } for (int i = 0; i < PRED_TYPES; ++i) { if (value->preds[i] == PRED_BOTTOM) { return true; } } if (!value->types || !value->xtypes) { return true; } return false; } /** Initialize some data flow value. */ static void df_init_top(struct df_domain *value) { for (int i = 0; i < PRED_TYPES; ++i) { value->preds[i] = PRED_TOP; } for (int i = 0; i < RANGE_TYPES; ++i) { range_init_top(&value->ranges[i]); } value->types = ~0; value->xtypes = ~0; } /** Check for the top element. */ static bool df_is_top(const struct df_domain *value) { for (int i = 0; i < PRED_TYPES; ++i) { if (value->preds[i] != PRED_TOP) { return false; } } for (int i = 0; i < RANGE_TYPES; ++i) { if (!range_is_top(&value->ranges[i])) { return false; } } if (value->types != ~0U) { return false; } if (value->xtypes != ~0U) { return false; } return true; } /** Compute the union of two fact sets. */ static void df_join(struct df_domain *dest, const struct df_domain *src) { for (int i = 0; i < PRED_TYPES; ++i) { pred_join(&dest->preds[i], src->preds[i]); } for (int i = 0; i < RANGE_TYPES; ++i) { range_join(&dest->ranges[i], &src->ranges[i]); } dest->types |= src->types; dest->xtypes |= src->xtypes; } /** * Optimizer state. */ struct bfs_opt { /** The context we're optimizing. */ struct bfs_ctx *ctx; /** Optimization level (ctx->optlevel). */ int level; /** Recursion depth. */ int depth; /** Whether to produce warnings. */ bool warn; /** Whether the result of this expression is ignored. */ bool ignore_result; /** Data flow state before this expression is evaluated. */ struct df_domain before; /** Data flow state after this expression returns true. */ struct df_domain after_true; /** Data flow state after this expression returns false. */ struct df_domain after_false; /** Data flow state before any side-effecting expressions are evaluated. */ struct df_domain *impure; }; /** Log an optimization. */ _printf(2, 3) static bool opt_debug(struct bfs_opt *opt, const char *format, ...) { if (bfs_debug_prefix(opt->ctx, DEBUG_OPT)) { for (int i = 0; i < opt->depth; ++i) { cfprintf(opt->ctx->cerr, "│ "); } va_list args; va_start(args, format); cvfprintf(opt->ctx->cerr, format, args); va_end(args); return true; } else { return false; } } /** Log a recursive call. */ _printf(2, 3) static bool opt_enter(struct bfs_opt *opt, const char *format, ...) { int depth = opt->depth; if (depth > 0) { --opt->depth; } bool debug = opt_debug(opt, "%s", depth > 0 ? "├─╮ " : ""); if (debug) { va_list args; va_start(args, format); cvfprintf(opt->ctx->cerr, format, args); va_end(args); } opt->depth = depth + 1; return debug; } /** Log a recursive return. */ _printf(2, 3) static bool opt_leave(struct bfs_opt *opt, const char *format, ...) { bool debug = false; int depth = opt->depth; if (format) { if (depth > 1) { opt->depth -= 2; } debug = opt_debug(opt, "%s", depth > 1 ? "├─╯ " : ""); if (debug) { va_list args; va_start(args, format); cvfprintf(opt->ctx->cerr, format, args); va_end(args); } } opt->depth = depth - 1; return debug; } /** Log a shallow visit. */ _printf(2, 3) static bool opt_visit(struct bfs_opt *opt, const char *format, ...) { int depth = opt->depth; if (depth > 0) { --opt->depth; } bool debug = opt_debug(opt, "%s", depth > 0 ? "├─◯ " : ""); if (debug) { va_list args; va_start(args, format); cvfprintf(opt->ctx->cerr, format, args); va_end(args); } opt->depth = depth; return debug; } /** Log the deletion of an expression. */ _printf(2, 3) static bool opt_delete(struct bfs_opt *opt, const char *format, ...) { int depth = opt->depth; if (depth > 0) { --opt->depth; } bool debug = opt_debug(opt, "%s", depth > 0 ? "├─✘ " : ""); if (debug) { va_list args; va_start(args, format); cvfprintf(opt->ctx->cerr, format, args); va_end(args); } opt->depth = depth; return debug; } typedef bool dump_fn(struct bfs_opt *opt, const char *format, ...); /** Print a df_pred. */ static void pred_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum pred_type type) { dump(opt, "${blu}%s${rs}: ", pred_names[type]); FILE *file = opt->ctx->cerr->file; switch (value->preds[type]) { case PRED_BOTTOM: fprintf(file, "⊥\n"); break; case PRED_TOP: fprintf(file, "⊤\n"); break; case PRED_TRUE: fprintf(file, "true\n"); break; case PRED_FALSE: fprintf(file, "false\n"); break; } } /** Print a df_range. */ static void range_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum range_type type) { dump(opt, "${blu}%s${rs}: ", range_names[type]); FILE *file = opt->ctx->cerr->file; const struct df_range *range = &value->ranges[type]; if (range_is_bottom(range)) { fprintf(file, "⊥\n"); } else if (range_is_top(range)) { fprintf(file, "⊤\n"); } else if (range->min == range->max) { fprintf(file, "%lld\n", range->min); } else { if (range->min == LLONG_MIN) { fprintf(file, "(-∞, "); } else { fprintf(file, "[%lld, ", range->min); } if (range->max == LLONG_MAX) { fprintf(file, "∞)\n"); } else { fprintf(file, "%lld]\n", range->max); } } } /** Print a set of types. */ static void types_dump(dump_fn *dump, struct bfs_opt *opt, const char *name, unsigned int types) { dump(opt, "${blu}%s${rs}: ", name); FILE *file = opt->ctx->cerr->file; if (types == 0) { fprintf(file, " ⊥\n"); } else if (types == ~0U) { fprintf(file, " ⊤\n"); } else if (count_ones(types) < count_ones(~types)) { fprintf(file, " 0x%X\n", types); } else { fprintf(file, "~0x%X\n", ~types); } } /** Calculate the number of lines of df_dump() output. */ static int df_dump_lines(const struct df_domain *value) { int lines = 0; for (int i = 0; i < PRED_TYPES; ++i) { lines += value->preds[i] != PRED_TOP; } for (int i = 0; i < RANGE_TYPES; ++i) { lines += !range_is_top(&value->ranges[i]); } lines += value->types != ~0U; lines += value->xtypes != ~0U; return lines; } /** Get the right debugging function for a df_dump() line. */ static dump_fn *df_dump_line(int lines, int *line) { ++*line; if (lines == 1) { return opt_visit; } else if (*line == 1) { return opt_enter; } else if (*line == lines) { return opt_leave; } else { return opt_debug; } } /** Print a data flow value. */ static void df_dump(struct bfs_opt *opt, const char *str, const struct df_domain *value) { if (df_is_bottom(value)) { opt_debug(opt, "%s: ⊥\n", str); return; } else if (df_is_top(value)) { opt_debug(opt, "%s: ⊤\n", str); return; } if (!opt_debug(opt, "%s:\n", str)) { return; } int lines = df_dump_lines(value); int line = 0; for (int i = 0; i < PRED_TYPES; ++i) { if (value->preds[i] != PRED_TOP) { pred_dump(df_dump_line(lines, &line), opt, value, i); } } for (int i = 0; i < RANGE_TYPES; ++i) { if (!range_is_top(&value->ranges[i])) { range_dump(df_dump_line(lines, &line), opt, value, i); } } if (value->types != ~0U) { types_dump(df_dump_line(lines, &line), opt, "-type", value->types); } if (value->xtypes != ~0U) { types_dump(df_dump_line(lines, &line), opt, "-xtype", value->xtypes); } } /** Check if an expression is constant. */ static bool is_const(const struct bfs_expr *expr) { return expr->eval_fn == eval_true || expr->eval_fn == eval_false; } /** Warn about an expression. */ _printf(3, 4) static bool opt_warning(const struct bfs_opt *opt, const struct bfs_expr *expr, const char *format, ...) { if (!opt->warn) { return false; } if (bfs_expr_is_parent(expr) || is_const(expr)) { return false; } if (!bfs_expr_warning(opt->ctx, expr)) { return false; } va_list args; va_start(args, format); bfs_vwarning(opt->ctx, format, args); va_end(args); return true; } /** Remove and return an expression's children. */ static void foster_children(struct bfs_expr *expr, struct bfs_exprs *children) { bfs_assert(bfs_expr_is_parent(expr)); SLIST_INIT(children); SLIST_EXTEND(children, &expr->children); expr->persistent_fds = 0; expr->ephemeral_fds = 0; expr->pure = true; } /** Return an expression's only child. */ static struct bfs_expr *only_child(struct bfs_expr *expr) { bfs_assert(bfs_expr_is_parent(expr)); struct bfs_expr *child = bfs_expr_children(expr); bfs_assert(child && !child->next); return child; } /** Foster an expression's only child. */ static struct bfs_expr *foster_only_child(struct bfs_expr *expr) { struct bfs_expr *child = only_child(expr); struct bfs_exprs children; foster_children(expr, &children); return child; } /** An expression visitor. */ struct visitor; /** An expression-visiting function. */ typedef struct bfs_expr *visit_fn(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor); /** An entry in a visitor lookup table. */ struct visitor_table { /** The evaluation function to match on. */ bfs_eval_fn *eval_fn; /** The visitor function. */ visit_fn *visit; }; /** Look up a visitor in a table. */ static visit_fn *look_up_visitor(const struct bfs_expr *expr, const struct visitor_table table[]) { for (size_t i = 0; table[i].eval_fn; ++i) { if (expr->eval_fn == table[i].eval_fn) { return table[i].visit; } } return NULL; } struct visitor { /** The name of this visitor. */ const char *name; /** A function to call before visiting children. */ visit_fn *enter; /** The default visitor. */ visit_fn *visit; /** A function to call after visiting children. */ visit_fn *leave; /** A visitor lookup table. */ const struct visitor_table *table; }; /** Recursive visitor implementation. */ static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor); /** Visit a negation. */ static struct bfs_expr *visit_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct bfs_expr *rhs = foster_only_child(expr); struct bfs_opt nested = *opt; rhs = visit_deep(&nested, rhs, visitor); if (!rhs) { return NULL; } opt->after_true = nested.after_false; opt->after_false = nested.after_true; bfs_expr_append(expr, rhs); return expr; } /** Visit a conjunction. */ static struct bfs_expr *visit_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct bfs_exprs children; foster_children(expr, &children); // Base case (-and) == (-true) df_init_bottom(&opt->after_false); struct bfs_opt nested = *opt; while (!SLIST_EMPTY(&children)) { struct bfs_expr *child = SLIST_POP(&children); if (SLIST_EMPTY(&children)) { nested.ignore_result = opt->ignore_result; } else { nested.ignore_result = false; } child = visit_deep(&nested, child, visitor); if (!child) { return NULL; } df_join(&opt->after_false, &nested.after_false); nested.before = nested.after_true; bfs_expr_append(expr, child); } opt->after_true = nested.after_true; return expr; } /** Visit a disjunction. */ static struct bfs_expr *visit_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct bfs_exprs children; foster_children(expr, &children); // Base case (-or) == (-false) df_init_bottom(&opt->after_true); struct bfs_opt nested = *opt; while (!SLIST_EMPTY(&children)) { struct bfs_expr *child = SLIST_POP(&children); if (SLIST_EMPTY(&children)) { nested.ignore_result = opt->ignore_result; } else { nested.ignore_result = false; } child = visit_deep(&nested, child, visitor); if (!child) { return NULL; } df_join(&opt->after_true, &nested.after_true); nested.before = nested.after_false; bfs_expr_append(expr, child); } opt->after_false = nested.after_false; return expr; } /** Visit a comma expression. */ static struct bfs_expr *visit_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct bfs_exprs children; foster_children(expr, &children); struct bfs_opt nested = *opt; while (!SLIST_EMPTY(&children)) { struct bfs_expr *child = SLIST_POP(&children); if (SLIST_EMPTY(&children)) { nested.ignore_result = opt->ignore_result; } else { nested.ignore_result = true; } child = visit_deep(&nested, child, visitor); if (!child) { return NULL; } nested.before = nested.after_true; df_join(&nested.before, &nested.after_false); bfs_expr_append(expr, child); } opt->after_true = nested.after_true; opt->after_false = nested.after_false; return expr; } /** Default enter() function. */ static struct bfs_expr *visit_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { opt_enter(opt, "%pe\n", expr); opt->after_true = opt->before; opt->after_false = opt->before; return expr; } /** Default leave() function. */ static struct bfs_expr *visit_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { opt_leave(opt, "%pe\n", expr); return expr; } static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { bool entered = false; visit_fn *enter = visitor->enter ? visitor->enter : visit_enter; visit_fn *leave = visitor->leave ? visitor->leave : visit_leave; static const struct visitor_table table[] = { {eval_not, visit_not}, {eval_and, visit_and}, {eval_or, visit_or}, {eval_comma, visit_comma}, {NULL, NULL}, }; visit_fn *recursive = look_up_visitor(expr, table); if (recursive) { if (!entered) { expr = enter(opt, expr, visitor); if (!expr) { return NULL; } entered = true; } expr = recursive(opt, expr, visitor); if (!expr) { return NULL; } } visit_fn *general = visitor->visit; if (general) { if (!entered) { expr = enter(opt, expr, visitor); if (!expr) { return NULL; } entered = true; } expr = general(opt, expr, visitor); if (!expr) { return NULL; } } visit_fn *specific = look_up_visitor(expr, visitor->table); if (specific) { if (!entered) { expr = enter(opt, expr, visitor); if (!expr) { return NULL; } entered = true; } expr = specific(opt, expr, visitor); if (!expr) { return NULL; } } if (entered) { expr = leave(opt, expr, visitor); } else { opt_visit(opt, "%pe\n", expr); } return expr; } /** Visit an expression recursively. */ static struct bfs_expr *visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { opt_enter(opt, "%s()\n", visitor->name); expr = visit_deep(opt, expr, visitor); opt_leave(opt, "\n"); return expr; } /** Visit an expression non-recursively. */ static struct bfs_expr *visit_shallow(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { visit_fn *general = visitor->visit; if (expr && general) { expr = general(opt, expr, visitor); } if (!expr) { return NULL; } visit_fn *specific = look_up_visitor(expr, visitor->table); if (specific) { expr = specific(opt, expr, visitor); } return expr; } /** Annotate -{execut,read,writ}able. */ static struct bfs_expr *annotate_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { expr->probability = 1.0; if (expr->num & R_OK) { expr->probability *= 0.99; } if (expr->num & W_OK) { expr->probability *= 0.8; } if (expr->num & X_OK) { expr->probability *= 0.2; } return expr; } /** Annotate -empty. */ static struct bfs_expr *annotate_empty(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { if (opt->level >= 4) { // Since -empty attempts to open and read directories, it may // have side effects such as reporting permission errors, and // thus shouldn't be re-ordered without aggressive optimizations expr->pure = true; } return expr; } /** Annotate -exec. */ static struct bfs_expr *annotate_exec(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { if (expr->exec->flags & BFS_EXEC_MULTI) { expr->always_true = true; } else { expr->cost = 1000000.0; } return expr; } /** Annotate -name/-lname/-path. */ static struct bfs_expr *annotate_fnmatch(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { if (expr->literal) { expr->probability = 0.1; } else { expr->probability = 0.5; } return expr; } /** Annotate -f?print. */ static struct bfs_expr *annotate_fprint(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { const struct colors *colors = expr->cfile->colors; expr->calls_stat = colors && colors_need_stat(colors); return expr; } /** Estimate probability for -x?type. */ static void estimate_type_probability(struct bfs_expr *expr) { unsigned int types = expr->num; expr->probability = 0.0; if (types & (1 << BFS_BLK)) { expr->probability += 0.00000721183; } if (types & (1 << BFS_CHR)) { expr->probability += 0.0000499855; } if (types & (1 << BFS_DIR)) { expr->probability += 0.114475; } if (types & (1 << BFS_DOOR)) { expr->probability += 0.000001; } if (types & (1 << BFS_FIFO)) { expr->probability += 0.00000248684; } if (types & (1 << BFS_REG)) { expr->probability += 0.859772; } if (types & (1 << BFS_LNK)) { expr->probability += 0.0256816; } if (types & (1 << BFS_SOCK)) { expr->probability += 0.0000116881; } if (types & (1 << BFS_WHT)) { expr->probability += 0.000001; } } /** Annotate -type. */ static struct bfs_expr *annotate_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { estimate_type_probability(expr); return expr; } /** Annotate -xtype. */ static struct bfs_expr *annotate_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { if (opt->level >= 4) { // Since -xtype dereferences symbolic links, it may have side // effects such as reporting permission errors, and thus // shouldn't be re-ordered without aggressive optimizations expr->pure = true; } estimate_type_probability(expr); return expr; } /** Annotate a negation. */ static struct bfs_expr *annotate_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct bfs_expr *rhs = only_child(expr); expr->pure = rhs->pure; expr->always_true = rhs->always_false; expr->always_false = rhs->always_true; expr->cost = rhs->cost; expr->probability = 1.0 - rhs->probability; return expr; } /** Annotate a conjunction. */ static struct bfs_expr *annotate_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { expr->pure = true; expr->always_true = true; expr->always_false = false; expr->cost = 0.0; expr->probability = 1.0; for_expr (child, expr) { expr->pure &= child->pure; expr->always_true &= child->always_true; expr->always_false |= child->always_false; expr->cost += expr->probability * child->cost; expr->probability *= child->probability; } return expr; } /** Annotate a disjunction. */ static struct bfs_expr *annotate_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { expr->pure = true; expr->always_true = false; expr->always_false = true; expr->cost = 0.0; float false_prob = 1.0; for_expr (child, expr) { expr->pure &= child->pure; expr->always_true |= child->always_true; expr->always_false &= child->always_false; expr->cost += false_prob * child->cost; false_prob *= (1.0 - child->probability); } expr->probability = 1.0 - false_prob; return expr; } /** Annotate a comma expression. */ static struct bfs_expr *annotate_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { expr->pure = true; expr->cost = 0.0; for_expr (child, expr) { expr->pure &= child->pure; expr->always_true = child->always_true; expr->always_false = child->always_false; expr->cost += child->cost; expr->probability = child->probability; } return expr; } /** Annotate an arbitrary expression. */ static struct bfs_expr *annotate_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { /** Table of pure expressions. */ static bfs_eval_fn *const pure[] = { eval_access, eval_acl, eval_capable, eval_depth, eval_false, eval_flags, eval_fstype, eval_gid, eval_hidden, eval_inum, eval_links, eval_lname, eval_name, eval_newer, eval_nogroup, eval_nouser, eval_path, eval_perm, eval_regex, eval_samefile, eval_size, eval_sparse, eval_time, eval_true, eval_type, eval_uid, eval_used, eval_xattr, eval_xattrname, }; expr->pure = false; for (size_t i = 0; i < countof(pure); ++i) { if (expr->eval_fn == pure[i]) { expr->pure = true; break; } } /** Table of always-true expressions. */ static bfs_eval_fn *const always_true[] = { eval_fls, eval_fprint, eval_fprint0, eval_fprintf, eval_fprintx, eval_limit, eval_prune, eval_true, // Non-returning eval_exit, eval_quit, }; expr->always_true = false; for (size_t i = 0; i < countof(always_true); ++i) { if (expr->eval_fn == always_true[i]) { expr->always_true = true; break; } } /** Table of always-false expressions. */ static bfs_eval_fn *const always_false[] = { eval_false, // Non-returning eval_exit, eval_quit, }; expr->always_false = false; for (size_t i = 0; i < countof(always_false); ++i) { if (expr->eval_fn == always_false[i]) { expr->always_false = true; break; } } /** Table of stat-calling primaries. */ static bfs_eval_fn *const calls_stat[] = { eval_empty, eval_flags, eval_fls, eval_fprintf, eval_fstype, eval_gid, eval_inum, eval_links, eval_newer, eval_nogroup, eval_nouser, eval_perm, eval_samefile, eval_size, eval_sparse, eval_time, eval_uid, eval_used, eval_xattr, eval_xattrname, }; expr->calls_stat = false; for (size_t i = 0; i < countof(calls_stat); ++i) { if (expr->eval_fn == calls_stat[i]) { expr->calls_stat = true; break; } } #define FAST_COST 40.0 #define FNMATCH_COST 400.0 #define STAT_COST 1000.0 #define PRINT_COST 20000.0 /** Table of expression costs. */ static const struct { bfs_eval_fn *eval_fn; float cost; } costs[] = { {eval_access, STAT_COST}, {eval_acl, STAT_COST}, {eval_capable, STAT_COST}, {eval_empty, 2 * STAT_COST}, // readdir() is worse than stat() {eval_flags, STAT_COST}, {eval_fls, PRINT_COST}, {eval_fprint, PRINT_COST}, {eval_fprint0, PRINT_COST}, {eval_fprintf, PRINT_COST}, {eval_fprintx, PRINT_COST}, {eval_fstype, STAT_COST}, {eval_gid, STAT_COST}, {eval_inum, STAT_COST}, {eval_links, STAT_COST}, {eval_lname, FNMATCH_COST}, {eval_name, FNMATCH_COST}, {eval_newer, STAT_COST}, {eval_nogroup, STAT_COST}, {eval_nouser, STAT_COST}, {eval_path, FNMATCH_COST}, {eval_perm, STAT_COST}, {eval_samefile, STAT_COST}, {eval_size, STAT_COST}, {eval_sparse, STAT_COST}, {eval_time, STAT_COST}, {eval_uid, STAT_COST}, {eval_used, STAT_COST}, {eval_xattr, STAT_COST}, {eval_xattrname, STAT_COST}, }; expr->cost = FAST_COST; for (size_t i = 0; i < countof(costs); ++i) { if (expr->eval_fn == costs[i].eval_fn) { expr->cost = costs[i].cost; break; } } /** Table of expression probabilities. */ static const struct { /** The evaluation function with this cost. */ bfs_eval_fn *eval_fn; /** The matching probability. */ float probability; } probs[] = { {eval_acl, 0.00002}, {eval_capable, 0.000002}, {eval_empty, 0.01}, {eval_false, 0.0}, {eval_hidden, 0.01}, {eval_nogroup, 0.01}, {eval_nouser, 0.01}, {eval_samefile, 0.01}, {eval_true, 1.0}, {eval_xattr, 0.01}, {eval_xattrname, 0.01}, }; expr->probability = 0.5; for (size_t i = 0; i < countof(probs); ++i) { if (expr->eval_fn == probs[i].eval_fn) { expr->probability = probs[i].probability; break; } } return expr; } /** * Annotating visitor. */ static const struct visitor annotate = { .name = "annotate", .visit = annotate_visit, .table = (const struct visitor_table[]) { {eval_access, annotate_access}, {eval_empty, annotate_empty}, {eval_exec, annotate_exec}, {eval_fprint, annotate_fprint}, {eval_lname, annotate_fnmatch}, {eval_name, annotate_fnmatch}, {eval_path, annotate_fnmatch}, {eval_type, annotate_type}, {eval_xtype, annotate_xtype}, {eval_not, annotate_not}, {eval_and, annotate_and}, {eval_or, annotate_or}, {eval_comma, annotate_comma}, {NULL, NULL}, }, }; /** Create a constant expression. */ static struct bfs_expr *opt_const(struct bfs_opt *opt, bool value) { static bfs_eval_fn *const fns[] = {eval_false, eval_true}; static char *fake_args[] = {"-false", "-true"}; struct bfs_expr *expr = bfs_expr_new(opt->ctx, fns[value], 1, &fake_args[value], BFS_TEST); return visit_shallow(opt, expr, &annotate); } /** Negate an expression, keeping it canonical. */ static struct bfs_expr *negate_expr(struct bfs_opt *opt, struct bfs_expr *expr, char **argv) { if (expr->eval_fn == eval_not) { return only_child(expr); } else if (expr->eval_fn == eval_true) { return opt_const(opt, false); } else if (expr->eval_fn == eval_false) { return opt_const(opt, true); } struct bfs_expr *ret = bfs_expr_new(opt->ctx, eval_not, 1, argv, BFS_OPERATOR); if (!ret) { return NULL; } bfs_expr_append(ret, expr); return visit_shallow(opt, ret, &annotate); } /** Sink negations into a conjunction/disjunction using De Morgan's laws. */ static struct bfs_expr *sink_not_andor(struct bfs_opt *opt, struct bfs_expr *expr) { opt_debug(opt, "De Morgan's laws\n"); char **argv = expr->argv; expr = only_child(expr); opt_enter(opt, "%pe\n", expr); if (expr->eval_fn == eval_and) { expr->eval_fn = eval_or; expr->argv = &fake_or_arg; } else { bfs_assert(expr->eval_fn == eval_or); expr->eval_fn = eval_and; expr->argv = &fake_and_arg; } struct bfs_exprs children; foster_children(expr, &children); struct bfs_expr *child; while ((child = SLIST_POP(&children))) { opt_enter(opt, "%pe\n", child); child = negate_expr(opt, child, argv); if (!child) { return NULL; } opt_leave(opt, "%pe\n", child); bfs_expr_append(expr, child); } opt_leave(opt, "%pe\n", expr); return visit_shallow(opt, expr, &annotate); } /** Sink a negation into a comma expression. */ static struct bfs_expr *sink_not_comma(struct bfs_opt *opt, struct bfs_expr *expr) { bfs_assert(expr->eval_fn == eval_comma); opt_enter(opt, "%pe\n", expr); char **argv = expr->argv; expr = only_child(expr); struct bfs_exprs children; foster_children(expr, &children); struct bfs_expr *child; while ((child = SLIST_POP(&children))) { if (SLIST_EMPTY(&children)) { opt_enter(opt, "%pe\n", child); opt_debug(opt, "sink\n"); child = negate_expr(opt, child, argv); if (!child) { return NULL; } opt_leave(opt, "%pe\n", child); } else { opt_visit(opt, "%pe\n", child); } bfs_expr_append(expr, child); } opt_leave(opt, "%pe\n", expr); return visit_shallow(opt, expr, &annotate); } /** Canonicalize a negation. */ static struct bfs_expr *canonicalize_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct bfs_expr *rhs = only_child(expr); if (rhs->eval_fn == eval_not) { opt_debug(opt, "double negation\n"); rhs = only_child(expr); return only_child(rhs); } else if (rhs->eval_fn == eval_and || rhs->eval_fn == eval_or) { return sink_not_andor(opt, expr); } else if (rhs->eval_fn == eval_comma) { return sink_not_comma(opt, expr); } else if (is_const(rhs)) { opt_debug(opt, "constant propagation\n"); return opt_const(opt, rhs->eval_fn == eval_false); } else { return expr; } } /** Canonicalize an associative operator. */ static struct bfs_expr *canonicalize_assoc(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct bfs_exprs children; foster_children(expr, &children); struct bfs_exprs flat; SLIST_INIT(&flat); struct bfs_expr *child; while ((child = SLIST_POP(&children))) { if (child->eval_fn == expr->eval_fn) { struct bfs_expr *head = SLIST_HEAD(&child->children); struct bfs_expr *tail = SLIST_TAIL(&child->children); if (!head) { opt_delete(opt, "%pe [empty]\n", child); } else { opt_enter(opt, "%pe\n", child); opt_debug(opt, "associativity\n"); if (head == tail) { opt_leave(opt, "%pe\n", head); } else if (head->next == tail) { opt_leave(opt, "%pe %pe\n", head, tail); } else { opt_leave(opt, "%pe ... %pe\n", head, tail); } } SLIST_EXTEND(&flat, &child->children); } else { opt_visit(opt, "%pe\n", child); SLIST_APPEND(&flat, child); } } bfs_expr_extend(expr, &flat); return visit_shallow(opt, expr, &annotate); } /** * Canonicalizing visitor. */ static const struct visitor canonicalize = { .name = "canonicalize", .table = (const struct visitor_table[]) { {eval_not, canonicalize_not}, {eval_and, canonicalize_assoc}, {eval_or, canonicalize_assoc}, {eval_comma, canonicalize_assoc}, {NULL, NULL}, }, }; /** Calculate the cost of an ordered pair of expressions. */ static float expr_cost(const struct bfs_expr *parent, const struct bfs_expr *lhs, const struct bfs_expr *rhs) { // https://cs.stackexchange.com/a/66921/21004 float prob = lhs->probability; if (parent->eval_fn == eval_or) { prob = 1.0 - prob; } return lhs->cost + prob * rhs->cost; } /** Sort a block of expressions. */ static void sort_exprs(struct bfs_opt *opt, struct bfs_expr *parent, struct bfs_exprs *exprs) { if (!exprs->head || !exprs->head->next) { return; } struct bfs_exprs left, right; SLIST_INIT(&left); SLIST_INIT(&right); // Split for (struct bfs_expr *hare = exprs->head; hare && (hare = hare->next); hare = hare->next) { struct bfs_expr *tortoise = SLIST_POP(exprs); SLIST_APPEND(&left, tortoise); } SLIST_EXTEND(&right, exprs); // Recurse sort_exprs(opt, parent, &left); sort_exprs(opt, parent, &right); // Merge while (!SLIST_EMPTY(&left) && !SLIST_EMPTY(&right)) { struct bfs_expr *lhs = left.head; struct bfs_expr *rhs = right.head; float cost = expr_cost(parent, lhs, rhs); float swapped = expr_cost(parent, rhs, lhs); if (cost <= swapped) { SLIST_POP(&left); SLIST_APPEND(exprs, lhs); } else { opt_enter(opt, "%pe %pe [${ylw}%g${rs}]\n", lhs, rhs, cost); SLIST_POP(&right); SLIST_APPEND(exprs, rhs); opt_leave(opt, "%pe %pe [${ylw}%g${rs}]\n", rhs, lhs, swapped); } } SLIST_EXTEND(exprs, &left); SLIST_EXTEND(exprs, &right); } /** Reorder children to reduce cost. */ static struct bfs_expr *reorder_andor(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct bfs_exprs children; foster_children(expr, &children); // Split into blocks of consecutive pure/impure expressions, and sort // the pure blocks struct bfs_exprs pure; SLIST_INIT(&pure); struct bfs_expr *child; while ((child = SLIST_POP(&children))) { if (child->pure) { SLIST_APPEND(&pure, child); } else { sort_exprs(opt, expr, &pure); bfs_expr_extend(expr, &pure); bfs_expr_append(expr, child); } } sort_exprs(opt, expr, &pure); bfs_expr_extend(expr, &pure); return visit_shallow(opt, expr, &annotate); } /** * Reordering visitor. */ static const struct visitor reorder = { .name = "reorder", .table = (const struct visitor_table[]) { {eval_and, reorder_andor}, {eval_or, reorder_andor}, {NULL, NULL}, }, }; /** Transfer function for simple predicates. */ static void data_flow_pred(struct bfs_opt *opt, enum pred_type pred, bool value) { constrain_pred(&opt->after_true.preds[pred], value); constrain_pred(&opt->after_false.preds[pred], !value); } /** Transfer function for icmp-style ([+-]N) expressions. */ static void data_flow_icmp(struct bfs_opt *opt, const struct bfs_expr *expr, enum range_type type) { struct df_range *true_range = &opt->after_true.ranges[type]; struct df_range *false_range = &opt->after_false.ranges[type]; long long value = expr->num; switch (expr->int_cmp) { case BFS_INT_EQUAL: constrain_range(true_range, value); range_remove(false_range, value); break; case BFS_INT_LESS: constrain_min(false_range, value); constrain_max(true_range, value); range_remove(true_range, value); break; case BFS_INT_GREATER: constrain_max(false_range, value); constrain_min(true_range, value); range_remove(true_range, value); break; } } /** Transfer function for -{execut,read,writ}able. */ static struct bfs_expr *data_flow_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { if (expr->num & R_OK) { data_flow_pred(opt, READABLE_PRED, true); } if (expr->num & W_OK) { data_flow_pred(opt, WRITABLE_PRED, true); } if (expr->num & X_OK) { data_flow_pred(opt, EXECUTABLE_PRED, true); } return expr; } /** Transfer function for -empty. */ static struct bfs_expr *data_flow_empty(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { opt->after_true.types &= (1 << BFS_REG) | (1 << BFS_DIR); if (opt->before.types == (1 << BFS_REG)) { constrain_range(&opt->after_true.ranges[SIZE_RANGE], 0); range_remove(&opt->after_false.ranges[SIZE_RANGE], 0); } return expr; } /** Transfer function for -gid. */ static struct bfs_expr *data_flow_gid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct df_range *range = &opt->after_true.ranges[GID_RANGE]; if (range->min == range->max) { gid_t gid = range->min; bool nogroup = !bfs_getgrgid(opt->ctx->groups, gid); if (errno == 0) { data_flow_pred(opt, NOGROUP_PRED, nogroup); } } return expr; } /** Transfer function for -inum. */ static struct bfs_expr *data_flow_inum(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct df_range *range = &opt->after_true.ranges[INUM_RANGE]; if (range->min == range->max) { expr->probability = 0.01; } else { expr->probability = 0.5; } return expr; } /** Transfer function for -links. */ static struct bfs_expr *data_flow_links(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct df_range *range = &opt->after_true.ranges[LINKS_RANGE]; if (1 >= range->min && 1 <= range->max) { expr->probability = 0.99; } else { expr->probability = 0.5; } return expr; } /** Transfer function for -lname. */ static struct bfs_expr *data_flow_lname(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { opt->after_true.types &= 1 << BFS_LNK; return expr; } /** Transfer function for -samefile. */ static struct bfs_expr *data_flow_samefile(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct df_range *true_range = &opt->after_true.ranges[INUM_RANGE]; constrain_range(true_range, expr->ino); struct df_range *false_range = &opt->after_false.ranges[INUM_RANGE]; range_remove(false_range, expr->ino); return expr; } /** Transfer function for -size. */ static struct bfs_expr *data_flow_size(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct df_range *range = &opt->after_true.ranges[SIZE_RANGE]; if (range->min == range->max) { expr->probability = 0.01; } else { expr->probability = 0.5; } return expr; } /** Transfer function for -type. */ static struct bfs_expr *data_flow_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { opt->after_true.types &= expr->num; opt->after_false.types &= ~expr->num; return expr; } /** Transfer function for -uid. */ static struct bfs_expr *data_flow_uid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct df_range *range = &opt->after_true.ranges[UID_RANGE]; if (range->min == range->max) { uid_t uid = range->min; bool nouser = !bfs_getpwuid(opt->ctx->users, uid); if (errno == 0) { data_flow_pred(opt, NOUSER_PRED, nouser); } } return expr; } /** Transfer function for -xtype. */ static struct bfs_expr *data_flow_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { opt->after_true.xtypes &= expr->num; opt->after_false.xtypes &= ~expr->num; return expr; } /** Data flow visitor entry. */ static struct bfs_expr *data_flow_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { visit_enter(opt, expr, visitor); df_dump(opt, "before", &opt->before); if (!bfs_expr_is_parent(expr) && !expr->pure) { df_join(opt->impure, &opt->before); df_dump(opt, "impure", opt->impure); } return expr; } /** Data flow visitor exit. */ static struct bfs_expr *data_flow_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { if (expr->always_true) { expr->probability = 1.0; df_init_bottom(&opt->after_false); } if (expr->always_false) { expr->probability = 0.0; df_init_bottom(&opt->after_true); } df_dump(opt, "after true", &opt->after_true); df_dump(opt, "after false", &opt->after_false); if (df_is_bottom(&opt->after_false)) { if (!expr->pure) { expr->always_true = true; expr->probability = 1.0; } else if (expr->eval_fn != eval_true) { opt_warning(opt, expr, "This expression is always true.\n\n"); opt_debug(opt, "pure, always true\n"); expr = opt_const(opt, true); if (!expr) { return NULL; } } } if (df_is_bottom(&opt->after_true)) { if (!expr->pure) { expr->always_false = true; expr->probability = 0.0; } else if (expr->eval_fn != eval_false) { opt_warning(opt, expr, "This expression is always false.\n\n"); opt_debug(opt, "pure, always false\n"); expr = opt_const(opt, false); if (!expr) { return NULL; } } } return visit_leave(opt, expr, visitor); } /** Ignore an expression (and possibly warn/prompt). */ static struct bfs_expr *opt_ignore(struct bfs_opt *opt, struct bfs_expr *expr, bool delete) { if (delete) { opt_delete(opt, "%pe [ignored result]\n", expr); } else { opt_debug(opt, "ignored result"); } if (expr->kind != BFS_TEST) { goto done; } if (!opt_warning(opt, expr, "The result of this expression is ignored.\n")) { goto done; } struct bfs_ctx *ctx = opt->ctx; if (ctx->interactive && ctx->dangerous) { bfs_warning(ctx, "Do you want to continue? "); if (ynprompt() <= 0) { errno = 0; return NULL; } } fprintf(stderr, "\n"); done: if (!delete && expr->pure) { // If we're not deleting the expression entirely, replace it with -false expr = opt_const(opt, false); } return expr; } /** Data flow visitor function. */ static struct bfs_expr *data_flow_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { if (opt->ignore_result) { expr = opt_ignore(opt, expr, false); if (!expr) { return NULL; } } if (df_is_bottom(&opt->before)) { opt_debug(opt, "unreachable\n"); opt_warning(opt, expr, "This expression is unreachable.\n\n"); expr = opt_const(opt, false); if (!expr) { return NULL; } } /** Table of simple predicates. */ static const struct { bfs_eval_fn *eval_fn; enum pred_type pred; } preds[] = { {eval_acl, ACL_PRED}, {eval_capable, CAPABLE_PRED}, {eval_empty, EMPTY_PRED}, {eval_hidden, HIDDEN_PRED}, {eval_nogroup, NOGROUP_PRED}, {eval_nouser, NOUSER_PRED}, {eval_sparse, SPARSE_PRED}, {eval_xattr, XATTR_PRED}, }; for (size_t i = 0; i < countof(preds); ++i) { if (preds[i].eval_fn == expr->eval_fn) { data_flow_pred(opt, preds[i].pred, true); break; } } /** Table of simple range comparisons. */ static const struct { bfs_eval_fn *eval_fn; enum range_type range; } ranges[] = { {eval_depth, DEPTH_RANGE}, {eval_gid, GID_RANGE}, {eval_inum, INUM_RANGE}, {eval_links, LINKS_RANGE}, {eval_size, SIZE_RANGE}, {eval_uid, UID_RANGE}, }; for (size_t i = 0; i < countof(ranges); ++i) { if (ranges[i].eval_fn == expr->eval_fn) { data_flow_icmp(opt, expr, ranges[i].range); break; } } return expr; } /** * Data flow visitor. */ static const struct visitor data_flow = { .name = "data_flow", .enter = data_flow_enter, .visit = data_flow_visit, .leave = data_flow_leave, .table = (const struct visitor_table[]) { {eval_access, data_flow_access}, {eval_empty, data_flow_empty}, {eval_gid, data_flow_gid}, {eval_inum, data_flow_inum}, {eval_links, data_flow_links}, {eval_lname, data_flow_lname}, {eval_samefile, data_flow_samefile}, {eval_size, data_flow_size}, {eval_type, data_flow_type}, {eval_uid, data_flow_uid}, {eval_xtype, data_flow_xtype}, {NULL, NULL}, }, }; /** Simplify a negation. */ static struct bfs_expr *simplify_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { if (opt->ignore_result) { opt_debug(opt, "ignored result\n"); expr = only_child(expr); } return expr; } /** Lift negations out of a conjunction/disjunction using De Morgan's laws. */ static struct bfs_expr *lift_andor_not(struct bfs_opt *opt, struct bfs_expr *expr) { // Only lift negations if it would reduce the number of (-not) expressions size_t added = 0, removed = 0; for_expr (child, expr) { if (child->eval_fn == eval_not) { ++removed; } else { ++added; } } if (added >= removed) { return visit_shallow(opt, expr, &annotate); } opt_debug(opt, "De Morgan's laws\n"); if (expr->eval_fn == eval_and) { expr->eval_fn = eval_or; expr->argv = &fake_or_arg; } else { bfs_assert(expr->eval_fn == eval_or); expr->eval_fn = eval_and; expr->argv = &fake_and_arg; } struct bfs_exprs children; foster_children(expr, &children); struct bfs_expr *child; while ((child = SLIST_POP(&children))) { opt_enter(opt, "%pe\n", child); child = negate_expr(opt, child, &fake_not_arg); if (!child) { return NULL; } opt_leave(opt, "%pe\n", child); bfs_expr_append(expr, child); } expr = visit_shallow(opt, expr, &annotate); if (!expr) { return NULL; } return negate_expr(opt, expr, &fake_not_arg); } /** Get the first ignorable expression in a conjunction/disjunction. */ static struct bfs_expr *first_ignorable(struct bfs_opt *opt, struct bfs_expr *expr) { if (opt->level < 2 || !opt->ignore_result) { return NULL; } struct bfs_expr *ret = NULL; for_expr (child, expr) { if (!child->pure) { ret = NULL; } else if (!ret) { ret = child; } } return ret; } /** Simplify a conjunction. */ static struct bfs_expr *simplify_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct bfs_expr *ignorable = first_ignorable(opt, expr); bool ignore = false; struct bfs_exprs children; foster_children(expr, &children); while (!SLIST_EMPTY(&children)) { struct bfs_expr *child = SLIST_POP(&children); if (child == ignorable) { ignore = true; } if (ignore) { if (!opt_ignore(opt, child, true)) { return NULL; } continue; } if (child->eval_fn == eval_true) { opt_delete(opt, "%pe [conjunction elimination]\n", child); continue; } opt_visit(opt, "%pe\n", child); bfs_expr_append(expr, child); if (child->always_false) { while ((child = SLIST_POP(&children))) { opt_delete(opt, "%pe [short-circuit]\n", child); } } } struct bfs_expr *child = bfs_expr_children(expr); if (!child) { opt_debug(opt, "nullary identity\n"); return opt_const(opt, true); } else if (!child->next) { opt_debug(opt, "unary identity\n"); return only_child(expr); } return lift_andor_not(opt, expr); } /** Simplify a disjunction. */ static struct bfs_expr *simplify_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct bfs_expr *ignorable = first_ignorable(opt, expr); bool ignore = false; struct bfs_exprs children; foster_children(expr, &children); while (!SLIST_EMPTY(&children)) { struct bfs_expr *child = SLIST_POP(&children); if (child == ignorable) { ignore = true; } if (ignore) { if (!opt_ignore(opt, child, true)) { return NULL; } continue; } if (child->eval_fn == eval_false) { opt_delete(opt, "%pe [disjunctive syllogism]\n", child); continue; } opt_visit(opt, "%pe\n", child); bfs_expr_append(expr, child); if (child->always_true) { while ((child = SLIST_POP(&children))) { opt_delete(opt, "%pe [short-circuit]\n", child); } } } struct bfs_expr *child = bfs_expr_children(expr); if (!child) { opt_debug(opt, "nullary identity\n"); return opt_const(opt, false); } else if (!child->next) { opt_debug(opt, "unary identity\n"); return only_child(expr); } return lift_andor_not(opt, expr); } /** Simplify a comma expression. */ static struct bfs_expr *simplify_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { struct bfs_exprs children; foster_children(expr, &children); while (!SLIST_EMPTY(&children)) { struct bfs_expr *child = SLIST_POP(&children); if (opt->level >= 2 && child->pure && !SLIST_EMPTY(&children)) { if (!opt_ignore(opt, child, true)) { return NULL; } continue; } opt_visit(opt, "%pe\n", child); bfs_expr_append(expr, child); } struct bfs_expr *child = bfs_expr_children(expr); if (child && !child->next) { opt_debug(opt, "unary identity\n"); return only_child(expr); } return expr; } /** * Logical simplification visitor. */ static const struct visitor simplify = { .name = "simplify", .table = (const struct visitor_table[]) { {eval_not, simplify_not}, {eval_and, simplify_and}, {eval_or, simplify_or}, {eval_comma, simplify_comma}, {NULL, NULL}, }, }; /** Optimize an expression. */ static struct bfs_expr *optimize(struct bfs_opt *opt, struct bfs_expr *expr) { opt_enter(opt, "pass 0:\n"); expr = visit(opt, expr, &annotate); opt_leave(opt, NULL); /** Table of optimization passes. */ static const struct { /** Minimum optlevel for this pass. */ int level; /** The visitor for this pass. */ const struct visitor *visitor; } passes[] = { {1, &canonicalize}, {3, &reorder}, {2, &data_flow}, {1, &simplify}, }; struct df_domain impure; df_init_top(&opt->after_true); df_init_top(&opt->after_false); for (int i = 0; i < 3; ++i) { struct bfs_opt nested = *opt; nested.impure = &impure; impure = *opt->impure; opt_enter(&nested, "pass %d:\n", i + 1); for (size_t j = 0; j < countof(passes); ++j) { if (opt->level < passes[j].level) { continue; } const struct visitor *visitor = passes[j].visitor; // Skip reordering the first time through the passes, to // make warnings more understandable if (visitor == &reorder) { if (i == 0) { continue; } else { nested.warn = false; } } expr = visit(&nested, expr, visitor); if (!expr) { return NULL; } if (visitor == &data_flow) { opt->after_true = nested.after_true; opt->after_false = nested.after_false; } } opt_leave(&nested, NULL); if (!bfs_expr_is_parent(expr)) { break; } } *opt->impure = impure; return expr; } /** An expression predicate. */ typedef bool expr_pred(const struct bfs_expr *expr); /** Estimate the odds that a matching expression will be evaluated. */ static float estimate_odds(const struct bfs_expr *expr, expr_pred *pred) { if (pred(expr)) { return 1.0; } float nonmatch_odds = 1.0; float reached_odds = 1.0; for_expr (child, expr) { float child_odds = estimate_odds(child, pred); nonmatch_odds *= 1.0 - reached_odds * child_odds; if (expr->eval_fn == eval_and) { reached_odds *= child->probability; } else if (expr->eval_fn == eval_or) { reached_odds *= 1.0 - child->probability; } } return 1.0 - nonmatch_odds; } /** Whether an expression calls stat(). */ static bool calls_stat(const struct bfs_expr *expr) { return expr->calls_stat; } /** Estimate the odds of calling stat(). */ static float estimate_stat_odds(struct bfs_ctx *ctx) { if (ctx->unique) { return 1.0; } float nostat_odds = 1.0 - estimate_odds(ctx->exclude, calls_stat); float reached_odds = 1.0 - ctx->exclude->probability; float expr_odds = estimate_odds(ctx->expr, calls_stat); nostat_odds *= 1.0 - reached_odds * expr_odds; return 1.0 - nostat_odds; } /** Matches -(exec|ok) ... \; */ static bool single_exec(const struct bfs_expr *expr) { return expr->eval_fn == eval_exec && !(expr->exec->flags & BFS_EXEC_MULTI); } int bfs_optimize(struct bfs_ctx *ctx) { bfs_ctx_dump(ctx, DEBUG_OPT); struct df_domain impure; df_init_bottom(&impure); struct bfs_opt opt = { .ctx = ctx, .level = ctx->optlevel, .depth = 0, .warn = ctx->warn, .ignore_result = false, .impure = &impure, }; df_init_top(&opt.before); ctx->exclude = optimize(&opt, ctx->exclude); if (!ctx->exclude) { return -1; } // Only non-excluded files are evaluated opt.before = opt.after_false; opt.ignore_result = true; struct df_range *depth = &opt.before.ranges[DEPTH_RANGE]; if (ctx->mindepth > 0) { constrain_min(depth, ctx->mindepth); } if (ctx->maxdepth < INT_MAX) { constrain_max(depth, ctx->maxdepth); } ctx->expr = optimize(&opt, ctx->expr); if (!ctx->expr) { return -1; } if (opt.level >= 2 && df_is_bottom(&impure)) { bfs_warning(ctx, "This command won't do anything.\n\n"); } const struct df_range *impure_depth = &impure.ranges[DEPTH_RANGE]; long long mindepth = impure_depth->min; long long maxdepth = impure_depth->max; opt_enter(&opt, "post-process:\n"); if (opt.level >= 2 && mindepth > ctx->mindepth) { if (mindepth > INT_MAX) { mindepth = INT_MAX; } opt_enter(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth); ctx->mindepth = mindepth; opt_leave(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth); } if (opt.level >= 4 && maxdepth < ctx->maxdepth) { if (maxdepth < INT_MIN) { maxdepth = INT_MIN; } opt_enter(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth); ctx->maxdepth = maxdepth; opt_leave(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth); } if (opt.level >= 3) { // bfs_eval() can do lazy stat() calls, but only on one thread. float lazy_cost = estimate_stat_odds(ctx); // bftw() can do eager stat() calls in parallel float eager_cost = 1.0 / ctx->threads; if (eager_cost <= lazy_cost) { opt_enter(&opt, "lazy stat cost: ${ylw}%g${rs}\n", lazy_cost); ctx->flags |= BFTW_STAT; opt_leave(&opt, "eager stat cost: ${ylw}%g${rs}\n", eager_cost); } #ifndef POSIX_SPAWN_SETRLIMIT // If bfs_spawn_setrlimit() would force us to use fork() over // posix_spawn(), the extra cost may outweigh the benefit of a // higher RLIMIT_NOFILE float single_exec_odds = estimate_odds(ctx->expr, single_exec); if (single_exec_odds >= 0.5) { opt_enter(&opt, "single ${blu}-exec${rs} odds: ${ylw}%g${rs}\n", single_exec_odds); ctx->raise_nofile = false; opt_leave(&opt, "not raising RLIMIT_NOFILE\n"); } #endif } opt_leave(&opt, NULL); return 0; }