From 220fb782da63fd648dedb02a7c43c837792b3d4a Mon Sep 17 00:00:00 2001
From: Tavian Barnes <tavianator@tavianator.com>
Date: Tue, 24 Jan 2023 17:23:18 -0500
Subject: opt: Move probabilities out of the parser

---
 src/opt.c   | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/parse.c | 149 ++++++++++++------------------------------------------------
 2 files changed, 161 insertions(+), 120 deletions(-)

(limited to 'src')

diff --git a/src/opt.c b/src/opt.c
index 1daf390..e76e216 100644
--- a/src/opt.c
+++ b/src/opt.c
@@ -54,6 +54,7 @@
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
+#include <string.h>
 #include <unistd.h>
 
 static char *fake_and_arg = "-a";
@@ -776,15 +777,23 @@ static void infer_icmp_facts(struct opt_state *state, const struct bfs_expr *exp
 
 /** Optimize -{execut,read,writ}able. */
 static struct bfs_expr *optimize_access(struct opt_state *state, struct bfs_expr *expr) {
+	expr->probability = 1.0;
+
 	if (expr->num & R_OK) {
 		infer_pred_facts(state, READABLE_PRED);
+		expr->probability *= 0.99;
 	}
+
 	if (expr->num & W_OK) {
 		infer_pred_facts(state, WRITABLE_PRED);
+		expr->probability *= 0.8;
 	}
+
 	if (expr->num & X_OK) {
 		infer_pred_facts(state, EXECUTABLE_PRED);
+		expr->probability *= 0.2;
 	}
+
 	return expr;
 }
 
@@ -811,6 +820,17 @@ static struct bfs_expr *optimize_exec(struct opt_state *state, struct bfs_expr *
 	return expr;
 }
 
+/** Optimize -name/-lname/-path. */
+static struct bfs_expr *optimize_fnmatch(struct opt_state *state, struct bfs_expr *expr) {
+	if (strchr(expr->argv[1], '*')) {
+		expr->probability = 0.5;
+	} else {
+		expr->probability = 0.1;
+	}
+
+	return expr;
+}
+
 /** Optimize -gid. */
 static struct bfs_expr *optimize_gid(struct opt_state *state, struct bfs_expr *expr) {
 	struct range *range = &state->facts_when_true.ranges[GID_RANGE];
@@ -825,6 +845,30 @@ static struct bfs_expr *optimize_gid(struct opt_state *state, struct bfs_expr *e
 	return expr;
 }
 
+/** Optimize -inum. */
+static struct bfs_expr *optimize_inum(struct opt_state *state, struct bfs_expr *expr) {
+	struct range *range = &state->facts_when_true.ranges[INUM_RANGE];
+	if (range->min == range->max) {
+		expr->probability = 0.01;
+	} else {
+		expr->probability = 0.5;
+	}
+
+	return expr;
+}
+
+/** Optimize -links. */
+static struct bfs_expr *optimize_links(struct opt_state *state, struct bfs_expr *expr) {
+	struct range *range = &state->facts_when_true.ranges[LINKS_RANGE];
+	if (1 >= range->min && 1 <= range->max) {
+		expr->probability = 0.99;
+	} else {
+		expr->probability = 0.5;
+	}
+
+	return expr;
+}
+
 /** Optimize -uid. */
 static struct bfs_expr *optimize_uid(struct opt_state *state, struct bfs_expr *expr) {
 	struct range *range = &state->facts_when_true.ranges[UID_RANGE];
@@ -847,10 +891,59 @@ static struct bfs_expr *optimize_samefile(struct opt_state *state, struct bfs_ex
 	return expr;
 }
 
+/** Optimize -size. */
+static struct bfs_expr *optimize_size(struct opt_state *state, struct bfs_expr *expr) {
+	struct range *range = &state->facts_when_true.ranges[SIZE_RANGE];
+	if (range->min == range->max) {
+		expr->probability = 0.01;
+	} else {
+		expr->probability = 0.5;
+	}
+
+	return expr;
+}
+
+/** Estimate probability for -x?type. */
+static void estimate_type_probability(struct bfs_expr *expr) {
+	unsigned int types = expr->num;
+
+	expr->probability = 0.0;
+	if (types & (1 << BFS_BLK)) {
+		expr->probability += 0.00000721183;
+	}
+	if (types & (1 << BFS_CHR)) {
+		expr->probability += 0.0000499855;
+	}
+	if (types & (1 << BFS_DIR)) {
+		expr->probability += 0.114475;
+	}
+	if (types & (1 << BFS_DOOR)) {
+		expr->probability += 0.000001;
+	}
+	if (types & (1 << BFS_FIFO)) {
+		expr->probability += 0.00000248684;
+	}
+	if (types & (1 << BFS_REG)) {
+		expr->probability += 0.859772;
+	}
+	if (types & (1 << BFS_LNK)) {
+		expr->probability += 0.0256816;
+	}
+	if (types & (1 << BFS_SOCK)) {
+		expr->probability += 0.0000116881;
+	}
+	if (types & (1 << BFS_WHT)) {
+		expr->probability += 0.000001;
+	}
+}
+
 /** Optimize -type. */
 static struct bfs_expr *optimize_type(struct opt_state *state, struct bfs_expr *expr) {
 	state->facts_when_true.types &= expr->num;
 	state->facts_when_false.types &= ~expr->num;
+
+	estimate_type_probability(expr);
+
 	return expr;
 }
 
@@ -865,6 +958,9 @@ static struct bfs_expr *optimize_xtype(struct opt_state *state, struct bfs_expr
 
 	state->facts_when_true.xtypes &= expr->num;
 	state->facts_when_false.xtypes &= ~expr->num;
+
+	estimate_type_probability(expr);
+
 	return expr;
 }
 
@@ -975,6 +1071,29 @@ static const struct {
 	{eval_xattrname, STAT_COST},
 };
 
+/**
+ * Table of expression probabilities.
+ */
+static const struct {
+	/** The evaluation function with this cost. */
+	bfs_eval_fn *eval_fn;
+	/** The matching probability. */
+	float probability;
+} opt_probs[] = {
+	{eval_acl,       0.00002},
+	{eval_capable,   0.000002},
+	{eval_empty,     0.01},
+	{eval_false,     0.0},
+	{eval_hidden,    0.01},
+	{eval_nogroup,   0.01},
+	{eval_nouser,    0.01},
+	{eval_samefile,  0.01},
+	{eval_true,      1.0},
+	{eval_xattr,     0.01},
+	{eval_xattrname, 0.01},
+};
+
+
 /**
  * Table of simple predicates.
  */
@@ -1026,7 +1145,13 @@ static const struct {
 	{eval_empty,    optimize_empty},
 	{eval_exec,     optimize_exec},
 	{eval_gid,      optimize_gid},
+	{eval_inum,     optimize_inum},
+	{eval_links,    optimize_links},
+	{eval_lname,    optimize_fnmatch},
+	{eval_name,     optimize_fnmatch},
+	{eval_path,     optimize_fnmatch},
 	{eval_samefile, optimize_samefile},
+	{eval_size,     optimize_size},
 	{eval_type,     optimize_type},
 	{eval_uid,      optimize_uid},
 	{eval_xtype,    optimize_xtype},
@@ -1071,6 +1196,13 @@ static struct bfs_expr *optimize_expr_lookup(struct opt_state *state, struct bfs
 		}
 	}
 
+	for (size_t i = 0; i < BFS_COUNTOF(opt_probs); ++i) {
+		if (opt_probs[i].eval_fn == expr->eval_fn) {
+			expr->probability = opt_probs[i].probability;
+			break;
+		}
+	}
+
 	for (size_t i = 0; i < BFS_COUNTOF(opt_preds); ++i) {
 		if (opt_preds[i].eval_fn == expr->eval_fn) {
 			infer_pred_facts(state, opt_preds[i].pred);
diff --git a/src/parse.c b/src/parse.c
index eee185b..f2582e0 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -1010,24 +1010,9 @@ static struct bfs_expr *parse_xargs_safe(struct parser_state *state, int arg1, i
  */
 static struct bfs_expr *parse_access(struct parser_state *state, int flag, int arg2) {
 	struct bfs_expr *expr = parse_nullary_test(state, eval_access);
-	if (!expr) {
-		return NULL;
-	}
-
-	expr->num = flag;
-
-	switch (flag) {
-	case R_OK:
-		expr->probability = 0.99;
-		break;
-	case W_OK:
-		expr->probability = 0.8;
-		break;
-	case X_OK:
-		expr->probability = 0.2;
-		break;
+	if (expr) {
+		expr->num = flag;
 	}
-
 	return expr;
 }
 
@@ -1036,11 +1021,7 @@ static struct bfs_expr *parse_access(struct parser_state *state, int flag, int a
  */
 static struct bfs_expr *parse_acl(struct parser_state *state, int flag, int arg2) {
 #if BFS_CAN_CHECK_ACL
-	struct bfs_expr *expr = parse_nullary_test(state, eval_acl);
-	if (expr) {
-		expr->probability = 0.00002;
-	}
-	return expr;
+	return parse_nullary_test(state, eval_acl);
 #else
 	parse_error(state, "Missing platform support.\n");
 	return NULL;
@@ -1160,11 +1141,7 @@ fail:
  */
 static struct bfs_expr *parse_capable(struct parser_state *state, int flag, int arg2) {
 #if BFS_CAN_CHECK_CAPABILITIES
-	struct bfs_expr *expr = parse_nullary_test(state, eval_capable);
-	if (expr) {
-		expr->probability = 0.000002;
-	}
-	return expr;
+	return parse_nullary_test(state, eval_capable);
 #else
 	parse_error(state, "Missing platform support.\n");
 	return NULL;
@@ -1293,12 +1270,9 @@ static struct bfs_expr *parse_depth_limit(struct parser_state *state, int is_min
  */
 static struct bfs_expr *parse_empty(struct parser_state *state, int arg1, int arg2) {
 	struct bfs_expr *expr = parse_nullary_test(state, eval_empty);
-	if (!expr) {
-		return NULL;
+	if (expr) {
+		expr->ephemeral_fds = 1;
 	}
-
-	expr->probability = 0.01;
-	expr->ephemeral_fds = 1;
 	return expr;
 }
 
@@ -1658,11 +1632,7 @@ fail:
  * Parse -hidden.
  */
 static struct bfs_expr *parse_hidden(struct parser_state *state, int arg1, int arg2) {
-	struct bfs_expr *expr = parse_nullary_test(state, eval_hidden);
-	if (expr) {
-		expr->probability = 0.01;
-	}
-	return expr;
+	return parse_nullary_test(state, eval_hidden);
 }
 
 /**
@@ -1677,22 +1647,14 @@ static struct bfs_expr *parse_ignore_races(struct parser_state *state, int ignor
  * Parse -inum N.
  */
 static struct bfs_expr *parse_inum(struct parser_state *state, int arg1, int arg2) {
-	struct bfs_expr *expr = parse_test_icmp(state, eval_inum);
-	if (expr) {
-		expr->probability = expr->int_cmp == BFS_INT_EQUAL ? 0.01 : 0.50;
-	}
-	return expr;
+	return parse_test_icmp(state, eval_inum);
 }
 
 /**
  * Parse -links N.
  */
 static struct bfs_expr *parse_links(struct parser_state *state, int arg1, int arg2) {
-	struct bfs_expr *expr = parse_test_icmp(state, eval_links);
-	if (expr) {
-		expr->probability = bfs_expr_cmp(expr, 1) ? 0.99 : 0.01;
-	}
-	return expr;
+	return parse_test_icmp(state, eval_links);
 }
 
 /**
@@ -1764,12 +1726,6 @@ static struct bfs_expr *parse_fnmatch(const struct parser_state *state, struct b
 		return expr;
 	}
 
-	if (strchr(pattern, '*')) {
-		expr->probability = 0.5;
-	} else {
-		expr->probability = 0.1;
-	}
-
 	return expr;
 }
 
@@ -1921,16 +1877,11 @@ fail:
  */
 static struct bfs_expr *parse_nogroup(struct parser_state *state, int arg1, int arg2) {
 	struct bfs_expr *expr = parse_nullary_test(state, eval_nogroup);
-	if (!expr) {
-		return NULL;
+	if (expr) {
+		// Who knows how many FDs getgrgid_r() needs?  Probably at least
+		// one for /etc/group
+		expr->ephemeral_fds = 1;
 	}
-
-	expr->probability = 0.01;
-
-	// Who knows how many FDs getgrgid_r() needs?  Probably at least one for
-	// /etc/group
-	expr->ephemeral_fds = 1;
-
 	return expr;
 }
 
@@ -1943,8 +1894,6 @@ static struct bfs_expr *parse_nohidden(struct parser_state *state, int arg1, int
 		return NULL;
 	}
 
-	hidden->probability = 0.01;
-
 	if (parse_exclude(state, hidden) != 0) {
 		return NULL;
 	}
@@ -1966,16 +1915,11 @@ static struct bfs_expr *parse_noleaf(struct parser_state *state, int arg1, int a
  */
 static struct bfs_expr *parse_nouser(struct parser_state *state, int arg1, int arg2) {
 	struct bfs_expr *expr = parse_nullary_test(state, eval_nouser);
-	if (!expr) {
-		return NULL;
+	if (expr) {
+		// Who knows how many FDs getpwuid_r() needs?  Probably at least
+		// one for /etc/passwd
+		expr->ephemeral_fds = 1;
 	}
-
-	expr->probability = 0.01;
-
-	// Who knows how many FDs getpwuid_r() needs?  Probably at least one for
-	// /etc/passwd
-	expr->ephemeral_fds = 1;
-
 	return expr;
 }
 
@@ -2428,9 +2372,6 @@ static struct bfs_expr *parse_samefile(struct parser_state *state, int arg1, int
 
 	expr->dev = sb.dev;
 	expr->ino = sb.ino;
-
-	expr->probability = 0.01;
-
 	return expr;
 }
 
@@ -2548,8 +2489,6 @@ static struct bfs_expr *parse_size(struct parser_state *state, int arg1, int arg
 		goto bad_unit;
 	}
 
-	expr->probability = expr->int_cmp == BFS_INT_EQUAL ? 0.01 : 0.50;
-
 	return expr;
 
 bad_unit:
@@ -2584,50 +2523,37 @@ static struct bfs_expr *parse_type(struct parser_state *state, int x, int arg2)
 		return NULL;
 	}
 
-	unsigned int types = 0;
-	float probability = 0.0;
+	expr->num = 0;
 
 	const char *c = expr->argv[1];
 	while (true) {
-		enum bfs_type type;
-		float type_prob;
-
 		switch (*c) {
 		case 'b':
-			type = BFS_BLK;
-			type_prob = 0.00000721183;
+			expr->num |= 1 << BFS_BLK;
 			break;
 		case 'c':
-			type = BFS_CHR;
-			type_prob = 0.0000499855;
+			expr->num |= 1 << BFS_CHR;
 			break;
 		case 'd':
-			type = BFS_DIR;
-			type_prob = 0.114475;
+			expr->num |= 1 << BFS_DIR;
 			break;
 		case 'D':
-			type = BFS_DOOR;
-			type_prob = 0.000001;
+			expr->num |= 1 << BFS_DOOR;
 			break;
 		case 'p':
-			type = BFS_FIFO;
-			type_prob = 0.00000248684;
+			expr->num |= 1 << BFS_FIFO;
 			break;
 		case 'f':
-			type = BFS_REG;
-			type_prob = 0.859772;
+			expr->num |= 1 << BFS_REG;
 			break;
 		case 'l':
-			type = BFS_LNK;
-			type_prob = 0.0256816;
+			expr->num |= 1 << BFS_LNK;
 			break;
 		case 's':
-			type = BFS_SOCK;
-			type_prob = 0.0000116881;
+			expr->num |= 1 << BFS_SOCK;
 			break;
 		case 'w':
-			type = BFS_WHT;
-			type_prob = 0.000001;
+			expr->num |= 1 << BFS_WHT;
 			break;
 
 		case '\0':
@@ -2639,12 +2565,6 @@ static struct bfs_expr *parse_type(struct parser_state *state, int x, int arg2)
 			goto fail;
 		}
 
-		unsigned int flag = 1 << type;
-		if (!(types & flag)) {
-			types |= flag;
-			probability += type_prob;
-		}
-
 		++c;
 		if (*c == '\0') {
 			break;
@@ -2657,9 +2577,6 @@ static struct bfs_expr *parse_type(struct parser_state *state, int x, int arg2)
 		}
 	}
 
-	expr->num = types;
-	expr->probability = probability;
-
 	return expr;
 
 fail:
@@ -2680,11 +2597,7 @@ static struct bfs_expr *parse_warn(struct parser_state *state, int warn, int arg
  */
 static struct bfs_expr *parse_xattr(struct parser_state *state, int arg1, int arg2) {
 #if BFS_CAN_CHECK_XATTRS
-	struct bfs_expr *expr = parse_nullary_test(state, eval_xattr);
-	if (expr) {
-		expr->probability = 0.01;
-	}
-	return expr;
+	return parse_nullary_test(state, eval_xattr);
 #else
 	parse_error(state, "Missing platform support.\n");
 	return NULL;
@@ -2696,11 +2609,7 @@ static struct bfs_expr *parse_xattr(struct parser_state *state, int arg1, int ar
  */
 static struct bfs_expr *parse_xattrname(struct parser_state *state, int arg1, int arg2) {
 #if BFS_CAN_CHECK_XATTRS
-	struct bfs_expr *expr = parse_unary_test(state, eval_xattrname);
-	if (expr) {
-		expr->probability = 0.01;
-	}
-	return expr;
+	return parse_unary_test(state, eval_xattrname);
 #else
 	parse_error(state, "Missing platform support.\n");
 	return NULL;
-- 
cgit v1.2.3