From 65a7814b2dbc10ea86610092f03d0c1df95d08ad Mon Sep 17 00:00:00 2001
From: Tavian Barnes <tavianator@tavianator.com>
Date: Thu, 23 May 2024 14:28:02 -0400
Subject: opt: Don't raise RLIMIT_NOFILE if it would prevent using
 posix_spawn()

If we raise RLIMIT_NOFILE, we have to lower it before calling exec() for
compatibility with select().  If posix_spawn() doesn't support that, we
fall back to fork(), which is quite a bit slower.

Therefore, if we're going to exec() on most files, it's better to keep
RLIMIT_NOFILE the same to avoid the fork() cost, even though it makes
bftw() somewhat slower.
---
 src/ctx.c  |  1 +
 src/ctx.h  |  2 ++
 src/eval.c |  3 +++
 src/opt.c  | 16 ++++++++++++++++
 4 files changed, 22 insertions(+)

diff --git a/src/ctx.c b/src/ctx.c
index 71fef98..fac501a 100644
--- a/src/ctx.c
+++ b/src/ctx.c
@@ -56,6 +56,7 @@ struct bfs_ctx *bfs_ctx_new(void) {
 		goto fail;
 	}
 	ctx->cur_nofile = ctx->orig_nofile;
+	ctx->raise_nofile = true;
 
 	ctx->users = bfs_users_new();
 	if (!ctx->users) {
diff --git a/src/ctx.h b/src/ctx.h
index fc3020c..b28a63c 100644
--- a/src/ctx.h
+++ b/src/ctx.h
@@ -102,6 +102,8 @@ struct bfs_ctx {
 	struct rlimit orig_nofile;
 	/** The current RLIMIT_NOFILE limits. */
 	struct rlimit cur_nofile;
+	/** Whether the fd limit should be raised. */
+	bool raise_nofile;
 
 	/** The current time. */
 	struct timespec now;
diff --git a/src/eval.c b/src/eval.c
index 2ca4a1f..8863e34 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -1466,6 +1466,9 @@ done:
 static int raise_fdlimit(struct bfs_ctx *ctx) {
 	rlim_t cur = ctx->orig_nofile.rlim_cur;
 	rlim_t max = ctx->orig_nofile.rlim_max;
+	if (!ctx->raise_nofile) {
+		max = cur;
+	}
 
 	rlim_t target = 64 << 10;
 	if (rlim_cmp(target, max) > 0) {
diff --git a/src/opt.c b/src/opt.c
index d1dee11..6704a46 100644
--- a/src/opt.c
+++ b/src/opt.c
@@ -2221,6 +2221,11 @@ static float estimate_stat_odds(struct bfs_ctx *ctx) {
 	return 1.0 - nostat_odds;
 }
 
+/** Matches -(exec|ok) ... \; */
+static bool single_exec(const struct bfs_expr *expr) {
+	return expr->eval_fn == eval_exec && !(expr->exec->flags & BFS_EXEC_MULTI);
+}
+
 int bfs_optimize(struct bfs_ctx *ctx) {
 	bfs_ctx_dump(ctx, DEBUG_OPT);
 
@@ -2299,6 +2304,17 @@ int bfs_optimize(struct bfs_ctx *ctx) {
 			opt_leave(&opt, "eager stat cost: ${ylw}%g${rs}\n", eager_cost);
 		}
 
+#ifndef POSIX_SPAWN_SETRLIMIT
+		// If bfs_spawn_setrlimit() would force us to use fork() over
+		// posix_spawn(), the extra cost may outweigh the benefit of a
+		// higher RLIMIT_NOFILE
+		float single_exec_odds = estimate_odds(ctx->expr, single_exec);
+		if (single_exec_odds >= 0.5) {
+			opt_enter(&opt, "single ${blu}-exec${rs} odds: ${ylw}%g${rs}\n", single_exec_odds);
+			ctx->raise_nofile = false;
+			opt_leave(&opt, "not raising RLIMIT_NOFILE\n");
+		}
+#endif
 	}
 
 	opt_leave(&opt, NULL);
-- 
cgit v1.2.3