summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2021-11-17 14:33:01 -0500
committerTavian Barnes <tavianator@tavianator.com>2021-11-17 14:49:04 -0500
commitac1d28ea6bf9c0bd7b41725095a5b41ac8a08121 (patch)
treefc546039470ce0c9f7daa2188be6bee915c7c6c1
parent9b50adaaaa4fedc8bda6fcf32595ecf7a682fa8b (diff)
downloadbfs-ac1d28ea6bf9c0bd7b41725095a5b41ac8a08121.tar.xz
exec: Find ARG_MAX with binary search after E2BIG
Previously we would shrink the command by one argument at a time until a successful execution. This is okay if the ARG_MAX estimate is just a little bit off, but is terribly slow when it's off by a lot. One situation where it's very far off is when a 32-bit build of bfs launches a 64-bit binary. In this case, bfs thinks the argv pointers are 4 bytes, while they actually take up 8 bytes. The performance is quite bad: $ time ./bfs-64 ~/code/linux -exec echo {} + >/dev/null ./bfs-64 ~/code/linux -exec echo {} + > /dev/null 0.03s user 0.07s system 99% cpu 0.095 total $ time ./bfs-32 ~/code/linux -exec echo {} + >/dev/null ./bfs-32 ~/code/linux -exec echo {} + > /dev/null 0.08s user 10.33s system 100% cpu 10.390 total After this change, performance is much better: $ time ./bfs-32 ~/code/linux -exec echo {} + >/dev/null ./bfs-32 ~/code/linux -exec echo {} + > /dev/null 0.03s user 0.08s system 99% cpu 0.110 total
-rw-r--r--exec.c79
-rw-r--r--exec.h2
2 files changed, 67 insertions, 14 deletions
diff --git a/exec.c b/exec.c
index 431fcbe..786a709 100644
--- a/exec.c
+++ b/exec.c
@@ -59,8 +59,6 @@ static void bfs_exec_debug(const struct bfs_exec *execbuf, const char *format, .
va_end(args);
}
-extern char **environ;
-
/** Determine the size of a single argument, for comparison to arg_max. */
static size_t bfs_exec_arg_size(const char *arg) {
return sizeof(arg) + strlen(arg) + 1;
@@ -79,6 +77,7 @@ static size_t bfs_exec_arg_max(const struct bfs_exec *execbuf) {
}
// We have to share space with the environment variables
+ extern char **environ;
for (char **envp = environ; *envp; ++envp) {
arg_max -= bfs_exec_arg_size(*envp);
}
@@ -131,6 +130,7 @@ struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs
execbuf->argv_cap = 0;
execbuf->arg_size = 0;
execbuf->arg_max = 0;
+ execbuf->arg_min = 0;
execbuf->wd_fd = -1;
execbuf->wd_path = NULL;
execbuf->wd_len = 0;
@@ -183,6 +183,7 @@ struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs
execbuf->argc = execbuf->tmpl_argc - 1;
execbuf->arg_max = bfs_exec_arg_max(execbuf);
+ execbuf->arg_min = execbuf->arg_max;
}
return execbuf;
@@ -461,6 +462,54 @@ static bool bfs_exec_args_remain(const struct bfs_exec *execbuf) {
return execbuf->argc >= execbuf->tmpl_argc;
}
+/** Compute the current ARG_MAX estimate for binary search. */
+static size_t bfs_exec_estimate_max(const struct bfs_exec *execbuf) {
+ size_t min = execbuf->arg_min;
+ size_t max = execbuf->arg_max;
+ return min + (max - min)/2;
+}
+
+/** Update the ARG_MAX lower bound from a successful execution. */
+static void bfs_exec_update_min(struct bfs_exec *execbuf) {
+ if (execbuf->arg_size > execbuf->arg_min) {
+ execbuf->arg_min = execbuf->arg_size;
+
+ // Don't let min exceed max
+ if (execbuf->arg_min > execbuf->arg_max) {
+ execbuf->arg_min = execbuf->arg_max;
+ }
+
+ size_t estimate = bfs_exec_estimate_max(execbuf);
+ bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n",
+ execbuf->arg_min, execbuf->arg_max, estimate);
+ }
+}
+
+/** Update the ARG_MAX upper bound from a failed execution. */
+static size_t bfs_exec_update_max(struct bfs_exec *execbuf) {
+ bfs_exec_debug(execbuf, "Got E2BIG, shrinking argument list...\n");
+
+ if (execbuf->arg_size < execbuf->arg_max) {
+ execbuf->arg_max = execbuf->arg_size;
+
+ // Don't let min exceed max
+ if (execbuf->arg_min > execbuf->arg_max) {
+ execbuf->arg_min = execbuf->arg_max;
+ }
+ }
+
+ if (execbuf->arg_size <= execbuf->arg_min) {
+ // Lower bound was wrong, restart binary search.
+ execbuf->arg_min = 0;
+ }
+
+ // Binary search for a more precise bound
+ size_t estimate = bfs_exec_estimate_max(execbuf);
+ bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n",
+ execbuf->arg_min, execbuf->arg_max, estimate);
+ return estimate;
+}
+
/** Execute the pending command from a BFS_EXEC_MULTI execbuf. */
static int bfs_exec_flush(struct bfs_exec *execbuf) {
int ret = 0, error = 0;
@@ -470,20 +519,24 @@ static int bfs_exec_flush(struct bfs_exec *execbuf) {
execbuf->argv[execbuf->argc] = NULL;
ret = bfs_exec_spawn(execbuf);
error = errno;
- if (ret == 0 || error != E2BIG) {
+ if (ret == 0) {
+ bfs_exec_update_min(execbuf);
+ break;
+ } else if (error != E2BIG) {
break;
}
// Try to recover from E2BIG by trying fewer and fewer arguments
// until they fit
- bfs_exec_debug(execbuf, "Got E2BIG, shrinking argument list...\n");
- execbuf->argv[execbuf->argc] = execbuf->argv[execbuf->argc - 1];
- execbuf->arg_size -= bfs_exec_arg_size(execbuf->argv[execbuf->argc]);
- --execbuf->argc;
+ size_t new_max = bfs_exec_update_max(execbuf);
+ while (execbuf->arg_size > new_max) {
+ execbuf->argv[execbuf->argc] = execbuf->argv[execbuf->argc - 1];
+ execbuf->arg_size -= bfs_exec_arg_size(execbuf->argv[execbuf->argc]);
+ --execbuf->argc;
+ }
}
- size_t new_argc = execbuf->argc;
- size_t new_size = execbuf->arg_size;
+ size_t new_argc = execbuf->argc;
for (size_t i = execbuf->tmpl_argc - 1; i < new_argc; ++i) {
free(execbuf->argv[i]);
}
@@ -491,9 +544,6 @@ static int bfs_exec_flush(struct bfs_exec *execbuf) {
execbuf->arg_size = 0;
if (new_argc < orig_argc) {
- execbuf->arg_max = new_size;
- bfs_exec_debug(execbuf, "ARG_MAX: %zu\n", execbuf->arg_max);
-
// If we recovered from E2BIG, there are unused arguments at the
// end of the list
for (size_t i = new_argc + 1; i <= orig_argc; ++i) {
@@ -526,10 +576,11 @@ static bool bfs_exec_changed_dirs(const struct bfs_exec *execbuf, const struct B
/** Check if we need to flush the execbuf because we're too big. */
static bool bfs_exec_would_overflow(const struct bfs_exec *execbuf, const char *arg) {
+ size_t arg_max = bfs_exec_estimate_max(execbuf);
size_t next_size = execbuf->arg_size + bfs_exec_arg_size(arg);
- if (next_size > execbuf->arg_max) {
+ if (next_size > arg_max) {
bfs_exec_debug(execbuf, "Command size (%zu) would exceed maximum (%zu), executing buffered command\n",
- next_size, execbuf->arg_max);
+ next_size, arg_max);
return true;
}
diff --git a/exec.h b/exec.h
index 1ba409f..a3e3c71 100644
--- a/exec.h
+++ b/exec.h
@@ -63,6 +63,8 @@ struct bfs_exec {
size_t arg_size;
/** Maximum arg_size before E2BIG. */
size_t arg_max;
+ /** Lower bound for arg_max. */
+ size_t arg_min;
/** A file descriptor for the working directory, for BFS_EXEC_CHDIR. */
int wd_fd;