From 2c396fce53100cad4e472f29851f07030a80ee50 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Thu, 13 Jul 2023 13:30:16 -0400 Subject: bfstd: Support wordesc() without allocating --- src/bfstd.c | 92 +++++++++++++++++++++++++++++++++++------------------------ src/bfstd.h | 47 +++++++++++++++++++++++++++--- src/color.c | 20 +++++-------- src/diag.c | 29 ++++++++++--------- src/diag.h | 4 +-- src/dstring.c | 22 ++++++++++++++ src/dstring.h | 31 ++++++++++++++++++++ 7 files changed, 175 insertions(+), 70 deletions(-) diff --git a/src/bfstd.c b/src/bfstd.c index 6f39f54..97fa3b3 100644 --- a/src/bfstd.c +++ b/src/bfstd.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -575,7 +576,7 @@ size_t xstrwidth(const char *str) { } /** Get the length of the longest printable prefix of a string. */ -static size_t printable_len(const char *str, size_t len) { +static size_t printable_len(const char *str, size_t len, enum wesc_flags flags) { mbstate_t mb; memset(&mb, 0, sizeof(mb)); @@ -583,9 +584,23 @@ static size_t printable_len(const char *str, size_t len) { while (len > 0) { wchar_t wc; size_t mblen = mbrtowc(&wc, cur, len, &mb); - if (mblen == (size_t)-1 || mblen == (size_t)-2 || !iswprint(wc)) { + if (mblen == (size_t)-1 || mblen == (size_t)-2) { break; } + + bool safe = iswprint(wc); + + // Technically a literal newline is safe inside single quotes, + // but $'\n' is much nicer than ' + // ' + if (!(flags & WESC_SHELL) && iswspace(wc)) { + safe = true; + } + + if (!safe) { + break; + } + cur += mblen; len -= mblen; } @@ -623,13 +638,13 @@ static const char *dollar_esc(char c) { } /** $'Quote' a string for the shell. */ -static char *dollar_quote(char *dest, char *end, const char *str, size_t len) { +static char *dollar_quote(char *dest, char *end, const char *str, size_t len, enum wesc_flags flags) { static const char *hex[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"}; dest = xstpecpy(dest, end, "$'"); while (len > 0) { - size_t plen = printable_len(str, len); + size_t plen = printable_len(str, len, flags); size_t elen = strcspn(str, "'\\"); size_t min = plen < elen ? plen : elen; dest = xstpencpy(dest, end, str, min); @@ -657,85 +672,86 @@ static char *dollar_quote(char *dest, char *end, const char *str, size_t len) { } /** How much of this string is safe as a bare word? */ -static size_t bare_len(const char *str) { +static size_t bare_len(const char *str, size_t len) { // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02 - return strcspn(str, "|&;<>()$`\\\"' *?[#˜=%!"); + size_t ret = strcspn(str, "|&;<>()$`\\\"' *?[#˜=%!"); + return ret < len ? ret : len; } /** How much of this string is safe to double-quote? */ -static size_t quotable_len(const char *str) { +static size_t quotable_len(const char *str, size_t len) { // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02_03 - return strcspn(str, "`$\\\"!"); + size_t ret = strcspn(str, "`$\\\"!"); + return ret < len ? ret : len; } /** "Quote" a string for the shell. */ -static char *double_quote(char *dest, char *end, const char *str) { +static char *double_quote(char *dest, char *end, const char *str, size_t len) { dest = xstpecpy(dest, end, "\""); - dest = xstpecpy(dest, end, str); + dest = xstpencpy(dest, end, str, len); return xstpecpy(dest, end, "\""); } /** 'Quote' a string for the shell. */ -static char *single_quote(char *dest, char *end, const char *str) { +static char *single_quote(char *dest, char *end, const char *str, size_t len) { bool open = false; - while (*str) { - size_t len = strcspn(str, "'"); - if (len > 0) { + while (len > 0) { + size_t chunk = strcspn(str, "'"); + chunk = chunk < len ? chunk : len; + if (chunk > 0) { if (!open) { dest = xstpecpy(dest, end, "'"); open = true; } - dest = xstpencpy(dest, end, str, len); - str += len; + dest = xstpencpy(dest, end, str, chunk); + str += chunk; + len -= chunk; } - while (*str == '\'') { + while (len > 0 && *str == '\'') { if (open) { dest = xstpecpy(dest, end, "'"); open = false; } dest = xstpecpy(dest, end, "\\'"); ++str; + --len; } } if (open) { dest = xstpecpy(dest, end, "'"); } + return dest; } -char *wordesc(const char *str) { - size_t len = strlen(str); +char *wordesc(char *dest, char *end, const char *str, enum wesc_flags flags) { + return wordnesc(dest, end, str, SIZE_MAX, flags); +} - // Worst case: every char is replaced with $'\xXX', so at most a 7x growth - size_t max_size = 7 * len + 3; - char *ret = malloc(max_size); - if (!ret) { - return NULL; - } - char *cur = ret; - char *end = ret + max_size; +char *wordnesc(char *dest, char *end, const char *str, size_t n, enum wesc_flags flags) { + size_t len = strnlen(str, n); + char *start = dest; - if (printable_len(str, len) < len) { + if (printable_len(str, len, flags) < len) { // String contains unprintable chars, use $'this\x7Fsyntax' - cur = dollar_quote(cur, end, str, len); - } else if (bare_len(str) == len) { + dest = dollar_quote(dest, end, str, len, flags); + } else if (!(flags & WESC_SHELL) || bare_len(str, len) == len) { // Whole string is safe as a bare word - cur = xstpecpy(cur, end, str); - } else if (quotable_len(str) == len) { + dest = xstpencpy(dest, end, str, len); + } else if (quotable_len(str, len) == len) { // Whole string is safe to double-quote - cur = double_quote(cur, end, str); + dest = double_quote(dest, end, str, len); } else { // Single-quote the whole string - cur = single_quote(cur, end, str); + dest = single_quote(dest, end, str, len); } - if (cur == ret) { - cur = xstpecpy(cur, end, "\"\""); + if (dest == start) { + dest = xstpecpy(dest, end, "\"\""); } - bfs_assert(cur != end, "Result truncated!"); - return ret; + return dest; } diff --git a/src/bfstd.h b/src/bfstd.h index 832db66..fb77399 100644 --- a/src/bfstd.h +++ b/src/bfstd.h @@ -319,15 +319,54 @@ size_t xstrwidth(const char *str); // #include +/** + * Flags for wordesc(). + */ +enum wesc_flags { + /** + * Escape special characters so that the shell will treat the escaped + * string as a single word. + */ + WESC_SHELL = 1 << 0, + /** + * Escape special characters so that the escaped string is safe to print + * to a TTY. + */ + WESC_TTY = 1 << 1, +}; + /** * Escape a string as a single shell word. * - * @param str + * @param dest + * The destination string to fill. + * @param end + * The end of the destination buffer. + * @param src + * The string to escape. + * @param flags + * Controls which characters to escape. + * @return + * The new NUL terminator of the destination, or `end` on truncation. + */ +char *wordesc(char *dest, char *end, const char *str, enum wesc_flags flags); + +/** + * Escape a string as a single shell word. + * + * @param dest + * The destination string to fill. + * @param end + * The end of the destination buffer. + * @param src * The string to escape. + * @param n + * The maximum length of the string. + * @param flags + * Controls which characters to escape. * @return - * A string that a shell would evaluate to str, dynamically allocated, - * or NULL on failure. + * The new NUL terminator of the destination, or `end` on truncation. */ -char *wordesc(const char *str); +char *wordnesc(char *dest, char *end, const char *str, size_t n, enum wesc_flags flags); #endif // BFS_BFSTD_H diff --git a/src/color.c b/src/color.c index 2e039f4..0f5829f 100644 --- a/src/color.c +++ b/src/color.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -891,6 +892,11 @@ static int print_reset(CFILE *cfile) { } } +/** Print a shell-escaped string. */ +static int print_wordesc(CFILE *cfile, const char *str, size_t n, enum wesc_flags flags) { + return dstrnescat(&cfile->buffer, str, n, flags); +} + /** Print a string with an optional color. */ static int print_colored(CFILE *cfile, const struct esc_seq *esc, const char *str, size_t len) { if (print_esc(cfile, esc) != 0) { @@ -1069,18 +1075,6 @@ static int print_link_target(CFILE *cfile, const struct BFTW *ftwbuf) { return ret; } -/** Print an shell-escaped string. */ -static int print_wordesc(CFILE *cfile, const char *str) { - char *esc = wordesc(str); - if (!esc) { - return -1; - } - - int ret = dstrcat(&cfile->buffer, esc); - free(esc); - return ret; -} - /** Format some colored output to the buffer. */ BFS_FORMATTER(2, 3) static int cbuff(CFILE *cfile, const char *format, ...); @@ -1224,7 +1218,7 @@ static int cvbuff(CFILE *cfile, const char *format, va_list args) { case 'p': switch (*++i) { case 'q': - if (print_wordesc(cfile, va_arg(args, const char *)) != 0) { + if (print_wordesc(cfile, va_arg(args, const char *), SIZE_MAX, WESC_SHELL | WESC_TTY) != 0) { return -1; } break; diff --git a/src/diag.c b/src/diag.c index acea9ad..0590847 100644 --- a/src/diag.c +++ b/src/diag.c @@ -2,6 +2,7 @@ // SPDX-License-Identifier: 0BSD #include "diag.h" +#include "alloc.h" #include "bfstd.h" #include "ctx.h" #include "color.h" @@ -114,7 +115,7 @@ bool bfs_debug_prefix(const struct bfs_ctx *ctx, enum debug_flags flag) { } /** Recursive part of highlight_expr(). */ -static bool highlight_expr_recursive(const struct bfs_ctx *ctx, const struct bfs_expr *expr, bool *args) { +static bool highlight_expr_recursive(const struct bfs_ctx *ctx, const struct bfs_expr *expr, bool args[]) { if (!expr) { return false; } @@ -141,7 +142,7 @@ static bool highlight_expr_recursive(const struct bfs_ctx *ctx, const struct bfs } /** Highlight an expression in the command line. */ -static bool highlight_expr(const struct bfs_ctx *ctx, const struct bfs_expr *expr, bool *args) { +static bool highlight_expr(const struct bfs_ctx *ctx, const struct bfs_expr *expr, bool args[]) { for (size_t i = 0; i < ctx->argc; ++i) { args[i] = false; } @@ -150,21 +151,21 @@ static bool highlight_expr(const struct bfs_ctx *ctx, const struct bfs_expr *exp } /** Print a highlighted portion of the command line. */ -static void bfs_argv_diag(const struct bfs_ctx *ctx, const bool *args, bool warning) { +static void bfs_argv_diag(const struct bfs_ctx *ctx, const bool args[], bool warning) { if (warning) { bfs_warning_prefix(ctx); } else { bfs_error_prefix(ctx); } - char *argv[ctx->argc]; + char **argv = ZALLOC_ARRAY(char *, ctx->argc); + if (!argv) { + return; + } + for (size_t i = 0; i < ctx->argc; ++i) { - argv[i] = wordesc(ctx->argv[i]); - if (!argv[i]) { - for (size_t j = 0; j < i; ++j) { - free(argv[j]); - } - return; + if (dstrescat(&argv[i], ctx->argv[i], WESC_SHELL | WESC_TTY) != 0) { + goto done; } } @@ -223,12 +224,14 @@ static void bfs_argv_diag(const struct bfs_ctx *ctx, const bool *args, bool warn cfprintf(ctx->cerr, "\n"); +done: for (size_t i = 0; i < ctx->argc; ++i) { - free(argv[i]); + dstrfree(argv[i]); } + free(argv); } -void bfs_argv_error(const struct bfs_ctx *ctx, const bool *args) { +void bfs_argv_error(const struct bfs_ctx *ctx, const bool args[]) { bfs_argv_diag(ctx, args, false); } @@ -239,7 +242,7 @@ void bfs_expr_error(const struct bfs_ctx *ctx, const struct bfs_expr *expr) { } } -bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool *args) { +bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool args[]) { if (!ctx->warn) { return false; } diff --git a/src/diag.h b/src/diag.h index c909da5..e019db0 100644 --- a/src/diag.h +++ b/src/diag.h @@ -149,7 +149,7 @@ bool bfs_debug_prefix(const struct bfs_ctx *ctx, enum debug_flags flag); /** * Highlight parts of the command line in an error message. */ -void bfs_argv_error(const struct bfs_ctx *ctx, const bool *args); +void bfs_argv_error(const struct bfs_ctx *ctx, const bool args[]); /** * Highlight parts of an expression in an error message. @@ -159,7 +159,7 @@ void bfs_expr_error(const struct bfs_ctx *ctx, const struct bfs_expr *expr); /** * Highlight parts of the command line in a warning message. */ -bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool *args); +bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool args[]); /** * Highlight parts of an expression in a warning message. diff --git a/src/dstring.c b/src/dstring.c index dada70b..60a7df9 100644 --- a/src/dstring.c +++ b/src/dstring.c @@ -232,6 +232,28 @@ fail: return -1; } +int dstrescat(char **dest, const char *str, enum wesc_flags flags) { + return dstrnescat(dest, str, SIZE_MAX, flags); +} + +int dstrnescat(char **dest, const char *str, size_t n, enum wesc_flags flags) { + size_t len = *dest ? dstrlen(*dest) : 0; + + // Worst case growth is `ccc...` => $'\xCC\xCC\xCC...' + n = strnlen(str, n); + size_t cap = len + 4 * n + 3; + if (dstreserve(dest, cap) != 0) { + return -1; + } + + char *cur = *dest + len; + char *end = *dest + cap + 1; + cur = wordnesc(cur, end, str, n, flags); + bfs_assert(cur != end, "wordesc() result truncated"); + + return dstresize(dest, cur - *dest); +} + void dstrfree(char *dstr) { if (dstr) { free(dstrheader(dstr)); diff --git a/src/dstring.h b/src/dstring.h index 2673f1b..88ca79f 100644 --- a/src/dstring.h +++ b/src/dstring.h @@ -8,6 +8,7 @@ #ifndef BFS_DSTRING_H #define BFS_DSTRING_H +#include "bfstd.h" #include "config.h" #include #include @@ -261,6 +262,36 @@ int dstrcatf(char **str, const char *format, ...); BFS_FORMATTER(2, 0) int dstrvcatf(char **str, const char *format, va_list args); +/** + * Concatenate while shell-escaping. + * + * @param dest + * The destination dynamic string. + * @param str + * The string to escape. + * @param flags + * Flags for wordesc(). + * @return + * 0 on success, -1 on failure. + */ +int dstrescat(char **dest, const char *str, enum wesc_flags flags); + +/** + * Concatenate while shell-escaping. + * + * @param dest + * The destination dynamic string. + * @param str + * The string to escape. + * @param n + * The maximum length of the string. + * @param flags + * Flags for wordesc(). + * @return + * 0 on success, -1 on failure. + */ +int dstrnescat(char **dest, const char *str, size_t n, enum wesc_flags flags); + /** * Free a dynamic string. * -- cgit v1.2.3