From 82f7f9ee1849947ed6de227279e623d8fc3a1ee1 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Thu, 24 Feb 2022 15:56:08 -0500 Subject: regex: Rework error handling --- eval.c | 9 +++-- parse.c | 25 +++++++------- regex.c | 118 +++++++++++++++++++++++++++++++++++++--------------------------- regex.h | 30 +++++++---------- util.c | 11 +++--- 5 files changed, 102 insertions(+), 91 deletions(-) diff --git a/eval.c b/eval.c index e812dc6..321a8d9 100644 --- a/eval.c +++ b/eval.c @@ -828,10 +828,9 @@ bool eval_quit(const struct expr *expr, struct eval_state *state) { bool eval_regex(const struct expr *expr, struct eval_state *state) { const char *path = state->ftwbuf->path; - int err; - bool ret = bfs_regexec(expr->regex, path, BFS_REGEX_ANCHOR, &err); - if (err) { - char *str = bfs_regerror(err, expr->regex); + int ret = bfs_regexec(expr->regex, path, BFS_REGEX_ANCHOR); + if (ret < 0) { + char *str = bfs_regerror(expr->regex); if (str) { eval_error(state, "%s.\n", str); free(str); @@ -840,7 +839,7 @@ bool eval_regex(const struct expr *expr, struct eval_state *state) { } } - return ret; + return ret > 0; } /** diff --git a/parse.c b/parse.c index 066784c..826d325 100644 --- a/parse.c +++ b/parse.c @@ -2269,24 +2269,25 @@ static struct expr *parse_regex(struct parser_state *state, int flags, int arg2) goto fail; } - int err; - expr->regex = bfs_regcomp(expr->sdata, state->regex_type, flags, &err); - if (!expr->regex) { - char *str = bfs_regerror(err, NULL); - if (str) { - parse_error(state, "${blu}%s${rs} ${bld}%s${rs}: %s.\n", expr->argv[0], expr->argv[1], str); - free(str); - } else { + if (bfs_regcomp(&expr->regex, expr->sdata, state->regex_type, flags) != 0) { + if (!expr->regex) { + parse_perror(state, "bfs_regcomp()"); + goto fail; + } + + char *str = bfs_regerror(expr->regex); + if (!str) { parse_perror(state, "bfs_regerror()"); + goto fail; } - goto fail_regex; + + parse_error(state, "${blu}%s${rs} ${bld}%s${rs}: %s.\n", expr->argv[0], expr->argv[1], str); + free(str); + goto fail; } return expr; -fail_regex: - free(expr->regex); - expr->regex = NULL; fail: free_expr(expr); return NULL; diff --git a/regex.c b/regex.c index 0852792..9785bf8 100644 --- a/regex.c +++ b/regex.c @@ -15,8 +15,9 @@ ****************************************************************************/ #include "regex.h" +#include "util.h" #include -#include +#include #include #include @@ -29,18 +30,23 @@ struct bfs_regex { #if BFS_WITH_ONIGURUMA + unsigned char *pattern; OnigRegex impl; + int err; + OnigErrorInfo einfo; #else regex_t impl; + int err; #endif }; #if BFS_WITH_ONIGURUMA /** Get (and initialize) the appropriate encoding for the current locale. */ -static OnigEncoding bfs_onig_encoding(int *err) { +static int bfs_onig_encoding(OnigEncoding *penc) { static OnigEncoding enc = NULL; if (enc) { - return enc; + *penc = enc; + return ONIG_NORMAL; } // Fall back to ASCII by default @@ -103,27 +109,35 @@ static OnigEncoding bfs_onig_encoding(int *err) { BFS_MAP_ENCODING("GB18030", ONIG_ENCODING_BIG5); } - *err = onig_initialize(&enc, 1); - if (*err != ONIG_NORMAL) { + int ret = onig_initialize(&enc, 1); + if (ret != ONIG_NORMAL) { enc = NULL; } - - return enc; + *penc = enc; + return ret; } #endif -struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum bfs_regcomp_flags flags, int *err) { - struct bfs_regex *regex = malloc(sizeof(*regex)); +int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags) { + struct bfs_regex *regex = *preg = malloc(sizeof(*regex)); if (!regex) { -#if BFS_WITH_ONIGURUMA - *err = ONIGERR_MEMORY; -#else - *err = REG_ESPACE; -#endif - return NULL; + return -1; } #if BFS_WITH_ONIGURUMA + // onig_error_code_to_str() says + // + // don't call this after the pattern argument of onig_new() is freed + // + // so make a defensive copy. + regex->pattern = (unsigned char *)strdup(pattern); + if (!regex->pattern) { + goto fail; + } + + regex->impl = NULL; + regex->err = ONIG_NORMAL; + OnigSyntaxType *syntax = NULL; switch (type) { case BFS_REGEX_POSIX_BASIC: @@ -146,16 +160,16 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b options |= ONIG_OPTION_IGNORECASE; } - OnigEncoding enc = bfs_onig_encoding(err); - if (!enc) { - goto fail; + OnigEncoding enc; + regex->err = bfs_onig_encoding(&enc); + if (regex->err != ONIG_NORMAL) { + return -1; } - const unsigned char *uexpr = (const unsigned char *)expr; - const unsigned char *end = uexpr + strlen(expr); - *err = onig_new(®ex->impl, uexpr, end, options, enc, syntax, NULL); - if (*err != ONIG_NORMAL) { - goto fail; + const unsigned char *end = regex->pattern + strlen(pattern); + regex->err = onig_new(®ex->impl, regex->pattern, end, options, enc, syntax, ®ex->einfo); + if (regex->err != ONIG_NORMAL) { + return -1; } #else int cflags = 0; @@ -166,7 +180,7 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b cflags |= REG_EXTENDED; break; default: - *err = REG_BADPAT; + errno = EINVAL; goto fail; } @@ -174,20 +188,26 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b cflags |= REG_ICASE; } - *err = regcomp(®ex->impl, expr, cflags); - if (*err != 0) { - goto fail; +#if BFS_HAS_FEATURE(memory_sanitizer, false) + // https://github.com/google/sanitizers/issues/1496 + memset(®ex->impl, 0, sizeof(regex->impl)); +#endif + + regex->err = regcomp(®ex->impl, pattern, cflags); + if (regex->err != 0) { + return -1; } #endif - return regex; + return 0; fail: free(regex); - return NULL; + *preg = NULL; + return -1; } -bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags, int *err) { +int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) { size_t len = strlen(str); #if BFS_WITH_ONIGURUMA @@ -198,8 +218,7 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag // // Do not pass invalid byte string in the regex character encoding. if (!onigenc_is_valid_mbc_string(onig_get_encoding(regex->impl), ustr, end)) { - *err = 0; - return false; + return 0; } int ret; @@ -210,19 +229,17 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag } if (ret >= 0) { - *err = 0; if (flags & BFS_REGEX_ANCHOR) { return (size_t)ret == len; } else { - return true; + return 1; } } else if (ret == ONIG_MISMATCH) { - *err = 0; + return 0; } else { - *err = ret; + regex->err = ret; + return -1; } - - return false; #else regmatch_t match = { .rm_so = 0, @@ -236,19 +253,17 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag int ret = regexec(®ex->impl, str, 1, &match, eflags); if (ret == 0) { - *err = 0; if (flags & BFS_REGEX_ANCHOR) { return match.rm_so == 0 && (size_t)match.rm_eo == len; } else { - return true; + return 1; } } else if (ret == REG_NOMATCH) { - *err = 0; + return 0; } else { - *err = ret; + regex->err = ret; + return -1; } - - return false; #endif } @@ -256,6 +271,7 @@ void bfs_regfree(struct bfs_regex *regex) { if (regex) { #if BFS_WITH_ONIGURUMA onig_free(regex->impl); + free(regex->pattern); #else regfree(®ex->impl); #endif @@ -263,20 +279,22 @@ void bfs_regfree(struct bfs_regex *regex) { } } -char *bfs_regerror(int err, const struct bfs_regex *regex) { +char *bfs_regerror(const struct bfs_regex *regex) { + if (!regex) { + return strdup(strerror(ENOMEM)); + } + #if BFS_WITH_ONIGURUMA unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN); if (str) { - onig_error_code_to_str(str, err); + onig_error_code_to_str(str, regex->err, ®ex->einfo); } return (char *)str; #else - const regex_t *impl = regex ? ®ex->impl : NULL; - - size_t len = regerror(err, impl, NULL, 0); + size_t len = regerror(regex->err, ®ex->impl, NULL, 0); char *str = malloc(len); if (str) { - regerror(err, impl, str, len); + regerror(regex->err, ®ex->impl, str, len); } return str; #endif diff --git a/regex.h b/regex.h index ed509e4..63cd120 100644 --- a/regex.h +++ b/regex.h @@ -18,8 +18,6 @@ #ifndef BFS_REGEX_H #define BFS_REGEX_H -#include - /** * A compiled regular expression. */ @@ -54,34 +52,32 @@ enum bfs_regexec_flags { /** * Wrapper for regcomp() that supports additional regex types. * - * @param expr + * @param[out] preg + * Will hold the compiled regex. + * @param pattern * The regular expression to compile. * @param type * The regular expression syntax to use. * @param flags * Regex compilation flags. - * @param[out] err - * Will hold the error code if compilation fails. * @return - * The compiled regular expression, or NULL on error. + * 0 on success, -1 on failure. */ -struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum bfs_regcomp_flags flags, int *err); +int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags); /** * Wrapper for regexec(). * - * @param expr + * @param regex * The regular expression to execute. * @param str * The string to match against. * @param flags * Regex execution flags. - * @param[out] err - * Will hold the error code if execution fails. * @return - * Whether the regex matched. + * 1 for a match, 0 for no match, -1 on failure. */ -bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags, int *err); +int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags); /** * Free a compiled regex. @@ -89,15 +85,13 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag void bfs_regfree(struct bfs_regex *regex); /** - * Dynamically allocate a regex error message. + * Get a human-readable regex error message. * - * @param err - * The error code to stringify. * @param regex - * The compiled regex, or NULL if compilation failed. + * The compiled regex. * @return - * A human-readable description of the error, allocated with malloc(). + * A human-readable description of the error, which should be free()'d. */ -char *bfs_regerror(int err, const struct bfs_regex *regex); +char *bfs_regerror(const struct bfs_regex *regex); #endif // BFS_REGEX_H diff --git a/util.c b/util.c index 1b363cd..21921e8 100644 --- a/util.c +++ b/util.c @@ -263,15 +263,14 @@ static int xrpregex(nl_item item, const char *response) { return -1; } - int err; - struct bfs_regex *regex = bfs_regcomp(pattern, BFS_REGEX_POSIX_EXTENDED, 0, &err); - if (!regex) { - return -1; + struct bfs_regex *regex; + int ret = bfs_regcomp(®ex, pattern, BFS_REGEX_POSIX_EXTENDED, 0); + if (ret == 0) { + ret = bfs_regexec(regex, response, 0); } - int ret = bfs_regexec(regex, response, 0, &err); bfs_regfree(regex); - return err ? -1 : ret; + return ret; } /** Check if a response is affirmative or negative. */ -- cgit v1.2.3