summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2022-02-24 15:56:08 -0500
committerTavian Barnes <tavianator@tavianator.com>2022-02-24 16:28:58 -0500
commit82f7f9ee1849947ed6de227279e623d8fc3a1ee1 (patch)
tree479c64bf8d92cfdd1ccba4bd679fb534a943c6e0
parentb490dc534eedcc9878d4962e6d02baf4217a712f (diff)
downloadbfs-82f7f9ee1849947ed6de227279e623d8fc3a1ee1.tar.xz
regex: Rework error handling
-rw-r--r--eval.c9
-rw-r--r--parse.c25
-rw-r--r--regex.c118
-rw-r--r--regex.h30
-rw-r--r--util.c11
5 files changed, 102 insertions, 91 deletions
diff --git a/eval.c b/eval.c
index e812dc6..321a8d9 100644
--- a/eval.c
+++ b/eval.c
@@ -828,10 +828,9 @@ bool eval_quit(const struct expr *expr, struct eval_state *state) {
bool eval_regex(const struct expr *expr, struct eval_state *state) {
const char *path = state->ftwbuf->path;
- int err;
- bool ret = bfs_regexec(expr->regex, path, BFS_REGEX_ANCHOR, &err);
- if (err) {
- char *str = bfs_regerror(err, expr->regex);
+ int ret = bfs_regexec(expr->regex, path, BFS_REGEX_ANCHOR);
+ if (ret < 0) {
+ char *str = bfs_regerror(expr->regex);
if (str) {
eval_error(state, "%s.\n", str);
free(str);
@@ -840,7 +839,7 @@ bool eval_regex(const struct expr *expr, struct eval_state *state) {
}
}
- return ret;
+ return ret > 0;
}
/**
diff --git a/parse.c b/parse.c
index 066784c..826d325 100644
--- a/parse.c
+++ b/parse.c
@@ -2269,24 +2269,25 @@ static struct expr *parse_regex(struct parser_state *state, int flags, int arg2)
goto fail;
}
- int err;
- expr->regex = bfs_regcomp(expr->sdata, state->regex_type, flags, &err);
- if (!expr->regex) {
- char *str = bfs_regerror(err, NULL);
- if (str) {
- parse_error(state, "${blu}%s${rs} ${bld}%s${rs}: %s.\n", expr->argv[0], expr->argv[1], str);
- free(str);
- } else {
+ if (bfs_regcomp(&expr->regex, expr->sdata, state->regex_type, flags) != 0) {
+ if (!expr->regex) {
+ parse_perror(state, "bfs_regcomp()");
+ goto fail;
+ }
+
+ char *str = bfs_regerror(expr->regex);
+ if (!str) {
parse_perror(state, "bfs_regerror()");
+ goto fail;
}
- goto fail_regex;
+
+ parse_error(state, "${blu}%s${rs} ${bld}%s${rs}: %s.\n", expr->argv[0], expr->argv[1], str);
+ free(str);
+ goto fail;
}
return expr;
-fail_regex:
- free(expr->regex);
- expr->regex = NULL;
fail:
free_expr(expr);
return NULL;
diff --git a/regex.c b/regex.c
index 0852792..9785bf8 100644
--- a/regex.c
+++ b/regex.c
@@ -15,8 +15,9 @@
****************************************************************************/
#include "regex.h"
+#include "util.h"
#include <assert.h>
-#include <stdbool.h>
+#include <errno.h>
#include <stdlib.h>
#include <string.h>
@@ -29,18 +30,23 @@
struct bfs_regex {
#if BFS_WITH_ONIGURUMA
+ unsigned char *pattern;
OnigRegex impl;
+ int err;
+ OnigErrorInfo einfo;
#else
regex_t impl;
+ int err;
#endif
};
#if BFS_WITH_ONIGURUMA
/** Get (and initialize) the appropriate encoding for the current locale. */
-static OnigEncoding bfs_onig_encoding(int *err) {
+static int bfs_onig_encoding(OnigEncoding *penc) {
static OnigEncoding enc = NULL;
if (enc) {
- return enc;
+ *penc = enc;
+ return ONIG_NORMAL;
}
// Fall back to ASCII by default
@@ -103,27 +109,35 @@ static OnigEncoding bfs_onig_encoding(int *err) {
BFS_MAP_ENCODING("GB18030", ONIG_ENCODING_BIG5);
}
- *err = onig_initialize(&enc, 1);
- if (*err != ONIG_NORMAL) {
+ int ret = onig_initialize(&enc, 1);
+ if (ret != ONIG_NORMAL) {
enc = NULL;
}
-
- return enc;
+ *penc = enc;
+ return ret;
}
#endif
-struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum bfs_regcomp_flags flags, int *err) {
- struct bfs_regex *regex = malloc(sizeof(*regex));
+int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags) {
+ struct bfs_regex *regex = *preg = malloc(sizeof(*regex));
if (!regex) {
-#if BFS_WITH_ONIGURUMA
- *err = ONIGERR_MEMORY;
-#else
- *err = REG_ESPACE;
-#endif
- return NULL;
+ return -1;
}
#if BFS_WITH_ONIGURUMA
+ // onig_error_code_to_str() says
+ //
+ // don't call this after the pattern argument of onig_new() is freed
+ //
+ // so make a defensive copy.
+ regex->pattern = (unsigned char *)strdup(pattern);
+ if (!regex->pattern) {
+ goto fail;
+ }
+
+ regex->impl = NULL;
+ regex->err = ONIG_NORMAL;
+
OnigSyntaxType *syntax = NULL;
switch (type) {
case BFS_REGEX_POSIX_BASIC:
@@ -146,16 +160,16 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b
options |= ONIG_OPTION_IGNORECASE;
}
- OnigEncoding enc = bfs_onig_encoding(err);
- if (!enc) {
- goto fail;
+ OnigEncoding enc;
+ regex->err = bfs_onig_encoding(&enc);
+ if (regex->err != ONIG_NORMAL) {
+ return -1;
}
- const unsigned char *uexpr = (const unsigned char *)expr;
- const unsigned char *end = uexpr + strlen(expr);
- *err = onig_new(&regex->impl, uexpr, end, options, enc, syntax, NULL);
- if (*err != ONIG_NORMAL) {
- goto fail;
+ const unsigned char *end = regex->pattern + strlen(pattern);
+ regex->err = onig_new(&regex->impl, regex->pattern, end, options, enc, syntax, &regex->einfo);
+ if (regex->err != ONIG_NORMAL) {
+ return -1;
}
#else
int cflags = 0;
@@ -166,7 +180,7 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b
cflags |= REG_EXTENDED;
break;
default:
- *err = REG_BADPAT;
+ errno = EINVAL;
goto fail;
}
@@ -174,20 +188,26 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b
cflags |= REG_ICASE;
}
- *err = regcomp(&regex->impl, expr, cflags);
- if (*err != 0) {
- goto fail;
+#if BFS_HAS_FEATURE(memory_sanitizer, false)
+ // https://github.com/google/sanitizers/issues/1496
+ memset(&regex->impl, 0, sizeof(regex->impl));
+#endif
+
+ regex->err = regcomp(&regex->impl, pattern, cflags);
+ if (regex->err != 0) {
+ return -1;
}
#endif
- return regex;
+ return 0;
fail:
free(regex);
- return NULL;
+ *preg = NULL;
+ return -1;
}
-bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags, int *err) {
+int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) {
size_t len = strlen(str);
#if BFS_WITH_ONIGURUMA
@@ -198,8 +218,7 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag
//
// Do not pass invalid byte string in the regex character encoding.
if (!onigenc_is_valid_mbc_string(onig_get_encoding(regex->impl), ustr, end)) {
- *err = 0;
- return false;
+ return 0;
}
int ret;
@@ -210,19 +229,17 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag
}
if (ret >= 0) {
- *err = 0;
if (flags & BFS_REGEX_ANCHOR) {
return (size_t)ret == len;
} else {
- return true;
+ return 1;
}
} else if (ret == ONIG_MISMATCH) {
- *err = 0;
+ return 0;
} else {
- *err = ret;
+ regex->err = ret;
+ return -1;
}
-
- return false;
#else
regmatch_t match = {
.rm_so = 0,
@@ -236,19 +253,17 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag
int ret = regexec(&regex->impl, str, 1, &match, eflags);
if (ret == 0) {
- *err = 0;
if (flags & BFS_REGEX_ANCHOR) {
return match.rm_so == 0 && (size_t)match.rm_eo == len;
} else {
- return true;
+ return 1;
}
} else if (ret == REG_NOMATCH) {
- *err = 0;
+ return 0;
} else {
- *err = ret;
+ regex->err = ret;
+ return -1;
}
-
- return false;
#endif
}
@@ -256,6 +271,7 @@ void bfs_regfree(struct bfs_regex *regex) {
if (regex) {
#if BFS_WITH_ONIGURUMA
onig_free(regex->impl);
+ free(regex->pattern);
#else
regfree(&regex->impl);
#endif
@@ -263,20 +279,22 @@ void bfs_regfree(struct bfs_regex *regex) {
}
}
-char *bfs_regerror(int err, const struct bfs_regex *regex) {
+char *bfs_regerror(const struct bfs_regex *regex) {
+ if (!regex) {
+ return strdup(strerror(ENOMEM));
+ }
+
#if BFS_WITH_ONIGURUMA
unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN);
if (str) {
- onig_error_code_to_str(str, err);
+ onig_error_code_to_str(str, regex->err, &regex->einfo);
}
return (char *)str;
#else
- const regex_t *impl = regex ? &regex->impl : NULL;
-
- size_t len = regerror(err, impl, NULL, 0);
+ size_t len = regerror(regex->err, &regex->impl, NULL, 0);
char *str = malloc(len);
if (str) {
- regerror(err, impl, str, len);
+ regerror(regex->err, &regex->impl, str, len);
}
return str;
#endif
diff --git a/regex.h b/regex.h
index ed509e4..63cd120 100644
--- a/regex.h
+++ b/regex.h
@@ -18,8 +18,6 @@
#ifndef BFS_REGEX_H
#define BFS_REGEX_H
-#include <stdbool.h>
-
/**
* A compiled regular expression.
*/
@@ -54,34 +52,32 @@ enum bfs_regexec_flags {
/**
* Wrapper for regcomp() that supports additional regex types.
*
- * @param expr
+ * @param[out] preg
+ * Will hold the compiled regex.
+ * @param pattern
* The regular expression to compile.
* @param type
* The regular expression syntax to use.
* @param flags
* Regex compilation flags.
- * @param[out] err
- * Will hold the error code if compilation fails.
* @return
- * The compiled regular expression, or NULL on error.
+ * 0 on success, -1 on failure.
*/
-struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum bfs_regcomp_flags flags, int *err);
+int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags);
/**
* Wrapper for regexec().
*
- * @param expr
+ * @param regex
* The regular expression to execute.
* @param str
* The string to match against.
* @param flags
* Regex execution flags.
- * @param[out] err
- * Will hold the error code if execution fails.
* @return
- * Whether the regex matched.
+ * 1 for a match, 0 for no match, -1 on failure.
*/
-bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags, int *err);
+int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags);
/**
* Free a compiled regex.
@@ -89,15 +85,13 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag
void bfs_regfree(struct bfs_regex *regex);
/**
- * Dynamically allocate a regex error message.
+ * Get a human-readable regex error message.
*
- * @param err
- * The error code to stringify.
* @param regex
- * The compiled regex, or NULL if compilation failed.
+ * The compiled regex.
* @return
- * A human-readable description of the error, allocated with malloc().
+ * A human-readable description of the error, which should be free()'d.
*/
-char *bfs_regerror(int err, const struct bfs_regex *regex);
+char *bfs_regerror(const struct bfs_regex *regex);
#endif // BFS_REGEX_H
diff --git a/util.c b/util.c
index 1b363cd..21921e8 100644
--- a/util.c
+++ b/util.c
@@ -263,15 +263,14 @@ static int xrpregex(nl_item item, const char *response) {
return -1;
}
- int err;
- struct bfs_regex *regex = bfs_regcomp(pattern, BFS_REGEX_POSIX_EXTENDED, 0, &err);
- if (!regex) {
- return -1;
+ struct bfs_regex *regex;
+ int ret = bfs_regcomp(&regex, pattern, BFS_REGEX_POSIX_EXTENDED, 0);
+ if (ret == 0) {
+ ret = bfs_regexec(regex, response, 0);
}
- int ret = bfs_regexec(regex, response, 0, &err);
bfs_regfree(regex);
- return err ? -1 : ret;
+ return ret;
}
/** Check if a response is affirmative or negative. */