From 9754c1ab7ceebd41ffda5f8004e562f18006dc6c Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Mon, 21 Feb 2022 15:25:27 -0500 Subject: regex: Wrap the POSIX API in a facade --- eval.c | 20 +++------------ expr.h | 2 +- parse.c | 17 +++---------- regex.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ regex.h | 64 ++++++++++++++++++++++++++++++++++++++---------- util.c | 24 +++++++++--------- util.h | 1 - 7 files changed, 152 insertions(+), 63 deletions(-) diff --git a/eval.c b/eval.c index f7de940..e812dc6 100644 --- a/eval.c +++ b/eval.c @@ -827,20 +827,10 @@ bool eval_quit(const struct expr *expr, struct eval_state *state) { */ bool eval_regex(const struct expr *expr, struct eval_state *state) { const char *path = state->ftwbuf->path; - size_t len = strlen(path); - regmatch_t match = { - .rm_so = 0, - .rm_eo = len, - }; - int flags = 0; -#ifdef REG_STARTEND - flags |= REG_STARTEND; -#endif - int err = regexec(expr->regex, path, 1, &match, flags); - if (err == 0) { - return match.rm_so == 0 && (size_t)match.rm_eo == len; - } else if (err != REG_NOMATCH) { + int err; + bool ret = bfs_regexec(expr->regex, path, BFS_REGEX_ANCHOR, &err); + if (err) { char *str = bfs_regerror(err, expr->regex); if (str) { eval_error(state, "%s.\n", str); @@ -848,11 +838,9 @@ bool eval_regex(const struct expr *expr, struct eval_state *state) { } else { eval_error(state, "bfs_regerror(): %m.\n"); } - - *state->ret = EXIT_FAILURE; } - return false; + return ret; } /** diff --git a/expr.h b/expr.h index 6f0ebec..a660a6d 100644 --- a/expr.h +++ b/expr.h @@ -159,7 +159,7 @@ struct expr { CFILE *cfile; /** Optional compiled regex. */ - regex_t *regex; + struct bfs_regex *regex; /** Optional exec command. */ struct bfs_exec *execbuf; diff --git a/parse.c b/parse.c index 38f63ce..066784c 100644 --- a/parse.c +++ b/parse.c @@ -105,11 +105,7 @@ void free_expr(struct expr *expr) { return; } - if (expr->regex) { - regfree(expr->regex); - free(expr->regex); - } - + bfs_regfree(expr->regex); bfs_printf_free(expr->printf); bfs_exec_free(expr->execbuf); @@ -2273,14 +2269,9 @@ static struct expr *parse_regex(struct parser_state *state, int flags, int arg2) goto fail; } - expr->regex = malloc(sizeof(regex_t)); + int err; + expr->regex = bfs_regcomp(expr->sdata, state->regex_type, flags, &err); if (!expr->regex) { - parse_perror(state, "malloc()"); - goto fail; - } - - int err = bfs_regcomp(expr->regex, expr->sdata, flags, state->regex_type); - if (err != 0) { char *str = bfs_regerror(err, NULL); if (str) { parse_error(state, "${blu}%s${rs} ${bld}%s${rs}: %s.\n", expr->argv[0], expr->argv[1], str); @@ -3134,7 +3125,7 @@ static const struct table_entry parse_table[] = { {"-iname", T_TEST, parse_name, true}, {"-inum", T_TEST, parse_inum}, {"-ipath", T_TEST, parse_path, true}, - {"-iregex", T_TEST, parse_regex, REG_ICASE}, + {"-iregex", T_TEST, parse_regex, BFS_REGEX_ICASE}, {"-iwholename", T_TEST, parse_path, true}, {"-links", T_TEST, parse_links}, {"-lname", T_TEST, parse_lname, false}, diff --git a/regex.c b/regex.c index f8bd833..d5c8346 100644 --- a/regex.c +++ b/regex.c @@ -15,9 +15,29 @@ ****************************************************************************/ #include "regex.h" +#include #include +#include + +#if BFS_WITH_ONIGURUMA +# include +#else +# include +#endif + +struct bfs_regex { + regex_t impl; +}; + +struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum bfs_regcomp_flags flags, int *err) { + struct bfs_regex *regex = malloc(sizeof(*regex)); + if (!regex) { + *err = REG_ESPACE; + return NULL; + } + + int cflags = 0; -int bfs_regcomp(regex_t *preg, const char *regex, int cflags, enum bfs_regex_type type) { #if BFS_WITH_ONIGURUMA // Oniguruma's POSIX wrapper uses the selected default syntax when REG_EXTENDED is set cflags |= REG_EXTENDED; @@ -39,24 +59,77 @@ int bfs_regcomp(regex_t *preg, const char *regex, int cflags, enum bfs_regex_typ #else switch (type) { case BFS_REGEX_POSIX_BASIC: - cflags &= ~REG_EXTENDED; break; case BFS_REGEX_POSIX_EXTENDED: cflags |= REG_EXTENDED; break; default: - return REG_BADPAT; + *err = REG_BADPAT; + goto fail; + } +#endif + + if (flags & BFS_REGEX_ICASE) { + cflags |= REG_ICASE; + } + + *err = regcomp(®ex->impl, expr, cflags); + if (*err != 0) { + goto fail; + } + + return regex; + +fail: + free(regex); + return NULL; +} + +bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags, int *err) { + size_t len = strlen(str); + regmatch_t match = { + .rm_so = 0, + .rm_eo = len, + }; + + int eflags = 0; +#ifdef REG_STARTEND + if (flags & BFS_REGEX_ANCHOR) { + eflags |= REG_STARTEND; } #endif - return regcomp(preg, regex, cflags); + int ret = regexec(®ex->impl, str, 1, &match, eflags); + if (ret == 0) { + *err = 0; + if (flags & BFS_REGEX_ANCHOR) { + return match.rm_so == 0 && (size_t)match.rm_eo == len; + } else { + return true; + } + } else if (ret == REG_NOMATCH) { + *err = 0; + return false; + } else { + *err = ret; + return false; + } +} + +void bfs_regfree(struct bfs_regex *regex) { + if (regex) { + regfree(®ex->impl); + free(regex); + } } -char *bfs_regerror(int err, const regex_t *regex) { - size_t len = regerror(err, regex, NULL, 0); +char *bfs_regerror(int err, const struct bfs_regex *regex) { + const regex_t *impl = regex ? ®ex->impl : NULL; + + size_t len = regerror(err, impl, NULL, 0); char *str = malloc(len); if (str) { - regerror(err, regex, str, len); + regerror(err, impl, str, len); } return str; } diff --git a/regex.h b/regex.h index b466ba8..ed509e4 100644 --- a/regex.h +++ b/regex.h @@ -18,11 +18,12 @@ #ifndef BFS_REGEX_H #define BFS_REGEX_H -#if BFS_WITH_ONIGURUMA -# include -#else -# include -#endif +#include + +/** + * A compiled regular expression. + */ +struct bfs_regex; /** * Regex syntax flavors. @@ -34,21 +35,58 @@ enum bfs_regex_type { BFS_REGEX_GREP, }; +/** + * Regex compilation flags. + */ +enum bfs_regcomp_flags { + /** Treat the regex case-insensitively. */ + BFS_REGEX_ICASE = 1 << 0, +}; + +/** + * Regex execution flags. + */ +enum bfs_regexec_flags { + /** Only treat matches of the entire string as successful. */ + BFS_REGEX_ANCHOR = 1 << 0, +}; + /** * Wrapper for regcomp() that supports additional regex types. * - * @param preg - * The compiled regex. - * @param regex + * @param expr * The regular expression to compile. - * @param cflags - * Regex compilation flags. * @param type * The regular expression syntax to use. + * @param flags + * Regex compilation flags. + * @param[out] err + * Will hold the error code if compilation fails. * @return - * 0 on success, or an error code on failure. + * The compiled regular expression, or NULL on error. + */ +struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum bfs_regcomp_flags flags, int *err); + +/** + * Wrapper for regexec(). + * + * @param expr + * The regular expression to execute. + * @param str + * The string to match against. + * @param flags + * Regex execution flags. + * @param[out] err + * Will hold the error code if execution fails. + * @return + * Whether the regex matched. + */ +bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags, int *err); + +/** + * Free a compiled regex. */ -int bfs_regcomp(regex_t *preg, const char *regex, int cflags, enum bfs_regex_type type); +void bfs_regfree(struct bfs_regex *regex); /** * Dynamically allocate a regex error message. @@ -60,6 +98,6 @@ int bfs_regcomp(regex_t *preg, const char *regex, int cflags, enum bfs_regex_typ * @return * A human-readable description of the error, allocated with malloc(). */ -char *bfs_regerror(int err, const regex_t *regex); +char *bfs_regerror(int err, const struct bfs_regex *regex); #endif // BFS_REGEX_H diff --git a/util.c b/util.c index 9f74e5a..1b363cd 100644 --- a/util.c +++ b/util.c @@ -260,33 +260,33 @@ bool is_nonexistence_error(int error) { static int xrpregex(nl_item item, const char *response) { const char *pattern = nl_langinfo(item); if (!pattern) { - return REG_BADPAT; + return -1; } - regex_t regex; - int ret = bfs_regcomp(®ex, pattern, 0, BFS_REGEX_POSIX_EXTENDED); - if (ret != 0) { - return ret; + int err; + struct bfs_regex *regex = bfs_regcomp(pattern, BFS_REGEX_POSIX_EXTENDED, 0, &err); + if (!regex) { + return -1; } - ret = regexec(®ex, response, 0, NULL, 0); - regfree(®ex); - return ret; + int ret = bfs_regexec(regex, response, 0, &err); + bfs_regfree(regex); + return err ? -1 : ret; } /** Check if a response is affirmative or negative. */ static int xrpmatch(const char *response) { int ret = xrpregex(NOEXPR, response); - if (ret == 0) { + if (ret > 0) { return 0; - } else if (ret != REG_NOMATCH) { + } else if (ret < 0) { return -1; } ret = xrpregex(YESEXPR, response); - if (ret == 0) { + if (ret > 0) { return 1; - } else if (ret != REG_NOMATCH) { + } else if (ret < 0) { return -1; } diff --git a/util.h b/util.h index 0a1bf69..b780624 100644 --- a/util.h +++ b/util.h @@ -21,7 +21,6 @@ #ifndef BFS_UTIL_H #define BFS_UTIL_H -#include "regex.h" #include #include #include -- cgit v1.2.3