diff options
author | Tavian Barnes <tavianator@tavianator.com> | 2022-02-21 16:32:23 -0500 |
---|---|---|
committer | Tavian Barnes <tavianator@tavianator.com> | 2022-02-21 16:32:23 -0500 |
commit | e5d5659884af4e2ebf9a788dd379825a470bd01d (patch) | |
tree | b607b565079dc5020562ee9762ca3235e35ecb0f | |
parent | 9754c1ab7ceebd41ffda5f8004e562f18006dc6c (diff) | |
download | bfs-e5d5659884af4e2ebf9a788dd379825a470bd01d.tar.xz |
regex: Use the real Oniguruma API, not the POSIX wrapper
Not every Oniguruma installation enables the POSIX wrapper, so we need
our own wrapper for portability. As well, older versions of Oniguruma
have symbol clashes with libc for the POSIX regex API, so using it can
be unsafe.
-rw-r--r-- | regex.c | 95 |
1 files changed, 84 insertions, 11 deletions
@@ -15,48 +15,79 @@ ****************************************************************************/ #include "regex.h" +#include <assert.h> #include <stdbool.h> #include <stdlib.h> #include <string.h> #if BFS_WITH_ONIGURUMA -# include <onigposix.h> +# include <oniguruma.h> #else # include <regex.h> #endif struct bfs_regex { +#if BFS_WITH_ONIGURUMA + OnigRegex impl; +#else regex_t impl; +#endif }; struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum bfs_regcomp_flags flags, int *err) { +#if BFS_WITH_ONIGURUMA + static bool onig_initialized = false; + if (!onig_initialized) { + OnigEncoding encs[] = {ONIG_ENCODING_UTF8}; + *err = onig_initialize(encs, sizeof(encs)/sizeof(encs[0])); + if (*err != ONIG_NORMAL) { + return NULL; + } + onig_initialized = true; + } +#endif + struct bfs_regex *regex = malloc(sizeof(*regex)); if (!regex) { +#if BFS_WITH_ONIGURUMA + *err = ONIGERR_MEMORY; +#else *err = REG_ESPACE; +#endif return NULL; } - int cflags = 0; - #if BFS_WITH_ONIGURUMA - // Oniguruma's POSIX wrapper uses the selected default syntax when REG_EXTENDED is set - cflags |= REG_EXTENDED; - + OnigSyntaxType *syntax = NULL; switch (type) { case BFS_REGEX_POSIX_BASIC: - onig_set_default_syntax(ONIG_SYNTAX_POSIX_BASIC); + syntax = ONIG_SYNTAX_POSIX_BASIC; break; case BFS_REGEX_POSIX_EXTENDED: - onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED); + syntax = ONIG_SYNTAX_POSIX_EXTENDED; break; case BFS_REGEX_EMACS: - onig_set_default_syntax(ONIG_SYNTAX_EMACS); + syntax = ONIG_SYNTAX_EMACS; break; case BFS_REGEX_GREP: - onig_set_default_syntax(ONIG_SYNTAX_GREP); + syntax = ONIG_SYNTAX_GREP; break; } + assert(syntax); + + OnigOptionType options = syntax->options; + if (flags & BFS_REGEX_ICASE) { + options |= ONIG_OPTION_IGNORECASE; + } + + const unsigned char *uexpr = (const unsigned char *)expr; + const unsigned char *end = uexpr + strlen(expr); + *err = onig_new(®ex->impl, uexpr, end, options, ONIG_ENCODING_UTF8, syntax, NULL); + if (*err != ONIG_NORMAL) { + goto fail; + } #else + int cflags = 0; switch (type) { case BFS_REGEX_POSIX_BASIC: break; @@ -67,7 +98,6 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b *err = REG_BADPAT; goto fail; } -#endif if (flags & BFS_REGEX_ICASE) { cflags |= REG_ICASE; @@ -77,6 +107,7 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b if (*err != 0) { goto fail; } +#endif return regex; @@ -87,6 +118,35 @@ fail: bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags, int *err) { size_t len = strlen(str); + +#if BFS_WITH_ONIGURUMA + const unsigned char *ustr = (const unsigned char *)str; + const unsigned char *end = ustr + len; + + OnigRegion *region = onig_region_new(); + if (!region) { + *err = ONIGERR_MEMORY; + return false; + } + + bool match = false; + int ret = onig_search(regex->impl, ustr, end, ustr, end, region, ONIG_OPTION_DEFAULT); + if (ret >= 0) { + *err = 0; + if (flags & BFS_REGEX_ANCHOR) { + match = region->beg[0] == 0 && (size_t)region->end[0] == len; + } else { + match = true; + } + } else if (ret == ONIG_MISMATCH) { + *err = 0; + } else { + *err = ret; + } + + onig_region_free(region, 1); + return match; +#else regmatch_t match = { .rm_so = 0, .rm_eo = len, @@ -114,16 +174,28 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag *err = ret; return false; } +#endif } void bfs_regfree(struct bfs_regex *regex) { if (regex) { +#if BFS_WITH_ONIGURUMA + onig_free(regex->impl); +#else regfree(®ex->impl); +#endif free(regex); } } char *bfs_regerror(int err, const struct bfs_regex *regex) { +#if BFS_WITH_ONIGURUMA + unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN); + if (str) { + onig_error_code_to_str(str, err); + } + return (char *)str; +#else const regex_t *impl = regex ? ®ex->impl : NULL; size_t len = regerror(err, impl, NULL, 0); @@ -132,4 +204,5 @@ char *bfs_regerror(int err, const struct bfs_regex *regex) { regerror(err, impl, str, len); } return str; +#endif } |