diff options
Diffstat (limited to 'src/xregex.c')
-rw-r--r-- | src/xregex.c | 67 |
1 files changed, 58 insertions, 9 deletions
diff --git a/src/xregex.c b/src/xregex.c index c2711bc..796544e 100644 --- a/src/xregex.c +++ b/src/xregex.c @@ -1,19 +1,21 @@ // Copyright © Tavian Barnes <tavianator@tavianator.com> // SPDX-License-Identifier: 0BSD -#include "prelude.h" #include "xregex.h" + #include "alloc.h" +#include "bfs.h" #include "bfstd.h" #include "diag.h" #include "sanity.h" #include "thread.h" + #include <errno.h> #include <pthread.h> #include <stdlib.h> #include <string.h> -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA # include <langinfo.h> # include <oniguruma.h> #else @@ -21,7 +23,7 @@ #endif struct bfs_regex { -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA unsigned char *pattern; OnigRegex impl; int err; @@ -32,11 +34,17 @@ struct bfs_regex { #endif }; -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA static int bfs_onig_status; static OnigEncoding bfs_onig_enc; +static OnigSyntaxType bfs_onig_syntax_awk; +static OnigSyntaxType bfs_onig_syntax_gnu_awk; +static OnigSyntaxType bfs_onig_syntax_emacs; +static OnigSyntaxType bfs_onig_syntax_egrep; +static OnigSyntaxType bfs_onig_syntax_gnu_find; + /** pthread_once() callback. */ static void bfs_onig_once(void) { // Fall back to ASCII by default @@ -103,6 +111,35 @@ static void bfs_onig_once(void) { if (bfs_onig_status != ONIG_NORMAL) { bfs_onig_enc = NULL; } + + // Compute the GNU extensions + OnigSyntaxType *ere = ONIG_SYNTAX_POSIX_EXTENDED; + OnigSyntaxType *gnu = ONIG_SYNTAX_GNU_REGEX; + unsigned int gnu_op = gnu->op & ~ere->op; + unsigned int gnu_op2 = gnu->op2 & ~ere->op2; + unsigned int gnu_behavior = gnu->behavior & ~ere->behavior; + + onig_copy_syntax(&bfs_onig_syntax_awk, ONIG_SYNTAX_POSIX_EXTENDED); + bfs_onig_syntax_awk.behavior |= ONIG_SYN_ALLOW_INVALID_INTERVAL; + bfs_onig_syntax_awk.behavior |= ONIG_SYN_BACKSLASH_ESCAPE_IN_CC; + + onig_copy_syntax(&bfs_onig_syntax_gnu_awk, &bfs_onig_syntax_awk); + bfs_onig_syntax_gnu_awk.op |= gnu_op; + bfs_onig_syntax_gnu_awk.op2 |= gnu_op2; + bfs_onig_syntax_gnu_awk.behavior |= gnu_behavior; + bfs_onig_syntax_gnu_awk.behavior &= ~ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS; + bfs_onig_syntax_gnu_awk.behavior &= ~ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS; + + // https://github.com/kkos/oniguruma/issues/296 + onig_copy_syntax(&bfs_onig_syntax_emacs, ONIG_SYNTAX_EMACS); + bfs_onig_syntax_emacs.op2 |= ONIG_SYN_OP2_QMARK_GROUP_EFFECT; + + onig_copy_syntax(&bfs_onig_syntax_egrep, ONIG_SYNTAX_POSIX_EXTENDED); + bfs_onig_syntax_egrep.behavior |= ONIG_SYN_ALLOW_INVALID_INTERVAL; + bfs_onig_syntax_egrep.behavior &= ~ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS; + + onig_copy_syntax(&bfs_onig_syntax_gnu_find, &bfs_onig_syntax_emacs); + bfs_onig_syntax_gnu_find.options |= ONIG_OPTION_MULTILINE; } /** Initialize Oniguruma. */ @@ -121,7 +158,7 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ return -1; } -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA // onig_error_code_to_str() says // // don't call this after the pattern argument of onig_new() is freed @@ -143,12 +180,24 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ case BFS_REGEX_POSIX_EXTENDED: syntax = ONIG_SYNTAX_POSIX_EXTENDED; break; + case BFS_REGEX_AWK: + syntax = &bfs_onig_syntax_awk; + break; + case BFS_REGEX_GNU_AWK: + syntax = &bfs_onig_syntax_gnu_awk; + break; case BFS_REGEX_EMACS: - syntax = ONIG_SYNTAX_EMACS; + syntax = &bfs_onig_syntax_emacs; break; case BFS_REGEX_GREP: syntax = ONIG_SYNTAX_GREP; break; + case BFS_REGEX_EGREP: + syntax = &bfs_onig_syntax_egrep; + break; + case BFS_REGEX_GNU_FIND: + syntax = &bfs_onig_syntax_gnu_find; + break; } bfs_assert(syntax, "Invalid regex type"); @@ -204,7 +253,7 @@ fail: int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) { size_t len = strlen(str); -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA const unsigned char *ustr = (const unsigned char *)str; const unsigned char *end = ustr + len; @@ -263,7 +312,7 @@ int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags void bfs_regfree(struct bfs_regex *regex) { if (regex) { -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA onig_free(regex->impl); free(regex->pattern); #else @@ -278,7 +327,7 @@ char *bfs_regerror(const struct bfs_regex *regex) { return strdup(xstrerror(ENOMEM)); } -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN); if (str) { onig_error_code_to_str(str, regex->err, ®ex->einfo); |