summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2022-02-21 16:32:23 -0500
committerTavian Barnes <tavianator@tavianator.com>2022-02-21 16:32:23 -0500
commite5d5659884af4e2ebf9a788dd379825a470bd01d (patch)
treeb607b565079dc5020562ee9762ca3235e35ecb0f
parent9754c1ab7ceebd41ffda5f8004e562f18006dc6c (diff)
downloadbfs-e5d5659884af4e2ebf9a788dd379825a470bd01d.tar.xz
regex: Use the real Oniguruma API, not the POSIX wrapper
Not every Oniguruma installation enables the POSIX wrapper, so we need our own wrapper for portability. As well, older versions of Oniguruma have symbol clashes with libc for the POSIX regex API, so using it can be unsafe.
-rw-r--r--regex.c95
1 files changed, 84 insertions, 11 deletions
diff --git a/regex.c b/regex.c
index d5c8346..a06d172 100644
--- a/regex.c
+++ b/regex.c
@@ -15,48 +15,79 @@
****************************************************************************/
#include "regex.h"
+#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#if BFS_WITH_ONIGURUMA
-# include <onigposix.h>
+# include <oniguruma.h>
#else
# include <regex.h>
#endif
struct bfs_regex {
+#if BFS_WITH_ONIGURUMA
+ OnigRegex impl;
+#else
regex_t impl;
+#endif
};
struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum bfs_regcomp_flags flags, int *err) {
+#if BFS_WITH_ONIGURUMA
+ static bool onig_initialized = false;
+ if (!onig_initialized) {
+ OnigEncoding encs[] = {ONIG_ENCODING_UTF8};
+ *err = onig_initialize(encs, sizeof(encs)/sizeof(encs[0]));
+ if (*err != ONIG_NORMAL) {
+ return NULL;
+ }
+ onig_initialized = true;
+ }
+#endif
+
struct bfs_regex *regex = malloc(sizeof(*regex));
if (!regex) {
+#if BFS_WITH_ONIGURUMA
+ *err = ONIGERR_MEMORY;
+#else
*err = REG_ESPACE;
+#endif
return NULL;
}
- int cflags = 0;
-
#if BFS_WITH_ONIGURUMA
- // Oniguruma's POSIX wrapper uses the selected default syntax when REG_EXTENDED is set
- cflags |= REG_EXTENDED;
-
+ OnigSyntaxType *syntax = NULL;
switch (type) {
case BFS_REGEX_POSIX_BASIC:
- onig_set_default_syntax(ONIG_SYNTAX_POSIX_BASIC);
+ syntax = ONIG_SYNTAX_POSIX_BASIC;
break;
case BFS_REGEX_POSIX_EXTENDED:
- onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED);
+ syntax = ONIG_SYNTAX_POSIX_EXTENDED;
break;
case BFS_REGEX_EMACS:
- onig_set_default_syntax(ONIG_SYNTAX_EMACS);
+ syntax = ONIG_SYNTAX_EMACS;
break;
case BFS_REGEX_GREP:
- onig_set_default_syntax(ONIG_SYNTAX_GREP);
+ syntax = ONIG_SYNTAX_GREP;
break;
}
+ assert(syntax);
+
+ OnigOptionType options = syntax->options;
+ if (flags & BFS_REGEX_ICASE) {
+ options |= ONIG_OPTION_IGNORECASE;
+ }
+
+ const unsigned char *uexpr = (const unsigned char *)expr;
+ const unsigned char *end = uexpr + strlen(expr);
+ *err = onig_new(&regex->impl, uexpr, end, options, ONIG_ENCODING_UTF8, syntax, NULL);
+ if (*err != ONIG_NORMAL) {
+ goto fail;
+ }
#else
+ int cflags = 0;
switch (type) {
case BFS_REGEX_POSIX_BASIC:
break;
@@ -67,7 +98,6 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b
*err = REG_BADPAT;
goto fail;
}
-#endif
if (flags & BFS_REGEX_ICASE) {
cflags |= REG_ICASE;
@@ -77,6 +107,7 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b
if (*err != 0) {
goto fail;
}
+#endif
return regex;
@@ -87,6 +118,35 @@ fail:
bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags, int *err) {
size_t len = strlen(str);
+
+#if BFS_WITH_ONIGURUMA
+ const unsigned char *ustr = (const unsigned char *)str;
+ const unsigned char *end = ustr + len;
+
+ OnigRegion *region = onig_region_new();
+ if (!region) {
+ *err = ONIGERR_MEMORY;
+ return false;
+ }
+
+ bool match = false;
+ int ret = onig_search(regex->impl, ustr, end, ustr, end, region, ONIG_OPTION_DEFAULT);
+ if (ret >= 0) {
+ *err = 0;
+ if (flags & BFS_REGEX_ANCHOR) {
+ match = region->beg[0] == 0 && (size_t)region->end[0] == len;
+ } else {
+ match = true;
+ }
+ } else if (ret == ONIG_MISMATCH) {
+ *err = 0;
+ } else {
+ *err = ret;
+ }
+
+ onig_region_free(region, 1);
+ return match;
+#else
regmatch_t match = {
.rm_so = 0,
.rm_eo = len,
@@ -114,16 +174,28 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag
*err = ret;
return false;
}
+#endif
}
void bfs_regfree(struct bfs_regex *regex) {
if (regex) {
+#if BFS_WITH_ONIGURUMA
+ onig_free(regex->impl);
+#else
regfree(&regex->impl);
+#endif
free(regex);
}
}
char *bfs_regerror(int err, const struct bfs_regex *regex) {
+#if BFS_WITH_ONIGURUMA
+ unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN);
+ if (str) {
+ onig_error_code_to_str(str, err);
+ }
+ return (char *)str;
+#else
const regex_t *impl = regex ? &regex->impl : NULL;
size_t len = regerror(err, impl, NULL, 0);
@@ -132,4 +204,5 @@ char *bfs_regerror(int err, const struct bfs_regex *regex) {
regerror(err, impl, str, len);
}
return str;
+#endif
}