summaryrefslogtreecommitdiffstats
path: root/regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c301
1 files changed, 0 insertions, 301 deletions
diff --git a/regex.c b/regex.c
deleted file mode 100644
index 9785bf8..0000000
--- a/regex.c
+++ /dev/null
@@ -1,301 +0,0 @@
-/****************************************************************************
- * bfs *
- * Copyright (C) 2022 Tavian Barnes <tavianator@tavianator.com> *
- * *
- * Permission to use, copy, modify, and/or distribute this software for any *
- * purpose with or without fee is hereby granted. *
- * *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES *
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF *
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR *
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES *
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN *
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF *
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *
- ****************************************************************************/
-
-#include "regex.h"
-#include "util.h"
-#include <assert.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-
-#if BFS_WITH_ONIGURUMA
-# include <langinfo.h>
-# include <oniguruma.h>
-#else
-# include <regex.h>
-#endif
-
-struct bfs_regex {
-#if BFS_WITH_ONIGURUMA
- unsigned char *pattern;
- OnigRegex impl;
- int err;
- OnigErrorInfo einfo;
-#else
- regex_t impl;
- int err;
-#endif
-};
-
-#if BFS_WITH_ONIGURUMA
-/** Get (and initialize) the appropriate encoding for the current locale. */
-static int bfs_onig_encoding(OnigEncoding *penc) {
- static OnigEncoding enc = NULL;
- if (enc) {
- *penc = enc;
- return ONIG_NORMAL;
- }
-
- // Fall back to ASCII by default
- enc = ONIG_ENCODING_ASCII;
-
- // Oniguruma has no locale support, so try to guess the right encoding
- // from the current locale.
- const char *charmap = nl_langinfo(CODESET);
- if (charmap) {
-#define BFS_MAP_ENCODING(name, value) \
- do { \
- if (strcmp(charmap, name) == 0) { \
- enc = value; \
- } \
- } while (0)
-#define BFS_MAP_ENCODING2(name1, name2, value) \
- do { \
- BFS_MAP_ENCODING(name1, value); \
- BFS_MAP_ENCODING(name2, value); \
- } while (0)
-
- // These names were found with locale -m on Linux and FreeBSD
-#define BFS_MAP_ISO_8859(n) \
- BFS_MAP_ENCODING2("ISO-8859-" #n, "ISO8859-" #n, ONIG_ENCODING_ISO_8859_ ## n)
-
- BFS_MAP_ISO_8859(1);
- BFS_MAP_ISO_8859(2);
- BFS_MAP_ISO_8859(3);
- BFS_MAP_ISO_8859(4);
- BFS_MAP_ISO_8859(5);
- BFS_MAP_ISO_8859(6);
- BFS_MAP_ISO_8859(7);
- BFS_MAP_ISO_8859(8);
- BFS_MAP_ISO_8859(9);
- BFS_MAP_ISO_8859(10);
- BFS_MAP_ISO_8859(11);
- // BFS_MAP_ISO_8859(12);
- BFS_MAP_ISO_8859(13);
- BFS_MAP_ISO_8859(14);
- BFS_MAP_ISO_8859(15);
- BFS_MAP_ISO_8859(16);
-
- BFS_MAP_ENCODING("UTF-8", ONIG_ENCODING_UTF8);
-
-#define BFS_MAP_EUC(name) \
- BFS_MAP_ENCODING2("EUC-" #name, "euc" #name, ONIG_ENCODING_EUC_ ## name)
-
- BFS_MAP_EUC(JP);
- BFS_MAP_EUC(TW);
- BFS_MAP_EUC(KR);
- BFS_MAP_EUC(CN);
-
- BFS_MAP_ENCODING2("SHIFT_JIS", "SJIS", ONIG_ENCODING_SJIS);
-
- // BFS_MAP_ENCODING("KOI-8", ONIG_ENCODING_KOI8);
- BFS_MAP_ENCODING("KOI8-R", ONIG_ENCODING_KOI8_R);
-
- BFS_MAP_ENCODING("CP1251", ONIG_ENCODING_CP1251);
-
- BFS_MAP_ENCODING("GB18030", ONIG_ENCODING_BIG5);
- }
-
- int ret = onig_initialize(&enc, 1);
- if (ret != ONIG_NORMAL) {
- enc = NULL;
- }
- *penc = enc;
- return ret;
-}
-#endif
-
-int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags) {
- struct bfs_regex *regex = *preg = malloc(sizeof(*regex));
- if (!regex) {
- return -1;
- }
-
-#if BFS_WITH_ONIGURUMA
- // onig_error_code_to_str() says
- //
- // don't call this after the pattern argument of onig_new() is freed
- //
- // so make a defensive copy.
- regex->pattern = (unsigned char *)strdup(pattern);
- if (!regex->pattern) {
- goto fail;
- }
-
- regex->impl = NULL;
- regex->err = ONIG_NORMAL;
-
- OnigSyntaxType *syntax = NULL;
- switch (type) {
- case BFS_REGEX_POSIX_BASIC:
- syntax = ONIG_SYNTAX_POSIX_BASIC;
- break;
- case BFS_REGEX_POSIX_EXTENDED:
- syntax = ONIG_SYNTAX_POSIX_EXTENDED;
- break;
- case BFS_REGEX_EMACS:
- syntax = ONIG_SYNTAX_EMACS;
- break;
- case BFS_REGEX_GREP:
- syntax = ONIG_SYNTAX_GREP;
- break;
- }
- assert(syntax);
-
- OnigOptionType options = syntax->options;
- if (flags & BFS_REGEX_ICASE) {
- options |= ONIG_OPTION_IGNORECASE;
- }
-
- OnigEncoding enc;
- regex->err = bfs_onig_encoding(&enc);
- if (regex->err != ONIG_NORMAL) {
- return -1;
- }
-
- const unsigned char *end = regex->pattern + strlen(pattern);
- regex->err = onig_new(&regex->impl, regex->pattern, end, options, enc, syntax, &regex->einfo);
- if (regex->err != ONIG_NORMAL) {
- return -1;
- }
-#else
- int cflags = 0;
- switch (type) {
- case BFS_REGEX_POSIX_BASIC:
- break;
- case BFS_REGEX_POSIX_EXTENDED:
- cflags |= REG_EXTENDED;
- break;
- default:
- errno = EINVAL;
- goto fail;
- }
-
- if (flags & BFS_REGEX_ICASE) {
- cflags |= REG_ICASE;
- }
-
-#if BFS_HAS_FEATURE(memory_sanitizer, false)
- // https://github.com/google/sanitizers/issues/1496
- memset(&regex->impl, 0, sizeof(regex->impl));
-#endif
-
- regex->err = regcomp(&regex->impl, pattern, cflags);
- if (regex->err != 0) {
- return -1;
- }
-#endif
-
- return 0;
-
-fail:
- free(regex);
- *preg = NULL;
- return -1;
-}
-
-int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) {
- size_t len = strlen(str);
-
-#if BFS_WITH_ONIGURUMA
- const unsigned char *ustr = (const unsigned char *)str;
- const unsigned char *end = ustr + len;
-
- // The docs for onig_{match,search}() say
- //
- // Do not pass invalid byte string in the regex character encoding.
- if (!onigenc_is_valid_mbc_string(onig_get_encoding(regex->impl), ustr, end)) {
- return 0;
- }
-
- int ret;
- if (flags & BFS_REGEX_ANCHOR) {
- ret = onig_match(regex->impl, ustr, end, ustr, NULL, ONIG_OPTION_DEFAULT);
- } else {
- ret = onig_search(regex->impl, ustr, end, ustr, end, NULL, ONIG_OPTION_DEFAULT);
- }
-
- if (ret >= 0) {
- if (flags & BFS_REGEX_ANCHOR) {
- return (size_t)ret == len;
- } else {
- return 1;
- }
- } else if (ret == ONIG_MISMATCH) {
- return 0;
- } else {
- regex->err = ret;
- return -1;
- }
-#else
- regmatch_t match = {
- .rm_so = 0,
- .rm_eo = len,
- };
-
- int eflags = 0;
-#ifdef REG_STARTEND
- eflags |= REG_STARTEND;
-#endif
-
- int ret = regexec(&regex->impl, str, 1, &match, eflags);
- if (ret == 0) {
- if (flags & BFS_REGEX_ANCHOR) {
- return match.rm_so == 0 && (size_t)match.rm_eo == len;
- } else {
- return 1;
- }
- } else if (ret == REG_NOMATCH) {
- return 0;
- } else {
- regex->err = ret;
- return -1;
- }
-#endif
-}
-
-void bfs_regfree(struct bfs_regex *regex) {
- if (regex) {
-#if BFS_WITH_ONIGURUMA
- onig_free(regex->impl);
- free(regex->pattern);
-#else
- regfree(&regex->impl);
-#endif
- free(regex);
- }
-}
-
-char *bfs_regerror(const struct bfs_regex *regex) {
- if (!regex) {
- return strdup(strerror(ENOMEM));
- }
-
-#if BFS_WITH_ONIGURUMA
- unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN);
- if (str) {
- onig_error_code_to_str(str, regex->err, &regex->einfo);
- }
- return (char *)str;
-#else
- size_t len = regerror(regex->err, &regex->impl, NULL, 0);
- char *str = malloc(len);
- if (str) {
- regerror(regex->err, &regex->impl, str, len);
- }
- return str;
-#endif
-}