From 2d4d6787c4bc62042be4a58a4791074ab2c6a89e Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Fri, 4 Feb 2022 11:23:27 -0500 Subject: regex: Add support for emacs and grep types --- Makefile | 1 + eval.c | 4 +-- main.c | 1 + parse.c | 26 ++++++++++++------ regex.c | 62 ++++++++++++++++++++++++++++++++++++++++++ regex.h | 38 ++++++++++++++++++++++++++ tests.sh | 18 ++++++++++++ tests/test_regextype_emacs.out | 6 ++++ tests/test_regextype_grep.out | 4 +++ util.c | 11 +------- util.h | 11 -------- 11 files changed, 151 insertions(+), 31 deletions(-) create mode 100644 regex.c create mode 100644 tests/test_regextype_emacs.out create mode 100644 tests/test_regextype_grep.out diff --git a/Makefile b/Makefile index 497bc70..79a74ee 100644 --- a/Makefile +++ b/Makefile @@ -190,6 +190,7 @@ bfs: \ parse.o \ printf.o \ pwcache.o \ + regex.o \ spawn.o \ stat.o \ time.o \ diff --git a/eval.c b/eval.c index 5babf27..f7de940 100644 --- a/eval.c +++ b/eval.c @@ -841,12 +841,12 @@ bool eval_regex(const struct expr *expr, struct eval_state *state) { if (err == 0) { return match.rm_so == 0 && (size_t)match.rm_eo == len; } else if (err != REG_NOMATCH) { - char *str = xregerror(err, expr->regex); + char *str = bfs_regerror(err, expr->regex); if (str) { eval_error(state, "%s.\n", str); free(str); } else { - eval_error(state, "xregerror(): %m.\n"); + eval_error(state, "bfs_regerror(): %m.\n"); } *state->ret = EXIT_FAILURE; diff --git a/main.c b/main.c index ade5358..2ad24a4 100644 --- a/main.c +++ b/main.c @@ -43,6 +43,7 @@ * - fsade.[ch] (a facade over non-standard filesystem features) * - mtab.[ch] (parses the system's mount table) * - pwcache.[ch] (a cache for the user/group tables) + * - regex.[ch] (regular expression support) * - spawn.[ch] (spawns processes) * - stat.[ch] (wraps stat(), or statx() on Linux) * - time.[ch] (date/time handling utilities) diff --git a/parse.c b/parse.c index ce2ea21..38f63ce 100644 --- a/parse.c +++ b/parse.c @@ -231,7 +231,7 @@ struct parser_state { const char *command; /** The current regex flags to use. */ - int regex_flags; + enum bfs_regex_type regex_type; /** Whether stdout is a terminal. */ bool stdout_tty; @@ -2279,14 +2279,14 @@ static struct expr *parse_regex(struct parser_state *state, int flags, int arg2) goto fail; } - int err = regcomp(expr->regex, expr->sdata, state->regex_flags | flags); + int err = bfs_regcomp(expr->regex, expr->sdata, flags, state->regex_type); if (err != 0) { - char *str = xregerror(err, NULL); + char *str = bfs_regerror(err, NULL); if (str) { parse_error(state, "${blu}%s${rs} ${bld}%s${rs}: %s.\n", expr->argv[0], expr->argv[1], str); free(str); } else { - parse_perror(state, "xregerror()"); + parse_perror(state, "bfs_regerror()"); } goto fail_regex; } @@ -2305,7 +2305,7 @@ fail: * Parse -E. */ static struct expr *parse_regex_extended(struct parser_state *state, int arg1, int arg2) { - state->regex_flags = REG_EXTENDED; + state->regex_type = BFS_REGEX_POSIX_EXTENDED; return parse_nullary_flag(state); } @@ -2327,9 +2327,15 @@ static struct expr *parse_regextype(struct parser_state *state, int arg1, int ar if (strcmp(type, "posix-basic") == 0 || strcmp(type, "ed") == 0 || strcmp(type, "sed") == 0) { - state->regex_flags = 0; + state->regex_type = BFS_REGEX_POSIX_BASIC; } else if (strcmp(type, "posix-extended") == 0) { - state->regex_flags = REG_EXTENDED; + state->regex_type = BFS_REGEX_POSIX_EXTENDED; +#if BFS_WITH_ONIGURUMA + } else if (strcmp(type, "emacs") == 0) { + state->regex_type = BFS_REGEX_EMACS; + } else if (strcmp(type, "grep") == 0) { + state->regex_type = BFS_REGEX_GREP; +#endif } else if (strcmp(type, "help") == 0) { state->just_info = true; cfile = ctx->cout; @@ -2346,6 +2352,10 @@ list_types: cfprintf(cfile, " ${bld}posix-basic${rs}: POSIX basic regular expressions (BRE)\n"); cfprintf(cfile, " ${bld}posix-extended${rs}: POSIX extended regular expressions (ERE)\n"); cfprintf(cfile, " ${bld}ed${rs}: Like ${grn}ed${rs} (same as ${bld}posix-basic${rs})\n"); +#if BFS_WITH_ONIGURUMA + cfprintf(cfile, " ${bld}emacs${rs}: Like ${grn}emacs${rs}\n"); + cfprintf(cfile, " ${bld}grep${rs}: Like ${grn}grep${rs}\n"); +#endif cfprintf(cfile, " ${bld}sed${rs}: Like ${grn}sed${rs} (same as ${bld}posix-basic${rs})\n"); return NULL; } @@ -3754,7 +3764,7 @@ struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) { .ctx = ctx, .argv = ctx->argv + 1, .command = ctx->argv[0], - .regex_flags = 0, + .regex_type = BFS_REGEX_POSIX_BASIC, .stdout_tty = stdout_tty, .interactive = stdin_tty && stderr_tty, .stdin_consumed = false, diff --git a/regex.c b/regex.c new file mode 100644 index 0000000..f8bd833 --- /dev/null +++ b/regex.c @@ -0,0 +1,62 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2022 Tavian Barnes * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "regex.h" +#include + +int bfs_regcomp(regex_t *preg, const char *regex, int cflags, enum bfs_regex_type type) { +#if BFS_WITH_ONIGURUMA + // Oniguruma's POSIX wrapper uses the selected default syntax when REG_EXTENDED is set + cflags |= REG_EXTENDED; + + switch (type) { + case BFS_REGEX_POSIX_BASIC: + onig_set_default_syntax(ONIG_SYNTAX_POSIX_BASIC); + break; + case BFS_REGEX_POSIX_EXTENDED: + onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED); + break; + case BFS_REGEX_EMACS: + onig_set_default_syntax(ONIG_SYNTAX_EMACS); + break; + case BFS_REGEX_GREP: + onig_set_default_syntax(ONIG_SYNTAX_GREP); + break; + } +#else + switch (type) { + case BFS_REGEX_POSIX_BASIC: + cflags &= ~REG_EXTENDED; + break; + case BFS_REGEX_POSIX_EXTENDED: + cflags |= REG_EXTENDED; + break; + default: + return REG_BADPAT; + } +#endif + + return regcomp(preg, regex, cflags); +} + +char *bfs_regerror(int err, const regex_t *regex) { + size_t len = regerror(err, regex, NULL, 0); + char *str = malloc(len); + if (str) { + regerror(err, regex, str, len); + } + return str; +} diff --git a/regex.h b/regex.h index 1a95239..b466ba8 100644 --- a/regex.h +++ b/regex.h @@ -24,4 +24,42 @@ # include #endif +/** + * Regex syntax flavors. + */ +enum bfs_regex_type { + BFS_REGEX_POSIX_BASIC, + BFS_REGEX_POSIX_EXTENDED, + BFS_REGEX_EMACS, + BFS_REGEX_GREP, +}; + +/** + * Wrapper for regcomp() that supports additional regex types. + * + * @param preg + * The compiled regex. + * @param regex + * The regular expression to compile. + * @param cflags + * Regex compilation flags. + * @param type + * The regular expression syntax to use. + * @return + * 0 on success, or an error code on failure. + */ +int bfs_regcomp(regex_t *preg, const char *regex, int cflags, enum bfs_regex_type type); + +/** + * Dynamically allocate a regex error message. + * + * @param err + * The error code to stringify. + * @param regex + * The compiled regex, or NULL if compilation failed. + * @return + * A human-readable description of the error, allocated with malloc(). + */ +char *bfs_regerror(int err, const regex_t *regex); + #endif // BFS_REGEX_H diff --git a/tests.sh b/tests.sh index 940187e..c83b6c6 100755 --- a/tests.sh +++ b/tests.sh @@ -624,6 +624,8 @@ gnu_tests=( test_regextype_posix_basic test_regextype_posix_extended test_regextype_ed + test_regextype_emacs + test_regextype_grep test_regextype_sed test_samefile @@ -2107,6 +2109,22 @@ function test_regextype_ed() { bfs_diff -regextype ed -regex '\./\((\)' } +function test_regextype_emacs() { + if fail quiet invoke_bfs -regextype emacs -quit; then + return 0 + fi + + bfs_diff basic -regextype emacs -regex '.*/\(f+o?o?\|bar\)' +} + +function test_regextype_grep() { + if fail quiet invoke_bfs -regextype grep -quit; then + return 0 + fi + + bfs_diff basic -regextype grep -regex '.*/f\+o\?o\?' +} + function test_regextype_sed() { cd weirdnames bfs_diff -regextype sed -regex '\./\((\)' diff --git a/tests/test_regextype_emacs.out b/tests/test_regextype_emacs.out new file mode 100644 index 0000000..8cd18de --- /dev/null +++ b/tests/test_regextype_emacs.out @@ -0,0 +1,6 @@ +basic/e/f +basic/j/foo +basic/k/foo +basic/l/foo +basic/k/foo/bar +basic/l/foo/bar diff --git a/tests/test_regextype_grep.out b/tests/test_regextype_grep.out new file mode 100644 index 0000000..a9e5d42 --- /dev/null +++ b/tests/test_regextype_grep.out @@ -0,0 +1,4 @@ +basic/e/f +basic/j/foo +basic/k/foo +basic/l/foo diff --git a/util.c b/util.c index e737c09..9f74e5a 100644 --- a/util.c +++ b/util.c @@ -115,15 +115,6 @@ int pipe_cloexec(int pipefd[2]) { #endif } -char *xregerror(int err, const regex_t *regex) { - size_t len = regerror(err, regex, NULL, 0); - char *str = malloc(len); - if (str) { - regerror(err, regex, str, len); - } - return str; -} - /** Get the single character describing the given file type. */ static char type_char(mode_t mode) { switch (mode & S_IFMT) { @@ -273,7 +264,7 @@ static int xrpregex(nl_item item, const char *response) { } regex_t regex; - int ret = regcomp(®ex, pattern, REG_EXTENDED); + int ret = bfs_regcomp(®ex, pattern, 0, BFS_REGEX_POSIX_EXTENDED); if (ret != 0) { return ret; } diff --git a/util.h b/util.h index c0551a2..0a1bf69 100644 --- a/util.h +++ b/util.h @@ -169,17 +169,6 @@ int dup_cloexec(int fd); */ int pipe_cloexec(int pipefd[2]); -/** - * Dynamically allocate a regex error message. - * - * @param err - * The error code to stringify. - * @param regex - * The (partially) compiled regex. - * @return A human-readable description of the error, allocated with malloc(). - */ -char *xregerror(int err, const regex_t *regex); - /** * Format a mode like ls -l (e.g. -rw-r--r--). * -- cgit v1.2.3