summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2022-02-04 11:23:27 -0500
committerTavian Barnes <tavianator@tavianator.com>2022-02-04 11:28:22 -0500
commit2d4d6787c4bc62042be4a58a4791074ab2c6a89e (patch)
tree0753c783533afc2bc3362f73ba3e8c2e45f1e65b
parentd83ffb41c7f5d42bfb43a19e38ab3d1331f0d86e (diff)
downloadbfs-2d4d6787c4bc62042be4a58a4791074ab2c6a89e.tar.xz
regex: Add support for emacs and grep types
-rw-r--r--Makefile1
-rw-r--r--eval.c4
-rw-r--r--main.c1
-rw-r--r--parse.c26
-rw-r--r--regex.c62
-rw-r--r--regex.h38
-rwxr-xr-xtests.sh18
-rw-r--r--tests/test_regextype_emacs.out6
-rw-r--r--tests/test_regextype_grep.out4
-rw-r--r--util.c11
-rw-r--r--util.h11
11 files changed, 151 insertions, 31 deletions
diff --git a/Makefile b/Makefile
index 497bc70..79a74ee 100644
--- a/Makefile
+++ b/Makefile
@@ -190,6 +190,7 @@ bfs: \
parse.o \
printf.o \
pwcache.o \
+ regex.o \
spawn.o \
stat.o \
time.o \
diff --git a/eval.c b/eval.c
index 5babf27..f7de940 100644
--- a/eval.c
+++ b/eval.c
@@ -841,12 +841,12 @@ bool eval_regex(const struct expr *expr, struct eval_state *state) {
if (err == 0) {
return match.rm_so == 0 && (size_t)match.rm_eo == len;
} else if (err != REG_NOMATCH) {
- char *str = xregerror(err, expr->regex);
+ char *str = bfs_regerror(err, expr->regex);
if (str) {
eval_error(state, "%s.\n", str);
free(str);
} else {
- eval_error(state, "xregerror(): %m.\n");
+ eval_error(state, "bfs_regerror(): %m.\n");
}
*state->ret = EXIT_FAILURE;
diff --git a/main.c b/main.c
index ade5358..2ad24a4 100644
--- a/main.c
+++ b/main.c
@@ -43,6 +43,7 @@
* - fsade.[ch] (a facade over non-standard filesystem features)
* - mtab.[ch] (parses the system's mount table)
* - pwcache.[ch] (a cache for the user/group tables)
+ * - regex.[ch] (regular expression support)
* - spawn.[ch] (spawns processes)
* - stat.[ch] (wraps stat(), or statx() on Linux)
* - time.[ch] (date/time handling utilities)
diff --git a/parse.c b/parse.c
index ce2ea21..38f63ce 100644
--- a/parse.c
+++ b/parse.c
@@ -231,7 +231,7 @@ struct parser_state {
const char *command;
/** The current regex flags to use. */
- int regex_flags;
+ enum bfs_regex_type regex_type;
/** Whether stdout is a terminal. */
bool stdout_tty;
@@ -2279,14 +2279,14 @@ static struct expr *parse_regex(struct parser_state *state, int flags, int arg2)
goto fail;
}
- int err = regcomp(expr->regex, expr->sdata, state->regex_flags | flags);
+ int err = bfs_regcomp(expr->regex, expr->sdata, flags, state->regex_type);
if (err != 0) {
- char *str = xregerror(err, NULL);
+ char *str = bfs_regerror(err, NULL);
if (str) {
parse_error(state, "${blu}%s${rs} ${bld}%s${rs}: %s.\n", expr->argv[0], expr->argv[1], str);
free(str);
} else {
- parse_perror(state, "xregerror()");
+ parse_perror(state, "bfs_regerror()");
}
goto fail_regex;
}
@@ -2305,7 +2305,7 @@ fail:
* Parse -E.
*/
static struct expr *parse_regex_extended(struct parser_state *state, int arg1, int arg2) {
- state->regex_flags = REG_EXTENDED;
+ state->regex_type = BFS_REGEX_POSIX_EXTENDED;
return parse_nullary_flag(state);
}
@@ -2327,9 +2327,15 @@ static struct expr *parse_regextype(struct parser_state *state, int arg1, int ar
if (strcmp(type, "posix-basic") == 0
|| strcmp(type, "ed") == 0
|| strcmp(type, "sed") == 0) {
- state->regex_flags = 0;
+ state->regex_type = BFS_REGEX_POSIX_BASIC;
} else if (strcmp(type, "posix-extended") == 0) {
- state->regex_flags = REG_EXTENDED;
+ state->regex_type = BFS_REGEX_POSIX_EXTENDED;
+#if BFS_WITH_ONIGURUMA
+ } else if (strcmp(type, "emacs") == 0) {
+ state->regex_type = BFS_REGEX_EMACS;
+ } else if (strcmp(type, "grep") == 0) {
+ state->regex_type = BFS_REGEX_GREP;
+#endif
} else if (strcmp(type, "help") == 0) {
state->just_info = true;
cfile = ctx->cout;
@@ -2346,6 +2352,10 @@ list_types:
cfprintf(cfile, " ${bld}posix-basic${rs}: POSIX basic regular expressions (BRE)\n");
cfprintf(cfile, " ${bld}posix-extended${rs}: POSIX extended regular expressions (ERE)\n");
cfprintf(cfile, " ${bld}ed${rs}: Like ${grn}ed${rs} (same as ${bld}posix-basic${rs})\n");
+#if BFS_WITH_ONIGURUMA
+ cfprintf(cfile, " ${bld}emacs${rs}: Like ${grn}emacs${rs}\n");
+ cfprintf(cfile, " ${bld}grep${rs}: Like ${grn}grep${rs}\n");
+#endif
cfprintf(cfile, " ${bld}sed${rs}: Like ${grn}sed${rs} (same as ${bld}posix-basic${rs})\n");
return NULL;
}
@@ -3754,7 +3764,7 @@ struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) {
.ctx = ctx,
.argv = ctx->argv + 1,
.command = ctx->argv[0],
- .regex_flags = 0,
+ .regex_type = BFS_REGEX_POSIX_BASIC,
.stdout_tty = stdout_tty,
.interactive = stdin_tty && stderr_tty,
.stdin_consumed = false,
diff --git a/regex.c b/regex.c
new file mode 100644
index 0000000..f8bd833
--- /dev/null
+++ b/regex.c
@@ -0,0 +1,62 @@
+/****************************************************************************
+ * bfs *
+ * Copyright (C) 2022 Tavian Barnes <tavianator@tavianator.com> *
+ * *
+ * Permission to use, copy, modify, and/or distribute this software for any *
+ * purpose with or without fee is hereby granted. *
+ * *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES *
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF *
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR *
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES *
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN *
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF *
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *
+ ****************************************************************************/
+
+#include "regex.h"
+#include <stdlib.h>
+
+int bfs_regcomp(regex_t *preg, const char *regex, int cflags, enum bfs_regex_type type) {
+#if BFS_WITH_ONIGURUMA
+ // Oniguruma's POSIX wrapper uses the selected default syntax when REG_EXTENDED is set
+ cflags |= REG_EXTENDED;
+
+ switch (type) {
+ case BFS_REGEX_POSIX_BASIC:
+ onig_set_default_syntax(ONIG_SYNTAX_POSIX_BASIC);
+ break;
+ case BFS_REGEX_POSIX_EXTENDED:
+ onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED);
+ break;
+ case BFS_REGEX_EMACS:
+ onig_set_default_syntax(ONIG_SYNTAX_EMACS);
+ break;
+ case BFS_REGEX_GREP:
+ onig_set_default_syntax(ONIG_SYNTAX_GREP);
+ break;
+ }
+#else
+ switch (type) {
+ case BFS_REGEX_POSIX_BASIC:
+ cflags &= ~REG_EXTENDED;
+ break;
+ case BFS_REGEX_POSIX_EXTENDED:
+ cflags |= REG_EXTENDED;
+ break;
+ default:
+ return REG_BADPAT;
+ }
+#endif
+
+ return regcomp(preg, regex, cflags);
+}
+
+char *bfs_regerror(int err, const regex_t *regex) {
+ size_t len = regerror(err, regex, NULL, 0);
+ char *str = malloc(len);
+ if (str) {
+ regerror(err, regex, str, len);
+ }
+ return str;
+}
diff --git a/regex.h b/regex.h
index 1a95239..b466ba8 100644
--- a/regex.h
+++ b/regex.h
@@ -24,4 +24,42 @@
# include <regex.h>
#endif
+/**
+ * Regex syntax flavors.
+ */
+enum bfs_regex_type {
+ BFS_REGEX_POSIX_BASIC,
+ BFS_REGEX_POSIX_EXTENDED,
+ BFS_REGEX_EMACS,
+ BFS_REGEX_GREP,
+};
+
+/**
+ * Wrapper for regcomp() that supports additional regex types.
+ *
+ * @param preg
+ * The compiled regex.
+ * @param regex
+ * The regular expression to compile.
+ * @param cflags
+ * Regex compilation flags.
+ * @param type
+ * The regular expression syntax to use.
+ * @return
+ * 0 on success, or an error code on failure.
+ */
+int bfs_regcomp(regex_t *preg, const char *regex, int cflags, enum bfs_regex_type type);
+
+/**
+ * Dynamically allocate a regex error message.
+ *
+ * @param err
+ * The error code to stringify.
+ * @param regex
+ * The compiled regex, or NULL if compilation failed.
+ * @return
+ * A human-readable description of the error, allocated with malloc().
+ */
+char *bfs_regerror(int err, const regex_t *regex);
+
#endif // BFS_REGEX_H
diff --git a/tests.sh b/tests.sh
index 940187e..c83b6c6 100755
--- a/tests.sh
+++ b/tests.sh
@@ -624,6 +624,8 @@ gnu_tests=(
test_regextype_posix_basic
test_regextype_posix_extended
test_regextype_ed
+ test_regextype_emacs
+ test_regextype_grep
test_regextype_sed
test_samefile
@@ -2107,6 +2109,22 @@ function test_regextype_ed() {
bfs_diff -regextype ed -regex '\./\((\)'
}
+function test_regextype_emacs() {
+ if fail quiet invoke_bfs -regextype emacs -quit; then
+ return 0
+ fi
+
+ bfs_diff basic -regextype emacs -regex '.*/\(f+o?o?\|bar\)'
+}
+
+function test_regextype_grep() {
+ if fail quiet invoke_bfs -regextype grep -quit; then
+ return 0
+ fi
+
+ bfs_diff basic -regextype grep -regex '.*/f\+o\?o\?'
+}
+
function test_regextype_sed() {
cd weirdnames
bfs_diff -regextype sed -regex '\./\((\)'
diff --git a/tests/test_regextype_emacs.out b/tests/test_regextype_emacs.out
new file mode 100644
index 0000000..8cd18de
--- /dev/null
+++ b/tests/test_regextype_emacs.out
@@ -0,0 +1,6 @@
+basic/e/f
+basic/j/foo
+basic/k/foo
+basic/l/foo
+basic/k/foo/bar
+basic/l/foo/bar
diff --git a/tests/test_regextype_grep.out b/tests/test_regextype_grep.out
new file mode 100644
index 0000000..a9e5d42
--- /dev/null
+++ b/tests/test_regextype_grep.out
@@ -0,0 +1,4 @@
+basic/e/f
+basic/j/foo
+basic/k/foo
+basic/l/foo
diff --git a/util.c b/util.c
index e737c09..9f74e5a 100644
--- a/util.c
+++ b/util.c
@@ -115,15 +115,6 @@ int pipe_cloexec(int pipefd[2]) {
#endif
}
-char *xregerror(int err, const regex_t *regex) {
- size_t len = regerror(err, regex, NULL, 0);
- char *str = malloc(len);
- if (str) {
- regerror(err, regex, str, len);
- }
- return str;
-}
-
/** Get the single character describing the given file type. */
static char type_char(mode_t mode) {
switch (mode & S_IFMT) {
@@ -273,7 +264,7 @@ static int xrpregex(nl_item item, const char *response) {
}
regex_t regex;
- int ret = regcomp(&regex, pattern, REG_EXTENDED);
+ int ret = bfs_regcomp(&regex, pattern, 0, BFS_REGEX_POSIX_EXTENDED);
if (ret != 0) {
return ret;
}
diff --git a/util.h b/util.h
index c0551a2..0a1bf69 100644
--- a/util.h
+++ b/util.h
@@ -170,17 +170,6 @@ int dup_cloexec(int fd);
int pipe_cloexec(int pipefd[2]);
/**
- * Dynamically allocate a regex error message.
- *
- * @param err
- * The error code to stringify.
- * @param regex
- * The (partially) compiled regex.
- * @return A human-readable description of the error, allocated with malloc().
- */
-char *xregerror(int err, const regex_t *regex);
-
-/**
* Format a mode like ls -l (e.g. -rw-r--r--).
*
* @param mode