summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2024-05-27 16:56:43 -0400
committerTavian Barnes <tavianator@tavianator.com>2024-05-28 12:54:50 -0400
commitfa24499735018e2cb81a6a76aa8b5e72695fb8ad (patch)
treef71f584533af2e2c5a3065b5fba058bc8d1ebc91
parent3043922abacf5d9e7bdfc7cfc48d9d1652927c29 (diff)
downloadbfs-fa24499735018e2cb81a6a76aa8b5e72695fb8ad.tar.xz
Implement the remaining regex types
Closes: https://github.com/tavianator/bfs/issues/21
-rw-r--r--src/parse.c32
-rw-r--r--src/xregex.c41
-rw-r--r--src/xregex.h4
-rw-r--r--tests/gnu/regextype_awk.out2
-rw-r--r--tests/gnu/regextype_awk.sh3
-rw-r--r--tests/gnu/regextype_egrep.out0
-rw-r--r--tests/gnu/regextype_egrep.sh3
-rw-r--r--tests/gnu/regextype_findutils_default.out3
-rw-r--r--tests/gnu/regextype_findutils_default.sh3
-rw-r--r--tests/gnu/regextype_gnu_awk.out2
-rw-r--r--tests/gnu/regextype_gnu_awk.sh3
-rw-r--r--tests/gnu/regextype_posix_awk.out2
-rw-r--r--tests/gnu/regextype_posix_awk.sh3
-rw-r--r--tests/gnu/regextype_posix_minimal_basic.out1
-rw-r--r--tests/gnu/regextype_posix_minimal_basic.sh2
15 files changed, 98 insertions, 6 deletions
diff --git a/src/parse.c b/src/parse.c
index 539aa05..58a0209 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -2253,16 +2253,27 @@ static struct bfs_expr *parse_regextype(struct bfs_parser *parser, int arg1, int
// See https://www.gnu.org/software/gnulib/manual/html_node/Predefined-Syntaxes.html
const char *type = expr->argv[1];
if (strcmp(type, "posix-basic") == 0
+ || strcmp(type, "posix-minimal-basic") == 0
|| strcmp(type, "ed") == 0
|| strcmp(type, "sed") == 0) {
parser->regex_type = BFS_REGEX_POSIX_BASIC;
} else if (strcmp(type, "posix-extended") == 0) {
parser->regex_type = BFS_REGEX_POSIX_EXTENDED;
#if BFS_WITH_ONIGURUMA
+ } else if (strcmp(type, "awk") == 0
+ || strcmp(type, "posix-awk") == 0) {
+ parser->regex_type = BFS_REGEX_AWK;
+ } else if (strcmp(type, "gnu-awk") == 0) {
+ parser->regex_type = BFS_REGEX_GNU_AWK;
} else if (strcmp(type, "emacs") == 0) {
parser->regex_type = BFS_REGEX_EMACS;
} else if (strcmp(type, "grep") == 0) {
parser->regex_type = BFS_REGEX_GREP;
+ } else if (strcmp(type, "egrep") == 0
+ || strcmp(type, "posix-egrep") == 0) {
+ parser->regex_type = BFS_REGEX_EGREP;
+ } else if (strcmp(type, "findutils-default") == 0) {
+ parser->regex_type = BFS_REGEX_GNU_FIND;
#endif
} else if (strcmp(type, "help") == 0) {
parser->just_info = true;
@@ -2277,14 +2288,23 @@ static struct bfs_expr *parse_regextype(struct bfs_parser *parser, int arg1, int
list_types:
cfprintf(cfile, "Supported types are:\n\n");
- cfprintf(cfile, " ${bld}posix-basic${rs}: POSIX basic regular expressions (BRE)\n");
- cfprintf(cfile, " ${bld}posix-extended${rs}: POSIX extended regular expressions (ERE)\n");
- cfprintf(cfile, " ${bld}ed${rs}: Like ${grn}ed${rs} (same as ${bld}posix-basic${rs})\n");
+ cfprintf(cfile, " ${bld}posix-basic${rs}: POSIX basic regular expressions (BRE)\n");
+ cfprintf(cfile, " ${bld}ed${rs}: Like ${grn}ed${rs} (same as ${bld}posix-basic${rs})\n");
+ cfprintf(cfile, " ${bld}sed${rs}: Like ${grn}sed${rs} (same as ${bld}posix-basic${rs})\n\n");
+
+ cfprintf(cfile, " ${bld}posix-extended${rs}: POSIX extended regular expressions (ERE)\n\n");
+
#if BFS_WITH_ONIGURUMA
- cfprintf(cfile, " ${bld}emacs${rs}: Like ${grn}emacs${rs}\n");
- cfprintf(cfile, " ${bld}grep${rs}: Like ${grn}grep${rs}\n");
+ cfprintf(cfile, " [${bld}posix-${rs}]${bld}awk${rs}: Like ${grn}awk${rs}\n");
+ cfprintf(cfile, " ${bld}gnu-awk${rs}: Like GNU ${grn}awk${rs}\n\n");
+
+ cfprintf(cfile, " ${bld}emacs${rs}: Like ${grn}emacs${rs}\n\n");
+
+ cfprintf(cfile, " ${bld}grep${rs}: Like ${grn}grep${rs}\n");
+ cfprintf(cfile, " [${bld}posix-${rs}]${bld}egrep${rs}: Like ${grn}grep${rs} ${cyn}-E${rs}\n\n");
+
+ cfprintf(cfile, " ${bld}findutils-default${rs}: Like GNU ${grn}find${rs}\n");
#endif
- cfprintf(cfile, " ${bld}sed${rs}: Like ${grn}sed${rs} (same as ${bld}posix-basic${rs})\n");
return NULL;
}
diff --git a/src/xregex.c b/src/xregex.c
index 414c366..2d089b2 100644
--- a/src/xregex.c
+++ b/src/xregex.c
@@ -37,7 +37,11 @@ struct bfs_regex {
static int bfs_onig_status;
static OnigEncoding bfs_onig_enc;
+static OnigSyntaxType bfs_onig_syntax_awk;
+static OnigSyntaxType bfs_onig_syntax_gnu_awk;
static OnigSyntaxType bfs_onig_syntax_emacs;
+static OnigSyntaxType bfs_onig_syntax_egrep;
+static OnigSyntaxType bfs_onig_syntax_gnu_find;
/** pthread_once() callback. */
static void bfs_onig_once(void) {
@@ -106,9 +110,34 @@ static void bfs_onig_once(void) {
bfs_onig_enc = NULL;
}
+ // Compute the GNU extensions
+ OnigSyntaxType *ere = ONIG_SYNTAX_POSIX_EXTENDED;
+ OnigSyntaxType *gnu = ONIG_SYNTAX_GNU_REGEX;
+ unsigned int gnu_op = gnu->op & ~ere->op;
+ unsigned int gnu_op2 = gnu->op2 & ~ere->op2;
+ unsigned int gnu_behavior = gnu->behavior & ~ere->behavior;
+
+ onig_copy_syntax(&bfs_onig_syntax_awk, ONIG_SYNTAX_POSIX_EXTENDED);
+ bfs_onig_syntax_awk.behavior |= ONIG_SYN_ALLOW_INVALID_INTERVAL;
+ bfs_onig_syntax_awk.behavior |= ONIG_SYN_BACKSLASH_ESCAPE_IN_CC;
+
+ onig_copy_syntax(&bfs_onig_syntax_gnu_awk, &bfs_onig_syntax_awk);
+ bfs_onig_syntax_gnu_awk.op |= gnu_op;
+ bfs_onig_syntax_gnu_awk.op2 |= gnu_op2;
+ bfs_onig_syntax_gnu_awk.behavior |= gnu_behavior;
+ bfs_onig_syntax_gnu_awk.behavior &= ~ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS;
+ bfs_onig_syntax_gnu_awk.behavior &= ~ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS;
+
// https://github.com/kkos/oniguruma/issues/296
onig_copy_syntax(&bfs_onig_syntax_emacs, ONIG_SYNTAX_EMACS);
bfs_onig_syntax_emacs.op2 |= ONIG_SYN_OP2_QMARK_GROUP_EFFECT;
+
+ onig_copy_syntax(&bfs_onig_syntax_egrep, ONIG_SYNTAX_POSIX_EXTENDED);
+ bfs_onig_syntax_egrep.behavior |= ONIG_SYN_ALLOW_INVALID_INTERVAL;
+ bfs_onig_syntax_egrep.behavior &= ~ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS;
+
+ onig_copy_syntax(&bfs_onig_syntax_gnu_find, &bfs_onig_syntax_emacs);
+ bfs_onig_syntax_gnu_find.options |= ONIG_OPTION_MULTILINE;
}
/** Initialize Oniguruma. */
@@ -149,12 +178,24 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ
case BFS_REGEX_POSIX_EXTENDED:
syntax = ONIG_SYNTAX_POSIX_EXTENDED;
break;
+ case BFS_REGEX_AWK:
+ syntax = &bfs_onig_syntax_awk;
+ break;
+ case BFS_REGEX_GNU_AWK:
+ syntax = &bfs_onig_syntax_gnu_awk;
+ break;
case BFS_REGEX_EMACS:
syntax = &bfs_onig_syntax_emacs;
break;
case BFS_REGEX_GREP:
syntax = ONIG_SYNTAX_GREP;
break;
+ case BFS_REGEX_EGREP:
+ syntax = &bfs_onig_syntax_egrep;
+ break;
+ case BFS_REGEX_GNU_FIND:
+ syntax = &bfs_onig_syntax_gnu_find;
+ break;
}
bfs_assert(syntax, "Invalid regex type");
diff --git a/src/xregex.h b/src/xregex.h
index 998a2b0..750db24 100644
--- a/src/xregex.h
+++ b/src/xregex.h
@@ -15,8 +15,12 @@ struct bfs_regex;
enum bfs_regex_type {
BFS_REGEX_POSIX_BASIC,
BFS_REGEX_POSIX_EXTENDED,
+ BFS_REGEX_AWK,
+ BFS_REGEX_GNU_AWK,
BFS_REGEX_EMACS,
BFS_REGEX_GREP,
+ BFS_REGEX_EGREP,
+ BFS_REGEX_GNU_FIND,
};
/**
diff --git a/tests/gnu/regextype_awk.out b/tests/gnu/regextype_awk.out
new file mode 100644
index 0000000..0f32fc4
--- /dev/null
+++ b/tests/gnu/regextype_awk.out
@@ -0,0 +1,2 @@
+weirdnames/*/m
+weirdnames/[/k
diff --git a/tests/gnu/regextype_awk.sh b/tests/gnu/regextype_awk.sh
new file mode 100644
index 0000000..3718473
--- /dev/null
+++ b/tests/gnu/regextype_awk.sh
@@ -0,0 +1,3 @@
+invoke_bfs -regextype awk -quit || skip
+
+bfs_diff weirdnames -regextype awk -regex '.*/[\[\*]/.*'
diff --git a/tests/gnu/regextype_egrep.out b/tests/gnu/regextype_egrep.out
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/gnu/regextype_egrep.out
diff --git a/tests/gnu/regextype_egrep.sh b/tests/gnu/regextype_egrep.sh
new file mode 100644
index 0000000..281d9c0
--- /dev/null
+++ b/tests/gnu/regextype_egrep.sh
@@ -0,0 +1,3 @@
+invoke_bfs -regextype egrep -quit || skip
+
+bfs_diff weirdnames -regextype egrep -regex '*.*/{l'
diff --git a/tests/gnu/regextype_findutils_default.out b/tests/gnu/regextype_findutils_default.out
new file mode 100644
index 0000000..709a7ba
--- /dev/null
+++ b/tests/gnu/regextype_findutils_default.out
@@ -0,0 +1,3 @@
+/n
+weirdnames/
+weirdnames/*/m
diff --git a/tests/gnu/regextype_findutils_default.sh b/tests/gnu/regextype_findutils_default.sh
new file mode 100644
index 0000000..c870312
--- /dev/null
+++ b/tests/gnu/regextype_findutils_default.sh
@@ -0,0 +1,3 @@
+invoke_bfs -regextype findutils-default -quit || skip
+
+bfs_diff weirdnames -regextype findutils-default -regex '.*/./\(m\|n\)'
diff --git a/tests/gnu/regextype_gnu_awk.out b/tests/gnu/regextype_gnu_awk.out
new file mode 100644
index 0000000..0f32fc4
--- /dev/null
+++ b/tests/gnu/regextype_gnu_awk.out
@@ -0,0 +1,2 @@
+weirdnames/*/m
+weirdnames/[/k
diff --git a/tests/gnu/regextype_gnu_awk.sh b/tests/gnu/regextype_gnu_awk.sh
new file mode 100644
index 0000000..6b66496
--- /dev/null
+++ b/tests/gnu/regextype_gnu_awk.sh
@@ -0,0 +1,3 @@
+invoke_bfs -regextype gnu-awk -quit || skip
+
+bfs_diff weirdnames -regextype gnu-awk -regex '.*/[\[\*]/(\<.\>)'
diff --git a/tests/gnu/regextype_posix_awk.out b/tests/gnu/regextype_posix_awk.out
new file mode 100644
index 0000000..0f32fc4
--- /dev/null
+++ b/tests/gnu/regextype_posix_awk.out
@@ -0,0 +1,2 @@
+weirdnames/*/m
+weirdnames/[/k
diff --git a/tests/gnu/regextype_posix_awk.sh b/tests/gnu/regextype_posix_awk.sh
new file mode 100644
index 0000000..86377d7
--- /dev/null
+++ b/tests/gnu/regextype_posix_awk.sh
@@ -0,0 +1,3 @@
+invoke_bfs -regextype posix-awk -quit || skip
+
+bfs_diff weirdnames -regextype posix-awk -regex '.*/[\[\*]/.*'
diff --git a/tests/gnu/regextype_posix_minimal_basic.out b/tests/gnu/regextype_posix_minimal_basic.out
new file mode 100644
index 0000000..0f0971e
--- /dev/null
+++ b/tests/gnu/regextype_posix_minimal_basic.out
@@ -0,0 +1 @@
+./(
diff --git a/tests/gnu/regextype_posix_minimal_basic.sh b/tests/gnu/regextype_posix_minimal_basic.sh
new file mode 100644
index 0000000..ee324f3
--- /dev/null
+++ b/tests/gnu/regextype_posix_minimal_basic.sh
@@ -0,0 +1,2 @@
+cd weirdnames
+bfs_diff -regextype posix-minimal-basic -regex '\./\((\)'