summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2024-03-21 11:55:38 -0400
committerTavian Barnes <tavianator@tavianator.com>2024-03-21 11:55:38 -0400
commita2a6ac8edd5d85398f6edb6afd02e683d9da9e7b (patch)
tree96402084c7a9184cb483aaee0fe8224a7f8871f1
parentef0aef16961508075249a6206dc0e9a9ac27d81c (diff)
downloadbfs-a2a6ac8edd5d85398f6edb6afd02e683d9da9e7b.tar.xz
bfstd: New asciilen() function
-rw-r--r--src/bfstd.c68
-rw-r--r--src/bfstd.h15
-rw-r--r--tests/bfstd.c15
3 files changed, 73 insertions, 25 deletions
diff --git a/src/bfstd.c b/src/bfstd.c
index 5b64452..43513b8 100644
--- a/src/bfstd.c
+++ b/src/bfstd.c
@@ -2,6 +2,7 @@
// SPDX-License-Identifier: 0BSD
#include "bfstd.h"
+#include "bit.h"
#include "config.h"
#include "diag.h"
#include "sanity.h"
@@ -670,6 +671,44 @@ int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *
#endif
}
+size_t asciilen(const char *str) {
+ return asciinlen(str, strlen(str));
+}
+
+size_t asciinlen(const char *str, size_t n) {
+ size_t i = 0;
+
+#if SIZE_WIDTH % 8 == 0
+ // Word-at-a-time isascii()
+ for (size_t word; i + sizeof(word) <= n; i += sizeof(word)) {
+ memcpy(&word, str + i, sizeof(word));
+
+ const size_t mask = (SIZE_MAX / 0xFF) << 7; // 0x808080...
+ word &= mask;
+ if (!word) {
+ continue;
+ }
+
+#if ENDIAN_NATIVE == ENDIAN_BIG
+ word = bswap(word);
+#elif ENDIAN_NATIVE != ENDIAN_LITTLE
+ break;
+#endif
+
+ size_t first = trailing_zeros(word) / 8;
+ return i + first;
+ }
+#endif
+
+ for (; i < n; ++i) {
+ if (!xisascii(str[i])) {
+ break;
+ }
+ }
+
+ return i;
+}
+
wint_t xmbrtowc(const char *str, size_t *i, size_t len, mbstate_t *mb) {
wchar_t wc;
size_t mblen = mbrtowc(&wc, str + *i, len - *i, mb);
@@ -765,36 +804,15 @@ static size_t printable_len(const char *str, size_t len, enum wesc_flags flags)
invoke_once(&once, char_cache_init);
// Fast path: avoid multibyte checks
- size_t i, word;
- for (i = 0; i + sizeof(word) <= len;) {
- // Word-at-a-time isascii()
- memcpy(&word, str + i, sizeof(word));
- // 0xFFFF... / 0xFF == 0x10101...
- size_t mask = (SIZE_MAX / 0xFF) << 7;
- if (word & mask) {
- goto multibyte;
- }
-
- for (size_t j = 0; j < sizeof(word); ++i, ++j) {
- if (!wesc_isprint(str[i], flags)) {
- return i;
- }
- }
- }
-
- for (; i < len; ++i) {
- char c = str[i];
- if (!xisascii(c)) {
- goto multibyte;
- }
- if (!wesc_isprint(c, flags)) {
+ size_t asclen = asciinlen(str, len);
+ size_t i;
+ for (i = 0; i < asclen; ++i) {
+ if (!wesc_isprint(str[i], flags)) {
return i;
}
}
-multibyte:;
mbstate_t mb = {0};
-
for (size_t j = i; i < len; i = j) {
wint_t wc = xmbrtowc(str, &j, len, &mb);
if (wc == WEOF) {
diff --git a/src/bfstd.h b/src/bfstd.h
index d160c88..fc22971 100644
--- a/src/bfstd.h
+++ b/src/bfstd.h
@@ -178,6 +178,21 @@ int ynprompt(void);
// #include <string.h>
/**
+ * Get the length of the pure-ASCII prefix of a string.
+ */
+size_t asciilen(const char *str);
+
+/**
+ * Get the length of the pure-ASCII prefix of a string.
+ *
+ * @param str
+ * The string to check.
+ * @param n
+ * The maximum prefix length.
+ */
+size_t asciinlen(const char *str, size_t n);
+
+/**
* Allocate a copy of a region of memory.
*
* @param src
diff --git a/tests/bfstd.c b/tests/bfstd.c
index 1351e11..3ffab41 100644
--- a/tests/bfstd.c
+++ b/tests/bfstd.c
@@ -40,6 +40,21 @@ static bool check_wordesc(const char *str, const char *exp, enum wesc_flags flag
bool check_bfstd(void) {
bool ret = true;
+ ret &= bfs_check(asciilen("") == 0);
+ ret &= bfs_check(asciilen("@") == 1);
+ ret &= bfs_check(asciilen("@@") == 2);
+ ret &= bfs_check(asciilen("\xFF@") == 0);
+ ret &= bfs_check(asciilen("@\xFF") == 1);
+ ret &= bfs_check(asciilen("@@@@@@@@") == 8);
+ ret &= bfs_check(asciilen("@@@@@@@@@@@@@@@@") == 16);
+ ret &= bfs_check(asciilen("@@@@@@@@@@@@@@@@@@@@@@@@") == 24);
+ ret &= bfs_check(asciilen("@@@@@@@@@@@@@@a\xFF@@@@@@@") == 15);
+ ret &= bfs_check(asciilen("@@@@@@@@@@@@@@@@\xFF@@@@@@@") == 16);
+ ret &= bfs_check(asciilen("@@@@@@@@@@@@@@@@a\xFF@@@@@@") == 17);
+ ret &= bfs_check(asciilen("@@@@@@@\xFF@@@@@@a\xFF@@@@@@@") == 7);
+ ret &= bfs_check(asciilen("@@@@@@@@\xFF@@@@@a\xFF@@@@@@@") == 8);
+ ret &= bfs_check(asciilen("@@@@@@@@@\xFF@@@@a\xFF@@@@@@@") == 9);
+
// From man 3p basename
ret &= check_base_dir("usr", ".", "usr");
ret &= check_base_dir("usr/", ".", "usr");