summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/alloc.c31
-rw-r--r--src/bfs.h11
-rw-r--r--src/bfstd.c114
-rw-r--r--src/bfstd.h27
-rw-r--r--src/bftw.c9
-rw-r--r--src/bit.h54
-rw-r--r--src/color.c21
-rw-r--r--src/diag.c19
-rw-r--r--src/diag.h2
-rw-r--r--src/eval.c78
-rw-r--r--src/ioq.c538
-rw-r--r--src/ioq.h39
-rw-r--r--src/list.h12
-rw-r--r--src/mtab.c11
-rw-r--r--src/prelude.h5
-rw-r--r--src/sanity.h31
-rw-r--r--src/sighook.c264
-rw-r--r--src/sighook.h10
-rw-r--r--src/stat.c31
-rw-r--r--src/stat.h9
-rw-r--r--src/thread.c12
-rw-r--r--src/thread.h5
-rw-r--r--src/trie.c121
-rw-r--r--src/trie.h56
-rw-r--r--src/xspawn.c17
-rw-r--r--src/xtime.c53
-rw-r--r--src/xtime.h36
27 files changed, 1186 insertions, 430 deletions
diff --git a/src/alloc.c b/src/alloc.c
index ef9f6ab..f505eda 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -106,7 +106,7 @@ void *reserve(void *ptr, size_t align, size_t size, size_t count) {
// If we stayed within the same size class, reuse ptr.
if (count & (count - 1)) {
// Tell sanitizers about the new array element
- sanitize_alloc((char *)ptr + old_size, size);
+ sanitize_resize(ptr, old_size, old_size + size, bit_ceil(count) * size);
errno = 0;
return ptr;
}
@@ -121,7 +121,7 @@ void *reserve(void *ptr, size_t align, size_t size, size_t count) {
}
// Pretend we only allocated one more element
- sanitize_free((char *)ret + old_size + size, new_size - old_size - size);
+ sanitize_resize(ret, new_size, old_size + size, new_size);
errno = 0;
return ret;
}
@@ -304,8 +304,7 @@ void *varena_alloc(struct varena *varena, size_t count) {
}
// Tell the sanitizers the exact size of the allocated struct
- sanitize_free(ret, arena->size);
- sanitize_alloc(ret, varena_exact_size(varena, count));
+ sanitize_resize(ret, arena->size, varena_exact_size(varena, count), arena->size);
return ret;
}
@@ -317,15 +316,14 @@ void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t
return NULL;
}
- size_t new_exact_size = varena_exact_size(varena, new_count);
- size_t old_exact_size = varena_exact_size(varena, old_count);
+ size_t old_size = old_arena->size;
+ size_t new_size = new_arena->size;
if (new_arena == old_arena) {
- if (new_count < old_count) {
- sanitize_free((char *)ptr + new_exact_size, old_exact_size - new_exact_size);
- } else if (new_count > old_count) {
- sanitize_alloc((char *)ptr + old_exact_size, new_exact_size - old_exact_size);
- }
+ sanitize_resize(ptr,
+ varena_exact_size(varena, old_count),
+ varena_exact_size(varena, new_count),
+ new_size);
return ptr;
}
@@ -334,17 +332,18 @@ void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t
return NULL;
}
- size_t old_size = old_arena->size;
- sanitize_alloc(ptr, old_size);
+ // Non-sanitized builds don't bother computing exact sizes, and just use
+ // the potentially-larger arena size for each size class instead. To
+ // allow the below memcpy() to work with the less-precise sizes, expand
+ // the old allocation to its full capacity.
+ sanitize_resize(ptr, varena_exact_size(varena, old_count), old_size, old_size);
- size_t new_size = new_arena->size;
size_t min_size = new_size < old_size ? new_size : old_size;
memcpy(ret, ptr, min_size);
arena_free(old_arena, ptr);
- sanitize_free(ret, new_size);
- sanitize_alloc(ret, new_exact_size);
+ sanitize_resize(ret, new_size, varena_exact_size(varena, new_count), new_size);
return ret;
}
diff --git a/src/bfs.h b/src/bfs.h
index af4cf9f..32dbbae 100644
--- a/src/bfs.h
+++ b/src/bfs.h
@@ -218,4 +218,15 @@ extern const char bfs_ldlibs[];
# define _target_clones(...)
#endif
+/**
+ * Optimization hint to not unroll a loop.
+ */
+#if BFS_HAS_PRAGMA_NOUNROLL
+# define _nounroll _Pragma("nounroll")
+#elif __GNUC__ && !__clang__
+# define _nounroll _Pragma("GCC unroll 0")
+#else
+# define _nounroll
+#endif
+
#endif // BFS_H
diff --git a/src/bfstd.c b/src/bfstd.c
index b29fb7b..f2938ad 100644
--- a/src/bfstd.c
+++ b/src/bfstd.c
@@ -211,35 +211,77 @@ const char *xgetprogname(void) {
return cmd;
}
-int xstrtoll(const char *str, char **end, int base, long long *value) {
- // strtoll() skips leading spaces, but we want to reject them
+/** Common prologue for xstrto*() wrappers. */
+static int xstrtox_prologue(const char *str) {
+ // strto*() skips leading spaces, but we want to reject them
if (xisspace(str[0])) {
errno = EINVAL;
return -1;
}
- // If end is NULL, make sure the entire string is valid
- bool entire = !end;
- char *endp;
- if (!end) {
- end = &endp;
- }
-
errno = 0;
- long long result = strtoll(str, end, base);
+ return 0;
+}
+
+/** Common epilogue for xstrto*() wrappers. */
+static int xstrtox_epilogue(const char *str, char **end, char *endp) {
if (errno != 0) {
return -1;
}
- if (*end == str || (entire && **end != '\0')) {
+ if (end) {
+ *end = endp;
+ }
+
+ // If end is NULL, make sure the entire string is valid
+ if (endp == str || (!end && *endp != '\0')) {
errno = EINVAL;
return -1;
}
- *value = result;
return 0;
}
+int xstrtol(const char *str, char **end, int base, long *value) {
+ if (xstrtox_prologue(str) != 0) {
+ return -1;
+ }
+
+ char *endp;
+ *value = strtol(str, &endp, base);
+ return xstrtox_epilogue(str, end, endp);
+}
+
+int xstrtoll(const char *str, char **end, int base, long long *value) {
+ if (xstrtox_prologue(str) != 0) {
+ return -1;
+ }
+
+ char *endp;
+ *value = strtoll(str, &endp, base);
+ return xstrtox_epilogue(str, end, endp);
+}
+
+int xstrtof(const char *str, char **end, float *value) {
+ if (xstrtox_prologue(str) != 0) {
+ return -1;
+ }
+
+ char *endp;
+ *value = strtof(str, &endp);
+ return xstrtox_epilogue(str, end, endp);
+}
+
+int xstrtod(const char *str, char **end, double *value) {
+ if (xstrtox_prologue(str) != 0) {
+ return -1;
+ }
+
+ char *endp;
+ *value = strtod(str, &endp);
+ return xstrtox_epilogue(str, end, endp);
+}
+
/** Compile and execute a regular expression for xrpmatch(). */
static int xrpregex(nl_item item, const char *response) {
const char *pattern = nl_langinfo(item);
@@ -482,7 +524,9 @@ int rlim_cmp(rlim_t a, rlim_t b) {
}
dev_t xmakedev(int ma, int mi) {
-#ifdef makedev
+#if __QNX__
+ return makedev(0, ma, mi);
+#elif defined(makedev)
return makedev(ma, mi);
#else
return (ma << 8) | mi;
@@ -736,35 +780,31 @@ size_t asciilen(const char *str) {
}
size_t asciinlen(const char *str, size_t n) {
+ const unsigned char *ustr = (const unsigned char *)str;
size_t i = 0;
-#if SIZE_WIDTH % 8 == 0
// Word-at-a-time isascii()
- for (size_t word; i + sizeof(word) <= n; i += sizeof(word)) {
- memcpy(&word, str + i, sizeof(word));
-
- const size_t mask = (SIZE_MAX / 0xFF) << 7; // 0x808080...
- word &= mask;
- if (!word) {
- continue;
- }
-
-#if ENDIAN_NATIVE == ENDIAN_BIG
- word = bswap(word);
-#elif ENDIAN_NATIVE != ENDIAN_LITTLE
- break;
+#define CHUNK(n) CHUNK_(uint##n##_t, load8_leu##n)
+#define CHUNK_(type, load8) \
+ while (n - i >= sizeof(type)) { \
+ type word = load8(ustr + i); \
+ type mask = (((type)-1) / 0xFF) << 7; /* 0x808080.. */ \
+ word &= mask; \
+ i += trailing_zeros(word) / 8; \
+ if (word) { \
+ return i; \
+ } \
+ }
+
+#if SIZE_WIDTH >= 64
+ CHUNK(64);
#endif
+ CHUNK(32);
+ CHUNK(16);
+ CHUNK(8);
- size_t first = trailing_zeros(word) / 8;
- return i + first;
- }
-#endif
-
- for (; i < n; ++i) {
- if (!xisascii(str[i])) {
- break;
- }
- }
+#undef CHUNK_
+#undef CHUNK
return i;
}
diff --git a/src/bfstd.h b/src/bfstd.h
index 97867fd..84f92ec 100644
--- a/src/bfstd.h
+++ b/src/bfstd.h
@@ -179,23 +179,26 @@ int open_cterm(int flags);
const char *xgetprogname(void);
/**
+ * Wrapper for strtol() that forbids leading spaces.
+ */
+int xstrtol(const char *str, char **end, int base, long *value);
+
+/**
* Wrapper for strtoll() that forbids leading spaces.
- *
- * @str
- * The string to parse.
- * @end
- * If non-NULL, will hold a pointer to the first invalid character.
- * If NULL, the entire string must be valid.
- * @base
- * The base for the conversion, or 0 to auto-detect.
- * @value
- * Will hold the parsed integer value, on success.
- * @return
- * 0 on success, -1 on failure.
*/
int xstrtoll(const char *str, char **end, int base, long long *value);
/**
+ * Wrapper for strtof() that forbids leading spaces.
+ */
+int xstrtof(const char *str, char **end, float *value);
+
+/**
+ * Wrapper for strtod() that forbids leading spaces.
+ */
+int xstrtod(const char *str, char **end, double *value);
+
+/**
* Process a yes/no prompt.
*
* @return 1 for yes, 0 for no, and -1 for unknown.
diff --git a/src/bftw.c b/src/bftw.c
index 61193d5..f822456 100644
--- a/src/bftw.c
+++ b/src/bftw.c
@@ -1008,6 +1008,7 @@ static int bftw_ioq_pop(struct bftw_state *state, bool block) {
return -1;
}
+ ioq_submit(ioq);
struct ioq_ent *ent = ioq_pop(ioq, block);
if (!ent) {
return -1;
@@ -1051,6 +1052,10 @@ static int bftw_ioq_pop(struct bftw_state *state, bool block) {
bftw_queue_attach(&state->fileq, file, true);
break;
+
+ default:
+ bfs_bug("Unexpected ioq op %d", (int)op);
+ break;
}
ioq_free(ioq, ent);
@@ -1953,6 +1958,10 @@ static void bftw_flush(struct bftw_state *state) {
bftw_queue_flush(&state->dirq);
bftw_ioq_opendirs(state);
+
+ if (state->ioq) {
+ ioq_submit(state->ioq);
+ }
}
/** Close the current directory. */
diff --git a/src/bit.h b/src/bit.h
index 73a80dc..5d6fb9d 100644
--- a/src/bit.h
+++ b/src/bit.h
@@ -148,7 +148,7 @@
# define INTMAX_WIDTH UINTMAX_WIDTH
#endif
-// C23 polyfill: byte order
+// N3022 polyfill: byte order
#ifdef __STDC_ENDIAN_LITTLE__
# define ENDIAN_LITTLE __STDC_ENDIAN_LITTLE__
@@ -250,6 +250,58 @@ static inline uint8_t bswap_u8(uint8_t n) {
*/
#define bswap(n) UINT_SELECT(n, bswap)(n)
+#define LOAD8_LEU8(ptr, i, n) ((uint##n##_t)((const unsigned char *)ptr)[(i) / 8] << (i))
+#define LOAD8_BEU8(ptr, i, n) ((uint##n##_t)((const unsigned char *)ptr)[(i) / 8] << (n - (i) - 8))
+
+/** Load a little-endian 8-bit word. */
+static inline uint8_t load8_leu8(const void *ptr) {
+ return LOAD8_LEU8(ptr, 0, 8);
+}
+
+/** Load a big-endian 8-bit word. */
+static inline uint8_t load8_beu8(const void *ptr) {
+ return LOAD8_BEU8(ptr, 0, 8);
+}
+
+#define LOAD8_LEU16(ptr, i, n) (LOAD8_LEU8(ptr, i, n) | LOAD8_LEU8(ptr, i + 8, n))
+#define LOAD8_BEU16(ptr, i, n) (LOAD8_BEU8(ptr, i, n) | LOAD8_BEU8(ptr, i + 8, n))
+
+/** Load a little-endian 16-bit word. */
+static inline uint16_t load8_leu16(const void *ptr) {
+ return LOAD8_LEU16(ptr, 0, 16);
+}
+
+/** Load a big-endian 16-bit word. */
+static inline uint16_t load8_beu16(const void *ptr) {
+ return LOAD8_BEU16(ptr, 0, 16);
+}
+
+#define LOAD8_LEU32(ptr, i, n) (LOAD8_LEU16(ptr, i, n) | LOAD8_LEU16(ptr, i + 16, n))
+#define LOAD8_BEU32(ptr, i, n) (LOAD8_BEU16(ptr, i, n) | LOAD8_BEU16(ptr, i + 16, n))
+
+/** Load a little-endian 32-bit word. */
+static inline uint32_t load8_leu32(const void *ptr) {
+ return LOAD8_LEU32(ptr, 0, 32);
+}
+
+/** Load a big-endian 32-bit word. */
+static inline uint32_t load8_beu32(const void *ptr) {
+ return LOAD8_BEU32(ptr, 0, 32);
+}
+
+#define LOAD8_LEU64(ptr, i, n) (LOAD8_LEU32(ptr, i, n) | LOAD8_LEU32(ptr, i + 32, n))
+#define LOAD8_BEU64(ptr, i, n) (LOAD8_BEU32(ptr, i, n) | LOAD8_BEU32(ptr, i + 32, n))
+
+/** Load a little-endian 64-bit word. */
+static inline uint64_t load8_leu64(const void *ptr) {
+ return LOAD8_LEU64(ptr, 0, 64);
+}
+
+/** Load a big-endian 64-bit word. */
+static inline uint64_t load8_beu64(const void *ptr) {
+ return LOAD8_BEU64(ptr, 0, 64);
+}
+
// C23 polyfill: bit utilities
#if __STDC_VERSION_STDBIT_H__ >= C23
diff --git a/src/color.c b/src/color.c
index e7f0973..0cc950b 100644
--- a/src/color.c
+++ b/src/color.c
@@ -143,13 +143,7 @@ static int init_esc(struct colors *colors, const char *name, const char *value,
*field = esc;
- struct trie_leaf *leaf = trie_insert_str(&colors->names, name);
- if (!leaf) {
- return -1;
- }
-
- leaf->value = field;
- return 0;
+ return trie_set_str(&colors->names, name, field);
}
/** Check if an escape sequence is equal to a string. */
@@ -159,8 +153,7 @@ static bool esc_eq(const struct esc_seq *esc, const char *str, size_t len) {
/** Get an escape sequence from the table. */
static struct esc_seq **get_esc(const struct colors *colors, const char *name) {
- const struct trie_leaf *leaf = trie_find_str(&colors->names, name);
- return leaf ? leaf->value : NULL;
+ return trie_get_str(&colors->names, name);
}
/** Append an escape sequence to a string. */
@@ -225,13 +218,7 @@ static int insert_ext(struct trie *trie, struct ext_color *ext) {
}
size_t len = ext->len + 1;
- leaf = trie_insert_mem(trie, ext->ext, len);
- if (!leaf) {
- return -1;
- }
-
- leaf->value = ext;
- return 0;
+ return trie_set_mem(trie, ext->ext, len, ext);
}
/** Set the color for an extension. */
@@ -975,7 +962,7 @@ static const struct esc_seq *file_color(const struct colors *colors, const struc
goto error;
}
- const struct bfs_stat *statbuf;
+ const struct bfs_stat *statbuf = NULL;
const struct esc_seq *color = NULL;
switch (type) {
diff --git a/src/diag.c b/src/diag.c
index 4909cf5..4f1c84c 100644
--- a/src/diag.c
+++ b/src/diag.c
@@ -14,13 +14,26 @@
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
+#include <unistd.h>
+
+/**
+ * Print an error using dprintf() if possible, because it's more likely to be
+ * async-signal-safe in practice.
+ */
+#if BFS_HAS_DPRINTF
+# define eprintf(...) dprintf(STDERR_FILENO, __VA_ARGS__)
+# define veprintf(...) vdprintf(STDERR_FILENO, __VA_ARGS__)
+#else
+# define eprintf(...) fprintf(stderr, __VA_ARGS__)
+# define veprintf(...) vfprintf(stderr, __VA_ARGS__)
+#endif
/** bfs_diagf() implementation. */
_printf(2, 0)
static void bfs_vdiagf(const struct bfs_loc *loc, const char *format, va_list args) {
- fprintf(stderr, "%s: %s@%s:%d: ", xgetprogname(), loc->func, loc->file, loc->line);
- vfprintf(stderr, format, args);
- fprintf(stderr, "\n");
+ eprintf("%s: %s@%s:%d: ", xgetprogname(), loc->func, loc->file, loc->line);
+ veprintf(format, args);
+ eprintf("\n");
}
void bfs_diagf(const struct bfs_loc *loc, const char *format, ...) {
diff --git a/src/diag.h b/src/diag.h
index 3bea9b2..7b3e8a5 100644
--- a/src/diag.h
+++ b/src/diag.h
@@ -27,7 +27,7 @@ struct bfs_loc {
/**
* Get the current source code location.
*/
-#if __STDC_VERSION__ >= C23
+#if BFS_HAS_COMPOUND_LITERAL_STORAGE
# define bfs_location() (&(static const struct bfs_loc)BFS_LOC_INIT)
#else
# define bfs_location() (&(const struct bfs_loc)BFS_LOC_INIT)
diff --git a/src/eval.c b/src/eval.c
index 6e9fffd..7c9da97 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -137,11 +137,9 @@ static const struct bfs_stat *eval_stat(struct bfs_eval *state) {
* Get the difference (in seconds) between two struct timespecs.
*/
static time_t timespec_diff(const struct timespec *lhs, const struct timespec *rhs) {
- time_t ret = lhs->tv_sec - rhs->tv_sec;
- if (lhs->tv_nsec < rhs->tv_nsec) {
- --ret;
- }
- return ret;
+ struct timespec diff = *lhs;
+ timespec_sub(&diff, rhs);
+ return diff.tv_sec;
}
bool bfs_expr_cmp(const struct bfs_expr *expr, long long n) {
@@ -260,8 +258,7 @@ bool eval_newer(const struct bfs_expr *expr, struct bfs_eval *state) {
return false;
}
- return time->tv_sec > expr->reftime.tv_sec
- || (time->tv_sec == expr->reftime.tv_sec && time->tv_nsec > expr->reftime.tv_nsec);
+ return timespec_cmp(time, &expr->reftime) > 0;
}
/**
@@ -703,6 +700,34 @@ static int print_owner(FILE *file, const char *name, uintmax_t id, int *width) {
}
}
+/** Print a file's modification time. */
+static int print_time(FILE *file, time_t time, time_t now) {
+ struct tm tm;
+ if (!localtime_r(&time, &tm)) {
+ goto error;
+ }
+
+ char time_str[256];
+ size_t time_ret;
+
+ time_t six_months_ago = now - 6 * 30 * 24 * 60 * 60;
+ time_t tomorrow = now + 24 * 60 * 60;
+ if (time <= six_months_ago || time >= tomorrow) {
+ time_ret = strftime(time_str, sizeof(time_str), "%b %e %Y", &tm);
+ } else {
+ time_ret = strftime(time_str, sizeof(time_str), "%b %e %H:%M", &tm);
+ }
+
+ if (time_ret == 0) {
+ goto error;
+ }
+
+ return fprintf(file, " %s", time_str);
+
+error:
+ return fprintf(file, " %jd", (intmax_t)time);
+}
+
/**
* -f?ls action.
*/
@@ -759,28 +784,11 @@ bool eval_fls(const struct bfs_expr *expr, struct bfs_eval *state) {
time_t time = statbuf->mtime.tv_sec;
time_t now = ctx->now.tv_sec;
- time_t six_months_ago = now - 6 * 30 * 24 * 60 * 60;
- time_t tomorrow = now + 24 * 60 * 60;
- struct tm tm;
- if (!localtime_r(&time, &tm)) {
- goto error;
- }
- char time_str[256];
- size_t time_ret;
- if (time <= six_months_ago || time >= tomorrow) {
- time_ret = strftime(time_str, sizeof(time_str), "%b %e %Y", &tm);
- } else {
- time_ret = strftime(time_str, sizeof(time_str), "%b %e %H:%M", &tm);
- }
- if (time_ret == 0) {
- errno = EOVERFLOW;
- goto error;
- }
- if (cfprintf(cfile, " %s${rs}", time_str) < 0) {
+ if (print_time(file, time, now) < 0) {
goto error;
}
- if (cfprintf(cfile, " %pP", ftwbuf) < 0) {
+ if (cfprintf(cfile, "${rs} %pP", ftwbuf) < 0) {
goto error;
}
@@ -1047,21 +1055,6 @@ static int eval_gettime(struct bfs_eval *state, struct timespec *ts) {
}
/**
- * Record an elapsed time.
- */
-static void timespec_elapsed(struct timespec *elapsed, const struct timespec *start, const struct timespec *end) {
- elapsed->tv_sec += end->tv_sec - start->tv_sec;
- elapsed->tv_nsec += end->tv_nsec - start->tv_nsec;
- if (elapsed->tv_nsec < 0) {
- elapsed->tv_nsec += 1000000000L;
- --elapsed->tv_sec;
- } else if (elapsed->tv_nsec >= 1000000000L) {
- elapsed->tv_nsec -= 1000000000L;
- ++elapsed->tv_sec;
- }
-}
-
-/**
* Evaluate an expression.
*/
static bool eval_expr(struct bfs_expr *expr, struct bfs_eval *state) {
@@ -1079,7 +1072,8 @@ static bool eval_expr(struct bfs_expr *expr, struct bfs_eval *state) {
if (time) {
if (eval_gettime(state, &end) == 0) {
- timespec_elapsed(&expr->elapsed, &start, &end);
+ timespec_sub(&end, &start);
+ timespec_add(&expr->elapsed, &end);
}
}
diff --git a/src/ioq.c b/src/ioq.c
index be5b758..064e0e8 100644
--- a/src/ioq.c
+++ b/src/ioq.c
@@ -136,6 +136,7 @@
#include <stdint.h>
#include <stdlib.h>
#include <sys/stat.h>
+#include <unistd.h>
#if BFS_WITH_LIBURING
# include <liburing.h>
@@ -259,17 +260,45 @@ static struct ioqq *ioqq_create(size_t size) {
/** Get the monitor associated with a slot. */
static struct ioq_monitor *ioq_slot_monitor(struct ioqq *ioqq, ioq_slot *slot) {
- size_t i = slot - ioqq->slots;
+ uint32_t i = slot - ioqq->slots;
+
+ // Hash the index to de-correlate waiters
+ // https://nullprogram.com/blog/2018/07/31/
+ // https://github.com/skeeto/hash-prospector/issues/19#issuecomment-1120105785
+ i ^= i >> 16;
+ i *= UINT32_C(0x21f0aaad);
+ i ^= i >> 15;
+ i *= UINT32_C(0x735a2d97);
+ i ^= i >> 15;
+
return &ioqq->monitors[i & ioqq->monitor_mask];
}
/** Atomically wait for a slot to change. */
_noinline
static uintptr_t ioq_slot_wait(struct ioqq *ioqq, ioq_slot *slot, uintptr_t value) {
+ uintptr_t ret;
+
+ // Try spinning a few times (with exponential backoff) before blocking
+ _nounroll
+ for (int i = 1; i < 1024; i *= 2) {
+ _nounroll
+ for (int j = 0; j < i; ++j) {
+ spin_loop();
+ }
+
+ // Check if the slot changed
+ ret = load(slot, relaxed);
+ if (ret != value) {
+ return ret;
+ }
+ }
+
+ // Nothing changed, start blocking
struct ioq_monitor *monitor = ioq_slot_monitor(ioqq, slot);
mutex_lock(&monitor->mutex);
- uintptr_t ret = load(slot, relaxed);
+ ret = load(slot, relaxed);
if (ret != value) {
goto done;
}
@@ -355,6 +384,14 @@ static bool ioq_slot_push(struct ioqq *ioqq, ioq_slot *slot, struct ioq_ent *ent
static struct ioq_ent *ioq_slot_pop(struct ioqq *ioqq, ioq_slot *slot, bool block) {
uintptr_t prev = load(slot, relaxed);
while (true) {
+#if __has_builtin(__builtin_prefetch)
+ // Optimistically prefetch the pointer in this slot. If this
+ // slot is not full, this will prefetch an invalid address, but
+ // experimentally this is worth it on both Intel (Alder Lake)
+ // and AMD (Zen 2).
+ __builtin_prefetch((void *)(prev << 1), 1 /* write */);
+#endif
+
// empty → skip(1)
// skip(n) → skip(n + 1)
// full(ptr) → full(ptr - 1)
@@ -409,13 +446,6 @@ static void ioqq_push_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t s
} while (size > 0);
}
-/** Pop an entry from the queue. */
-static struct ioq_ent *ioqq_pop(struct ioqq *ioqq, bool block) {
- size_t i = fetch_add(&ioqq->tail, 1, relaxed);
- ioq_slot *slot = &ioqq->slots[i & ioqq->slot_mask];
- return ioq_slot_pop(ioqq, slot, block);
-}
-
/** Pop a batch of entries from the queue. */
static void ioqq_pop_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t size, bool block) {
size_t mask = ioqq->slot_mask;
@@ -431,30 +461,77 @@ static void ioqq_pop_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t si
#define IOQ_BATCH (FALSE_SHARING_SIZE / sizeof(ioq_slot))
/**
- * A batch of entries to send all at once.
+ * A batch of I/O queue entries.
*/
struct ioq_batch {
- /** The current batch size. */
- size_t size;
+ /** The start of the batch. */
+ size_t head;
+ /** The end of the batch. */
+ size_t tail;
/** The array of entries. */
struct ioq_ent *entries[IOQ_BATCH];
};
-/** Send the batch to a queue. */
+/** Reset a batch. */
+static void ioq_batch_reset(struct ioq_batch *batch) {
+ batch->head = batch->tail = 0;
+}
+
+/** Check if a batch is empty. */
+static bool ioq_batch_empty(const struct ioq_batch *batch) {
+ return batch->head >= batch->tail;
+}
+
+/** Send a batch to a queue. */
static void ioq_batch_flush(struct ioqq *ioqq, struct ioq_batch *batch) {
- if (batch->size > 0) {
- ioqq_push_batch(ioqq, batch->entries, batch->size);
- batch->size = 0;
+ if (batch->tail > 0) {
+ ioqq_push_batch(ioqq, batch->entries, batch->tail);
+ ioq_batch_reset(batch);
}
}
-/** An an entry to a batch, flushing if necessary. */
+/** Push an entry to a batch, flushing if necessary. */
static void ioq_batch_push(struct ioqq *ioqq, struct ioq_batch *batch, struct ioq_ent *ent) {
- if (batch->size >= IOQ_BATCH) {
+ batch->entries[batch->tail++] = ent;
+
+ if (batch->tail >= IOQ_BATCH) {
ioq_batch_flush(ioqq, batch);
}
+}
+
+/** Fill a batch from a queue. */
+static bool ioq_batch_fill(struct ioqq *ioqq, struct ioq_batch *batch, bool block) {
+ ioqq_pop_batch(ioqq, batch->entries, IOQ_BATCH, block);
+
+ ioq_batch_reset(batch);
+ for (size_t i = 0; i < IOQ_BATCH; ++i) {
+ struct ioq_ent *ent = batch->entries[i];
+ if (ent) {
+ batch->entries[batch->tail++] = ent;
+ }
+ }
+
+ return batch->tail > 0;
+}
+
+/** Pop an entry from a batch, filling it first if necessary. */
+static struct ioq_ent *ioq_batch_pop(struct ioqq *ioqq, struct ioq_batch *batch, bool block) {
+ if (ioq_batch_empty(batch)) {
+ // For non-blocking pops, make sure that each ioq_batch_pop()
+ // corresponds to a single (amortized) increment of ioqq->head.
+ // Otherwise, we start skipping many slots and batching ends up
+ // degrading performance.
+ if (!block && batch->head < IOQ_BATCH) {
+ ++batch->head;
+ return NULL;
+ }
+
+ if (!ioq_batch_fill(ioqq, batch, block)) {
+ return NULL;
+ }
+ }
- batch->entries[batch->size++] = ent;
+ return batch->entries[batch->head++];
}
/** Sentinel stop command. */
@@ -503,11 +580,16 @@ struct ioq {
struct arena xbufs;
#endif
- /** Pending I/O requests. */
+ /** Pending I/O request queue. */
struct ioqq *pending;
- /** Ready I/O responses. */
+ /** Ready I/O response queue. */
struct ioqq *ready;
+ /** Pending request batch. */
+ struct ioq_batch pending_batch;
+ /** Ready request batch. */
+ struct ioq_batch ready_batch;
+
/** The number of background threads. */
size_t nthreads;
/** The background threads themselves. */
@@ -532,6 +614,14 @@ static bool ioq_check_cancel(struct ioq *ioq, struct ioq_ent *ent) {
/** Dispatch a single request synchronously. */
static void ioq_dispatch_sync(struct ioq *ioq, struct ioq_ent *ent) {
switch (ent->op) {
+ case IOQ_NOP:
+ if (ent->nop.type == IOQ_NOP_HEAVY) {
+ // A fast, no-op syscall
+ getppid();
+ }
+ ent->result = 0;
+ return;
+
case IOQ_CLOSE:
ent->result = try(xclose(ent->close.fd));
return;
@@ -580,23 +670,161 @@ struct ioq_ring_state {
struct ioq_batch ready;
};
+/** Reap a single CQE. */
+static void ioq_reap_cqe(struct ioq_ring_state *state, struct io_uring_cqe *cqe) {
+ struct ioq *ioq = state->ioq;
+
+ struct ioq_ent *ent = io_uring_cqe_get_data(cqe);
+ ent->result = cqe->res;
+
+ if (ent->result < 0) {
+ goto push;
+ }
+
+ switch (ent->op) {
+ case IOQ_OPENDIR: {
+ int fd = ent->result;
+ if (ioq_check_cancel(ioq, ent)) {
+ xclose(fd);
+ goto push;
+ }
+
+ struct ioq_opendir *args = &ent->opendir;
+ ent->result = try(bfs_opendir(args->dir, fd, NULL, args->flags));
+ if (ent->result >= 0) {
+ // TODO: io_uring_prep_getdents()
+ bfs_polldir(args->dir);
+ } else {
+ xclose(fd);
+ }
+
+ break;
+ }
+
+#if BFS_USE_STATX
+ case IOQ_STAT: {
+ struct ioq_stat *args = &ent->stat;
+ ent->result = try(bfs_statx_convert(args->buf, args->xbuf));
+ break;
+ }
+#endif
+
+ default:
+ break;
+ }
+
+push:
+ ioq_batch_push(ioq->ready, &state->ready, ent);
+}
+
+/** Wait for submitted requests to complete. */
+static void ioq_ring_drain(struct ioq_ring_state *state, size_t wait_nr) {
+ struct ioq *ioq = state->ioq;
+ struct io_uring *ring = state->ring;
+
+ bfs_assert(wait_nr <= state->submitted);
+
+ while (state->submitted > 0) {
+ struct io_uring_cqe *cqe;
+ if (wait_nr > 0) {
+ io_uring_wait_cqes(ring, &cqe, wait_nr, NULL, NULL);
+ }
+
+ unsigned int head;
+ size_t seen = 0;
+ io_uring_for_each_cqe (ring, head, cqe) {
+ ioq_reap_cqe(state, cqe);
+ ++seen;
+ }
+
+ io_uring_cq_advance(ring, seen);
+ state->submitted -= seen;
+
+ if (seen >= wait_nr) {
+ break;
+ }
+ wait_nr -= seen;
+ }
+
+ ioq_batch_flush(ioq->ready, &state->ready);
+}
+
+/** Submit prepped SQEs, and wait for some to complete. */
+static void ioq_ring_submit(struct ioq_ring_state *state) {
+ struct io_uring *ring = state->ring;
+
+ size_t unreaped = state->prepped + state->submitted;
+ size_t wait_nr = 0;
+
+ if (state->prepped == 0 && unreaped > 0) {
+ // If we have no new SQEs, wait for at least one old one to
+ // complete, to avoid livelock
+ wait_nr = 1;
+ }
+
+ if (unreaped > ring->sq.ring_entries) {
+ // Keep the completion queue below half full
+ wait_nr = unreaped - ring->sq.ring_entries;
+ }
+
+ // Submit all prepped SQEs
+ while (state->prepped > 0) {
+ int ret = io_uring_submit_and_wait(state->ring, wait_nr);
+ if (ret <= 0) {
+ continue;
+ }
+
+ state->submitted += ret;
+ state->prepped -= ret;
+ if (state->prepped > 0) {
+ // In the unlikely event of a short submission, any SQE
+ // links will be broken. Wait for all SQEs to complete
+ // to preserve any ordering requirements.
+ ioq_ring_drain(state, state->submitted);
+ wait_nr = 0;
+ }
+ }
+
+ // Drain all the CQEs we waited for (and any others that are ready)
+ ioq_ring_drain(state, wait_nr);
+}
+
+/** Reserve space for a number of SQEs, submitting if necessary. */
+static void ioq_reserve_sqes(struct ioq_ring_state *state, unsigned int count) {
+ while (io_uring_sq_space_left(state->ring) < count) {
+ ioq_ring_submit(state);
+ }
+}
+
+/** Get an SQE, submitting if necessary. */
+static struct io_uring_sqe *ioq_get_sqe(struct ioq_ring_state *state) {
+ ioq_reserve_sqes(state, 1);
+ return io_uring_get_sqe(state->ring);
+}
+
/** Dispatch a single request asynchronously. */
static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, struct ioq_ent *ent) {
- struct io_uring *ring = state->ring;
enum ioq_ring_ops ops = state->ops;
struct io_uring_sqe *sqe = NULL;
switch (ent->op) {
+ case IOQ_NOP:
+ if (ent->nop.type == IOQ_NOP_HEAVY) {
+ sqe = ioq_get_sqe(state);
+ io_uring_prep_nop(sqe);
+ }
+ return sqe;
+
case IOQ_CLOSE:
if (ops & IOQ_RING_CLOSE) {
- sqe = io_uring_get_sqe(ring);
+ sqe = ioq_get_sqe(state);
io_uring_prep_close(sqe, ent->close.fd);
}
return sqe;
case IOQ_OPENDIR:
if (ops & IOQ_RING_OPENAT) {
- sqe = io_uring_get_sqe(ring);
+ sqe = ioq_get_sqe(state);
struct ioq_opendir *args = &ent->opendir;
int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY;
io_uring_prep_openat(sqe, args->dfd, args->path, flags, 0);
@@ -606,7 +834,7 @@ static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, str
case IOQ_CLOSEDIR:
#if BFS_USE_UNWRAPDIR
if (ops & IOQ_RING_CLOSE) {
- sqe = io_uring_get_sqe(ring);
+ sqe = ioq_get_sqe(state);
io_uring_prep_close(sqe, bfs_unwrapdir(ent->closedir.dir));
}
#endif
@@ -615,10 +843,10 @@ static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, str
case IOQ_STAT:
#if BFS_USE_STATX
if (ops & IOQ_RING_STATX) {
- sqe = io_uring_get_sqe(ring);
+ sqe = ioq_get_sqe(state);
struct ioq_stat *args = &ent->stat;
int flags = bfs_statx_flags(args->flags);
- unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+ unsigned int mask = bfs_statx_mask();
io_uring_prep_statx(sqe, args->dfd, args->path, flags, mask, args->xbuf);
}
#endif
@@ -631,7 +859,7 @@ static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, str
/** Check if ioq_ring_reap() has work to do. */
static bool ioq_ring_empty(struct ioq_ring_state *state) {
- return !state->prepped && !state->submitted && !state->ready.size;
+ return !state->prepped && !state->submitted && ioq_batch_empty(&state->ready);
}
/** Prep a single SQE. */
@@ -659,121 +887,52 @@ static bool ioq_ring_prep(struct ioq_ring_state *state) {
}
struct ioq *ioq = state->ioq;
- struct io_uring *ring = state->ring;
- struct ioq_ent *pending[IOQ_BATCH];
-
- while (io_uring_sq_space_left(ring) >= IOQ_BATCH) {
- bool block = ioq_ring_empty(state);
- ioqq_pop_batch(ioq->pending, pending, IOQ_BATCH, block);
-
- bool any = false;
- for (size_t i = 0; i < IOQ_BATCH; ++i) {
- struct ioq_ent *ent = pending[i];
- if (ent == &IOQ_STOP) {
- ioqq_push(ioq->pending, &IOQ_STOP);
- state->stop = true;
- goto done;
- } else if (ent) {
- ioq_prep_sqe(state, ent);
- any = true;
- }
- }
-
- if (!any) {
- break;
- }
- }
-
-done:
- return !ioq_ring_empty(state);
-}
-
-/** Reap a single CQE. */
-static void ioq_reap_cqe(struct ioq_ring_state *state, struct io_uring_cqe *cqe) {
- struct ioq *ioq = state->ioq;
- struct io_uring *ring = state->ring;
-
- struct ioq_ent *ent = io_uring_cqe_get_data(cqe);
- ent->result = cqe->res;
- io_uring_cqe_seen(ring, cqe);
- --state->submitted;
-
- if (ent->result < 0) {
- goto push;
- }
- switch (ent->op) {
- case IOQ_OPENDIR: {
- int fd = ent->result;
- if (ioq_check_cancel(ioq, ent)) {
- xclose(fd);
- goto push;
- }
-
- struct ioq_opendir *args = &ent->opendir;
- ent->result = try(bfs_opendir(args->dir, fd, NULL, args->flags));
- if (ent->result >= 0) {
- // TODO: io_uring_prep_getdents()
- bfs_polldir(args->dir);
- } else {
- xclose(fd);
- }
+ struct ioq_batch pending;
+ ioq_batch_reset(&pending);
+ while (true) {
+ bool block = ioq_ring_empty(state);
+ struct ioq_ent *ent = ioq_batch_pop(ioq->pending, &pending, block);
+ if (ent == &IOQ_STOP) {
+ ioqq_push(ioq->pending, ent);
+ state->stop = true;
break;
- }
-
-#if BFS_USE_STATX
- case IOQ_STAT: {
- struct ioq_stat *args = &ent->stat;
- ent->result = try(bfs_statx_convert(args->buf, args->xbuf));
+ } else if (ent) {
+ ioq_prep_sqe(state, ent);
+ } else {
break;
}
-#endif
-
- default:
- break;
}
-push:
- ioq_batch_push(ioq->ready, &state->ready, ent);
+ bfs_assert(ioq_batch_empty(&pending));
+ return !ioq_ring_empty(state);
}
-/** Reap a batch of CQEs. */
-static void ioq_ring_reap(struct ioq_ring_state *state) {
- struct ioq *ioq = state->ioq;
- struct io_uring *ring = state->ring;
+/** io_uring worker loop. */
+static int ioq_ring_work(struct ioq_thread *thread) {
+ struct io_uring *ring = &thread->ring;
- while (state->prepped) {
- int ret = io_uring_submit_and_wait(ring, 1);
- if (ret > 0) {
- state->prepped -= ret;
- state->submitted += ret;
+#ifdef IORING_SETUP_R_DISABLED
+ if (ring->flags & IORING_SETUP_R_DISABLED) {
+ if (io_uring_enable_rings(ring) != 0) {
+ return -1;
}
}
+#endif
- while (state->submitted) {
- struct io_uring_cqe *cqe;
- if (io_uring_wait_cqe(ring, &cqe) < 0) {
- continue;
- }
-
- ioq_reap_cqe(state, cqe);
- }
-
- ioq_batch_flush(ioq->ready, &state->ready);
-}
-
-/** io_uring worker loop. */
-static void ioq_ring_work(struct ioq_thread *thread) {
struct ioq_ring_state state = {
.ioq = thread->parent,
- .ring = &thread->ring,
+ .ring = ring,
.ops = thread->ring_ops,
};
while (ioq_ring_prep(&state)) {
- ioq_ring_reap(&state);
+ ioq_ring_submit(&state);
}
+
+ ioq_ring_drain(&state, state.submitted);
+ return 0;
}
#endif // BFS_WITH_LIBURING
@@ -782,30 +941,29 @@ static void ioq_ring_work(struct ioq_thread *thread) {
static void ioq_sync_work(struct ioq_thread *thread) {
struct ioq *ioq = thread->parent;
- bool stop = false;
- while (!stop) {
- struct ioq_ent *pending[IOQ_BATCH];
- ioqq_pop_batch(ioq->pending, pending, IOQ_BATCH, true);
-
- struct ioq_batch ready;
- ready.size = 0;
-
- for (size_t i = 0; i < IOQ_BATCH; ++i) {
- struct ioq_ent *ent = pending[i];
- if (ent == &IOQ_STOP) {
- ioqq_push(ioq->pending, &IOQ_STOP);
- stop = true;
- break;
- } else if (ent) {
- if (!ioq_check_cancel(ioq, ent)) {
- ioq_dispatch_sync(ioq, ent);
- }
- ioq_batch_push(ioq->ready, &ready, ent);
- }
+ struct ioq_batch pending, ready;
+ ioq_batch_reset(&pending);
+ ioq_batch_reset(&ready);
+
+ while (true) {
+ if (ioq_batch_empty(&pending)) {
+ ioq_batch_flush(ioq->ready, &ready);
+ }
+
+ struct ioq_ent *ent = ioq_batch_pop(ioq->pending, &pending, true);
+ if (ent == &IOQ_STOP) {
+ ioqq_push(ioq->pending, ent);
+ break;
}
- ioq_batch_flush(ioq->ready, &ready);
+ if (!ioq_check_cancel(ioq, ent)) {
+ ioq_dispatch_sync(ioq, ent);
+ }
+ ioq_batch_push(ioq->ready, &ready, ent);
}
+
+ bfs_assert(ioq_batch_empty(&pending));
+ ioq_batch_flush(ioq->ready, &ready);
}
/** Background thread entry point. */
@@ -814,8 +972,9 @@ static void *ioq_work(void *ptr) {
#if BFS_WITH_LIBURING
if (thread->ring_err == 0) {
- ioq_ring_work(thread);
- return NULL;
+ if (ioq_ring_work(thread) == 0) {
+ return NULL;
+ }
}
#endif
@@ -823,6 +982,27 @@ static void *ioq_work(void *ptr) {
return NULL;
}
+#if BFS_WITH_LIBURING
+/** Test whether some io_uring setup flags are supported. */
+static bool ioq_ring_probe_flags(struct io_uring_params *params, unsigned int flags) {
+ unsigned int saved = params->flags;
+ params->flags |= flags;
+
+ struct io_uring ring;
+ int ret = io_uring_queue_init_params(2, &ring, params);
+ if (ret == 0) {
+ io_uring_queue_exit(&ring);
+ }
+
+ if (ret == -EINVAL) {
+ params->flags = saved;
+ return false;
+ }
+
+ return true;
+}
+#endif
+
/** Initialize io_uring thread state. */
static int ioq_ring_init(struct ioq *ioq, struct ioq_thread *thread) {
#if BFS_WITH_LIBURING
@@ -836,11 +1016,31 @@ static int ioq_ring_init(struct ioq *ioq, struct ioq_thread *thread) {
return -1;
}
- // Share io-wq workers between rings
struct io_uring_params params = {0};
+
if (prev) {
- params.flags |= IORING_SETUP_ATTACH_WQ;
+ // Share io-wq workers between rings
+ params.flags = prev->ring.flags | IORING_SETUP_ATTACH_WQ;
params.wq_fd = prev->ring.ring_fd;
+ } else {
+#ifdef IORING_SETUP_SUBMIT_ALL
+ // Don't abort submission just because an inline request fails
+ ioq_ring_probe_flags(&params, IORING_SETUP_SUBMIT_ALL);
+#endif
+
+#ifdef IORING_SETUP_R_DISABLED
+ // Don't enable the ring yet (needed for SINGLE_ISSUER)
+ if (ioq_ring_probe_flags(&params, IORING_SETUP_R_DISABLED)) {
+# ifdef IORING_SETUP_SINGLE_ISSUER
+ // Allow optimizations assuming only one task submits SQEs
+ ioq_ring_probe_flags(&params, IORING_SETUP_SINGLE_ISSUER);
+# endif
+# ifdef IORING_SETUP_DEFER_TASKRUN
+ // Don't interrupt us aggresively with completion events
+ ioq_ring_probe_flags(&params, IORING_SETUP_DEFER_TASKRUN);
+# endif
+ }
+#endif
}
// Use a page for each SQE ring
@@ -902,7 +1102,8 @@ static void ioq_ring_exit(struct ioq_thread *thread) {
}
/** Create an I/O queue thread. */
-static int ioq_thread_create(struct ioq *ioq, struct ioq_thread *thread) {
+static int ioq_thread_create(struct ioq *ioq, size_t i) {
+ struct ioq_thread *thread = &ioq->threads[i];
thread->parent = ioq;
ioq_ring_init(ioq, thread);
@@ -912,6 +1113,11 @@ static int ioq_thread_create(struct ioq *ioq, struct ioq_thread *thread) {
return -1;
}
+ char name[16];
+ if (snprintf(name, sizeof(name), "ioq-%zu", i) >= 0) {
+ thread_setname(thread->id, name);
+ }
+
return 0;
}
@@ -946,7 +1152,7 @@ struct ioq *ioq_create(size_t depth, size_t nthreads) {
ioq->nthreads = nthreads;
for (size_t i = 0; i < nthreads; ++i) {
- if (ioq_thread_create(ioq, &ioq->threads[i]) != 0) {
+ if (ioq_thread_create(ioq, i) != 0) {
ioq->nthreads = i;
goto fail;
}
@@ -988,6 +1194,18 @@ static struct ioq_ent *ioq_request(struct ioq *ioq, enum ioq_op op, void *ptr) {
return ent;
}
+int ioq_nop(struct ioq *ioq, enum ioq_nop_type type, void *ptr) {
+ struct ioq_ent *ent = ioq_request(ioq, IOQ_NOP, ptr);
+ if (!ent) {
+ return -1;
+ }
+
+ ent->nop.type = type;
+
+ ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
+ return 0;
+}
+
int ioq_close(struct ioq *ioq, int fd, void *ptr) {
struct ioq_ent *ent = ioq_request(ioq, IOQ_CLOSE, ptr);
if (!ent) {
@@ -996,7 +1214,7 @@ int ioq_close(struct ioq *ioq, int fd, void *ptr) {
ent->close.fd = fd;
- ioqq_push(ioq->pending, ent);
+ ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
return 0;
}
@@ -1012,7 +1230,7 @@ int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path,
args->path = path;
args->flags = flags;
- ioqq_push(ioq->pending, ent);
+ ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
return 0;
}
@@ -1024,7 +1242,7 @@ int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr) {
ent->closedir.dir = dir;
- ioqq_push(ioq->pending, ent);
+ ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
return 0;
}
@@ -1048,16 +1266,23 @@ int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags fla
}
#endif
- ioqq_push(ioq->pending, ent);
+ ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
return 0;
}
+void ioq_submit(struct ioq *ioq) {
+ ioq_batch_flush(ioq->pending, &ioq->pending_batch);
+}
+
struct ioq_ent *ioq_pop(struct ioq *ioq, bool block) {
+ // Don't forget to submit before popping
+ bfs_assert(ioq_batch_empty(&ioq->pending_batch));
+
if (ioq->size == 0) {
return NULL;
}
- return ioqq_pop(ioq->ready, block);
+ return ioq_batch_pop(ioq->ready, &ioq->ready_batch, block);
}
void ioq_free(struct ioq *ioq, struct ioq_ent *ent) {
@@ -1075,7 +1300,8 @@ void ioq_free(struct ioq *ioq, struct ioq_ent *ent) {
void ioq_cancel(struct ioq *ioq) {
if (!exchange(&ioq->cancel, true, relaxed)) {
- ioqq_push(ioq->pending, &IOQ_STOP);
+ ioq_batch_push(ioq->pending, &ioq->pending_batch, &IOQ_STOP);
+ ioq_submit(ioq);
}
}
diff --git a/src/ioq.h b/src/ioq.h
index cb14be4..5eaa066 100644
--- a/src/ioq.h
+++ b/src/ioq.h
@@ -8,6 +8,7 @@
#ifndef BFS_IOQ_H
#define BFS_IOQ_H
+#include "bfs.h"
#include "dir.h"
#include "stat.h"
@@ -22,6 +23,8 @@ struct ioq;
* I/O queue operations.
*/
enum ioq_op {
+ /** ioq_nop(). */
+ IOQ_NOP,
/** ioq_close(). */
IOQ_CLOSE,
/** ioq_opendir(). */
@@ -33,18 +36,21 @@ enum ioq_op {
};
/**
- * The I/O queue implementation needs two tag bits in each pointer to a struct
- * ioq_ent, so we need to ensure at least 4-byte alignment. The natural
- * alignment is enough on most architectures, but not m68k, so over-align it.
+ * ioq_nop() types.
*/
-#define IOQ_ENT_ALIGN alignas(4)
+enum ioq_nop_type {
+ /** A lightweight nop that avoids syscalls. */
+ IOQ_NOP_LIGHT,
+ /** A heavyweight nop that involves a syscall. */
+ IOQ_NOP_HEAVY,
+};
/**
* An I/O queue entry.
*/
struct ioq_ent {
/** The I/O operation. */
- IOQ_ENT_ALIGN enum ioq_op op;
+ cache_align enum ioq_op op;
/** The return value (on success) or negative error code (on failure). */
int result;
@@ -54,6 +60,10 @@ struct ioq_ent {
/** Operation-specific arguments. */
union {
+ /** ioq_nop() args. */
+ struct ioq_nop {
+ enum ioq_nop_type type;
+ } nop;
/** ioq_close() args. */
struct ioq_close {
int fd;
@@ -98,6 +108,20 @@ struct ioq *ioq_create(size_t depth, size_t nthreads);
size_t ioq_capacity(const struct ioq *ioq);
/**
+ * A no-op, for benchmarking.
+ *
+ * @ioq
+ * The I/O queue.
+ * @type
+ * The type of operation to perform.
+ * @ptr
+ * An arbitrary pointer to associate with the request.
+ * @return
+ * 0 on success, or -1 on failure.
+ */
+int ioq_nop(struct ioq *ioq, enum ioq_nop_type type, void *ptr);
+
+/**
* Asynchronous close().
*
* @ioq
@@ -166,6 +190,11 @@ int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr);
int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags flags, struct bfs_stat *buf, void *ptr);
/**
+ * Submit any buffered requests.
+ */
+void ioq_submit(struct ioq *ioq);
+
+/**
* Pop a response from the queue.
*
* @ioq
diff --git a/src/list.h b/src/list.h
index 48f0d06..15c37a8 100644
--- a/src/list.h
+++ b/src/list.h
@@ -377,21 +377,21 @@
// Scratch variables for the type-safe SLIST_REMOVE() implementation.
// Not a pointer type due to https://github.com/llvm/llvm-project/issues/109718.
_maybe_unused
-static thread_local uintptr_t _slist_prev, _slist_next;
+static thread_local uintptr_t slist_prev_, slist_next_;
/** Suppress -Wunused-value. */
_maybe_unused
-static inline void *_slist_cast(uintptr_t ptr) {
+static inline void *slist_cast_(uintptr_t ptr) {
return (void *)ptr;
}
#define SLIST_REMOVE__(list, cursor, next) \
- (_slist_prev = (uintptr_t)(void *)*cursor, \
- _slist_next = (uintptr_t)(void *)(*cursor)->next, \
+ (slist_prev_ = (uintptr_t)(void *)*cursor, \
+ slist_next_ = (uintptr_t)(void *)(*cursor)->next, \
(*cursor)->next = NULL, \
- *cursor = (void *)_slist_next, \
+ *cursor = (void *)slist_next_, \
list->tail = *cursor ? list->tail : cursor, \
- _slist_cast(_slist_prev))
+ slist_cast_(slist_prev_))
/**
* Pop the head off a singly-linked list.
diff --git a/src/mtab.c b/src/mtab.c
index bf9fc53..40a9885 100644
--- a/src/mtab.c
+++ b/src/mtab.c
@@ -256,10 +256,7 @@ static int bfs_mtab_fill_types(struct bfs_mtab *mtab) {
continue;
}
- struct trie_leaf *leaf = trie_insert_mem(&mtab->types, &sb.dev, sizeof(sb.dev));
- if (leaf) {
- leaf->value = mount->type;
- } else {
+ if (trie_set_mem(&mtab->types, &sb.mnt_id, sizeof(sb.mnt_id), mount->type) != 0) {
goto fail;
}
}
@@ -282,9 +279,9 @@ const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statb
}
}
- const struct trie_leaf *leaf = trie_find_mem(&mtab->types, &statbuf->dev, sizeof(statbuf->dev));
- if (leaf) {
- return leaf->value;
+ const char *type = trie_get_mem(&mtab->types, &statbuf->mnt_id, sizeof(statbuf->mnt_id));
+ if (type) {
+ return type;
} else {
return "unknown";
}
diff --git a/src/prelude.h b/src/prelude.h
index a0cc2a1..de89a6c 100644
--- a/src/prelude.h
+++ b/src/prelude.h
@@ -62,6 +62,11 @@
# define _POSIX_PTHREAD_SEMANTICS 1
#endif
+/** QNX extensions. */
+#if __QNX__
+# define _QNX_SOURCE 1
+#endif
+
// Get the convenience macros that became standard spellings in C23
#if __STDC_VERSION__ < 202311L
diff --git a/src/sanity.h b/src/sanity.h
index 3f6020b..be77eef 100644
--- a/src/sanity.h
+++ b/src/sanity.h
@@ -20,11 +20,6 @@
#define SANITIZE_CALL__(macro, ptr, size, ...) \
macro(ptr, size)
-/**
- * Squelch unused variable warnings when not sanitizing.
- */
-#define sanitize_ignore(ptr, size) ((void)(ptr), (void)(size))
-
#if __SANITIZE_ADDRESS__
# include <sanitizer/asan_interface.h>
@@ -42,9 +37,27 @@
*/
#define sanitize_free(...) SANITIZE_CALL(__asan_poison_memory_region, __VA_ARGS__)
+/**
+ * Adjust the size of an allocated region, for things like dynamic arrays.
+ *
+ * @ptr
+ * The memory region.
+ * @old
+ * The previous usable size of the region.
+ * @new
+ * The new usable size of the region.
+ * @cap
+ * The total allocated capacity of the region.
+ */
+static inline void sanitize_resize(const void *ptr, size_t old, size_t new, size_t cap) {
+ const char *beg = ptr;
+ __sanitizer_annotate_contiguous_container(beg, beg + cap, beg + old, beg + new);
+}
+
#else
-# define sanitize_alloc(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
-# define sanitize_free(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
+# define sanitize_alloc(...) ((void)0)
+# define sanitize_free(...) ((void)0)
+# define sanitize_resize(ptr, old, new, cap) ((void)0)
#endif
#if __SANITIZE_MEMORY__
@@ -65,8 +78,8 @@
#define sanitize_uninit(...) SANITIZE_CALL(__msan_allocated_memory, __VA_ARGS__)
#else
-# define sanitize_init(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
-# define sanitize_uninit(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
+# define sanitize_init(...) ((void)0)
+# define sanitize_uninit(...) ((void)0)
#endif
/**
diff --git a/src/sighook.c b/src/sighook.c
index 0cc81fa..a87bed5 100644
--- a/src/sighook.c
+++ b/src/sighook.c
@@ -32,6 +32,10 @@
#include <stdlib.h>
#include <unistd.h>
+#if __linux__
+# include <sys/syscall.h>
+#endif
+
// NetBSD opens a file descriptor for each sem_init()
#if defined(_POSIX_SEMAPHORES) && !__NetBSD__
# define BFS_POSIX_SEMAPHORES _POSIX_SEMAPHORES
@@ -245,62 +249,90 @@ static void *rcu_update(struct rcu *rcu, void *ptr) {
return rcu_decode(arc_wait(prev));
}
-struct sighook {
- /** The signal being hooked, or 0 for atsigexit(). */
- int sig;
- /** Signal hook flags. */
- enum sigflags flags;
- /** The function to call. */
- sighook_fn *fn;
- /** An argument to pass to the function. */
- void *arg;
-
- /** The RCU pointer to this hook. */
- struct rcu *self;
- /** The next hook in the list. */
- struct rcu next;
-};
-
/**
- * An RCU-protected linked list of signal hooks.
+ * An RCU-protected linked list.
*/
-struct siglist {
- /** The first hook in the list. */
+struct rcu_list {
+ /** The first node in the list. */
struct rcu head;
/** &last->next */
struct rcu *tail;
};
-/** Initialize a siglist. */
-static void siglist_init(struct siglist *list) {
+/**
+ * An rcu_list node.
+ */
+struct rcu_node {
+ /** The RCU pointer to this node. */
+ struct rcu *self;
+ /** The next node in the list. */
+ struct rcu next;
+};
+
+/** Initialize an rcu_list. */
+static void rcu_list_init(struct rcu_list *list) {
rcu_init(&list->head, NULL);
list->tail = &list->head;
}
-/** Append a hook to a linked list. */
-static void sigpush(struct siglist *list, struct sighook *hook) {
- hook->self = list->tail;
- list->tail = &hook->next;
- rcu_init(&hook->next, NULL);
- rcu_update(hook->self, hook);
+/** Append to an rcu_list. */
+static void rcu_list_append(struct rcu_list *list, struct rcu_node *node) {
+ node->self = list->tail;
+ list->tail = &node->next;
+ rcu_init(&node->next, NULL);
+ rcu_update(node->self, node);
}
-/** Remove a hook from the linked list. */
-static void sigpop(struct siglist *list, struct sighook *hook) {
- struct sighook *next = rcu_peek(&hook->next);
- rcu_update(hook->self, next);
+/** Remove from an rcu_list. */
+static void rcu_list_remove(struct rcu_list *list, struct rcu_node *node) {
+ struct rcu_node *next = rcu_peek(&node->next);
+ rcu_update(node->self, next);
if (next) {
- next->self = hook->self;
+ next->self = node->self;
} else {
list->tail = &list->head;
}
+ rcu_destroy(&node->next);
}
+/**
+ * Iterate over an rcu_list.
+ *
+ * It is save to `break` out of this loop, but `return` or `goto` will lead to
+ * a missed arc_put().
+ */
+#define for_rcu(type, node, list) \
+ for_rcu_(type, node, (list), node##_slot_, node##_prev_, node##_done_)
+
+#define for_rcu_(type, node, list, slot, prev, done) \
+ for (struct arc *slot, *prev, **done = NULL; !done; arc_put(slot), done = &slot) \
+ for (type *node = rcu_read(&list->head, &slot); \
+ node; \
+ prev = slot, \
+ node = rcu_read(&((struct rcu_node *)node)->next, &slot), \
+ arc_put(prev))
+
+struct sighook {
+ /** The RCU list node (must be the first field). */
+ struct rcu_node node;
+
+ /** The signal being hooked, or 0 for atsigexit(). */
+ int sig;
+ /** Signal hook flags. */
+ enum sigflags flags;
+ /** The function to call. */
+ sighook_fn *fn;
+ /** An argument to pass to the function. */
+ void *arg;
+ /** Flag for SH_ONESHOT. */
+ atomic bool armed;
+};
+
/** The lists of signal hooks. */
-static struct siglist sighooks[64];
+static struct rcu_list sighooks[64];
/** Get the hook list for a particular signal. */
-static struct siglist *siglist(int sig) {
+static struct rcu_list *siglist(int sig) {
return &sighooks[sig % countof(sighooks)];
}
@@ -336,22 +368,31 @@ static const int FATAL_SIGNALS[] = {
SIGHUP,
SIGILL,
SIGINT,
+#ifdef SIGIO
+ SIGIO,
+#endif
SIGPIPE,
- SIGQUIT,
- SIGSEGV,
- SIGTERM,
- SIGUSR1,
- SIGUSR2,
#ifdef SIGPOLL
SIGPOLL,
#endif
#ifdef SIGPROF
SIGPROF,
#endif
+#ifdef SIGPWR
+ SIGPWR,
+#endif
+ SIGQUIT,
+ SIGSEGV,
+#ifdef SIGSTKFLT
+ SIGSTKFLT,
+#endif
#ifdef SIGSYS
SIGSYS,
#endif
+ SIGTERM,
SIGTRAP,
+ SIGUSR1,
+ SIGUSR2,
#ifdef SIGVTALRM
SIGVTALRM,
#endif
@@ -383,7 +424,9 @@ static bool is_fatal(int sig) {
/** Reraise a fatal signal. */
_noreturn
-static void reraise(int sig) {
+static void reraise(siginfo_t *info) {
+ int sig = info->si_signo;
+
// Restore the default signal action
if (signal(sig, SIG_DFL) == SIG_ERR) {
goto fail;
@@ -397,42 +440,70 @@ static void reraise(int sig) {
goto fail;
}
+#if __linux__
+ // On Linux, try to re-raise the exact siginfo_t (since 3.9, a process can
+ // signal itself with any siginfo_t)
+ pid_t tid = syscall(SYS_gettid);
+ syscall(SYS_rt_tgsigqueueinfo, getpid(), tid, sig, info);
+#endif
+
raise(sig);
fail:
abort();
}
+/** Check whether we should run a hook. */
+static bool should_run(int sig, struct sighook *hook) {
+ if (hook->sig != sig && hook->sig != 0) {
+ return false;
+ }
+
+ if (hook->flags & SH_ONESHOT) {
+ if (!exchange(&hook->armed, false, relaxed)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
/** Find any matching hooks and run them. */
-static enum sigflags run_hooks(struct siglist *list, int sig, siginfo_t *info) {
+static enum sigflags run_hooks(struct rcu_list *list, int sig, siginfo_t *info) {
enum sigflags ret = 0;
- struct arc *slot = NULL;
- struct sighook *hook = rcu_read(&list->head, &slot);
- while (hook) {
- if (hook->sig == sig || hook->sig == 0) {
+ for_rcu (struct sighook, hook, list) {
+ if (should_run(sig, hook)) {
hook->fn(sig, info, hook->arg);
ret |= hook->flags;
}
-
- struct arc *prev = slot;
- hook = rcu_read(&hook->next, &slot);
- arc_put(prev);
}
- arc_put(slot);
return ret;
}
/** Dispatches a signal to the registered handlers. */
static void sigdispatch(int sig, siginfo_t *info, void *context) {
- // https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html#tag_16_04_03_03
+ // If we get a fault (e.g. a "real" SIGSEGV, not something like
+ // kill(..., SIGSEGV)), don't try to run signal hooks, since we could be
+ // in an arbitrarily corrupted state.
//
- // The behavior of a process is undefined after it returns normally
- // from a signal-catching function for a SIGBUS, SIGFPE, SIGILL, or
- // SIGSEGV signal that was not generated by kill(), sigqueue(), or
- // raise().
+ // POSIX says that returning normally from a signal handler for a fault
+ // is undefined. But in practice, it's better to uninstall the handler
+ // and return, which will re-run the faulting instruction and cause us
+ // to die "correctly" (e.g. with a core dump pointing at the faulting
+ // instruction, not reraise()).
if (is_fault(info)) {
- reraise(sig);
+ // On macOS, we cannot reliably distinguish between faults and
+ // asynchronous signals. For example, pkill -SEGV bfs will
+ // result in si_code == SEGV_ACCERR. So we always re-raise the
+ // signal, because just returning would cause us to ignore
+ // asynchronous SIG{BUS,ILL,SEGV}.
+#if !__APPLE__
+ if (signal(sig, SIG_DFL) != SIG_ERR) {
+ return;
+ }
+#endif
+ reraise(info);
}
// https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html#tag_16_04_04
@@ -445,40 +516,58 @@ static void sigdispatch(int sig, siginfo_t *info, void *context) {
int error = errno;
// Run the normal hooks
- struct siglist *list = siglist(sig);
+ struct rcu_list *list = siglist(sig);
enum sigflags flags = run_hooks(list, sig, info);
// Run the atsigexit() hooks, if we're exiting
if (!(flags & SH_CONTINUE) && is_fatal(sig)) {
list = siglist(0);
run_hooks(list, sig, info);
- reraise(sig);
+ reraise(info);
}
errno = error;
}
+/** A saved signal handler, for sigreset() to restore. */
+struct sigsave {
+ struct rcu_node node;
+ int sig;
+ struct sigaction action;
+};
+
+/** The list of saved signal handlers. */
+static struct rcu_list saved;
+/** `saved` initialization status (since rcu_list_init() isn't atomic). */
+static atomic bool initialized = false;
+
/** Make sure our signal handler is installed for a given signal. */
static int siginit(int sig) {
+#ifdef SA_RESTART
+# define BFS_SA_RESTART SA_RESTART
+#else
+# define BFS_SA_RESTART 0
+#endif
+
static struct sigaction action = {
.sa_sigaction = sigdispatch,
- .sa_flags = SA_RESTART | SA_SIGINFO,
+ .sa_flags = BFS_SA_RESTART | SA_SIGINFO,
};
static sigset_t signals;
- static bool initialized = false;
- if (!initialized) {
+ if (!load(&initialized, relaxed)) {
if (sigemptyset(&signals) != 0
|| sigemptyset(&action.sa_mask) != 0) {
return -1;
}
for (size_t i = 0; i < countof(sighooks); ++i) {
- siglist_init(&sighooks[i]);
+ rcu_list_init(&sighooks[i]);
}
- initialized = true;
+ rcu_list_init(&saved);
+ store(&initialized, true, release);
}
int installed = sigismember(&signals, sig);
@@ -488,14 +577,32 @@ static int siginit(int sig) {
return 0;
}
- if (sigaction(sig, &action, NULL) != 0) {
+ sigset_t updated = signals;
+ if (sigaddset(&updated, sig) != 0) {
+ return -1;
+ }
+
+ struct sigaction original;
+ if (sigaction(sig, NULL, &original) != 0) {
return -1;
}
- if (sigaddset(&signals, sig) != 0) {
+ struct sigsave *save = ALLOC(struct sigsave);
+ if (!save) {
return -1;
}
+ save->sig = sig;
+ save->action = original;
+ rcu_list_append(&saved, &save->node);
+
+ if (sigaction(sig, &action, NULL) != 0) {
+ rcu_list_remove(&saved, &save->node);
+ free(save);
+ return -1;
+ }
+
+ signals = updated;
return 0;
}
@@ -510,9 +617,10 @@ static struct sighook *sighook_impl(int sig, sighook_fn *fn, void *arg, enum sig
hook->flags = flags;
hook->fn = fn;
hook->arg = arg;
+ atomic_init(&hook->armed, true);
- struct siglist *list = siglist(sig);
- sigpush(list, hook);
+ struct rcu_list *list = siglist(sig);
+ rcu_list_append(list, &hook->node);
return hook;
}
@@ -558,11 +666,27 @@ void sigunhook(struct sighook *hook) {
mutex_lock(&sigmutex);
- struct siglist *list = siglist(hook->sig);
- sigpop(list, hook);
+ struct rcu_list *list = siglist(hook->sig);
+ rcu_list_remove(list, &hook->node);
mutex_unlock(&sigmutex);
- rcu_destroy(&hook->next);
free(hook);
}
+
+int sigreset(void) {
+ if (!load(&initialized, acquire)) {
+ return 0;
+ }
+
+ int ret = 0;
+
+ for_rcu (struct sigsave, save, &saved) {
+ if (sigaction(save->sig, &save->action, NULL) != 0) {
+ ret = -1;
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/src/sighook.h b/src/sighook.h
index 3bece21..7149229 100644
--- a/src/sighook.h
+++ b/src/sighook.h
@@ -21,6 +21,8 @@ struct sighook;
enum sigflags {
/** Suppress the default action for this signal. */
SH_CONTINUE = 1 << 0,
+ /** Only run this hook once. */
+ SH_ONESHOT = 1 << 1,
};
/**
@@ -70,4 +72,12 @@ struct sighook *atsigexit(sighook_fn *fn, void *arg);
*/
void sigunhook(struct sighook *hook);
+/**
+ * Restore all signal handlers to their original dispositions (e.g. after fork()).
+ *
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int sigreset(void);
+
#endif // BFS_SIGHOOK_H
diff --git a/src/stat.c b/src/stat.c
index e99e711..1fcfde3 100644
--- a/src/stat.c
+++ b/src/stat.c
@@ -51,6 +51,8 @@ const char *bfs_stat_field_name(enum bfs_stat_field field) {
return "change time";
case BFS_STAT_MTIME:
return "modification time";
+ case BFS_STAT_MNT_ID:
+ return "mount ID";
}
bfs_bug("Unrecognized stat field %d", (int)field);
@@ -101,6 +103,10 @@ void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src) {
dest->rdev = src->st_rdev;
dest->mask |= BFS_STAT_RDEV;
+ // No mount IDs in regular stat(), so use the dev_t as an approximation
+ dest->mnt_id = dest->dev;
+ dest->mask |= BFS_STAT_MNT_ID;
+
#if BFS_HAS_ST_FLAGS
dest->attrs = src->st_flags;
dest->mask |= BFS_STAT_ATTRS;
@@ -169,6 +175,17 @@ int bfs_statx_flags(enum bfs_stat_flags flags) {
return ret;
}
+unsigned int bfs_statx_mask(void) {
+ unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+#ifdef STATX_MNT_ID
+ mask |= STATX_MNT_ID;
+#endif
+#ifdef STATX_MNT_ID_UNIQUE
+ mask |= STATX_MNT_ID_UNIQUE;
+#endif
+ return mask;
+}
+
int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) {
// Callers shouldn't have to check anything except the times
const unsigned int guaranteed = STATX_BASIC_STATS & ~(STATX_ATIME | STATX_CTIME | STATX_MTIME);
@@ -209,6 +226,18 @@ int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) {
dest->attrs = src->stx_attributes;
dest->mask |= BFS_STAT_ATTRS;
+ dest->mnt_id = dest->dev;
+#ifdef STATX_MNT_ID
+ unsigned int mnt_mask = STATX_MNT_ID;
+# ifdef STATX_MNT_ID_UNIQUE
+ mnt_mask |= STATX_MNT_ID_UNIQUE;
+# endif
+ if (src->stx_mask & mnt_mask) {
+ dest->mnt_id = src->stx_mnt_id;
+ }
+#endif
+ dest->mask |= BFS_STAT_MNT_ID;
+
if (src->stx_mask & STATX_ATIME) {
dest->atime.tv_sec = src->stx_atime.tv_sec;
dest->atime.tv_nsec = src->stx_atime.tv_nsec;
@@ -240,7 +269,7 @@ int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) {
* bfs_stat() implementation backed by statx().
*/
static int bfs_statx_impl(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) {
- unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+ unsigned int mask = bfs_statx_mask();
struct statx xbuf;
int ret = bfs_statx(at_fd, at_path, at_flags, mask, &xbuf);
if (ret != 0) {
diff --git a/src/stat.h b/src/stat.h
index 50c91de..c4a63d3 100644
--- a/src/stat.h
+++ b/src/stat.h
@@ -14,6 +14,7 @@
#include "bfs.h"
+#include <stdint.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
@@ -56,6 +57,7 @@ enum bfs_stat_field {
BFS_STAT_BTIME = 1 << 11,
BFS_STAT_CTIME = 1 << 12,
BFS_STAT_MTIME = 1 << 13,
+ BFS_STAT_MNT_ID = 1 << 14,
};
/**
@@ -102,6 +104,8 @@ struct bfs_stat {
blkcnt_t blocks;
/** The device ID represented by this file. */
dev_t rdev;
+ /** The ID of the mount point containing this file. */
+ uint64_t mnt_id;
/** Attributes/flags set on the file. */
unsigned long long attrs;
@@ -150,6 +154,11 @@ void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src);
int bfs_statx_flags(enum bfs_stat_flags flags);
/**
+ * Get the default statx() mask.
+ */
+unsigned int bfs_statx_mask(void);
+
+/**
* Convert struct statx to struct bfs_stat.
*/
int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src);
diff --git a/src/thread.c b/src/thread.c
index d61ff8c..b3604f8 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -9,6 +9,10 @@
#include <errno.h>
#include <pthread.h>
+#if __has_include(<pthread_np.h>)
+# include <pthread_np.h>
+#endif
+
#define THREAD_FALLIBLE(expr) \
do { \
int err = expr; \
@@ -32,6 +36,14 @@ int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn,
THREAD_FALLIBLE(pthread_create(thread, attr, fn, arg));
}
+void thread_setname(pthread_t thread, const char *name) {
+#if BFS_HAS_PTHREAD_SETNAME_NP
+ pthread_setname_np(thread, name);
+#elif BFS_HAS_PTHREAD_SET_NAME_NP
+ pthread_set_name_np(thread, name);
+#endif
+}
+
void thread_join(pthread_t thread, void **ret) {
THREAD_INFALLIBLE(pthread_join(thread, ret));
}
diff --git a/src/thread.h b/src/thread.h
index 7d12468..3dd8422 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -22,6 +22,11 @@ typedef void *thread_fn(void *arg);
int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn, void *arg);
/**
+ * Set the name of a thread.
+ */
+void thread_setname(pthread_t thread, const char *name);
+
+/**
* Wrapper for pthread_join().
*/
void thread_join(pthread_t thread, void **ret);
diff --git a/src/trie.c b/src/trie.c
index a7498d4..c4bf4ba 100644
--- a/src/trie.c
+++ b/src/trie.c
@@ -173,16 +173,16 @@ void trie_init(struct trie *trie) {
/** Extract the nibble at a certain offset from a byte sequence. */
static unsigned char trie_key_nibble(const void *key, size_t length, size_t offset) {
const unsigned char *bytes = key;
- size_t byte = offset >> 1;
+ size_t byte = offset / 2;
bfs_assert(byte < length);
// A branchless version of
// if (offset & 1) {
- // return bytes[byte] >> 4;
- // } else {
// return bytes[byte] & 0xF;
+ // } else {
+ // return bytes[byte] >> 4;
// }
- unsigned int shift = (offset & 1) << 2;
+ unsigned int shift = 4 * ((offset + 1) % 2);
return (bytes[byte] >> shift) & 0xF;
}
@@ -191,6 +191,12 @@ static unsigned char trie_leaf_nibble(const struct trie_leaf *leaf, size_t offse
return trie_key_nibble(leaf->key, leaf->length, offset);
}
+/** Get the number of children of an internal node. */
+_trie_clones
+static unsigned int trie_node_size(const struct trie_node *node) {
+ return count_ones((unsigned int)node->bitmap);
+}
+
/**
* Finds a leaf in the trie that matches the key at every branch. If the key
* exists in the trie, the representative will match the searched key. But
@@ -202,19 +208,20 @@ _trie_clones
static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) {
uintptr_t ptr = trie->root;
- size_t offset = 0;
+ size_t offset = 0, limit = 2 * length;
while (trie_is_node(ptr)) {
struct trie_node *node = trie_decode_node(ptr);
offset += node->offset;
unsigned int index = 0;
- if ((offset >> 1) < length) {
+ if (offset < limit) {
unsigned char nibble = trie_key_nibble(key, length, offset);
unsigned int bit = 1U << nibble;
- // bits = bitmap & bit ? bitmap & (bit - 1) : 0
- unsigned int mask = -!!(node->bitmap & bit);
- unsigned int bits = node->bitmap & (bit - 1) & mask;
- index = count_ones(bits);
+ unsigned int map = node->bitmap;
+ unsigned int bits = map & (bit - 1);
+ unsigned int mask = -!!(map & bit);
+ // index = (map & bit) ? count_ones(bits) : 0;
+ index = count_ones(bits) & mask;
}
ptr = node->children[index];
}
@@ -240,6 +247,16 @@ struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t
return trie_find_mem_impl(trie, key, length);
}
+void *trie_get_str(const struct trie *trie, const char *key) {
+ const struct trie_leaf *leaf = trie_find_str(trie, key);
+ return leaf ? leaf->value : NULL;
+}
+
+void *trie_get_mem(const struct trie *trie, const void *key, size_t length) {
+ const struct trie_leaf *leaf = trie_find_mem(trie, key, length);
+ return leaf ? leaf->value : NULL;
+}
+
_trie_clones
static struct trie_leaf *trie_find_postfix_impl(const struct trie *trie, const char *key) {
size_t length = strlen(key);
@@ -296,18 +313,18 @@ static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const ch
size_t skip = 0;
size_t length = strlen(key) + 1;
- size_t offset = 0;
+ size_t offset = 0, limit = 2 * length;
while (trie_is_node(ptr)) {
struct trie_node *node = trie_decode_node(ptr);
offset += node->offset;
- if ((offset >> 1) >= length) {
+ if (offset >= limit) {
return best;
}
struct trie_leaf *leaf = trie_terminal_leaf(node);
if (trie_check_prefix(leaf, skip, key, length)) {
best = leaf;
- skip = offset >> 1;
+ skip = offset / 2;
}
unsigned char nibble = trie_key_nibble(key, length, offset);
@@ -370,16 +387,10 @@ static struct trie_node *trie_node_realloc(struct trie *trie, struct trie_node *
/** Free a node. */
static void trie_node_free(struct trie *trie, struct trie_node *node, size_t size) {
- bfs_assert(size == (size_t)count_ones(node->bitmap));
+ bfs_assert(size == trie_node_size(node));
varena_free(&trie->nodes, node, size);
}
-#if ENDIAN_NATIVE == ENDIAN_LITTLE
-# define TRIE_BSWAP(n) (n)
-#elif ENDIAN_NATIVE == ENDIAN_BIG
-# define TRIE_BSWAP(n) bswap(n)
-#endif
-
/** Find the offset of the first nibble that differs between two keys. */
static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t length) {
if (!rep) {
@@ -393,32 +404,32 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t
const char *rep_bytes = rep->key;
const char *key_bytes = key;
- size_t i = 0;
- for (size_t chunk = sizeof(chunk); i + chunk <= length; i += chunk) {
- size_t rep_chunk, key_chunk;
- memcpy(&rep_chunk, rep_bytes + i, sizeof(rep_chunk));
- memcpy(&key_chunk, key_bytes + i, sizeof(key_chunk));
-
- if (rep_chunk != key_chunk) {
-#ifdef TRIE_BSWAP
- size_t diff = TRIE_BSWAP(rep_chunk ^ key_chunk);
- i *= 2;
- i += trailing_zeros(diff) / 4;
- return i;
-#else
- break;
-#endif
- }
+ size_t ret = 0, i = 0;
+
+#define CHUNK(n) CHUNK_(uint##n##_t, load8_beu##n)
+#define CHUNK_(type, load8) \
+ while (length - i >= sizeof(type)) { \
+ type rep_chunk = load8(rep_bytes + i); \
+ type key_chunk = load8(key_bytes + i); \
+ type diff = rep_chunk ^ key_chunk; \
+ ret += leading_zeros(diff) / 4; \
+ if (diff) { \
+ return ret; \
+ } \
+ i += sizeof(type); \
}
- for (; i < length; ++i) {
- unsigned char diff = rep_bytes[i] ^ key_bytes[i];
- if (diff) {
- return 2 * i + !(diff & 0xF);
- }
- }
+#if SIZE_WIDTH >= 64
+ CHUNK(64);
+#endif
+ CHUNK(32);
+ CHUNK(16);
+ CHUNK(8);
+
+#undef CHUNK_
+#undef CHUNK
- return 2 * i;
+ return ret;
}
/**
@@ -446,7 +457,7 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t
_trie_clones
static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, unsigned char nibble) {
struct trie_node *node = trie_decode_node(*ptr);
- unsigned int size = count_ones(node->bitmap);
+ unsigned int size = trie_node_size(node);
// Double the capacity every power of two
if (has_single_bit(size)) {
@@ -626,6 +637,26 @@ struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t len
return trie_insert_mem_impl(trie, key, length);
}
+int trie_set_str(struct trie *trie, const char *key, const void *value) {
+ struct trie_leaf *leaf = trie_insert_str(trie, key);
+ if (leaf) {
+ leaf->value = (void *)value;
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+int trie_set_mem(struct trie *trie, const void *key, size_t length, const void *value) {
+ struct trie_leaf *leaf = trie_insert_mem(trie, key, length);
+ if (leaf) {
+ leaf->value = (void *)value;
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
/** Free a chain of singleton nodes. */
static void trie_free_singletons(struct trie *trie, uintptr_t ptr) {
while (trie_is_node(ptr)) {
@@ -711,7 +742,7 @@ static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) {
struct trie_node *node = trie_decode_node(*parent);
trie_free_singletons(trie, node->children[child_index]);
- unsigned int parent_size = count_ones(node->bitmap);
+ unsigned int parent_size = trie_node_size(node);
bfs_assert(parent_size > 1);
if (parent_size == 2 && trie_collapse_node(trie, parent, node, child_index) == 0) {
return;
diff --git a/src/trie.h b/src/trie.h
index 318a23b..d8cecab 100644
--- a/src/trie.h
+++ b/src/trie.h
@@ -70,6 +70,32 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key);
struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length);
/**
+ * Get the value associated with a string key.
+ *
+ * @trie
+ * The trie to search.
+ * @key
+ * The key to look up.
+ * @return
+ * The found value, or NULL if the key is not present.
+ */
+void *trie_get_str(const struct trie *trie, const char *key);
+
+/**
+ * Get the value associated with a fixed-size key.
+ *
+ * @trie
+ * The trie to search.
+ * @key
+ * The key to look up.
+ * @length
+ * The length of the key in bytes.
+ * @return
+ * The found value, or NULL if the key is not present.
+ */
+void *trie_get_mem(const struct trie *trie, const void *key, size_t length);
+
+/**
* Find the shortest leaf that starts with a given key.
*
* @trie
@@ -120,6 +146,36 @@ struct trie_leaf *trie_insert_str(struct trie *trie, const char *key);
struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length);
/**
+ * Set the value for a string key.
+ *
+ * @trie
+ * The trie to modify.
+ * @key
+ * The key to insert.
+ * @value
+ * The value to set.
+ * @return
+ * 0 on success, -1 on error.
+ */
+int trie_set_str(struct trie *trie, const char *key, const void *value);
+
+/**
+ * Set the value for a fixed-size key.
+ *
+ * @trie
+ * The trie to modify.
+ * @key
+ * The key to insert.
+ * @length
+ * The length of the key in bytes.
+ * @value
+ * The value to set.
+ * @return
+ * 0 on success, -1 on error.
+ */
+int trie_set_mem(struct trie *trie, const void *key, size_t length, const void *value);
+
+/**
* Remove a leaf from a trie.
*
* @trie
diff --git a/src/xspawn.c b/src/xspawn.c
index b3eed79..3fa4e60 100644
--- a/src/xspawn.c
+++ b/src/xspawn.c
@@ -8,6 +8,7 @@
#include "bfstd.h"
#include "diag.h"
#include "list.h"
+#include "sighook.h"
#include <errno.h>
#include <fcntl.h>
@@ -535,7 +536,7 @@ static bool bfs_use_posix_spawn(const struct bfs_resolver *res, const struct bfs
/** Actually exec() the new process. */
_noreturn
-static void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp, int pipefd[2]) {
+static void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp, const sigset_t *mask, int pipefd[2]) {
xclose(pipefd[0]);
for_slist (const struct bfs_spawn_action, action, ctx) {
@@ -596,6 +597,18 @@ static void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_spawn *ctx
goto fail;
}
+ // Reset signal handlers to their original values before we unblock
+ // signals, so that handlers don't run in both the parent and the child
+ if (sigreset() != 0) {
+ goto fail;
+ }
+
+ // Restore the original signal mask for the child process
+ errno = pthread_sigmask(SIG_SETMASK, mask, NULL);
+ if (errno != 0) {
+ goto fail;
+ }
+
execve(res->exe, argv, envp);
fail:;
@@ -635,7 +648,7 @@ static pid_t bfs_fork_spawn(struct bfs_resolver *res, const struct bfs_spawn *ct
#endif
if (pid == 0) {
// Child
- bfs_spawn_exec(res, ctx, argv, envp, pipefd);
+ bfs_spawn_exec(res, ctx, argv, envp, &old_mask, pipefd);
}
// Restore the original signal mask
diff --git a/src/xtime.c b/src/xtime.c
index 49d7c36..6b8a141 100644
--- a/src/xtime.c
+++ b/src/xtime.c
@@ -354,6 +354,59 @@ error:
return -1;
}
+/** One nanosecond. */
+static const long NS = 1000L * 1000 * 1000;
+
+void timespec_add(struct timespec *lhs, const struct timespec *rhs) {
+ lhs->tv_sec += rhs->tv_sec;
+ lhs->tv_nsec += rhs->tv_nsec;
+ if (lhs->tv_nsec >= NS) {
+ lhs->tv_nsec -= NS;
+ lhs->tv_sec += 1;
+ }
+}
+
+void timespec_sub(struct timespec *lhs, const struct timespec *rhs) {
+ lhs->tv_sec -= rhs->tv_sec;
+ lhs->tv_nsec -= rhs->tv_nsec;
+ if (lhs->tv_nsec < 0) {
+ lhs->tv_nsec += NS;
+ lhs->tv_sec -= 1;
+ }
+}
+
+int timespec_cmp(const struct timespec *lhs, const struct timespec *rhs) {
+ if (lhs->tv_sec < rhs->tv_sec) {
+ return -1;
+ } else if (lhs->tv_sec > rhs->tv_sec) {
+ return 1;
+ }
+
+ if (lhs->tv_nsec < rhs->tv_nsec) {
+ return -1;
+ } else if (lhs->tv_nsec > rhs->tv_nsec) {
+ return 1;
+ }
+
+ return 0;
+}
+
+void timespec_min(struct timespec *dest, const struct timespec *src) {
+ if (timespec_cmp(src, dest) < 0) {
+ *dest = *src;
+ }
+}
+
+void timespec_max(struct timespec *dest, const struct timespec *src) {
+ if (timespec_cmp(src, dest) > 0) {
+ *dest = *src;
+ }
+}
+
+double timespec_ns(const struct timespec *ts) {
+ return 1.0e9 * ts->tv_sec + ts->tv_nsec;
+}
+
#if defined(_POSIX_TIMERS) && BFS_HAS_TIMER_CREATE
# define BFS_POSIX_TIMERS _POSIX_TIMERS
#else
diff --git a/src/xtime.h b/src/xtime.h
index 2927a2e..b76fef2 100644
--- a/src/xtime.h
+++ b/src/xtime.h
@@ -47,6 +47,42 @@ int xtimegm(struct tm *tm, time_t *timep);
int xgetdate(const char *str, struct timespec *result);
/**
+ * Add to a timespec.
+ */
+void timespec_add(struct timespec *lhs, const struct timespec *rhs);
+
+/**
+ * Subtract from a timespec.
+ */
+void timespec_sub(struct timespec *lhs, const struct timespec *rhs);
+
+/**
+ * Compare two timespecs.
+ *
+ * @return
+ * An integer with the sign of (*lhs - *rhs).
+ */
+int timespec_cmp(const struct timespec *lhs, const struct timespec *rhs);
+
+/**
+ * Update a minimum timespec.
+ */
+void timespec_min(struct timespec *dest, const struct timespec *src);
+
+/**
+ * Update a maximum timespec.
+ */
+void timespec_max(struct timespec *dest, const struct timespec *src);
+
+/**
+ * Convert a timespec to floating point.
+ *
+ * @return
+ * The value in nanoseconds.
+ */
+double timespec_ns(const struct timespec *ts);
+
+/**
* A timer.
*/
struct timer;