diff options
author | Tavian Barnes <tavianator@tavianator.com> | 2024-11-25 15:24:16 -0500 |
---|---|---|
committer | Tavian Barnes <tavianator@tavianator.com> | 2024-12-03 14:42:05 -0500 |
commit | cf197cada461d1d442458cbebdd2bb8ba314692e (patch) | |
tree | 6f7f2a94231abdc6b9944de9d737b256293b5c5b | |
parent | 3678c2ee7c11d67f4ea97c85d8564cd386a32bd1 (diff) | |
download | bfs-cf197cada461d1d442458cbebdd2bb8ba314692e.tar.xz |
ioq: Prefetch pointers before popping them
Also, cache-align struct ioq_ent to avoid false sharing when two workers
are handling neighbouring requests.
-rw-r--r-- | src/ioq.c | 8 | ||||
-rw-r--r-- | src/ioq.h | 10 |
2 files changed, 10 insertions, 8 deletions
@@ -356,6 +356,14 @@ static bool ioq_slot_push(struct ioqq *ioqq, ioq_slot *slot, struct ioq_ent *ent static struct ioq_ent *ioq_slot_pop(struct ioqq *ioqq, ioq_slot *slot, bool block) { uintptr_t prev = load(slot, relaxed); while (true) { +#if __has_builtin(__builtin_prefetch) + // Optimistically prefetch the pointer in this slot. If this + // slot is not full, this will prefetch an invalid address, but + // experimentally this is worth it on both Intel (Alder Lake) + // and AMD (Zen 2). + __builtin_prefetch((void *)(prev << 1)); +#endif + // empty → skip(1) // skip(n) → skip(n + 1) // full(ptr) → full(ptr - 1) @@ -8,6 +8,7 @@ #ifndef BFS_IOQ_H #define BFS_IOQ_H +#include "bfs.h" #include "dir.h" #include "stat.h" @@ -45,18 +46,11 @@ enum ioq_nop_type { }; /** - * The I/O queue implementation needs two tag bits in each pointer to a struct - * ioq_ent, so we need to ensure at least 4-byte alignment. The natural - * alignment is enough on most architectures, but not m68k, so over-align it. - */ -#define IOQ_ENT_ALIGN alignas(4) - -/** * An I/O queue entry. */ struct ioq_ent { /** The I/O operation. */ - IOQ_ENT_ALIGN enum ioq_op op; + cache_align enum ioq_op op; /** The return value (on success) or negative error code (on failure). */ int result; |