From cf197cada461d1d442458cbebdd2bb8ba314692e Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Mon, 25 Nov 2024 15:24:16 -0500 Subject: ioq: Prefetch pointers before popping them Also, cache-align struct ioq_ent to avoid false sharing when two workers are handling neighbouring requests. --- src/ioq.c | 8 ++++++++ src/ioq.h | 10 ++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/ioq.c b/src/ioq.c index 5668a83..017b6c1 100644 --- a/src/ioq.c +++ b/src/ioq.c @@ -356,6 +356,14 @@ static bool ioq_slot_push(struct ioqq *ioqq, ioq_slot *slot, struct ioq_ent *ent static struct ioq_ent *ioq_slot_pop(struct ioqq *ioqq, ioq_slot *slot, bool block) { uintptr_t prev = load(slot, relaxed); while (true) { +#if __has_builtin(__builtin_prefetch) + // Optimistically prefetch the pointer in this slot. If this + // slot is not full, this will prefetch an invalid address, but + // experimentally this is worth it on both Intel (Alder Lake) + // and AMD (Zen 2). + __builtin_prefetch((void *)(prev << 1)); +#endif + // empty → skip(1) // skip(n) → skip(n + 1) // full(ptr) → full(ptr - 1) diff --git a/src/ioq.h b/src/ioq.h index fce1d7f..da0a525 100644 --- a/src/ioq.h +++ b/src/ioq.h @@ -8,6 +8,7 @@ #ifndef BFS_IOQ_H #define BFS_IOQ_H +#include "bfs.h" #include "dir.h" #include "stat.h" @@ -44,19 +45,12 @@ enum ioq_nop_type { IOQ_NOP_HEAVY, }; -/** - * The I/O queue implementation needs two tag bits in each pointer to a struct - * ioq_ent, so we need to ensure at least 4-byte alignment. The natural - * alignment is enough on most architectures, but not m68k, so over-align it. - */ -#define IOQ_ENT_ALIGN alignas(4) - /** * An I/O queue entry. */ struct ioq_ent { /** The I/O operation. */ - IOQ_ENT_ALIGN enum ioq_op op; + cache_align enum ioq_op op; /** The return value (on success) or negative error code (on failure). */ int result; -- cgit v1.2.3