diff options
author | Tavian Barnes <tavianator@tavianator.com> | 2023-10-02 13:09:41 -0400 |
---|---|---|
committer | Tavian Barnes <tavianator@tavianator.com> | 2023-10-02 13:09:41 -0400 |
commit | 1c775d0128a797370bbc0bbd527b4bbbc9d0b83d (patch) | |
tree | dba15bad6c144786f462c0fed7ba727d4823b93e | |
parent | fed31013ebfa6b0c5165f015da5b96ce524224eb (diff) | |
parent | 1afa241472709b32baf5e3e1fd3ba6ebd5fd1bf6 (diff) | |
download | bfs-1c775d0128a797370bbc0bbd527b4bbbc9d0b83d.tar.xz |
Merge branch 'io-uring'
-rw-r--r-- | .github/workflows/ci.yml | 5 | ||||
-rw-r--r-- | .github/workflows/codecov.yml | 3 | ||||
-rw-r--r-- | .github/workflows/codeql.yml | 3 | ||||
-rw-r--r-- | GNUmakefile | 8 | ||||
-rw-r--r-- | README.md | 12 | ||||
-rw-r--r-- | docs/BUILDING.md | 2 | ||||
-rw-r--r-- | src/bftw.c | 23 | ||||
-rw-r--r-- | src/ioq.c | 307 |
8 files changed, 310 insertions, 53 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca1737c..971a4df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,9 +28,10 @@ jobs: libcap-dev \ libcap2:i386 \ libonig-dev \ - libonig5:i386 + libonig5:i386 \ + liburing-dev # Ubuntu doesn't let you install the -dev packages for both amd64 and - # i386 at once, so we make our own symlinks to fix -m32 -lacl -lattr -lcap + # i386 at once, so we make our own symlinks to fix -m32 -lacl -l... sudo ln -s libacl.so.1 /lib/i386-linux-gnu/libacl.so sudo ln -s libattr.so.1 /lib/i386-linux-gnu/libattr.so sudo ln -s libcap.so.2 /lib/i386-linux-gnu/libcap.so diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index 783cc43..b06ea62 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -20,7 +20,8 @@ jobs: libattr1-dev \ libcap2-bin \ libcap-dev \ - libonig-dev + libonig-dev \ + liburing-dev - name: Generate coverage run: | diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 88e9f3f..3a2f81f 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -37,7 +37,8 @@ jobs: libattr1-dev \ libcap2-bin \ libcap-dev \ - libonig-dev + libonig-dev \ + liburing-dev - name: Initialize CodeQL uses: github/codeql-action/init@v2 diff --git a/GNUmakefile b/GNUmakefile index 6902979..8154240 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -126,6 +126,7 @@ ifndef NOLIBS USE_ACL := y USE_ATTR := y USE_LIBCAP := y +USE_LIBURING := y endif ifdef USE_ACL @@ -146,6 +147,11 @@ else LOCAL_CPPFLAGS += -DBFS_USE_SYS_CAPABILITY_H=0 endif +ifdef USE_LIBURING +LOCAL_CPPFLAGS += -DBFS_USE_LIBURING=1 +LOCAL_LDLIBS += -luring +endif + LOCAL_LDFLAGS += -Wl,--as-needed LOCAL_LDLIBS += -lrt endif # Linux @@ -287,7 +293,7 @@ ifneq ($(OS),Darwin) endif +$(MAKE) -B tsan ubsan check CC=clang $(DISTCHECK_FLAGS) ifeq ($(OS) $(ARCH),Linux x86_64) - +$(MAKE) -B check EXTRA_CFLAGS="-m32" ONIG_CONFIG= $(DISTCHECK_FLAGS) + +$(MAKE) -B check EXTRA_CFLAGS="-m32" ONIG_CONFIG= USE_LIBURING= $(DISTCHECK_FLAGS) endif +$(MAKE) -B release check $(DISTCHECK_FLAGS) +$(MAKE) -B check $(DISTCHECK_FLAGS) @@ -290,22 +290,22 @@ Here's how to install them on some common platforms: <pre> <strong>Alpine Linux</strong> -# apk add acl{,-dev} attr{,-dev} libcap{,-dev} oniguruma-dev +# apk add acl{,-dev} attr{,-dev} libcap{,-dev} liburing-dev oniguruma-dev <strong>Arch Linux</strong> -# pacman -S acl attr libcap oniguruma +# pacman -S acl attr libcap liburing oniguruma <strong>Debian/Ubuntu</strong> -# apt install acl libacl1-dev attr libattr1-dev libcap2-bin libcap-dev libonig-dev +# apt install acl libacl1-dev attr libattr1-dev libcap2-bin libcap-dev liburing-dev libonig-dev <strong>Fedora</strong> -# dnf install acl libacl-devel libattr-devel libcap-devel oniguruma-devel +# dnf install acl libacl-devel libattr-devel libcap-devel liburing-devel oniguruma-devel <strong>NixOS</strong> -# nix-env -i acl attr libcap oniguruma +# nix-env -i acl attr libcap liburing oniguruma <strong>Void Linux</strong> -# xbps-install -S acl-{devel,progs} attr-{devel,progs} libcap-{devel,progs} oniguruma-devel +# xbps-install -S acl-{devel,progs} attr-{devel,progs} libcap-{devel,progs} liburing-devel oniguruma-devel <strong>Homebrew</strong> $ brew install oniguruma diff --git a/docs/BUILDING.md b/docs/BUILDING.md index b19ef00..02f9756 100644 --- a/docs/BUILDING.md +++ b/docs/BUILDING.md @@ -75,11 +75,13 @@ These dependencies are optional, and can be turned off at build time if necessar | [acl] | Linux only | `USE_ACL` | | [attr] | Linux only | `USE_ATTR` | | [libcap] | Linux only | `USE_LIBCAP` | +| [liburing] | Linux only | `USE_LIBURING` | | [Oniguruma] | All | `USE_ONIGURUMA` | [acl]: https://savannah.nongnu.org/projects/acl [attr]: https://savannah.nongnu.org/projects/attr [libcap]: https://sites.google.com/site/fullycapable/ +[liburing]: https://github.com/axboe/liburing [Oniguruma]: https://github.com/kkos/oniguruma ### Dependency tracking @@ -470,21 +470,34 @@ static int bftw_state_init(struct bftw_state *state, const struct bftw_args *arg state->error = 0; - if (args->nopenfd < 1) { + if (args->nopenfd < 2) { errno = EMFILE; return -1; } - bftw_cache_init(&state->cache, args->nopenfd); - state->nthreads = args->nthreads; - if (state->nthreads > 0) { - state->ioq = ioq_create(4096, state->nthreads); + size_t nopenfd = args->nopenfd; + size_t qdepth = 4096; + size_t nthreads = args->nthreads; + +#if BFS_USE_LIBURING + // io_uring uses one fd per ring, ioq uses one ring per thread + if (nthreads >= nopenfd - 1) { + nthreads = nopenfd - 2; + } + nopenfd -= nthreads; +#endif + + bftw_cache_init(&state->cache, nopenfd); + + if (nthreads > 0) { + state->ioq = ioq_create(qdepth, nthreads); if (!state->ioq) { return -1; } } else { state->ioq = NULL; } + state->nthreads = nthreads; SLIST_INIT(&state->to_open); SLIST_INIT(&state->to_read); @@ -16,6 +16,10 @@ #include <pthread.h> #include <stdlib.h> +#if BFS_USE_LIBURING +# include <liburing.h> +#endif + /** * A monitor for an I/O queue slot. */ @@ -280,6 +284,21 @@ static struct ioq_ent *ioqq_trypop(struct ioqq *ioqq) { /** Sentinel stop command. */ static struct ioq_ent IOQ_STOP; +/** I/O queue thread-specific data. */ +struct ioq_thread { + /** The thread handle. */ + pthread_t id; + /** Pointer back to the I/O queue. */ + struct ioq *parent; + +#if BFS_USE_LIBURING + /** io_uring instance. */ + struct io_uring ring; + /** Any error that occurred initializing the ring. */ + int ring_err; +#endif +}; + struct ioq { /** The depth of the queue. */ size_t depth; @@ -299,60 +318,247 @@ struct ioq { /** The number of background threads. */ size_t nthreads; /** The background threads themselves. */ - pthread_t threads[]; + struct ioq_thread threads[]; }; -/** Background thread entry point. */ -static void *ioq_work(void *ptr) { - struct ioq *ioq = ptr; +/** Cancel a request if we need to. */ +static bool ioq_check_cancel(struct ioq *ioq, struct ioq_ent *ent) { + if (!load(&ioq->cancel, relaxed)) { + return false; + } - while (true) { - struct ioq_ent *ent = ioqq_pop(ioq->pending); - if (ent == &IOQ_STOP) { - break; + // Always close(), even if we're cancelled, just like a real EINTR + if (ent->op == IOQ_CLOSE || ent->op == IOQ_CLOSEDIR) { + return false; + } + + ent->ret = -1; + ent->error = EINTR; + ioqq_push(ioq->ready, ent); + return true; +} + +/** Handle a single request synchronously. */ +static void ioq_handle(struct ioq *ioq, struct ioq_ent *ent) { + int ret; + + switch (ent->op) { + case IOQ_CLOSE: + ret = xclose(ent->close.fd); + break; + + case IOQ_OPENDIR: + ret = bfs_opendir(ent->opendir.dir, ent->opendir.dfd, ent->opendir.path); + if (ret == 0) { + bfs_polldir(ent->opendir.dir); } + break; + + case IOQ_CLOSEDIR: + ret = bfs_closedir(ent->closedir.dir); + break; + + default: + bfs_bug("Unknown ioq_op %d", (int)ent->op); + ret = -1; + errno = ENOSYS; + break; + } + + ent->ret = ret; + ent->error = ret == 0 ? 0 : errno; + + ioqq_push(ioq->ready, ent); +} - bool cancel = load(&ioq->cancel, relaxed); +#if BFS_USE_LIBURING +/** io_uring worker state. */ +struct ioq_ring_state { + /** The I/O queue. */ + struct ioq *ioq; + /** The io_uring. */ + struct io_uring *ring; + /** The current ioq->pending slot. */ + ioq_slot *slot; + /** Number of prepped, unsubmitted SQEs. */ + size_t prepped; + /** Number of submitted, unreaped SQEs. */ + size_t submitted; + /** Whether to stop the loop. */ + bool stop; +}; + +/** Pop a request for ioq_ring_prep(). */ +static struct ioq_ent *ioq_ring_pop(struct ioq_ring_state *state) { + if (state->stop) { + return NULL; + } + + // Advance to the next slot if necessary + struct ioq *ioq = state->ioq; + if (!state->slot) { + state->slot = ioqq_read(ioq->pending); + } + + // Block if we have nothing else to do + bool block = !state->prepped && !state->submitted; + struct ioq_ent *ret = ioq_slot_pop(ioq->pending, state->slot, block); + + if (ret) { + // Got an entry, move to the next slot next time + state->slot = NULL; + } + + if (ret == &IOQ_STOP) { + state->stop = true; + ret = NULL; + } + + return ret; +} - ent->ret = -1; +/** Prep a single SQE. */ +static void ioq_prep_sqe(struct io_uring_sqe *sqe, struct ioq_ent *ent) { + switch (ent->op) { + case IOQ_CLOSE: + io_uring_prep_close(sqe, ent->close.fd); + break; + + case IOQ_OPENDIR: + io_uring_prep_openat(sqe, ent->opendir.dfd, ent->opendir.path, O_RDONLY | O_CLOEXEC | O_DIRECTORY, 0); + break; + +#if BFS_USE_UNWRAPDIR + case IOQ_CLOSEDIR: + io_uring_prep_close(sqe, bfs_unwrapdir(ent->closedir.dir)); + break; +#endif + + default: + bfs_bug("Unknown ioq_op %d", (int)ent->op); + io_uring_prep_nop(sqe); + break; + } - switch (ent->op) { - case IOQ_CLOSE: - // Always close(), even if we're cancelled, just like a real EINTR - ent->ret = xclose(ent->close.fd); + io_uring_sqe_set_data(sqe, ent); +} + +/** Prep a batch of SQEs. */ +static bool ioq_ring_prep(struct ioq_ring_state *state) { + struct ioq *ioq = state->ioq; + struct io_uring *ring = state->ring; + + while (io_uring_sq_space_left(ring)) { + struct ioq_ent *ent = ioq_ring_pop(state); + if (!ent) { break; + } + + if (ioq_check_cancel(ioq, ent)) { + continue; + } - case IOQ_OPENDIR: - if (!cancel) { - struct ioq_opendir *args = &ent->opendir; - ent->ret = bfs_opendir(args->dir, args->dfd, args->path); - if (ent->ret == 0) { - bfs_polldir(args->dir); - } +#if !BFS_USE_UNWRAPDIR + if (ent->op == IOQ_CLOSEDIR) { + ioq_handle(ioq, ent); + continue; + } +#endif + + struct io_uring_sqe *sqe = io_uring_get_sqe(ring); + ioq_prep_sqe(sqe, ent); + ++state->prepped; + } + + return state->prepped || state->submitted; +} + +/** Reap a batch of SQEs. */ +static void ioq_ring_reap(struct ioq_ring_state *state) { + struct ioq *ioq = state->ioq; + struct io_uring *ring = state->ring; + + while (state->prepped) { + int ret = io_uring_submit_and_wait(ring, 1); + if (ret > 0) { + state->prepped -= ret; + state->submitted += ret; + } + } + + while (state->submitted) { + struct io_uring_cqe *cqe; + if (io_uring_wait_cqe(ring, &cqe) < 0) { + continue; + } + + struct ioq_ent *ent = io_uring_cqe_get_data(cqe); + ent->ret = cqe->res >= 0 ? cqe->res : -1; + ent->error = cqe->res < 0 ? -cqe->res : 0; + io_uring_cqe_seen(ring, cqe); + --state->submitted; + + if (ent->op == IOQ_OPENDIR && ent->ret >= 0) { + int fd = ent->ret; + if (ioq_check_cancel(ioq, ent)) { + xclose(fd); + continue; } - break; - case IOQ_CLOSEDIR: - ent->ret = bfs_closedir(ent->closedir.dir); - break; + ent->ret = bfs_opendir(ent->opendir.dir, fd, NULL); + if (ent->ret == 0) { + // TODO: io_uring_prep_getdents() + bfs_polldir(ent->opendir.dir); + } else { + ent->error = errno; + } + } + + ioqq_push(ioq->ready, ent); + } +} + +/** io_uring worker loop. */ +static void ioq_ring_work(struct ioq_thread *thread) { + struct ioq_ring_state state = { + .ioq = thread->parent, + .ring = &thread->ring, + }; - default: - bfs_bug("Unknown ioq_op %d", (int)ent->op); - errno = ENOSYS; + while (ioq_ring_prep(&state)) { + ioq_ring_reap(&state); + } +} +#endif + +/** Synchronous syscall loop. */ +static void ioq_sync_work(struct ioq_thread *thread) { + struct ioq *ioq = thread->parent; + + while (true) { + struct ioq_ent *ent = ioqq_pop(ioq->pending); + if (ent == &IOQ_STOP) { break; } - if (cancel) { - ent->error = EINTR; - } else if (ent->ret < 0) { - ent->error = errno; - } else { - ent->error = 0; + if (!ioq_check_cancel(ioq, ent)) { + ioq_handle(ioq, ent); } + } +} - ioqq_push(ioq->ready, ent); +/** Background thread entry point. */ +static void *ioq_work(void *ptr) { + struct ioq_thread *thread = ptr; + +#if BFS_USE_LIBURING + if (thread->ring_err == 0) { + ioq_ring_work(thread); + return NULL; } +#endif + ioq_sync_work(thread); return NULL; } @@ -376,7 +582,30 @@ struct ioq *ioq_create(size_t depth, size_t nthreads) { } for (size_t i = 0; i < nthreads; ++i) { - if (thread_create(&ioq->threads[i], NULL, ioq_work, ioq) != 0) { + struct ioq_thread *thread = &ioq->threads[i]; + thread->parent = ioq; + +#if BFS_USE_LIBURING + struct ioq_thread *prev = i ? &ioq->threads[i - 1] : NULL; + if (prev && prev->ring_err) { + thread->ring_err = prev->ring_err; + } else { + // Share io-wq workers between rings + struct io_uring_params params = {0}; + if (prev) { + params.flags |= IORING_SETUP_ATTACH_WQ; + params.wq_fd = prev->ring.ring_fd; + } + + size_t entries = depth / nthreads; + if (entries < 16) { + entries = 16; + } + thread->ring_err = -io_uring_queue_init_params(entries, &thread->ring, ¶ms); + } +#endif + + if (thread_create(&thread->id, NULL, ioq_work, thread) != 0) { goto fail; } ++ioq->nthreads; @@ -496,7 +725,11 @@ void ioq_destroy(struct ioq *ioq) { ioq_cancel(ioq); for (size_t i = 0; i < ioq->nthreads; ++i) { - thread_join(ioq->threads[i], NULL); + struct ioq_thread *thread = &ioq->threads[i]; + thread_join(thread->id, NULL); +#if BFS_USE_LIBURING + io_uring_queue_exit(&thread->ring); +#endif } ioqq_destroy(ioq->ready); |