diff options
Diffstat (limited to 'src/thread_pool.cpp')
| -rw-r--r-- | src/thread_pool.cpp | 129 |
1 files changed, 88 insertions, 41 deletions
diff --git a/src/thread_pool.cpp b/src/thread_pool.cpp index 5dbbe37c4..62cca6de6 100644 --- a/src/thread_pool.cpp +++ b/src/thread_pool.cpp @@ -10,13 +10,18 @@ gb_internal void thread_pool_destroy(ThreadPool *pool); gb_internal bool thread_pool_add_task(ThreadPool *pool, WorkerTaskProc *proc, void *data); gb_internal void thread_pool_wait(ThreadPool *pool); +enum GrabState { + Grab_Success = 0, + Grab_Empty = 1, + Grab_Failed = 2, +}; + struct ThreadPool { - gbAllocator threads_allocator; - Slice<Thread> threads; + gbAllocator threads_allocator; + Slice<Thread> threads; std::atomic<bool> running; Futex tasks_available; - Futex tasks_left; }; @@ -46,7 +51,7 @@ gb_internal void thread_pool_destroy(ThreadPool *pool) { for_array_off(i, 1, pool->threads) { Thread *t = &pool->threads[i]; - pool->tasks_available.fetch_add(1, std::memory_order_relaxed); + pool->tasks_available.fetch_add(1, std::memory_order_acquire); futex_broadcast(&pool->tasks_available); thread_join_and_destroy(t); } @@ -54,51 +59,86 @@ gb_internal void thread_pool_destroy(ThreadPool *pool) { gb_free(pool->threads_allocator, pool->threads.data); } -void thread_pool_queue_push(Thread *thread, WorkerTask task) { - u64 capture; - u64 new_capture; - do { - capture = thread->head_and_tail.load(); - - u64 mask = thread->capacity - 1; - u64 head = (capture >> 32) & mask; - u64 tail = ((u32)capture) & mask; +TaskRingBuffer *task_ring_grow(TaskRingBuffer *ring, isize bottom, isize top) { + TaskRingBuffer *new_ring = task_ring_init(ring->size * 2); + for (isize i = top; i < bottom; i++) { + new_ring->buffer[i % new_ring->size] = ring->buffer[i % ring->size]; + } + return new_ring; +} - u64 new_head = (head + 1) & mask; - GB_ASSERT_MSG(new_head != tail, "Thread Queue Full!"); +void thread_pool_queue_push(Thread *thread, WorkerTask task) { + isize bot = thread->queue.bottom.load(std::memory_order_relaxed); + isize top = thread->queue.top.load(std::memory_order_acquire); + TaskRingBuffer *cur_ring = thread->queue.ring.load(std::memory_order_relaxed); + + isize size = bot - top; + if (size > (cur_ring->size - 1)) { + // Queue is full + thread->queue.ring = task_ring_grow(thread->queue.ring, bot, top); + cur_ring = thread->queue.ring.load(std::memory_order_relaxed); + } - // This *must* be done in here, to avoid a potential race condition where we no longer own the slot by the time we're assigning - thread->queue[head] = task; - new_capture = (new_head << 32) | tail; - } while (!thread->head_and_tail.compare_exchange_weak(capture, new_capture)); + cur_ring->buffer[bot % cur_ring->size] = task; + std::atomic_thread_fence(std::memory_order_release); + thread->queue.bottom.store(bot + 1, std::memory_order_relaxed); thread->pool->tasks_left.fetch_add(1, std::memory_order_release); thread->pool->tasks_available.fetch_add(1, std::memory_order_relaxed); futex_broadcast(&thread->pool->tasks_available); } -bool thread_pool_queue_pop(Thread *thread, WorkerTask *task) { - u64 capture; - u64 new_capture; - do { - capture = thread->head_and_tail.load(std::memory_order_acquire); - - u64 mask = thread->capacity - 1; - u64 head = (capture >> 32) & mask; - u64 tail = ((u32)capture) & mask; +GrabState thread_pool_queue_take(Thread *thread, WorkerTask *task) { + isize bot = thread->queue.bottom.load(std::memory_order_relaxed) - 1; + TaskRingBuffer *cur_ring = thread->queue.ring.load(std::memory_order_relaxed); + thread->queue.bottom.store(bot, std::memory_order_relaxed); + std::atomic_thread_fence(std::memory_order_seq_cst); + + isize top = thread->queue.top.load(std::memory_order_relaxed); + if (top <= bot) { + + // Queue is not empty + *task = cur_ring->buffer[bot % cur_ring->size]; + if (top == bot) { + // Only one entry left in queue + if (!thread->queue.top.compare_exchange_strong(top, top + 1, std::memory_order_seq_cst, std::memory_order_relaxed)) { + // Race failed + thread->queue.bottom.store(bot + 1, std::memory_order_relaxed); + return Grab_Empty; + } - u64 new_tail = (tail + 1) & mask; - if (tail == head) { - return false; + thread->queue.bottom.store(bot + 1, std::memory_order_relaxed); + return Grab_Success; } - // Making a copy of the task before we increment the tail, avoiding the same potential race condition as above - *task = thread->queue[tail]; - - new_capture = (head << 32) | new_tail; - } while (!thread->head_and_tail.compare_exchange_weak(capture, new_capture, std::memory_order_release)); + // We got a task without hitting a race + return Grab_Success; + } else { + // Queue is empty + thread->queue.bottom.store(bot + 1, std::memory_order_relaxed); + return Grab_Empty; + } +} - return true; +GrabState thread_pool_queue_steal(Thread *thread, WorkerTask *task) { + isize top = thread->queue.top.load(std::memory_order_acquire); + std::atomic_thread_fence(std::memory_order_seq_cst); + isize bot = thread->queue.bottom.load(std::memory_order_acquire); + + GrabState ret = Grab_Empty; + if (top < bot) { + // Queue is not empty + TaskRingBuffer *cur_ring = thread->queue.ring.load(std::memory_order_consume); + *task = cur_ring->buffer[top % cur_ring->size]; + + if (!thread->queue.top.compare_exchange_strong(top, top + 1, std::memory_order_seq_cst, std::memory_order_relaxed)) { + // Race failed + ret = Grab_Failed; + } else { + ret = Grab_Success; + } + } + return ret; } gb_internal bool thread_pool_add_task(ThreadPool *pool, WorkerTaskProc *proc, void *data) { @@ -115,12 +155,11 @@ gb_internal void thread_pool_wait(ThreadPool *pool) { while (pool->tasks_left.load(std::memory_order_acquire)) { // if we've got tasks on our queue, run them - while (thread_pool_queue_pop(current_thread, &task)) { + while (!thread_pool_queue_take(current_thread, &task)) { task.do_work(task.data); pool->tasks_left.fetch_sub(1, std::memory_order_release); } - // is this mem-barriered enough? // This *must* be executed in this order, so the futex wakes immediately // if rem_tasks has changed since we checked last, otherwise the program @@ -145,7 +184,7 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) { usize finished_tasks = 0; i32 state; - while (thread_pool_queue_pop(current_thread, &task)) { + while (!thread_pool_queue_take(current_thread, &task)) { task.do_work(task.data); pool->tasks_left.fetch_sub(1, std::memory_order_release); @@ -167,7 +206,12 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) { Thread *thread = &pool->threads.data[idx]; WorkerTask task; - if (thread_pool_queue_pop(thread, &task)) { + + GrabState ret = thread_pool_queue_steal(thread, &task); + switch (ret) { + case Grab_Empty: + continue; + case Grab_Success: task.do_work(task.data); pool->tasks_left.fetch_sub(1, std::memory_order_release); @@ -175,6 +219,8 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) { futex_signal(&pool->tasks_left); } + /*fallthrough*/ + case Grab_Failed: goto main_loop_continue; } } @@ -182,6 +228,7 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) { // if we've done all our work, and there's nothing to steal, go to sleep state = pool->tasks_available.load(std::memory_order_acquire); + if (!pool->running) { break; } futex_wait(&pool->tasks_available, state); main_loop_continue:; |