Merge pull request #5707 from janga-perlind/lucas/timing-stuff

Improve OSX threading performance
author: gingerBill <gingerBill@users.noreply.github.com> 2025-09-24 08:20:26 +0100
committer: GitHub <noreply@github.com> 2025-09-24 08:20:26 +0100
commit: b9e90194d89995aa8ed1b1ee9b50c2f616240824 (patch)
tree: eb922461d61820e7b0229cb331c2b10cdee88fde
parent: e9d20a9b4a069815f76a23ce5f429862b155b2d6 (diff)
parent: 15b4b9277a58e0c10e4da698701fbf806d0c45b9 (diff)
2 files changed, 32 insertions, 32 deletions
diff --git a/src/thread_pool.cpp b/src/thread_pool.cpp
index 8363a4553..ca6483fd9 100644
--- a/src/thread_pool.cpp
+++ b/src/thread_pool.cpp
@@ -19,6 +19,11 @@ enum GrabState {
 	Grab_Failed  = 2,
 };
 
+enum BroadcastWaitState {
+	Nobody_Waiting  = 0,
+	Someone_Waiting = 1,
+};
+
 struct ThreadPool {
 	gbAllocator       threads_allocator;
 	Slice<Thread>     threads;
@@ -54,8 +59,8 @@ gb_internal void thread_pool_destroy(ThreadPool *pool) {
 
 	for_array_off(i, 1, pool->threads) {
 		Thread *t = &pool->threads[i];
-		pool->tasks_available.fetch_add(1, std::memory_order_acquire);
-		futex_broadcast(&pool->tasks_available);
+		pool->tasks_available.store(Nobody_Waiting);
+		futex_broadcast(&t->pool->tasks_available);
 		thread_join_and_destroy(t);
 	}
 
@@ -87,8 +92,10 @@ void thread_pool_queue_push(Thread *thread, WorkerTask task) {
 	thread->queue.bottom.store(bot + 1, std::memory_order_relaxed);
 
 	thread->pool->tasks_left.fetch_add(1, std::memory_order_release);
-	thread->pool->tasks_available.fetch_add(1, std::memory_order_relaxed);
-	futex_broadcast(&thread->pool->tasks_available);
+	i32 state = Someone_Waiting;
+	if (thread->pool->tasks_available.compare_exchange_strong(state, Nobody_Waiting)) {
+		futex_broadcast(&thread->pool->tasks_available);
+	}
 }
 
 GrabState thread_pool_queue_take(Thread *thread, WorkerTask *task) {
@@ -230,12 +237,13 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) {
 		}
 
 		// if we've done all our work, and there's nothing to steal, go to sleep
-		state = pool->tasks_available.load(std::memory_order_acquire);
+		pool->tasks_available.store(Someone_Waiting);
 		if (!pool->running) { break; }
-		futex_wait(&pool->tasks_available, state);
+		futex_wait(&pool->tasks_available, Someone_Waiting);
 
 		main_loop_continue:;
 	}
 
 	return 0;
 }
+
diff --git a/src/threading.cpp b/src/threading.cpp
index b1a0af2e4..84f09912d 100644
--- a/src/threading.cpp
+++ b/src/threading.cpp
@@ -195,7 +195,13 @@ gb_internal void mutex_lock(RecursiveMutex *m) {
 			// inside the lock
 			return;
 		}
-		futex_wait(&m->owner, prev_owner);
+
+		// NOTE(lucas): we are doing spin lock since futex signal is expensive on OSX. The recursive locks are
+		// very short lived so we don't hit this mega often and I see no perform regression on windows (with
+		// a performance uplift on OSX).
+
+		//futex_wait(&m->owner, prev_owner);
+		yield_thread();
 	}
 }
 gb_internal bool mutex_try_lock(RecursiveMutex *m) {
@@ -216,7 +222,9 @@ gb_internal void mutex_unlock(RecursiveMutex *m) {
 		return;
 	}
 	m->owner.exchange(0, std::memory_order_release);
-	futex_signal(&m->owner);
+	// NOTE(lucas): see comment about spin lock in mutex_lock above
+
+	// futex_signal(&m->owner);
 	// outside the lock
 }
 
@@ -423,44 +431,28 @@ gb_internal void semaphore_wait(Semaphore *s) {
 	}
 
 	struct RwMutex {
-		BlockingMutex lock;
-		Condition     cond;
-		int32_t       readers;
+		// TODO(bill): make this a proper RW mutex
+		BlockingMutex mutex;
 	};
 
 	gb_internal void rw_mutex_lock(RwMutex *m) {
-		mutex_lock(&m->lock);
-		while (m->readers != 0) {
-			condition_wait(&m->cond, &m->lock);
-		}
+		mutex_lock(&m->mutex);
 	}
 	gb_internal bool rw_mutex_try_lock(RwMutex *m) {
-		// TODO(bill): rw_mutex_try_lock
-		rw_mutex_lock(m);
-		return true;
+		return mutex_try_lock(&m->mutex);
 	}
 	gb_internal void rw_mutex_unlock(RwMutex *m) {
-		condition_signal(&m->cond);
-		mutex_unlock(&m->lock);
+		mutex_unlock(&m->mutex);
 	}
 
 	gb_internal void rw_mutex_shared_lock(RwMutex *m) {
-		mutex_lock(&m->lock);
-		m->readers += 1;
-		mutex_unlock(&m->lock);
+		mutex_lock(&m->mutex);
 	}
 	gb_internal bool rw_mutex_try_shared_lock(RwMutex *m) {
-		// TODO(bill): rw_mutex_try_shared_lock
-		rw_mutex_shared_lock(m);
-		return true;
+		return mutex_try_lock(&m->mutex);
 	}
 	gb_internal void rw_mutex_shared_unlock(RwMutex *m) {
-		mutex_lock(&m->lock);
-		m->readers -= 1;
-		if (m->readers == 0) {
-			condition_signal(&m->cond);
-		}
-		mutex_unlock(&m->lock);
+		mutex_unlock(&m->mutex);
 	}
 #endif
author	gingerBill <gingerBill@users.noreply.github.com>	2025-09-24 08:20:26 +0100
committer	GitHub <noreply@github.com>	2025-09-24 08:20:26 +0100
commit	b9e90194d89995aa8ed1b1ee9b50c2f616240824 (patch)
tree	eb922461d61820e7b0229cb331c2b10cdee88fde
parent	e9d20a9b4a069815f76a23ce5f429862b155b2d6 (diff)
parent	15b4b9277a58e0c10e4da698701fbf806d0c45b9 (diff)