aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/build_settings.cpp6
-rw-r--r--src/check_decl.cpp9
-rw-r--r--src/check_expr.cpp31
-rw-r--r--src/check_type.cpp37
-rw-r--r--src/checker.cpp3
-rw-r--r--src/checker.hpp9
-rw-r--r--src/entity.cpp2
-rw-r--r--src/llvm_abi.cpp14
-rw-r--r--src/llvm_backend.cpp1
-rw-r--r--src/llvm_backend.hpp9
-rw-r--r--src/llvm_backend_proc.cpp56
-rw-r--r--src/main.cpp12
-rw-r--r--src/parser.cpp1
-rw-r--r--src/parser.hpp9
-rw-r--r--src/thread_pool.cpp129
-rw-r--r--src/threading.cpp39
-rw-r--r--src/types.cpp3
17 files changed, 300 insertions, 70 deletions
diff --git a/src/build_settings.cpp b/src/build_settings.cpp
index 4d3e20a7a..32640d732 100644
--- a/src/build_settings.cpp
+++ b/src/build_settings.cpp
@@ -1649,7 +1649,11 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta
if (!bc->custom_optimization_level) {
// NOTE(bill): when building with `-debug` but not specifying an optimization level
// default to `-o:none` to improve the debug symbol generation by default
- bc->optimization_level = -1; // -o:none
+ if (bc->ODIN_DEBUG) {
+ bc->optimization_level = -1; // -o:none
+ } else {
+ bc->optimization_level = 0; // -o:minimal
+ }
}
bc->optimization_level = gb_clamp(bc->optimization_level, -1, 3);
diff --git a/src/check_decl.cpp b/src/check_decl.cpp
index 7d81d102d..6828774e4 100644
--- a/src/check_decl.cpp
+++ b/src/check_decl.cpp
@@ -1869,5 +1869,14 @@ gb_internal bool check_proc_body(CheckerContext *ctx_, Token token, DeclInfo *de
add_deps_from_child_to_parent(decl);
+ for (VariadicReuseData const &vr : decl->variadic_reuses) {
+ GB_ASSERT(vr.slice_type->kind == Type_Slice);
+ Type *elem = vr.slice_type->Slice.elem;
+ i64 size = type_size_of(elem);
+ i64 align = type_align_of(elem);
+ decl->variadic_reuse_max_bytes = gb_max(decl->variadic_reuse_max_bytes, size*vr.max_count);
+ decl->variadic_reuse_max_align = gb_max(decl->variadic_reuse_max_align, align);
+ }
+
return true;
}
diff --git a/src/check_expr.cpp b/src/check_expr.cpp
index 12acca0cb..82f64738f 100644
--- a/src/check_expr.cpp
+++ b/src/check_expr.cpp
@@ -6033,6 +6033,22 @@ gb_internal CallArgumentError check_call_arguments_internal(CheckerContext *c, A
Entity *vt = pt->params->Tuple.variables[pt->variadic_index];
o.type = vt->type;
+
+ // NOTE(bill, 2024-07-14): minimize the stack usage for variadic parameters with the backing array
+ if (c->decl) {
+ bool found = false;
+ for (auto &vr : c->decl->variadic_reuses) {
+ if (are_types_identical(vt->type, vr.slice_type)) {
+ vr.max_count = gb_max(vr.max_count, variadic_operands.count);
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ array_add(&c->decl->variadic_reuses, VariadicReuseData{vt->type, variadic_operands.count});
+ }
+ }
+
} else {
dummy_argument_count += 1;
o.type = t_untyped_nil;
@@ -7888,12 +7904,15 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c
// NOTE: Due to restrictions in LLVM you can not inline calls with a superset of features.
if (is_call_inlined) {
- GB_ASSERT(c->curr_proc_decl);
- GB_ASSERT(c->curr_proc_decl->entity);
- GB_ASSERT(c->curr_proc_decl->entity->type->kind == Type_Proc);
- String scope_features = c->curr_proc_decl->entity->type->Proc.enable_target_feature;
- if (!check_target_feature_is_superset_of(scope_features, pt->Proc.enable_target_feature, &invalid)) {
- error(call, "Inlined procedure enables target feature '%.*s', this requires the calling procedure to at least enable the same feature", LIT(invalid));
+ if (c->curr_proc_decl == nullptr) {
+ error(call, "Calling a '#force_inline' procedure that enables target features is not allowed at file scope");
+ } else {
+ GB_ASSERT(c->curr_proc_decl->entity);
+ GB_ASSERT(c->curr_proc_decl->entity->type->kind == Type_Proc);
+ String scope_features = c->curr_proc_decl->entity->type->Proc.enable_target_feature;
+ if (!check_target_feature_is_superset_of(scope_features, pt->Proc.enable_target_feature, &invalid)) {
+ error(call, "Inlined procedure enables target feature '%.*s', this requires the calling procedure to at least enable the same feature", LIT(invalid));
+ }
}
}
}
diff --git a/src/check_type.cpp b/src/check_type.cpp
index dd8559114..fea937e4e 100644
--- a/src/check_type.cpp
+++ b/src/check_type.cpp
@@ -1953,6 +1953,10 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para
error(name, "'#by_ptr' can only be applied to variable fields");
p->flags &= ~FieldFlag_by_ptr;
}
+ if (p->flags&FieldFlag_no_capture) {
+ error(name, "'#no_capture' can only be applied to variable fields");
+ p->flags &= ~FieldFlag_no_capture;
+ }
param = alloc_entity_type_name(scope, name->Ident.token, type, EntityState_Resolved);
param->TypeName.is_type_alias = true;
@@ -2054,6 +2058,28 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para
p->flags &= ~FieldFlag_by_ptr; // Remove the flag
}
}
+ if (p->flags&FieldFlag_no_capture) {
+ if (is_variadic && variadic_index == variables.count) {
+ if (p->flags & FieldFlag_c_vararg) {
+ error(name, "'#no_capture' cannot be applied to a #c_vararg parameter");
+ p->flags &= ~FieldFlag_no_capture;
+ } else {
+ error(name, "'#no_capture' is already implied on all variadic parameter");
+ }
+ } else if (is_type_polymorphic(type)) {
+ // ignore
+ } else {
+ if (is_type_internally_pointer_like(type)) {
+ error(name, "'#no_capture' is currently reserved for future use");
+ } else {
+ ERROR_BLOCK();
+ error(name, "'#no_capture' can only be applied to pointer-like types");
+ error_line("\t'#no_capture' does not currently do anything useful\n");
+ p->flags &= ~FieldFlag_no_capture;
+ }
+ }
+ }
+
if (is_poly_name) {
if (p->flags&FieldFlag_no_alias) {
@@ -2072,6 +2098,11 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para
error(name, "'#by_ptr' can only be applied to variable fields");
p->flags &= ~FieldFlag_by_ptr;
}
+ if (p->flags&FieldFlag_no_capture) {
+ error(name, "'#no_capture' can only be applied to variable fields");
+ p->flags &= ~FieldFlag_no_capture;
+ }
+
if (!is_type_polymorphic(type) && check_constant_parameter_value(type, params[i])) {
// failed
@@ -2091,6 +2122,8 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para
param->flags |= EntityFlag_Ellipsis;
if (is_c_vararg) {
param->flags |= EntityFlag_CVarArg;
+ } else {
+ param->flags |= EntityFlag_NoCapture;
}
}
@@ -2115,6 +2148,10 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para
if (p->flags&FieldFlag_by_ptr) {
param->flags |= EntityFlag_ByPtr;
}
+ if (p->flags&FieldFlag_no_capture) {
+ param->flags |= EntityFlag_NoCapture;
+ }
+
param->state = EntityState_Resolved; // NOTE(bill): This should have be resolved whilst determining it
add_entity(ctx, scope, name, param);
diff --git a/src/checker.cpp b/src/checker.cpp
index 8756cce1a..336440d32 100644
--- a/src/checker.cpp
+++ b/src/checker.cpp
@@ -184,6 +184,9 @@ gb_internal void init_decl_info(DeclInfo *d, Scope *scope, DeclInfo *parent) {
ptr_set_init(&d->deps, 0);
ptr_set_init(&d->type_info_deps, 0);
d->labels.allocator = heap_allocator();
+ d->variadic_reuses.allocator = heap_allocator();
+ d->variadic_reuse_max_bytes = 0;
+ d->variadic_reuse_max_align = 1;
}
gb_internal DeclInfo *make_decl_info(Scope *scope, DeclInfo *parent) {
diff --git a/src/checker.hpp b/src/checker.hpp
index 781737140..d76e4c7d0 100644
--- a/src/checker.hpp
+++ b/src/checker.hpp
@@ -181,6 +181,11 @@ char const *ProcCheckedState_strings[ProcCheckedState_COUNT] {
"Checked",
};
+struct VariadicReuseData {
+ Type *slice_type; // ..elem_type
+ i64 max_count;
+};
+
// DeclInfo is used to store information of certain declarations to allow for "any order" usage
struct DeclInfo {
DeclInfo * parent; // NOTE(bill): only used for procedure literals at the moment
@@ -219,6 +224,10 @@ struct DeclInfo {
Array<BlockLabel> labels;
+ Array<VariadicReuseData> variadic_reuses;
+ i64 variadic_reuse_max_bytes;
+ i64 variadic_reuse_max_align;
+
// NOTE(bill): this is to prevent a race condition since these procedure literals can be created anywhere at any time
struct lbModule *code_gen_module;
};
diff --git a/src/entity.cpp b/src/entity.cpp
index 41d84e0f7..db6ffdd52 100644
--- a/src/entity.cpp
+++ b/src/entity.cpp
@@ -45,7 +45,7 @@ enum EntityFlag : u64 {
EntityFlag_Value = 1ull<<11,
EntityFlag_BitFieldField = 1ull<<12,
-
+ EntityFlag_NoCapture = 1ull<<13, // #no_capture
EntityFlag_PolyConst = 1ull<<15,
EntityFlag_NotExported = 1ull<<16,
diff --git a/src/llvm_abi.cpp b/src/llvm_abi.cpp
index b2e485d01..c21cd0a46 100644
--- a/src/llvm_abi.cpp
+++ b/src/llvm_abi.cpp
@@ -15,6 +15,7 @@ struct lbArgType {
LLVMAttributeRef align_attribute; // Optional
i64 byval_alignment;
bool is_byval;
+ bool no_capture;
};
@@ -159,6 +160,11 @@ gb_internal void lb_add_function_type_attributes(LLVMValueRef fn, lbFunctionType
LLVMAddAttributeAtIndex(fn, arg_index+1, arg->align_attribute);
}
+ if (arg->no_capture) {
+ LLVMAddAttributeAtIndex(fn, arg_index+1, nocapture_attr);
+ }
+
+
if (ft->multiple_return_original_type) {
if (ft->original_arg_count <= i) {
LLVMAddAttributeAtIndex(fn, arg_index+1, noalias_attr);
@@ -645,10 +651,10 @@ namespace lbAbiAmd64SysV {
if (is_mem_cls(cls, attribute_kind)) {
LLVMAttributeRef attribute = nullptr;
if (attribute_kind == Amd64TypeAttribute_ByVal) {
- // if (!is_calling_convention_odin(calling_convention)) {
- return lb_arg_type_indirect_byval(c, type);
- // }
- // attribute = nullptr;
+ if (is_calling_convention_odin(calling_convention)) {
+ return lb_arg_type_indirect(type, attribute);
+ }
+ return lb_arg_type_indirect_byval(c, type);
} else if (attribute_kind == Amd64TypeAttribute_StructRect) {
attribute = lb_create_enum_attribute_with_type(c, "sret", type);
}
diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp
index 52661dfa7..ae46186ed 100644
--- a/src/llvm_backend.cpp
+++ b/src/llvm_backend.cpp
@@ -1570,6 +1570,7 @@ gb_internal WORKER_TASK_PROC(lb_llvm_module_pass_worker_proc) {
switch (build_context.optimization_level) {
case -1:
+ array_add(&passes, "function(annotation-remarks)");
break;
case 0:
array_add(&passes, "always-inline");
diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp
index 005358734..deb05528f 100644
--- a/src/llvm_backend.hpp
+++ b/src/llvm_backend.hpp
@@ -296,6 +296,11 @@ enum lbProcedureFlag : u32 {
lbProcedureFlag_DebugAllocaCopy = 1<<1,
};
+struct lbVariadicReuseSlices {
+ Type *slice_type;
+ lbAddr slice_addr;
+};
+
struct lbProcedure {
u32 flags;
u16 state_flags;
@@ -336,8 +341,10 @@ struct lbProcedure {
bool in_multi_assignment;
Array<LLVMValueRef> raw_input_parameters;
- LLVMValueRef temp_callee_return_struct_memory;
+ Array<lbVariadicReuseSlices> variadic_reuses;
+ lbAddr variadic_reuse_base_array_ptr;
+ LLVMValueRef temp_callee_return_struct_memory;
Ast *curr_stmt;
Array<Scope *> scope_stack;
diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp
index 610c34de2..5270d6c30 100644
--- a/src/llvm_backend_proc.cpp
+++ b/src/llvm_backend_proc.cpp
@@ -253,6 +253,11 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i
if (e->flags&EntityFlag_NoAlias) {
lb_add_proc_attribute_at_index(p, offset+parameter_index, "noalias");
}
+ if (e->flags&EntityFlag_NoCapture) {
+ if (is_type_internally_pointer_like(e->type)) {
+ lb_add_proc_attribute_at_index(p, offset+parameter_index, "nocapture");
+ }
+ }
parameter_index += 1;
}
}
@@ -517,6 +522,7 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) {
lb_start_block(p, p->entry_block);
map_init(&p->direct_parameters);
+ p->variadic_reuses.allocator = heap_allocator();
GB_ASSERT(p->type != nullptr);
@@ -3450,17 +3456,59 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
}
isize slice_len = var_args.count;
if (slice_len > 0) {
- lbAddr slice = lb_add_local_generated(p, slice_type, true);
- lbAddr base_array = lb_add_local_generated(p, alloc_type_array(elem_type, slice_len), true);
+ lbAddr slice = {};
+
+ for (auto const &vr : p->variadic_reuses) {
+ if (are_types_identical(vr.slice_type, slice_type)) {
+ slice = vr.slice_addr;
+ break;
+ }
+ }
+
+ DeclInfo *d = decl_info_of_entity(p->entity);
+ if (d != nullptr && slice.addr.value == nullptr) {
+ for (auto const &vr : d->variadic_reuses) {
+ if (are_types_identical(vr.slice_type, slice_type)) {
+ #if LLVM_VERSION_MAJOR >= 13
+ // NOTE(bill): No point wasting even more memory, just reuse this stack variable too
+ if (p->variadic_reuses.count > 0) {
+ slice = p->variadic_reuses[0].slice_addr;
+ } else {
+ slice = lb_add_local_generated(p, slice_type, true);
+ }
+ // NOTE(bill): Change the underlying type to match the specific type
+ slice.addr.type = alloc_type_pointer(slice_type);
+ #else
+ slice = lb_add_local_generated(p, slice_type, true);
+ #endif
+ array_add(&p->variadic_reuses, lbVariadicReuseSlices{slice_type, slice});
+ break;
+ }
+ }
+ }
+
+ lbValue base_array_ptr = p->variadic_reuse_base_array_ptr.addr;
+ if (d != nullptr && base_array_ptr.value == nullptr) {
+ i64 max_bytes = d->variadic_reuse_max_bytes;
+ i64 max_align = gb_max(d->variadic_reuse_max_align, 16);
+ p->variadic_reuse_base_array_ptr = lb_add_local_generated(p, alloc_type_array(t_u8, max_bytes), true);
+ lb_try_update_alignment(p->variadic_reuse_base_array_ptr.addr, cast(unsigned)max_align);
+ base_array_ptr = p->variadic_reuse_base_array_ptr.addr;
+ }
+
+ GB_ASSERT(base_array_ptr.value != nullptr);
+ GB_ASSERT(slice.addr.value != nullptr);
+
+ base_array_ptr = lb_emit_conv(p, base_array_ptr, alloc_type_pointer(alloc_type_array(elem_type, slice_len)));
for (isize i = 0; i < var_args.count; i++) {
- lbValue addr = lb_emit_array_epi(p, base_array.addr, cast(i32)i);
+ lbValue addr = lb_emit_array_epi(p, base_array_ptr, cast(i32)i);
lbValue var_arg = var_args[i];
var_arg = lb_emit_conv(p, var_arg, elem_type);
lb_emit_store(p, addr, var_arg);
}
- lbValue base_elem = lb_emit_array_epi(p, base_array.addr, 0);
+ lbValue base_elem = lb_emit_array_epi(p, base_array_ptr, 0);
lbValue len = lb_const_int(p->module, t_int, slice_len);
lb_fill_slice(p, slice, base_elem, len);
diff --git a/src/main.cpp b/src/main.cpp
index e6a0aecf0..388184be9 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -399,6 +399,8 @@ enum BuildFlagKind {
BuildFlag_Sanitize,
+ BuildFlag_FastBuild,
+
#if defined(GB_SYSTEM_WINDOWS)
BuildFlag_IgnoreVsSearch,
BuildFlag_ResourceFile,
@@ -605,6 +607,9 @@ gb_internal bool parse_build_flags(Array<String> args) {
add_flag(&build_flags, BuildFlag_Sanitize, str_lit("sanitize"), BuildFlagParam_String, Command__does_build, true);
+ add_flag(&build_flags, BuildFlag_FastBuild, str_lit("fast-build"), BuildFlagParam_None, Command__does_build);
+
+
#if defined(GB_SYSTEM_WINDOWS)
add_flag(&build_flags, BuildFlag_IgnoreVsSearch, str_lit("ignore-vs-search"), BuildFlagParam_None, Command__does_build);
add_flag(&build_flags, BuildFlag_ResourceFile, str_lit("resource"), BuildFlagParam_String, Command__does_build);
@@ -1441,6 +1446,13 @@ gb_internal bool parse_build_flags(Array<String> args) {
}
break;
+
+ case BuildFlag_FastBuild:
+ build_context.custom_optimization_level = true;
+ build_context.optimization_level = -1;
+ build_context.use_separate_modules = true;
+ break;
+
#if defined(GB_SYSTEM_WINDOWS)
case BuildFlag_IgnoreVsSearch: {
GB_ASSERT(value.kind == ExactValue_Invalid);
diff --git a/src/parser.cpp b/src/parser.cpp
index 9ce3d563d..a6a146cfd 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -4014,6 +4014,7 @@ struct ParseFieldPrefixMapping {
gb_global ParseFieldPrefixMapping const parse_field_prefix_mappings[] = {
{str_lit("using"), Token_using, FieldFlag_using},
{str_lit("no_alias"), Token_Hash, FieldFlag_no_alias},
+ {str_lit("no_capture"), Token_Hash, FieldFlag_no_capture},
{str_lit("c_vararg"), Token_Hash, FieldFlag_c_vararg},
{str_lit("const"), Token_Hash, FieldFlag_const},
{str_lit("any_int"), Token_Hash, FieldFlag_any_int},
diff --git a/src/parser.hpp b/src/parser.hpp
index 86b3393af..451cdf53d 100644
--- a/src/parser.hpp
+++ b/src/parser.hpp
@@ -331,8 +331,10 @@ enum FieldFlag : u32 {
FieldFlag_by_ptr = 1<<8,
FieldFlag_no_broadcast = 1<<9, // disallow array programming
+ FieldFlag_no_capture = 1<<11,
+
// Internal use by the parser only
- FieldFlag_Tags = 1<<10,
+ FieldFlag_Tags = 1<<15,
FieldFlag_Results = 1<<16,
@@ -340,7 +342,10 @@ enum FieldFlag : u32 {
FieldFlag_Invalid = 1u<<31,
// Parameter List Restrictions
- FieldFlag_Signature = FieldFlag_ellipsis|FieldFlag_using|FieldFlag_no_alias|FieldFlag_c_vararg|FieldFlag_const|FieldFlag_any_int|FieldFlag_by_ptr|FieldFlag_no_broadcast,
+ FieldFlag_Signature = FieldFlag_ellipsis|FieldFlag_using|FieldFlag_no_alias|FieldFlag_c_vararg|
+ FieldFlag_const|FieldFlag_any_int|FieldFlag_by_ptr|FieldFlag_no_broadcast|
+ FieldFlag_no_capture,
+
FieldFlag_Struct = FieldFlag_using|FieldFlag_subtype|FieldFlag_Tags,
};
diff --git a/src/thread_pool.cpp b/src/thread_pool.cpp
index 5dbbe37c4..62cca6de6 100644
--- a/src/thread_pool.cpp
+++ b/src/thread_pool.cpp
@@ -10,13 +10,18 @@ gb_internal void thread_pool_destroy(ThreadPool *pool);
gb_internal bool thread_pool_add_task(ThreadPool *pool, WorkerTaskProc *proc, void *data);
gb_internal void thread_pool_wait(ThreadPool *pool);
+enum GrabState {
+ Grab_Success = 0,
+ Grab_Empty = 1,
+ Grab_Failed = 2,
+};
+
struct ThreadPool {
- gbAllocator threads_allocator;
- Slice<Thread> threads;
+ gbAllocator threads_allocator;
+ Slice<Thread> threads;
std::atomic<bool> running;
Futex tasks_available;
-
Futex tasks_left;
};
@@ -46,7 +51,7 @@ gb_internal void thread_pool_destroy(ThreadPool *pool) {
for_array_off(i, 1, pool->threads) {
Thread *t = &pool->threads[i];
- pool->tasks_available.fetch_add(1, std::memory_order_relaxed);
+ pool->tasks_available.fetch_add(1, std::memory_order_acquire);
futex_broadcast(&pool->tasks_available);
thread_join_and_destroy(t);
}
@@ -54,51 +59,86 @@ gb_internal void thread_pool_destroy(ThreadPool *pool) {
gb_free(pool->threads_allocator, pool->threads.data);
}
-void thread_pool_queue_push(Thread *thread, WorkerTask task) {
- u64 capture;
- u64 new_capture;
- do {
- capture = thread->head_and_tail.load();
-
- u64 mask = thread->capacity - 1;
- u64 head = (capture >> 32) & mask;
- u64 tail = ((u32)capture) & mask;
+TaskRingBuffer *task_ring_grow(TaskRingBuffer *ring, isize bottom, isize top) {
+ TaskRingBuffer *new_ring = task_ring_init(ring->size * 2);
+ for (isize i = top; i < bottom; i++) {
+ new_ring->buffer[i % new_ring->size] = ring->buffer[i % ring->size];
+ }
+ return new_ring;
+}
- u64 new_head = (head + 1) & mask;
- GB_ASSERT_MSG(new_head != tail, "Thread Queue Full!");
+void thread_pool_queue_push(Thread *thread, WorkerTask task) {
+ isize bot = thread->queue.bottom.load(std::memory_order_relaxed);
+ isize top = thread->queue.top.load(std::memory_order_acquire);
+ TaskRingBuffer *cur_ring = thread->queue.ring.load(std::memory_order_relaxed);
+
+ isize size = bot - top;
+ if (size > (cur_ring->size - 1)) {
+ // Queue is full
+ thread->queue.ring = task_ring_grow(thread->queue.ring, bot, top);
+ cur_ring = thread->queue.ring.load(std::memory_order_relaxed);
+ }
- // This *must* be done in here, to avoid a potential race condition where we no longer own the slot by the time we're assigning
- thread->queue[head] = task;
- new_capture = (new_head << 32) | tail;
- } while (!thread->head_and_tail.compare_exchange_weak(capture, new_capture));
+ cur_ring->buffer[bot % cur_ring->size] = task;
+ std::atomic_thread_fence(std::memory_order_release);
+ thread->queue.bottom.store(bot + 1, std::memory_order_relaxed);
thread->pool->tasks_left.fetch_add(1, std::memory_order_release);
thread->pool->tasks_available.fetch_add(1, std::memory_order_relaxed);
futex_broadcast(&thread->pool->tasks_available);
}
-bool thread_pool_queue_pop(Thread *thread, WorkerTask *task) {
- u64 capture;
- u64 new_capture;
- do {
- capture = thread->head_and_tail.load(std::memory_order_acquire);
-
- u64 mask = thread->capacity - 1;
- u64 head = (capture >> 32) & mask;
- u64 tail = ((u32)capture) & mask;
+GrabState thread_pool_queue_take(Thread *thread, WorkerTask *task) {
+ isize bot = thread->queue.bottom.load(std::memory_order_relaxed) - 1;
+ TaskRingBuffer *cur_ring = thread->queue.ring.load(std::memory_order_relaxed);
+ thread->queue.bottom.store(bot, std::memory_order_relaxed);
+ std::atomic_thread_fence(std::memory_order_seq_cst);
+
+ isize top = thread->queue.top.load(std::memory_order_relaxed);
+ if (top <= bot) {
+
+ // Queue is not empty
+ *task = cur_ring->buffer[bot % cur_ring->size];
+ if (top == bot) {
+ // Only one entry left in queue
+ if (!thread->queue.top.compare_exchange_strong(top, top + 1, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+ // Race failed
+ thread->queue.bottom.store(bot + 1, std::memory_order_relaxed);
+ return Grab_Empty;
+ }
- u64 new_tail = (tail + 1) & mask;
- if (tail == head) {
- return false;
+ thread->queue.bottom.store(bot + 1, std::memory_order_relaxed);
+ return Grab_Success;
}
- // Making a copy of the task before we increment the tail, avoiding the same potential race condition as above
- *task = thread->queue[tail];
-
- new_capture = (head << 32) | new_tail;
- } while (!thread->head_and_tail.compare_exchange_weak(capture, new_capture, std::memory_order_release));
+ // We got a task without hitting a race
+ return Grab_Success;
+ } else {
+ // Queue is empty
+ thread->queue.bottom.store(bot + 1, std::memory_order_relaxed);
+ return Grab_Empty;
+ }
+}
- return true;
+GrabState thread_pool_queue_steal(Thread *thread, WorkerTask *task) {
+ isize top = thread->queue.top.load(std::memory_order_acquire);
+ std::atomic_thread_fence(std::memory_order_seq_cst);
+ isize bot = thread->queue.bottom.load(std::memory_order_acquire);
+
+ GrabState ret = Grab_Empty;
+ if (top < bot) {
+ // Queue is not empty
+ TaskRingBuffer *cur_ring = thread->queue.ring.load(std::memory_order_consume);
+ *task = cur_ring->buffer[top % cur_ring->size];
+
+ if (!thread->queue.top.compare_exchange_strong(top, top + 1, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+ // Race failed
+ ret = Grab_Failed;
+ } else {
+ ret = Grab_Success;
+ }
+ }
+ return ret;
}
gb_internal bool thread_pool_add_task(ThreadPool *pool, WorkerTaskProc *proc, void *data) {
@@ -115,12 +155,11 @@ gb_internal void thread_pool_wait(ThreadPool *pool) {
while (pool->tasks_left.load(std::memory_order_acquire)) {
// if we've got tasks on our queue, run them
- while (thread_pool_queue_pop(current_thread, &task)) {
+ while (!thread_pool_queue_take(current_thread, &task)) {
task.do_work(task.data);
pool->tasks_left.fetch_sub(1, std::memory_order_release);
}
-
// is this mem-barriered enough?
// This *must* be executed in this order, so the futex wakes immediately
// if rem_tasks has changed since we checked last, otherwise the program
@@ -145,7 +184,7 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) {
usize finished_tasks = 0;
i32 state;
- while (thread_pool_queue_pop(current_thread, &task)) {
+ while (!thread_pool_queue_take(current_thread, &task)) {
task.do_work(task.data);
pool->tasks_left.fetch_sub(1, std::memory_order_release);
@@ -167,7 +206,12 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) {
Thread *thread = &pool->threads.data[idx];
WorkerTask task;
- if (thread_pool_queue_pop(thread, &task)) {
+
+ GrabState ret = thread_pool_queue_steal(thread, &task);
+ switch (ret) {
+ case Grab_Empty:
+ continue;
+ case Grab_Success:
task.do_work(task.data);
pool->tasks_left.fetch_sub(1, std::memory_order_release);
@@ -175,6 +219,8 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) {
futex_signal(&pool->tasks_left);
}
+ /*fallthrough*/
+ case Grab_Failed:
goto main_loop_continue;
}
}
@@ -182,6 +228,7 @@ gb_internal THREAD_PROC(thread_pool_thread_proc) {
// if we've done all our work, and there's nothing to steal, go to sleep
state = pool->tasks_available.load(std::memory_order_acquire);
+ if (!pool->running) { break; }
futex_wait(&pool->tasks_available, state);
main_loop_continue:;
diff --git a/src/threading.cpp b/src/threading.cpp
index 717dcb874..ff0fdfcde 100644
--- a/src/threading.cpp
+++ b/src/threading.cpp
@@ -46,6 +46,18 @@ typedef struct WorkerTask {
void *data;
} WorkerTask;
+typedef struct TaskRingBuffer {
+ std::atomic<isize> size;
+ std::atomic<WorkerTask *> buffer;
+} TaskRingBuffer;
+
+typedef struct TaskQueue {
+ std::atomic<isize> top;
+ std::atomic<isize> bottom;
+
+ std::atomic<TaskRingBuffer *> ring;
+} TaskQueue;
+
struct Thread {
#if defined(GB_SYSTEM_WINDOWS)
void *win32_handle;
@@ -54,12 +66,9 @@ struct Thread {
#endif
isize idx;
+ isize stack_size;
- WorkerTask *queue;
- size_t capacity;
- std::atomic<uint64_t> head_and_tail;
-
- isize stack_size;
+ struct TaskQueue queue;
struct ThreadPool *pool;
};
@@ -551,6 +560,18 @@ gb_internal void *internal_thread_proc(void *arg) {
}
#endif
+TaskRingBuffer *task_ring_init(isize size) {
+ TaskRingBuffer *ring = gb_alloc_item(heap_allocator(), TaskRingBuffer);
+ ring->size = size;
+ ring->buffer = gb_alloc_array(heap_allocator(), WorkerTask, ring->size);
+ return ring;
+}
+
+void thread_queue_destroy(TaskQueue *q) {
+ gb_free(heap_allocator(), (*q->ring).buffer);
+ gb_free(heap_allocator(), q->ring);
+}
+
gb_internal void thread_init(ThreadPool *pool, Thread *t, isize idx) {
gb_zero_item(t);
#if defined(GB_SYSTEM_WINDOWS)
@@ -559,14 +580,12 @@ gb_internal void thread_init(ThreadPool *pool, Thread *t, isize idx) {
t->posix_handle = 0;
#endif
- t->capacity = 1 << 14; // must be a power of 2
- t->queue = gb_alloc_array(heap_allocator(), WorkerTask, t->capacity);
- t->head_and_tail = 0;
+ // Size must be a power of 2
+ t->queue.ring = task_ring_init(1 << 14);
t->pool = pool;
t->idx = idx;
}
-
gb_internal void thread_init_and_start(ThreadPool *pool, Thread *t, isize idx) {
thread_init(pool, t, idx);
isize stack_size = 0;
@@ -598,7 +617,7 @@ gb_internal void thread_join_and_destroy(Thread *t) {
t->posix_handle = 0;
#endif
- gb_free(heap_allocator(), t->queue);
+ thread_queue_destroy(&t->queue);
}
gb_internal void thread_set_name(Thread *t, char const *name) {
diff --git a/src/types.cpp b/src/types.cpp
index c3a5fb539..92b187cdb 100644
--- a/src/types.cpp
+++ b/src/types.cpp
@@ -2923,11 +2923,14 @@ gb_internal Type *c_vararg_promote_type(Type *type) {
if (core->kind == Type_Basic) {
switch (core->Basic.kind) {
+ case Basic_f16:
case Basic_f32:
case Basic_UntypedFloat:
return t_f64;
+ case Basic_f16le:
case Basic_f32le:
return t_f64le;
+ case Basic_f16be:
case Basic_f32be:
return t_f64be;