diff options
| author | Andre Weissflog <floooh@gmail.com> | 2025-01-29 20:18:21 +0100 |
|---|---|---|
| committer | Andre Weissflog <floooh@gmail.com> | 2025-01-29 20:18:21 +0100 |
| commit | dd3c869e7daa2e35c0ba5bc107af412c262cd8e7 (patch) | |
| tree | ae9151039652a03043bd45d92cee8d60157ec936 | |
| parent | 353c384768a1cbc33fb4eea7ca312552e16bb797 (diff) | |
sokol_gfx.h mtl: sg_dispatch working :)
| -rw-r--r-- | sokol_gfx.h | 43 |
1 files changed, 36 insertions, 7 deletions
diff --git a/sokol_gfx.h b/sokol_gfx.h index 4cf39ae9..36abb484 100644 --- a/sokol_gfx.h +++ b/sokol_gfx.h @@ -5875,6 +5875,9 @@ typedef struct { MTLCullMode cull_mode; MTLWinding winding; uint32_t stencil_ref; + struct { + MTLSize max_threads_per_group; + } compute; int cps; // MTLComputePipelineState int rps; // MTLRenderPipelineState int dss; // MTLDepthStencilState @@ -13096,6 +13099,14 @@ _SOKOL_PRIVATE sg_resource_state _sg_mtl_create_pipeline(_sg_pipeline_t* pip, _s } // NOTE: no easy way to set the label on a compute pipeline pip->mtl.cps = _sg_mtl_add_resource(mtl_cps); + + // compute threads-per-thread-group dispatch arg + // (see: https://developer.apple.com/documentation/metal/calculating-threadgroup-and-grid-sizes?language=objc) + const NSUInteger w = mtl_cps.threadExecutionWidth; + SOKOL_ASSERT(w > 0); + const NSUInteger h = mtl_cps.maxTotalThreadsPerThreadgroup / w; + SOKOL_ASSERT(h > 0); + pip->mtl.compute.max_threads_per_group = MTLSizeMake(w, h, 1); _SG_OBJC_RELEASE(mtl_cps); } else { sg_primitive_type prim_type = desc->primitive_type; @@ -13849,6 +13860,22 @@ _SOKOL_PRIVATE void _sg_mtl_draw(int base_element, int num_elements, int num_ins } } +_SOKOL_PRIVATE void _sg_mtl_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) { + SOKOL_ASSERT(nil != _sg.mtl.compute_cmd_encoder); + SOKOL_ASSERT(_sg.mtl.state_cache.cur_pipeline && (_sg.mtl.state_cache.cur_pipeline->slot.id == _sg.mtl.state_cache.cur_pipeline_id.id)); + // NOTE: we assume the `Nonuniform threadgroup size` feature is supported (iPhone8 and later) + const _sg_pipeline_t* cur_pip = _sg.mtl.state_cache.cur_pipeline; + const NSUInteger ngx = (NSUInteger)num_groups_x; + const NSUInteger ngy = (NSUInteger)num_groups_y; + const NSUInteger ngz = (NSUInteger)num_groups_z; + const MTLSize threads = MTLSizeMake(ngx, ngy, ngz); + MTLSize tpg = cur_pip->mtl.compute.max_threads_per_group; + tpg.width = _sg_min(tpg.width, ngx); + tpg.height = _sg_min(tpg.height, ngy); + tpg.depth = _sg_min(tpg.depth, ngz); + [_sg.mtl.compute_cmd_encoder dispatchThreads:threads threadsPerThreadgroup:tpg]; +} + _SOKOL_PRIVATE void _sg_mtl_update_buffer(_sg_buffer_t* buf, const sg_range* data) { SOKOL_ASSERT(buf && data && data->ptr && (data->size > 0)); if (++buf->cmn.active_slot >= buf->cmn.num_slots) { @@ -16390,6 +16417,7 @@ static inline void _sg_dispatch(int num_groups_x, int num_groups_y, int num_grou #if defined(_SOKOL_ANY_GL) // FIXME #elif defined(SOKOL_METAL) + _sg_mtl_dispatch(num_groups_x, num_groups_y, num_groups_z); // FIXME #elif defined(SOKOL_D3D11) // FIMXE @@ -17388,7 +17416,9 @@ _SOKOL_PRIVATE bool _sg_validate_pipeline_desc(const sg_pipeline_desc* desc) { } // must only use readonly storage buffer bindings in render pipelines for (size_t i = 0; i < SG_MAX_STORAGEBUFFER_BINDSLOTS; i++) { - _SG_VALIDATE(shd->cmn.storage_buffers[i].readonly, VALIDATE_PIPELINEDESC_SHADER_READONLY_STORAGEBUFFERS); + if (shd->cmn.storage_buffers[i].stage != SG_SHADERSTAGE_NONE) { + _SG_VALIDATE(shd->cmn.storage_buffers[i].readonly, VALIDATE_PIPELINEDESC_SHADER_READONLY_STORAGEBUFFERS); + } } } for (int buf_index = 0; buf_index < SG_MAX_VERTEXBUFFER_BINDSLOTS; buf_index++) { @@ -19463,9 +19493,7 @@ SOKOL_API_IMPL void sg_draw(int base_element, int num_elements, int num_instance return; } #endif - /* attempting to draw with zero elements or instances is not technically an - error, but might be handled as an error in the backend API (e.g. on Metal) - */ + // skip no-op draws if ((0 == num_elements) || (0 == num_instances)) { return; } @@ -19477,9 +19505,6 @@ SOKOL_API_IMPL void sg_dispatch(int num_groups_x, int num_groups_y, int num_grou SOKOL_ASSERT(_sg.valid); SOKOL_ASSERT(_sg.cur_pass.in_pass); SOKOL_ASSERT(_sg.cur_pass.is_compute); - SOKOL_ASSERT(num_groups_x > 0); - SOKOL_ASSERT(num_groups_y > 0); - SOKOL_ASSERT(num_groups_z > 0); _sg_stats_add(num_dispatch, 1); if (!_sg.cur_pass.valid) { return; @@ -19493,6 +19518,10 @@ SOKOL_API_IMPL void sg_dispatch(int num_groups_x, int num_groups_y, int num_grou return; } #endif + // skip no-op dispatches + if ((0 == num_groups_x) || (0 == num_groups_y) || (0 == num_groups_z)) { + return; + } _sg_dispatch(num_groups_x, num_groups_y, num_groups_z); _SG_TRACE_ARGS(dispatch, num_groups_x, num_groups_y, num_groups_z); } |