/* 4coder Metal render implementation */ #undef clamp #undef function #import #import #define function static //////////////////////////////// struct Metal_Buffer{ Node node; id buffer; u32 size; u64 last_reuse_time; }; //////////////////////////////// typedef id Metal_Texture; // NOTE(yuval): This is a locator used to describe where a specific slot is located. union Metal_Texture_Slot_Locator{ u32 packed; struct{ u16 bucket_index; u16 slot_index; }; }; // NOTE(yuval): This is the ACTUAL texture slot. Each slot contains the texture handle, the slot locator, and a pointer to the next slot in the free list (in case the slot if not occupied). struct Metal_Texture_Slot{ // NOTE(yuval): This is a pointer to the next texture in the free texture slots list Metal_Texture_Slot *next; Metal_Texture texture; Metal_Texture_Slot_Locator locator; }; global_const u32 metal__texture_slots_per_bucket = 256; // NOTE(yuval): This a bucket of ACTUAL texture slots. struct Metal_Texture_Slot_Bucket{ Metal_Texture_Slot_Bucket *next; Metal_Texture_Slot slots[metal__texture_slots_per_bucket]; }; // NOTE(yuval): This a struct contaning all texture slot buckets and a list of the currently free slots. struct Metal_Texture_Slot_List{ Metal_Texture_Slot_Bucket *first_bucket; Metal_Texture_Slot_Bucket *last_bucket; u16 bucket_count; Metal_Texture_Slot *first_free_slot; Metal_Texture_Slot *last_free_slot; }; global_const u32 metal__invalid_texture_slot_locator = (u32)-1; //////////////////////////////// @interface Metal_Renderer : NSObject - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView*)mtkView target:(Render_Target*)target; - (u32)get_texture_of_dim:(Vec3_i32)dim kind:(Texture_Kind)kind; - (b32)fill_texture:(u32)texture kind:(Texture_Kind)kind pos:(Vec3_i32)p dim:(Vec3_i32)dim data:(void*)data; - (void)bind_texture:(u32)handle encoder:(id)render_encoder; - (Metal_Texture_Slot*)get_texture_slot_at_locator:(Metal_Texture_Slot_Locator)locator; - (Metal_Texture_Slot*)get_texture_slot_at_handle:(u32)handle; - (Metal_Buffer*)get_reusable_buffer_with_size:(NSUInteger)size; - (void)add_reusable_buffer:(Metal_Buffer*)buffer; @end //////////////////////////////// global_const char *metal__shaders_source = R"( #include #include using namespace metal; //////////////////////////////// typedef struct{ float2 xy [[attribute(0)]]; float3 uvw [[attribute(1)]]; uint32_t color [[attribute(2)]]; float half_thickness [[attribute(3)]]; } Vertex; // NOTE(yuval): Vertex shader outputs and fragment shader inputs typedef struct{ // NOTE(yuval): Vertex shader output float4 position [[position]]; // NOTE(yuval): Fragment shader inputs float4 color; float3 uvw; float2 xy; float2 adjusted_half_dim; float half_thickness; } Rasterizer_Data; //////////////////////////////// vertex Rasterizer_Data vertex_shader(Vertex in [[stage_in]], constant float4x4 &proj [[buffer(1)]]){ Rasterizer_Data out; // NOTE(yuval): Calculate position in NDC out.position = proj * float4(in.xy, 0.0, 1.0); // NOTE(yuval): Convert color to float4 format out.color.b = ((float((in.color ) & 0xFFu)) / 255.0); out.color.g = ((float((in.color >> 8u) & 0xFFu)) / 255.0); out.color.r = ((float((in.color >> 16u) & 0xFFu)) / 255.0); out.color.a = ((float((in.color >> 24u) & 0xFFu)) / 255.0); // NOTE(yuval): Pass uvw coordinates to the fragment shader out.uvw = in.uvw; // NOTE(yuval): Calculate adjusted half dim float2 center = in.uvw.xy; float2 half_dim = abs(in.xy - center); out.adjusted_half_dim = (half_dim - in.uvw.zz + float2(0.5, 0.5)); // NOTE(yuval): Pass half_thickness to the fragment shader out.half_thickness = in.half_thickness; // NOTE(yuval): Pass xy to the fragment shader out.xy = in.xy; return(out); } //////////////////////////////// float rectangle_sd(float2 p, float2 b){ float2 d = (abs(p) - b); float result = (length(max(d, float2(0.0, 0.0))) + min(max(d.x, d.y), 0.0)); return(result); } fragment float4 fragment_shader(Rasterizer_Data in [[stage_in]], texture2d_array in_texture [[texture(0)]]){ float has_thickness = step(0.49, in.half_thickness); float does_not_have_thickness = (1.0 - has_thickness); constexpr sampler texture_sampler(coord::normalized, min_filter::linear, mag_filter::linear, mip_filter::linear); half sample_value = in_texture.sample(texture_sampler, in.uvw.xy, in.uvw.z).r; sample_value *= does_not_have_thickness; float2 center = in.uvw.xy; float roundness = in.uvw.z; float sd = rectangle_sd(in.xy - center, in.adjusted_half_dim); sd = sd - roundness; sd = (abs(sd + in.half_thickness) - in.half_thickness); float shape_value = (1.0 - smoothstep(-1.0, 0.0, sd)); shape_value *= has_thickness; float4 out_color = float4(in.color.xyz, in.color.a * (sample_value + shape_value)); return(out_color); } )"; //////////////////////////////// function Metal_Buffer* metal__make_buffer(u32 size, id device){ Metal_Buffer *result = (Metal_Buffer*)malloc(sizeof(Metal_Buffer)); // NOTE(yuval): Create the vertex buffer MTLResourceOptions options = MTLCPUCacheModeWriteCombined|MTLResourceStorageModeManaged; result->buffer = [device newBufferWithLength:size options:options]; result->size = size; // NOTE(yuval): Set the last_reuse_time to the current time result->last_reuse_time = system_now_time(); return result; } //////////////////////////////// @implementation Metal_Renderer{ Render_Target *_target; id _device; id _pipeline_state; id _command_queue; id _capture_scope; Node _buffer_cache; u64 _last_buffer_cache_purge_time; Metal_Texture_Slot_List _texture_slots; } - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView*)mtk_view target:(Render_Target*)target{ self = [super init]; if (self == nil){ return(nil); } _target = target; NSError *error = nil; _device = mtk_view.device; // NOTE(yuval): Compile the shaders id vertex_function = nil; id fragment_function = nil; { NSString *shaders_source_str = [NSString stringWithUTF8String:metal__shaders_source]; MTLCompileOptions *options = [[MTLCompileOptions alloc] init]; options.fastMathEnabled = YES; id shader_library = [_device newLibraryWithSource:shaders_source_str options:options error:&error]; vertex_function = [shader_library newFunctionWithName:@"vertex_shader"]; fragment_function = [shader_library newFunctionWithName:@"fragment_shader"]; [options release]; } Assert(error == nil); Assert((vertex_function != nil) && (fragment_function != nil)); // NOTE(yuval): Configure the pipeline descriptor { MTLVertexDescriptor *vertexDescriptor = [MTLVertexDescriptor vertexDescriptor]; vertexDescriptor.attributes[0].offset = OffsetOfMember(Render_Vertex, xy); vertexDescriptor.attributes[0].format = MTLVertexFormatFloat2; // position vertexDescriptor.attributes[0].bufferIndex = 0; vertexDescriptor.attributes[1].offset = OffsetOfMember(Render_Vertex, uvw); vertexDescriptor.attributes[1].format = MTLVertexFormatFloat3; // texCoords vertexDescriptor.attributes[1].bufferIndex = 0; vertexDescriptor.attributes[2].offset = OffsetOfMember(Render_Vertex, color); vertexDescriptor.attributes[2].format = MTLVertexFormatUInt; // color vertexDescriptor.attributes[2].bufferIndex = 0; vertexDescriptor.attributes[3].offset = OffsetOfMember(Render_Vertex, half_thickness); vertexDescriptor.attributes[3].format = MTLVertexFormatFloat; // position vertexDescriptor.attributes[3].bufferIndex = 0; vertexDescriptor.layouts[0].stepRate = 1; vertexDescriptor.layouts[0].stepFunction = MTLVertexStepFunctionPerVertex; vertexDescriptor.layouts[0].stride = sizeof(Render_Vertex); MTLRenderPipelineDescriptor *pipeline_state_descriptor = [[MTLRenderPipelineDescriptor alloc] init]; pipeline_state_descriptor.label = @"4coder Metal Renderer Pipeline"; pipeline_state_descriptor.vertexFunction = vertex_function; pipeline_state_descriptor.fragmentFunction = fragment_function; pipeline_state_descriptor.vertexDescriptor = vertexDescriptor; pipeline_state_descriptor.colorAttachments[0].pixelFormat = mtk_view.colorPixelFormat; pipeline_state_descriptor.colorAttachments[0].blendingEnabled = YES; pipeline_state_descriptor.colorAttachments[0].alphaBlendOperation = MTLBlendOperationAdd; pipeline_state_descriptor.colorAttachments[0].rgbBlendOperation = MTLBlendOperationAdd; pipeline_state_descriptor.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorSourceAlpha; pipeline_state_descriptor.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha; pipeline_state_descriptor.colorAttachments[0].sourceAlphaBlendFactor = MTLBlendFactorOne; pipeline_state_descriptor.colorAttachments[0].destinationAlphaBlendFactor = MTLBlendFactorOneMinusSourceAlpha; _pipeline_state = [_device newRenderPipelineStateWithDescriptor:pipeline_state_descriptor error:&error]; } Assert(error == nil); // NOTE(yuval): Create the command queue _command_queue = [_device newCommandQueue]; // NOTE(yuval): Initialize buffer caching dll_init_sentinel(&_buffer_cache); _last_buffer_cache_purge_time = system_now_time(); // NOTE(yuval): Initialize the texture slot list block_zero_struct(&_texture_slots); // NOTE(yuval): Create the fallback texture _target->fallback_texture_id = [self get_texture_of_dim:V3i32(2, 2, 1) kind:TextureKind_Mono]; u8 white_block[] = {0xFF, 0xFF, 0xFF, 0xFF}; [self fill_texture:_target->fallback_texture_id kind:TextureKind_Mono pos:V3i32(0, 0, 0) dim:V3i32(2, 2, 1) data:white_block]; // NOTE(yuval): Create a capture scope for gpu frame capture _capture_scope = [[MTLCaptureManager sharedCaptureManager] newCaptureScopeWithDevice:_device]; _capture_scope.label = @"4coder Metal Capture Scope"; return(self); } - (void)mtkView:(nonnull MTKView*)view drawableSizeWillChange:(CGSize)size{ // NOTE(yuval): Nothing to do here because we use the render target's dimentions for rendering } - (void)drawInMTKView:(nonnull MTKView*)view{ #if FRED_INTERNAL [_capture_scope beginScope]; #endif // HACK(yuval): This is the best way I found to force valid width and height without drawing on the next draw cycle (1 frame delay). CGSize drawable_size = [view drawableSize]; i32 width = (i32)Min(_target->width, drawable_size.width); i32 height = (i32)Min(_target->height, drawable_size.height); Font_Set *font_set = (Font_Set*)_target->font_set; // NOTE(yuval): Free any textures in the target's texture free list for (Render_Free_Texture *free_texture = _target->free_texture_first; free_texture; free_texture = free_texture->next){ Metal_Texture_Slot *texture_slot = [self get_texture_slot_at_handle:free_texture->tex_id]; if (texture_slot){ sll_queue_push(_texture_slots.first_free_slot, _texture_slots.last_free_slot, texture_slot); } } _target->free_texture_first = 0; _target->free_texture_last = 0; // NOTE(yuval): Create the command buffer id command_buffer = [_command_queue commandBuffer]; command_buffer.label = @"4coder Metal Render Command"; // NOTE(yuval): Obtain the render pass descriptor from the renderer's view MTLRenderPassDescriptor *render_pass_descriptor = view.currentRenderPassDescriptor; if (render_pass_descriptor != nil){ render_pass_descriptor.colorAttachments[0].clearColor = MTLClearColorMake(0.0f, 0.0f, 0.0f, 1.0f); // NOTE(yuval): Create the render command encoder id render_encoder = [command_buffer renderCommandEncoderWithDescriptor:render_pass_descriptor]; render_encoder.label = @"4coder Render Encoder"; // NOTE(yuval): Set the region of the drawable to draw into [render_encoder setViewport:(MTLViewport){0.0, 0.0, (double)width, (double)height, 0.0, 1.0}]; // NOTE(yuval): Set the render pipeline to use for drawing [render_encoder setRenderPipelineState:_pipeline_state]; // NOTE(yuval): Calculate the projection matrix float left = 0, right = (float)width; float bottom = (float)height, top = 0; float near_depth = -1.0f, far_depth = 1.0f; float proj[16] = { 2.0f / (right - left), 0.0f, 0.0f, 0.0f, 0.0f, 2.0f / (top - bottom), 0.0f, 0.0f, 0.0f, 0.0f, -1.0f / (far_depth - near_depth), 0.0f, -((right + left) / (right - left)), -((top + bottom) / (top - bottom)), -(near_depth / (far_depth - near_depth)), 1.0f }; // NOTE(yuval): Calculate required vertex buffer size i32 all_vertex_count = 0; for (Render_Group *group = _target->group_first; group; group = group->next){ all_vertex_count += group->vertex_list.vertex_count; } u32 vertex_buffer_size = (all_vertex_count * sizeof(Render_Vertex)); // NOTE(yuval): Find & Get a vertex buffer matching the required size Metal_Buffer *buffer = [self get_reusable_buffer_with_size:vertex_buffer_size]; // NOTE(yuval): Pass the vertex buffer to the vertex shader [render_encoder setVertexBuffer:buffer->buffer offset:0 atIndex:0]; // NOTE(yuval): Pass the projection matrix to the vertex shader [render_encoder setVertexBytes:&proj length:sizeof(proj) atIndex:1]; u32 buffer_offset = 0; for (Render_Group *group = _target->group_first; group; group = group->next){ // NOTE(yuval): Set scissor rect { Rect_i32 box = Ri32(group->clip_box); NSUInteger x0 = (NSUInteger)Min(Max(0, box.x0), width - 1); NSUInteger x1 = (NSUInteger)Min(Max(0, box.x1), width); NSUInteger y0 = (NSUInteger)Min(Max(0, box.y0), height - 1); NSUInteger y1 = (NSUInteger)Min(Max(0, box.y1), height); MTLScissorRect scissor_rect; scissor_rect.x = x0; scissor_rect.y = y0; scissor_rect.width = (x1 - x0); scissor_rect.height = (y1 - y0); [render_encoder setScissorRect:scissor_rect]; } i32 vertex_count = group->vertex_list.vertex_count; if (vertex_count > 0){ // NOTE(yuval): Bind a texture { Face* face = font_set_face_from_id(font_set, group->face_id); if (face != 0){ // NOTE(yuval): Bind face texture [self bind_texture:face->texture encoder:render_encoder]; } else{ // NOTE(yuval): Bind fallback texture [self bind_texture:_target->fallback_texture_id encoder:render_encoder]; } } // NOTE(yuval): Copy the vertex data to the vertex buffer { u8 *group_buffer_contents = (u8*)[buffer->buffer contents] + buffer_offset; u8 *cursor = group_buffer_contents; for (Render_Vertex_Array_Node *node = group->vertex_list.first; node; node = node->next){ i32 size = node->vertex_count * sizeof(*node->vertices); memcpy(cursor, node->vertices, size); cursor += size; } NSUInteger data_size = (NSUInteger)(cursor - group_buffer_contents); NSRange modify_range = NSMakeRange(buffer_offset, data_size); [buffer->buffer didModifyRange:modify_range]; } // NOTE(yuval): Set the vertex buffer offset to the beginning of the group's vertices [render_encoder setVertexBufferOffset:buffer_offset atIndex:0]; // NOTE(yuval): Draw the vertices [render_encoder drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:vertex_count]; buffer_offset += (vertex_count * sizeof(Render_Vertex)); } } [render_encoder endEncoding]; // NOTE(yuval): Schedule a present once the framebuffer is complete using the current drawable [command_buffer presentDrawable:view.currentDrawable]; [command_buffer addCompletedHandler:^(id){ dispatch_async(dispatch_get_main_queue(), ^{ [self add_reusable_buffer:buffer]; }); }]; } // NOTE(yuval): Finalize rendering here and push the command buffer to the GPU [command_buffer commit]; #if FRED_INTERNAL [_capture_scope endScope]; #endif } - (u32)get_texture_of_dim:(Vec3_i32)dim kind:(Texture_Kind)kind{ u32 handle = metal__invalid_texture_slot_locator; // NOTE(yuval): Check for a free texture slot and allocate another slot bucket if no free slot has been found if (!_texture_slots.first_free_slot){ // NOTE(yuval): Assert that the next bucket's index can fit in a u16 Assert(_texture_slots.bucket_count < ((u16)-1)); Metal_Texture_Slot_Bucket *bucket = (Metal_Texture_Slot_Bucket*)system_memory_allocate(sizeof(Metal_Texture_Slot_Bucket), file_name_line_number_lit_u8); for (u16 slot_index = 0; slot_index < ArrayCount(bucket->slots); ++slot_index){ Metal_Texture_Slot *slot = &bucket->slots[slot_index]; block_zero_struct(slot); slot->locator.bucket_index = _texture_slots.bucket_count; slot->locator.slot_index = slot_index; sll_queue_push(_texture_slots.first_free_slot, _texture_slots.last_free_slot, slot); } sll_queue_push(_texture_slots.first_bucket, _texture_slots.last_bucket, bucket); _texture_slots.bucket_count += 1; } // NOTE(yuval): Get the first free texture slot and remove it from the free list (a slot is guarenteed to exist because we assert that above). if (_texture_slots.first_free_slot){ Metal_Texture_Slot *texture_slot = _texture_slots.first_free_slot; sll_queue_pop(_texture_slots.first_free_slot, _texture_slots.last_free_slot); texture_slot->next = 0; // NOTE(yuval): Create a texture descriptor. MTLTextureDescriptor *texture_descriptor = [[MTLTextureDescriptor alloc] init]; texture_descriptor.textureType = MTLTextureType2DArray; texture_descriptor.pixelFormat = MTLPixelFormatR8Unorm; texture_descriptor.width = dim.x; texture_descriptor.height = dim.y; texture_descriptor.depth = dim.z; // NOTE(yuval): Create the texture from the device using the descriptor and add it to the textures array. Metal_Texture texture = [_device newTextureWithDescriptor:texture_descriptor]; texture_slot->texture = texture; handle = texture_slot->locator.packed; } return handle; } - (b32)fill_texture:(u32)handle kind:(Texture_Kind)kind pos:(Vec3_i32)p dim:(Vec3_i32)dim data:(void*)data{ b32 result = false; if (data){ Metal_Texture_Slot *texture_slot = [self get_texture_slot_at_handle:handle]; if (texture_slot){ Metal_Texture texture = texture_slot->texture; if (texture != 0){ MTLRegion replace_region = { {(NSUInteger)p.x, (NSUInteger)p.y, (NSUInteger)p.z}, {(NSUInteger)dim.x, (NSUInteger)dim.y, (NSUInteger)dim.z} }; // NOTE(yuval): Fill the texture with data [texture replaceRegion:replace_region mipmapLevel:0 withBytes:data bytesPerRow:dim.x]; result = true; } } } return(result); } - (void)bind_texture:(u32)handle encoder:(id)render_encoder{ Metal_Texture_Slot *texture_slot = [self get_texture_slot_at_handle:handle]; if (texture_slot){ Metal_Texture texture = texture_slot->texture; if (texture != 0){ [render_encoder setFragmentTexture:texture atIndex:0]; } } } - (Metal_Texture_Slot*)get_texture_slot_at_locator:(Metal_Texture_Slot_Locator)locator{ Metal_Texture_Slot *result = 0; if (locator.packed != metal__invalid_texture_slot_locator){ Metal_Texture_Slot_Bucket *bucket = _texture_slots.first_bucket; for (u16 bucket_index = 0; (bucket_index < locator.bucket_index) && bucket; ++bucket_index, bucket = bucket->next); if (bucket && (locator.slot_index < metal__texture_slots_per_bucket)){ result = &bucket->slots[locator.slot_index]; } } return(result); } - (Metal_Texture_Slot*)get_texture_slot_at_handle:(u32)handle{ Metal_Texture_Slot_Locator locator; locator.packed = handle; Metal_Texture_Slot *result = [self get_texture_slot_at_locator:locator]; return(result); } - (Metal_Buffer*)get_reusable_buffer_with_size:(NSUInteger)size{ // NOTE(yuval): This routine is a modified version of Dear ImGui's MetalContext::dequeueReusableBufferOfLength in imgui_impl_metal.mm u64 now = system_now_time(); // NOTE(yuval): Purge old buffers that haven't been useful for a while if ((now - _last_buffer_cache_purge_time) > 1000000){ Node prev_buffer_cache = _buffer_cache; dll_init_sentinel(&_buffer_cache); for (Node *node = prev_buffer_cache.next; node != &_buffer_cache; node = node->next){ Metal_Buffer *candidate = CastFromMember(Metal_Buffer, node, node); if (candidate->last_reuse_time > _last_buffer_cache_purge_time){ dll_insert(&_buffer_cache, node); } } _last_buffer_cache_purge_time = now; } // NOTE(yuval): See if we have a buffer we can reuse Metal_Buffer *best_candidate = 0; for (Node *node = _buffer_cache.next; node != &_buffer_cache; node = node->next){ Metal_Buffer *candidate = CastFromMember(Metal_Buffer, node, node); if ((candidate->size >= size) && ((!best_candidate) || (best_candidate->last_reuse_time > candidate->last_reuse_time))){ best_candidate = candidate; } } Metal_Buffer *result; if (best_candidate){ // NOTE(yuval): A best candidate has been found! Remove it from the buffer list and set its last reuse time. dll_remove(&best_candidate->node); best_candidate->last_reuse_time = now; result = best_candidate; } else{ // NOTE(yuval): No luck; make a new buffer. result = metal__make_buffer(size, _device); } return(result); } - (void)add_reusable_buffer:(Metal_Buffer*)buffer{ // NOTE(yuval): This routine is a modified version of Dear ImGui's MetalContext::enqueueReusableBuffer in imgui_impl_metal.mm dll_insert(&_buffer_cache, &buffer->node); } @end