From 29592b404108726f9d32099507abc116a79aa0c0 Mon Sep 17 00:00:00 2001 From: Amber Date: Mon, 31 Oct 2022 11:42:19 +0100 Subject: [PATCH] Add simple compute shader example. --- meson.build | 3 +- src/compute_simple.comp | 16 + src/main.c | 2 +- src/vlk_compute.c | 678 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 697 insertions(+), 2 deletions(-) create mode 100644 src/compute_simple.comp create mode 100644 src/vlk_compute.c diff --git a/meson.build b/meson.build index 9838cec..7e53f1d 100644 --- a/meson.build +++ b/meson.build @@ -4,7 +4,7 @@ deps = [dependency('glfw3'), dependency('vulkan'), dependency('dl'), dependency( glslc = find_program('glslc') -shaders = ['src/vertex_simple.vert', 'src/fragment_simple.frag'] +shaders = ['src/vertex_simple.vert', 'src/fragment_simple.frag', 'src/compute_simple.comp'] foreach shader : shaders out = (shader.split('.')[-2] + '.spv').split('/')[-1] @@ -18,3 +18,4 @@ foreach shader : shaders endforeach executable('triangle', 'src/main.c', dependencies: deps) +executable('compute', 'src/vlk_compute.c', dependencies: deps) diff --git a/src/compute_simple.comp b/src/compute_simple.comp new file mode 100644 index 0000000..8b62771 --- /dev/null +++ b/src/compute_simple.comp @@ -0,0 +1,16 @@ +#version 450 +#extension GL_EXT_debug_printf : enable + +layout(local_size_x_id = 1) in; + +layout(binding = 0) uniform Input { + uint num; +} in_vals; + +layout(binding = 1) buffer Output { + uint num; +} out_vals; + +void main() { + out_vals.num = in_vals.num; +} diff --git a/src/main.c b/src/main.c index 4b0bb97..6973931 100644 --- a/src/main.c +++ b/src/main.c @@ -921,7 +921,7 @@ void create_graphics_pipeline(ApplicationState* state) { VkPipelineDepthStencilStateCreateInfo depth_stencil = {0}; depth_stencil.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - depth_stencil.depthTestEnable = VK_TRUE; + depth_stencil.depthTestEnable = VK_FALSE; depth_stencil.depthWriteEnable = VK_TRUE; depth_stencil.depthCompareOp = VK_COMPARE_OP_LESS; depth_stencil.depthBoundsTestEnable = VK_FALSE; diff --git a/src/vlk_compute.c b/src/vlk_compute.c new file mode 100644 index 0000000..05f913f --- /dev/null +++ b/src/vlk_compute.c @@ -0,0 +1,678 @@ +#define GLFW_INCLUDE_VULKAN +#define GLM_FORCE_DEPTH_ZERO_TO_ONE +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define STB_IMAGE_IMPLEMENTATION +#include "stb_image.h" + +#define WIDTH 2560 +#define HEIGHT 1440 +#define VALIDATE true + +#define ARRSIZE(x) ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x]))))) + +const char* validation_layers[] = { + "VK_LAYER_KHRONOS_validation", +}; + +const char* required_extensions[] = { +}; + +typedef struct SwapChainSupportDetails { + VkSurfaceCapabilitiesKHR capabilities; + uint32_t num_formats; + VkSurfaceFormatKHR* formats; + uint32_t num_modes; + VkPresentModeKHR* present_modes; +} SwapChainSupportDetails; + +typedef struct ApplicationState { + VkInstance instance; + VkPhysicalDevice physical_device; + VkDevice device; + + struct { + VkQueue queue; + + VkCommandPool command_pool; + VkCommandBuffer command_buffer; + + VkDescriptorSetLayout descriptor_set_layout; + VkDescriptorPool descriptor_pool; + VkDescriptorSet descriptor_sets; + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + + VkBuffer input_buffer; + VkDeviceMemory input_buffer_memory; + + VkBuffer output_buffer; + VkDeviceMemory output_buffer_memory; + + VkFence compute_done; + } compute; +} ApplicationState; + +typedef struct QueueFamilyIndices { + bool compute_present; + uint32_t compute_family; +} QueueFamilyIndices; + +bool check_validation_layer_support() { + uint32_t layer_count; + vkEnumerateInstanceLayerProperties(&layer_count, NULL); + + VkLayerProperties* available_layers = malloc(sizeof(VkLayerProperties) * layer_count); + vkEnumerateInstanceLayerProperties(&layer_count, available_layers); + + printf("supported validation layers:\n"); + for (int i = 0; i < layer_count; i++) { + printf("\t%s\n", available_layers[i].layerName); + } + + for (int i = 0; i < ARRSIZE(validation_layers); i++) { + bool found = false; + for (int j = 0; j < layer_count; j++) { + if (!strcmp(available_layers[j].layerName, validation_layers[i])) { + found = true; + } + } + + if (!found) { + return false; + } + } + + free (available_layers); + + return true; +} + +//we first create an instance, filling it in with our application information. +void create_instance(ApplicationState* state) { + if (VALIDATE && !check_validation_layer_support()) { + printf("Requested validation layers but they're not supported"); + exit(1); + } + + VkApplicationInfo appinfo = {0}; + + appinfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + appinfo.pApplicationName = "Vulkan learning triangle example"; + appinfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); + appinfo.pEngineName = "No Engine"; + appinfo.engineVersion = VK_MAKE_VERSION(1, 0, 0); + appinfo.apiVersion = VK_API_VERSION_1_0; + + VkInstanceCreateInfo create_info = {0}; + create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + create_info.pApplicationInfo = &appinfo; + + //now let's require the global extensions that are required for GLFW to work + uint32_t glfw_extension_num = 0; + const char** glfw_extensions; + + glfw_extensions = glfwGetRequiredInstanceExtensions(&glfw_extension_num); + + create_info.enabledExtensionCount = glfw_extension_num; + create_info.ppEnabledExtensionNames = glfw_extensions; + + if (VALIDATE) { + create_info.enabledLayerCount = ARRSIZE(validation_layers); + create_info.ppEnabledLayerNames = validation_layers; + } else { + create_info.enabledLayerCount = 0; + } + + //now we actually create the instance + if (vkCreateInstance(&create_info, NULL, &state->instance) != VK_SUCCESS) { + printf("error creating instance\n"); + exit(1); + } + + printf("Instance successfully created\n"); + + //let's just list all supported vulkan instance extensions + uint32_t extension_num = 0; + vkEnumerateInstanceExtensionProperties(NULL, &extension_num, NULL); + VkExtensionProperties* extensions = malloc(sizeof(VkExtensionProperties) * extension_num); + vkEnumerateInstanceExtensionProperties(NULL, &extension_num, extensions); + + printf("instance extensions:\n"); + + for (int i = 0; i < extension_num; i++) { + printf("\t%s\n", extensions[i].extensionName); + } + + free(extensions); +} + +uint32_t find_memory_type (ApplicationState* state, uint32_t type_filter, VkMemoryPropertyFlags props) { + VkPhysicalDeviceMemoryProperties mem_props; + vkGetPhysicalDeviceMemoryProperties(state->physical_device, &mem_props); + + for (uint32_t i = 0; i < mem_props.memoryTypeCount; i++) { + if ((type_filter & (1 << i)) && (mem_props.memoryTypes[i].propertyFlags & props) == props) { + return i; + } + } + + printf("no suitable memory type\n"); + exit(1); +} + +void create_buffer(ApplicationState* state, VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, + VkBuffer* buffer, VkDeviceMemory* memory) { + VkBufferCreateInfo buffer_info = {0}; + buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_info.size = size; + buffer_info.usage = usage; + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + if (vkCreateBuffer(state->device, &buffer_info, NULL, buffer) != VK_SUCCESS) { + printf("Failed to create buffer\n"); + exit(1); + } + + VkMemoryRequirements mem_req; + vkGetBufferMemoryRequirements(state->device, *buffer, &mem_req); + + VkMemoryAllocateInfo alloc_info = {0}; + alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + alloc_info.allocationSize = mem_req.size; + alloc_info.memoryTypeIndex = find_memory_type(state, mem_req.memoryTypeBits, properties); + + if (vkAllocateMemory(state->device, &alloc_info, NULL, memory) != VK_SUCCESS) { + printf("failed to allocate memory for buffer\n"); + exit(1); + } + + vkBindBufferMemory(state->device, *buffer, *memory, 0); +} + +//find the queues families from which we can allocate all the types of queues we need +QueueFamilyIndices find_queue_families(ApplicationState* state, VkPhysicalDevice device) { + struct QueueFamilyIndices ret = {0}; + uint32_t queue_family_count = 0; + vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, NULL); + VkQueueFamilyProperties* queue_fam_props = malloc(sizeof(VkQueueFamilyProperties) * queue_family_count); + vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, queue_fam_props); + + for (int i = 0; i < queue_family_count; i++) { + if (queue_fam_props[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { + ret.compute_present = true; + ret.compute_family = i; + } + } + + free(queue_fam_props); + + return ret; +} + +bool extensions_supported(ApplicationState* state, VkPhysicalDevice device) { + uint32_t num_extensions; + vkEnumerateDeviceExtensionProperties(device, NULL, &num_extensions, NULL); + + VkExtensionProperties* available_extensions = malloc(sizeof(VkExtensionProperties) * num_extensions); + vkEnumerateDeviceExtensionProperties(device, NULL, &num_extensions, available_extensions); + + printf("Device extensions:\n"); + for (int i = 0; i < num_extensions; i++) { + printf("\t%s\n", available_extensions[i].extensionName); + } + + for (int i = 0; i < ARRSIZE(required_extensions); i++) { + bool found = false; + for (int j = 0; j < num_extensions; j++) { + if (!strcmp(required_extensions[i], available_extensions[j].extensionName)) { + found = true; + } + } + + if (!found) { + return false; + } + } + + free(available_extensions); + + return true; +} + +//check all the requirements for a physical device +void pick_physical_device(ApplicationState* state) { + uint32_t device_count = 0; + vkEnumeratePhysicalDevices(state->instance, &device_count, NULL); + + if (device_count == 0) { + printf("vulkan is not supported\n"); + exit(1); + } + + VkPhysicalDevice* devices = malloc(sizeof(VkPhysicalDevice) * device_count); + vkEnumeratePhysicalDevices(state->instance, &device_count, devices); + + //now let's check if any device is suitable for our usage + int i; + for (i = 0; i < device_count; i++) { + VkPhysicalDevice device = devices[i]; + + VkPhysicalDeviceProperties device_props; + vkGetPhysicalDeviceProperties(device, &device_props); + + VkPhysicalDeviceFeatures device_features; + vkGetPhysicalDeviceFeatures(device, &device_features); + + QueueFamilyIndices families = find_queue_families(state, device); + +// SwapChainSupportDetails details = get_swapchain_details(state, device); + + //for now we only require that a graphics queue is present + if (families.compute_present) { + VkPhysicalDeviceFeatures supportedFeatures; + vkGetPhysicalDeviceFeatures(device, &supportedFeatures); +// state->details = details; + break; + } else { + printf("device does not support all necessary extensions and queues"); + } + } + //TODO print unsupported message + state->physical_device = devices[i]; + free(devices); +} + +int cmp(const void* a, const void* b) { + return (*(int*)a - *(int*)b); +} + +void create_logical_device(ApplicationState* state) {; + //we already ensured that all of the required queues exist + QueueFamilyIndices indices = find_queue_families(state, state->physical_device); + + //now we need to find out the number of unique queues we need to create + //since there aren't going to be many queues in this case we don't need + //a particularly efficient algorithm + uint32_t indices_array[] = {indices.compute_family}; + qsort(indices_array, ARRSIZE(indices_array), sizeof(uint32_t), cmp); + + int unique_queues[ARRSIZE(indices_array)] = {0}; + int j = 0; + + for (int i = 0; i < ARRSIZE(indices_array) - 1; i++) { + if (indices_array[i] != indices_array[i + 1]) { + unique_queues[j++] = indices_array[i]; + } + } + unique_queues[j++] = unique_queues[ARRSIZE(indices_array) - 1]; + printf("number of queues: %x\n", j); + + VkDeviceQueueCreateInfo* queue_create_infos = calloc(j, sizeof(VkDeviceCreateInfo)); + + float prio = 1.0f; + for (int i = 0; i < j; i++) { + queue_create_infos[i].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue_create_infos[i].queueFamilyIndex = unique_queues[i]; + queue_create_infos[i].queueCount = 1; + queue_create_infos[i].pQueuePriorities = &prio; + } + + //for now we won't use any features + VkPhysicalDeviceFeatures device_features = {0}; + device_features.samplerAnisotropy = VK_TRUE; + + //now create a logical device knowing which features and queues we need + VkDeviceCreateInfo create_info = {0}; + create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + create_info.pQueueCreateInfos = queue_create_infos; + create_info.queueCreateInfoCount = j; + create_info.pEnabledFeatures = &device_features; + create_info.enabledExtensionCount = ARRSIZE(required_extensions); + create_info.ppEnabledExtensionNames = required_extensions; + + if (VALIDATE) { + create_info.enabledLayerCount = ARRSIZE(validation_layers); + create_info.ppEnabledLayerNames = validation_layers; + } else { + create_info.enabledLayerCount = 0;; + } + + printf("creating logical device\n"); + if (vkCreateDevice(state->physical_device, &create_info, NULL, &state->device) != VK_SUCCESS) { + printf("Error creating logical device\n"); + exit(1); + } + + //get a handle to the queue +// vkGetDeviceQueue(state->device, indices.graphics_queue, 0, &state->graphics_queue); +// vkGetDeviceQueue(state->device, indices.present_queue, 0, &state->present_queue); + vkGetDeviceQueue(state->device, indices.compute_family, 0, &state->compute.queue); + + printf("Logical device created\n"); +} + +void setup_compute_queues(ApplicationState* state) { + QueueFamilyIndices families = find_queue_families(state, state->physical_device); + VkCommandPoolCreateInfo pool_info = {0}; + pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + //Allow command buffers to be rerecorded individually + pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + pool_info.queueFamilyIndex = families.compute_family; + + if (vkCreateCommandPool(state->device, &pool_info, NULL, &state->compute.command_pool) != VK_SUCCESS) { + printf("failed to create command pool\n"); + exit(1); + } + + printf("created command pools\n"); + + VkCommandBufferAllocateInfo command_buf_allocate_info = {0}; + command_buf_allocate_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buf_allocate_info.commandPool = state->compute.command_pool; + //VK_COMMAND_BUFFER_LEVEL_PRIMARY: Can be submitted to a queue for execution, but cannot be called from other command buffers. + //VK_COMMAND_BUFFER_LEVEL_SECONDARY: Cannot be submitted directly, but can be called from primary command buffers + command_buf_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + command_buf_allocate_info.commandBufferCount = 1; + + if (vkAllocateCommandBuffers(state->device, &command_buf_allocate_info, &state->compute.command_buffer) != VK_SUCCESS) { + printf("failed to allocated command buffer"); + exit(1); + } + printf("created command buffer\n"); + +} + +void create_descriptor_set_layout(ApplicationState* state) { + VkDescriptorSetLayoutBinding input_binding = {0}; + input_binding.binding = 0; + input_binding.descriptorCount = 1; + input_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + input_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutBinding output_binding = {0}; + output_binding.binding = 1; + output_binding.descriptorCount = 1; + output_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + output_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutBinding bindings[2] = {input_binding, output_binding}; + + VkDescriptorSetLayoutCreateInfo layout_info = {0}; + layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + layout_info.bindingCount = ARRSIZE(bindings); + layout_info.pBindings = bindings; + + if (vkCreateDescriptorSetLayout(state->device, &layout_info, NULL, &state->compute.descriptor_set_layout)) { + printf("could not create descriptor set layout\n"); + exit(1); + } + printf("created descriptor set layout\n"); +} + +char* load_file(const char* name, size_t* size) { + FILE *fileptr; + char *buffer; + size_t filelen; + + fileptr = fopen(name, "rb"); + fseek(fileptr, 0, SEEK_END); + filelen = ftell(fileptr); + rewind(fileptr); + + buffer = (char*)malloc(filelen * sizeof(char)); + fread(buffer, filelen, 1, fileptr); + fclose(fileptr); + + *size = filelen; + + return buffer; +} + +VkShaderModule create_shader_module(ApplicationState* state, const char* code, size_t size) { + VkShaderModuleCreateInfo create_info = {0}; + create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + create_info.codeSize = size; + create_info.pCode = (uint32_t*)code; + + VkShaderModule ret; + + if (vkCreateShaderModule(state->device, &create_info, NULL, &ret) != VK_SUCCESS) { + printf("shader creation failed\n"); + exit(1); + } + + return ret; +} + +void create_compute_pipeline(ApplicationState* state) { + VkShaderModule compute_shader_module; + size_t compute_shader_size; + char* compute_shader = load_file("./compute_simple.spv", &compute_shader_size); + + compute_shader_module = create_shader_module(state, compute_shader, compute_shader_size); + + VkPipelineShaderStageCreateInfo compute_shader_stage_info = {0}; + compute_shader_stage_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + compute_shader_stage_info.stage = VK_SHADER_STAGE_COMPUTE_BIT; + compute_shader_stage_info.module = compute_shader_module; + compute_shader_stage_info.pName = "main"; + + VkPipelineLayoutCreateInfo pipeline_layout_create_info = {0}; + pipeline_layout_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_create_info.setLayoutCount = 1; + pipeline_layout_create_info.pSetLayouts = &state->compute.descriptor_set_layout; + pipeline_layout_create_info.pushConstantRangeCount = 0; + pipeline_layout_create_info.pPushConstantRanges = NULL; + + if (vkCreatePipelineLayout(state->device, &pipeline_layout_create_info, NULL, &state->compute.pipeline_layout) != VK_SUCCESS) { + printf("failed to create pipeline layout"); + exit(1); + } + + printf("created pipeline layout\n"); + + VkComputePipelineCreateInfo create_info = {0}; + create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + create_info.layout = state->compute.pipeline_layout; + create_info.basePipelineHandle = VK_NULL_HANDLE; + create_info.stage = compute_shader_stage_info; + create_info.flags = 0; + + if (vkCreateComputePipelines(state->device, VK_NULL_HANDLE, 1, &create_info, NULL, &state->compute.pipeline) != VK_SUCCESS) { + printf("failed to create compute pipeline\n"); + exit(1); + } + + printf("created compute pipeline\n"); + + vkDestroyShaderModule(state->device, compute_shader_module, NULL); + + return; +} + +void create_descriptor_pool(ApplicationState* state) { + VkDescriptorPoolSize pool_size[2] = {0}; + pool_size[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + pool_size[0].descriptorCount = 1; + pool_size[1].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + pool_size[1].descriptorCount = 1; + + VkDescriptorPoolCreateInfo pool_info = {0}; + pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_info.poolSizeCount = ARRSIZE(pool_size); + pool_info.pPoolSizes = pool_size; + pool_info.maxSets = 1; + + if (vkCreateDescriptorPool(state->device, &pool_info, NULL, &state->compute.descriptor_pool) != VK_SUCCESS) { + printf("failed to create descriptor pool\n"); + exit(1); + } + + printf("created descriptor pool\n"); +} + +void create_descriptor_sets(ApplicationState* state) { + VkDescriptorSetAllocateInfo alloc_info = {0}; + alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + alloc_info.descriptorPool = state->compute.descriptor_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &state->compute.descriptor_set_layout; + + if (vkAllocateDescriptorSets(state->device, &alloc_info, &state->compute.descriptor_sets) != VK_SUCCESS) { + printf("failed to allocate descriptor sets\n"); + exit(1); + } + + create_buffer(state, sizeof(uint32_t), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &state->compute.input_buffer, &state->compute.input_buffer_memory); + create_buffer(state, sizeof(uint32_t), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &state->compute.output_buffer, &state->compute.output_buffer_memory); + + VkDescriptorBufferInfo in_buffer_info = {0}; + VkDescriptorBufferInfo out_buffer_info = {0}; + + in_buffer_info.buffer = state->compute.input_buffer; + in_buffer_info.range = sizeof(uint32_t); + + out_buffer_info.buffer = state->compute.output_buffer; + out_buffer_info.range = sizeof(uint32_t); + + VkWriteDescriptorSet descriptor_writes[2] = {0}; + + descriptor_writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_writes[0].dstSet = state->compute.descriptor_sets; + descriptor_writes[0].dstBinding = 0; + descriptor_writes[0].dstArrayElement = 0; + descriptor_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_writes[0].descriptorCount = 1; + descriptor_writes[0].pBufferInfo = &in_buffer_info; + descriptor_writes[0].pImageInfo = NULL; + descriptor_writes[0].pTexelBufferView = NULL; + + descriptor_writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_writes[1].dstSet = state->compute.descriptor_sets; + descriptor_writes[1].dstBinding = 1; + descriptor_writes[1].dstArrayElement = 0; + descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_writes[1].descriptorCount = 1; + descriptor_writes[1].pBufferInfo = &out_buffer_info; + descriptor_writes[1].pImageInfo = NULL; + descriptor_writes[1].pTexelBufferView = NULL; + + vkUpdateDescriptorSets(state->device, ARRSIZE(descriptor_writes), descriptor_writes, 0, NULL); + + printf("allocated and written descriptor sets\n"); +} + +void create_sync_objects(ApplicationState* state) { + VkSemaphoreCreateInfo semaphore_info = {0}; + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + + VkFenceCreateInfo fence_info = {0}; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + //initialize it to signalled so we can draw the first frame +// fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; + + if (vkCreateFence(state->device, &fence_info, NULL, &state->compute.compute_done)) { + printf("failed to create sync objects\n"); + exit(1); + } +} + +void init_vulkan(ApplicationState* state) { + create_instance(state); + pick_physical_device(state); + create_logical_device(state); + create_descriptor_set_layout(state); + create_compute_pipeline(state); + create_descriptor_pool(state); + create_descriptor_sets(state); + setup_compute_queues(state); + create_sync_objects(state); +} + +void terminate(ApplicationState* state) { + vkDestroyFence(state->device, state->compute.compute_done, NULL); +// vkDestroyCommandPool(state->device, state->compute.command_pool, NULL); + + vkFreeMemory(state->device, state->compute.input_buffer_memory, NULL); + vkFreeMemory(state->device, state->compute.output_buffer_memory, NULL); + vkDestroyBuffer(state->device, state->compute.input_buffer, NULL); + vkDestroyBuffer(state->device, state->compute.output_buffer, NULL); + + vkDestroyDescriptorPool(state->device, state->compute.descriptor_pool, NULL); + vkDestroyPipeline(state->device, state->compute.pipeline, NULL); + vkDestroyPipelineLayout(state->device, state->compute.pipeline_layout, NULL); + vkDestroyDescriptorSetLayout(state->device, state->compute.descriptor_set_layout, NULL); + vkDestroyCommandPool(state->device, state->compute.command_pool, NULL); + vkDestroyDevice(state->device, NULL); + vkDestroyInstance(state->instance, NULL); +} + +int main() { + ApplicationState state = {0}; + init_vulkan(&state); + + VkCommandBufferBeginInfo begin_info = {0}; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = 0; + begin_info.pInheritanceInfo = NULL; + + if (vkBeginCommandBuffer(state.compute.command_buffer, &begin_info) != VK_SUCCESS) { + printf("failed to begin recording\n"); + exit(1); + } + + vkCmdBindPipeline(state.compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state.compute.pipeline); + vkCmdBindDescriptorSets(state.compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + state.compute.pipeline_layout, 0, 1, &state.compute.descriptor_sets, 0, NULL); + vkCmdDispatch(state.compute.command_buffer, 1, 1, 1); + + printf("start recording command buffer\n"); + + if (vkEndCommandBuffer(state.compute.command_buffer) != VK_SUCCESS) { + printf("failed to record command buffer\n"); + exit(1); + } + + printf("command buffer recorded\n"); + + VkSubmitInfo compute_submit_info = {0}; + compute_submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + compute_submit_info.commandBufferCount = 1; + compute_submit_info.pCommandBuffers = &state.compute.command_buffer; + compute_submit_info.signalSemaphoreCount = 0; + compute_submit_info.waitSemaphoreCount = 0; + + volatile uint32_t* data; + vkMapMemory(state.device, state.compute.input_buffer_memory, 0, sizeof(uint32_t), 0, (void**)&data); + *data = 15; + vkUnmapMemory(state.device, state.compute.input_buffer_memory); + + if (vkQueueSubmit(state.compute.queue, 1, &compute_submit_info, state.compute.compute_done)) { + printf("submit failed\n"); + } + + vkWaitForFences(state.device, 1, &state.compute.compute_done, VK_TRUE, UINT64_MAX); + vkQueueWaitIdle(state.compute.queue); + + vkMapMemory(state.device, state.compute.output_buffer_memory, 0, sizeof(uint32_t), 0, (void**)&data); + printf("submit complete, result: %X\n", *data); + vkUnmapMemory(state.device, state.compute.output_buffer_memory); + + + terminate(&state); + + return 0; +}