Browse Source

Modify texture for the triangle example using a compute shader.

master
Amber 2 years ago
parent
commit
c4f1801f3b
  1. 2
      meson.build
  2. 198
      src/main.c
  3. 57
      src/postprocess.comp

2
meson.build

@ -4,7 +4,7 @@ deps = [dependency('glfw3'), dependency('vulkan'), dependency('dl'), dependency( @@ -4,7 +4,7 @@ deps = [dependency('glfw3'), dependency('vulkan'), dependency('dl'), dependency(
glslc = find_program('glslc')
shaders = ['src/vertex_simple.vert', 'src/fragment_simple.frag', 'src/compute_simple.comp', 'src/compute_simple2.comp']
shaders = ['src/vertex_simple.vert', 'src/fragment_simple.frag', 'src/compute_simple.comp', 'src/compute_simple2.comp', 'src/postprocess.comp']
foreach shader : shaders
out = (shader.split('.')[-2] + '.spv').split('/')[-1]

198
src/main.c

@ -100,6 +100,24 @@ typedef struct ApplicationState { @@ -100,6 +100,24 @@ typedef struct ApplicationState {
VkImage depth_image;
VkDeviceMemory depth_image_memory;
VkImageView depth_image_view;
struct {
VkQueue queue;
VkCommandPool command_pool;
VkCommandBuffer command_buffer;
VkDescriptorSetLayout descriptor_set_layout;
VkDescriptorPool descriptor_pool;
VkDescriptorSet descriptor_sets;
VkPipelineLayout pipeline_layout;
VkPipeline pipeline[2];
VkBuffer input_buffer;
VkDeviceMemory input_buffer_memory;
VkFence compute_done;
} compute;
} ApplicationState;
typedef struct QueueFamilyIndices {
@ -107,6 +125,8 @@ typedef struct QueueFamilyIndices { @@ -107,6 +125,8 @@ typedef struct QueueFamilyIndices {
uint32_t graphics_queue;
bool present_present;
uint32_t present_queue;
bool compute_present;
uint32_t compute_queue;
} QueueFamilyIndices;
typedef struct Vertex {
@ -257,6 +277,11 @@ QueueFamilyIndices find_queue_families(ApplicationState* state, VkPhysicalDevice @@ -257,6 +277,11 @@ QueueFamilyIndices find_queue_families(ApplicationState* state, VkPhysicalDevice
ret.present_present = true;
ret.present_queue = i;
}
if (queue_fam_props[i].queueFlags & VK_QUEUE_COMPUTE_BIT) {
ret.compute_present = true;
ret.present_queue = i;
}
}
free(queue_fam_props);
@ -382,7 +407,7 @@ void create_logical_device(ApplicationState* state) {; @@ -382,7 +407,7 @@ void create_logical_device(ApplicationState* state) {;
//now we need to find out the number of unique queues we need to create
//since there aren't going to be many queues in this case we don't need
//a particularly efficient algorithm
uint32_t indices_array[] = {indices.present_queue, indices.graphics_queue};
uint32_t indices_array[] = {indices.present_queue, indices.graphics_queue, indices.compute_queue};
qsort(indices_array, ARRSIZE(indices_array), sizeof(uint32_t), cmp);
int unique_queues[ARRSIZE(indices_array)] = {0};
@ -435,6 +460,7 @@ void create_logical_device(ApplicationState* state) {; @@ -435,6 +460,7 @@ void create_logical_device(ApplicationState* state) {;
//get a handle to the queue
vkGetDeviceQueue(state->device, indices.graphics_queue, 0, &state->graphics_queue);
vkGetDeviceQueue(state->device, indices.present_queue, 0, &state->present_queue);
vkGetDeviceQueue(state->device, indices.compute_queue, 0, &state->compute.queue);
printf("Logical device created\n");
}
@ -527,6 +553,7 @@ void create_swap_chain(ApplicationState* state) { @@ -527,6 +553,7 @@ void create_swap_chain(ApplicationState* state) {
create_info.imageFormat = format.format;
create_info.imageColorSpace = format.colorSpace;
create_info.imageExtent = extent;
create_info.imageUsage = VK_IMAGE_USAGE_STORAGE_BIT;
//1 unless stereoscopic 3d
create_info.imageArrayLayers = 1;
//we directly render to this image
@ -1205,7 +1232,7 @@ void transition_image_layout(ApplicationState* state, VkImage image, VkFormat fo @@ -1205,7 +1232,7 @@ void transition_image_layout(ApplicationState* state, VkImage image, VkFormat fo
source_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
destination_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
} else if (old_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && new_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
} else if (old_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && (new_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL || new_layout == VK_IMAGE_LAYOUT_GENERAL)) {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
@ -1308,14 +1335,14 @@ void create_texture_image(ApplicationState* state) { @@ -1308,14 +1335,14 @@ void create_texture_image(ApplicationState* state) {
vkUnmapMemory(state->device, staging_buffer_memory);
stbi_image_free(pixels);
create_image(state, width, height, VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
create_image(state, width, height, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
&state->texture_image, &state->texture_image_memory);
printf("created image texture\n");
transition_image_layout(state, state->texture_image, VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
transition_image_layout(state, state->texture_image, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
copy_buffer_to_image(state, staging_buffer, state->texture_image, width, height);
transition_image_layout(state, state->texture_image, VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
transition_image_layout(state, state->texture_image, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL);
vkDestroyBuffer(state->device, staging_buffer, NULL);
vkFreeMemory(state->device, staging_buffer_memory, NULL);
@ -1326,7 +1353,7 @@ void create_texture_image_view(ApplicationState* state) { @@ -1326,7 +1353,7 @@ void create_texture_image_view(ApplicationState* state) {
view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
view_info.image = state->texture_image;
view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
view_info.format = VK_FORMAT_R8G8B8A8_SRGB;
view_info.format = VK_FORMAT_R8G8B8A8_UNORM;
view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
view_info.subresourceRange.baseMipLevel = 0;
view_info.subresourceRange.levelCount = 1;
@ -1414,7 +1441,7 @@ void create_descriptor_sets(ApplicationState* state) { @@ -1414,7 +1441,7 @@ void create_descriptor_sets(ApplicationState* state) {
buffer_info.range = sizeof(UBO);
VkDescriptorImageInfo image_info = {0};
image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
image_info.imageView = state->texture_image_view;
image_info.sampler = state->texture_sampler;
@ -1470,7 +1497,27 @@ void record_command_buffer(ApplicationState* state, VkCommandBuffer command_buff @@ -1470,7 +1497,27 @@ void record_command_buffer(ApplicationState* state, VkCommandBuffer command_buff
exit(1);
}
printf("start recording command buffer\n");
vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->compute.pipeline[0]);
vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
state->compute.pipeline_layout, 0, 1, &state->compute.descriptor_sets, 0, NULL);
vkCmdDispatch(command_buffer, 256, 256, 1);
struct VkImageMemoryBarrier barrier = {0};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier.image = state->texture_image;
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.levelCount = 1;
barrier.subresourceRange.baseArrayLayer = 0;
barrier.subresourceRange.layerCount = 1;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &barrier);
VkRenderPassBeginInfo render_pass_info = {0};
render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
@ -1532,8 +1579,6 @@ void record_command_buffer(ApplicationState* state, VkCommandBuffer command_buff @@ -1532,8 +1579,6 @@ void record_command_buffer(ApplicationState* state, VkCommandBuffer command_buff
printf("failed to record command buffer\n");
exit(1);
}
printf("recorded command buffer\n");
}
void create_sync_objects(ApplicationState* state) {
@ -1555,6 +1600,138 @@ void create_sync_objects(ApplicationState* state) { @@ -1555,6 +1600,138 @@ void create_sync_objects(ApplicationState* state) {
}
}
void setup_compute(ApplicationState* state) {
VkDescriptorSetLayoutBinding input_binding = {0};
input_binding.binding = 0;
input_binding.descriptorCount = 1;
input_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
input_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
VkDescriptorSetLayoutBinding bindings[1] = {input_binding};
VkDescriptorSetLayoutCreateInfo layout_info = {0};
layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
layout_info.bindingCount = ARRSIZE(bindings);
layout_info.pBindings = bindings;
if (vkCreateDescriptorSetLayout(state->device, &layout_info, NULL, &state->compute.descriptor_set_layout)) {
printf("could not create descriptor set layout\n");
exit(1);
}
VkShaderModule compute_shader_module0;
size_t compute_shader_size0;
char* compute_shader0 = load_file("./postprocess.spv", &compute_shader_size0);
compute_shader_module0 = create_shader_module(state, compute_shader0, compute_shader_size0);
VkPipelineShaderStageCreateInfo compute_shader_stage_info0 = {0};
compute_shader_stage_info0.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
compute_shader_stage_info0.stage = VK_SHADER_STAGE_COMPUTE_BIT;
compute_shader_stage_info0.module = compute_shader_module0;
compute_shader_stage_info0.pName = "main";
VkPipelineLayoutCreateInfo pipeline_layout_create_info0 = {0};
pipeline_layout_create_info0.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pipeline_layout_create_info0.setLayoutCount = 1;
pipeline_layout_create_info0.pSetLayouts = &state->compute.descriptor_set_layout;
pipeline_layout_create_info0.pushConstantRangeCount = 0;
pipeline_layout_create_info0.pPushConstantRanges = NULL;
if (vkCreatePipelineLayout(state->device, &pipeline_layout_create_info0, NULL, &state->compute.pipeline_layout) != VK_SUCCESS) {
printf("failed to create pipeline layout");
exit(1);
}
printf("created pipeline layout\n");
VkComputePipelineCreateInfo create_info0 = {0};
create_info0.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
create_info0.layout = state->compute.pipeline_layout;
create_info0.basePipelineHandle = VK_NULL_HANDLE;
create_info0.stage = compute_shader_stage_info0;
create_info0.flags = 0;
if (vkCreateComputePipelines(state->device, VK_NULL_HANDLE, 1, &create_info0, NULL, &state->compute.pipeline[0]) != VK_SUCCESS) {
printf("failed to create compute pipeline\n");
exit(1);
}
vkDestroyShaderModule(state->device, compute_shader_module0, NULL);
VkDescriptorPoolSize pool_size[1] = {0};
pool_size[0].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
pool_size[0].descriptorCount = 1;
VkDescriptorPoolCreateInfo pool_info = {0};
pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
pool_info.poolSizeCount = ARRSIZE(pool_size);
pool_info.pPoolSizes = pool_size;
pool_info.maxSets = 1;
if (vkCreateDescriptorPool(state->device, &pool_info, NULL, &state->compute.descriptor_pool) != VK_SUCCESS) {
printf("failed to create descriptor pool\n");
exit(1);
}
VkDescriptorSetAllocateInfo alloc_info = {0};
alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
alloc_info.descriptorPool = state->compute.descriptor_pool;
alloc_info.descriptorSetCount = 1;
alloc_info.pSetLayouts = &state->compute.descriptor_set_layout;
if (vkAllocateDescriptorSets(state->device, &alloc_info, &state->compute.descriptor_sets) != VK_SUCCESS) {
printf("failed to allocate descriptor sets\n");
exit(1);
}
VkWriteDescriptorSet descriptor_writes[1] = {0};
VkDescriptorImageInfo image_info = {0};
image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
image_info.imageView = state->texture_image_view;
descriptor_writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptor_writes[0].dstSet = state->compute.descriptor_sets;
descriptor_writes[0].dstBinding = 0;
descriptor_writes[0].dstArrayElement = 0;
descriptor_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
descriptor_writes[0].descriptorCount = 1;
descriptor_writes[0].pImageInfo = &image_info;
vkUpdateDescriptorSets(state->device, ARRSIZE(descriptor_writes), descriptor_writes, 0, NULL);
QueueFamilyIndices families = find_queue_families(state, state->physical_device);
VkCommandPoolCreateInfo command_pool_info = {0};
command_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
//Allow command buffers to be rerecorded individually
command_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
command_pool_info.queueFamilyIndex = families.compute_queue;
if (vkCreateCommandPool(state->device, &command_pool_info, NULL, &state->compute.command_pool) != VK_SUCCESS) {
printf("failed to create command pool\n");
exit(1);
}
VkCommandBufferAllocateInfo command_buf_allocate_info = {0};
command_buf_allocate_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
command_buf_allocate_info.commandPool = state->compute.command_pool;
//VK_COMMAND_BUFFER_LEVEL_PRIMARY: Can be submitted to a queue for execution, but cannot be called from other command buffers.
//VK_COMMAND_BUFFER_LEVEL_SECONDARY: Cannot be submitted directly, but can be called from primary command buffers
command_buf_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
command_buf_allocate_info.commandBufferCount = 1;
if (vkAllocateCommandBuffers(state->device, &command_buf_allocate_info, &state->compute.command_buffer) != VK_SUCCESS) {
printf("failed to allocated command buffer");
exit(1);
}
printf("created compute command buffer\n");
return;
}
void init_vulkan(ApplicationState* state) {
create_instance(state);
create_surface(state);
@ -1578,6 +1755,7 @@ void init_vulkan(ApplicationState* state) { @@ -1578,6 +1755,7 @@ void init_vulkan(ApplicationState* state) {
create_descriptor_sets(state);
create_command_buffer(state);
create_sync_objects(state);
setup_compute(state);
}
void update_uniform_buffer(ApplicationState* state, uint32_t current_frame) {

57
src/postprocess.comp

@ -0,0 +1,57 @@ @@ -0,0 +1,57 @@
//taken from https://github.com/SaschaWillems/Vulkan/blob/master/data/shaders/glsl/computeshader/emboss.comp, modified
/*
The MIT License (MIT)
Copyright (c) 2016 Sascha Willems
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#version 450
layout (local_size_x = 16, local_size_y = 16) in;
layout (binding = 0, rgba8) uniform image2D resultImage;
float conv(in float[9] kernel, in float[9] data, in float denom, in float offset)
{
float res = 0.0;
for (int i=0; i<9; ++i)
{
res += kernel[i] * data[i];
}
return clamp(res/denom + offset, 0.0, 1.0);
}
struct ImageData
{
float avg[9];
} imageData;
void main()
{
// Fetch neighbouring texels
int n = -1;
for (int i=-1; i<2; ++i)
{
for(int j=-1; j<2; ++j)
{
n++;
vec3 rgb = imageLoad(resultImage, ivec2(gl_GlobalInvocationID.x + i, gl_GlobalInvocationID.y + j)).rgb;
imageData.avg[n] = (rgb.r + rgb.g + rgb.b) / 3.0;
}
}
float[9] kernel;
kernel[0] = 1.0/8.0; kernel[1] = -1.0/8.0; kernel[2] = -1.0/8.0;
kernel[3] = -1.0/8.0; kernel[4] = 1.0; kernel[5] = -1.0/8.0;
kernel[6] = -1.0/8.0; kernel[7] = 1.0/8.0; kernel[8] = -1.0/8.0;
vec4 res = vec4(vec3(conv(kernel, imageData.avg, 0.1, 0.0)), 1.0);
imageStore(resultImage, ivec2(gl_GlobalInvocationID.xy), res);
}
Loading…
Cancel
Save