early-access version 1520

main
pineappleEA 2021-03-14 09:08:46 +01:00
parent 7c0c4aee76
commit 5f8c5c9a16
4 changed files with 30 additions and 32 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 1519. This is the source code for early-access 1520.
## Legal Notice ## Legal Notice

View File

@ -111,6 +111,8 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(1, tile_size.width, tile_size.height); glUniform2ui(1, tile_size.width, tile_size.height);
// Ensure buffer data is valid before dispatching
glFlush();
for (const SwizzleParameters& swizzle : swizzles) { for (const SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset; const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
@ -133,8 +135,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset); image.guest_size_bytes - swizzle.buffer_offset);
// Ensure buffer data is valid before dispatching compute
glFinish();
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
} }
program_manager.RestoreGuestCompute(); program_manager.RestoreGuestCompute();

View File

@ -472,30 +472,33 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
if (!data_buffer) { if (!data_buffer) {
MakeDataBuffer(); MakeDataBuffer();
} }
const VkPipeline vk_pipeline = *pipeline;
const VkImageAspectFlags aspect_mask = image.AspectMask(); const VkImageAspectFlags aspect_mask = image.AspectMask();
const VkImage vk_image = image.Handle(); const VkImage vk_image = image.Handle();
const bool is_initialized = image.ExchangeInitialization(); const bool is_initialized = image.ExchangeInitialization();
scheduler.Record([vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf) { scheduler.Record(
const VkImageMemoryBarrier image_barrier{ [vk_pipeline, vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, const VkImageMemoryBarrier image_barrier{
.pNext = nullptr, .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .pNext = nullptr,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
.newLayout = VK_IMAGE_LAYOUT_GENERAL, .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .newLayout = VK_IMAGE_LAYOUT_GENERAL,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = vk_image, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange{ .image = vk_image,
.aspectMask = aspect_mask, .subresourceRange{
.baseMipLevel = 0, .aspectMask = aspect_mask,
.levelCount = VK_REMAINING_MIP_LEVELS, .baseMipLevel = 0,
.baseArrayLayer = 0, .levelCount = VK_REMAINING_MIP_LEVELS,
.layerCount = VK_REMAINING_ARRAY_LAYERS, .baseArrayLayer = 0,
}, .layerCount = VK_REMAINING_ARRAY_LAYERS,
}; },
cmdbuf.PipelineBarrier(0, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); };
}); cmdbuf.PipelineBarrier(0, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
});
for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset; const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
@ -522,14 +525,13 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
const VkPipelineLayout vk_layout = *layout; const VkPipelineLayout vk_layout = *layout;
const VkPipeline vk_pipeline = *pipeline;
// To unswizzle the ASTC data // To unswizzle the ASTC data
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z,
scheduler.Record([vk_layout, vk_pipeline, num_dispatches_x, num_dispatches_y, block_dims, params, set](vk::CommandBuffer cmdbuf) {
num_dispatches_z, block_dims, params, set](vk::CommandBuffer cmdbuf) {
const AstcPushConstants uniforms{ const AstcPushConstants uniforms{
.blocks_dims = block_dims, .blocks_dims = block_dims,
.bytes_per_block_log2 = params.bytes_per_block_log2, .bytes_per_block_log2 = params.bytes_per_block_log2,
@ -539,7 +541,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
.block_height = params.block_height, .block_height = params.block_height,
.block_height_mask = params.block_height_mask, .block_height_mask = params.block_height_mask,
}; };
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {});
cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);

View File

@ -129,7 +129,4 @@ struct AstcBufferData {
decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
} constexpr ASTC_BUFFER_DATA; } constexpr ASTC_BUFFER_DATA;
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
} // namespace Tegra::Texture::ASTC } // namespace Tegra::Texture::ASTC