early-access version 3938

This commit is contained in:
pineappleEA
2023-10-19 17:53:53 +02:00
parent 4d49e282ae
commit ec28817fce
36 changed files with 376 additions and 144 deletions

View File

@@ -48,8 +48,14 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
SetInlineIndexBuffer(regs.inline_index_4x8.index3);
break;
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
DrawArrayInstanced(regs.vertex_array_instance_first.topology.Value(),
regs.vertex_array_instance_first.start.Value(),
regs.vertex_array_instance_first.count.Value(), false);
break;
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): {
LOG_WARNING(HW_GPU, "(STUBBED) called");
DrawArrayInstanced(regs.vertex_array_instance_subsequent.topology.Value(),
regs.vertex_array_instance_subsequent.start.Value(),
regs.vertex_array_instance_subsequent.count.Value(), true);
break;
}
case MAXWELL3D_REG_INDEX(draw_texture.src_y0): {
@@ -84,6 +90,22 @@ void DrawManager::DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 ve
ProcessDraw(false, num_instances);
}
void DrawManager::DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
bool subsequent) {
draw_state.topology = topology;
draw_state.vertex_buffer.first = vertex_first;
draw_state.vertex_buffer.count = vertex_count;
if (!subsequent) {
draw_state.instance_count = 1;
}
draw_state.base_instance = draw_state.instance_count - 1;
draw_state.draw_mode = DrawMode::Instance;
draw_state.instance_count++;
ProcessDraw(false, 1);
}
void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count,
u32 base_index, u32 base_instance, u32 num_instances) {
const auto& regs{maxwell3d->regs};

View File

@@ -66,6 +66,8 @@ public:
void DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
u32 base_instance, u32 num_instances);
void DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
bool subsequent);
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
u32 base_instance, u32 num_instances);

View File

@@ -86,7 +86,10 @@ public:
uncommitted_operations.emplace_back(std::move(func));
}
pending_operations.emplace_back(std::move(uncommitted_operations));
QueueFence(new_fence);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
QueueFence(new_fence);
}
if (!delay_fence) {
func();
}

View File

@@ -137,16 +137,6 @@ bool Codec::CreateGpuAvDevice() {
break;
}
if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
#if defined(__unix__)
// Some linux decoding backends are reported to crash with this config method
// TODO(ameerj): Properly support this method
if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) {
// skip zero-copy decoders, we don't currently support them
LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.",
av_hwdevice_get_type_name(type), config->methods);
continue;
}
#endif
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
av_codec_ctx->pix_fmt = config->pix_fmt;
return true;

View File

@@ -19,6 +19,7 @@ set(SHADER_FILES
block_linear_unswizzle_2d.comp
block_linear_unswizzle_3d.comp
convert_abgr8_to_d24s8.frag
convert_abgr8_to_d32f.frag
convert_d32f_to_abgr8.frag
convert_d24s8_to_abgr8.frag
convert_depth_to_float.frag

View File

@@ -0,0 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
layout(binding = 0) uniform sampler2D color_texture;
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
vec4 color = texelFetch(color_texture, coord, 0).abgr;
float value = color.a * (color.r + color.g + color.b) / 3.0f;
gl_FragDepth = value;
}

View File

@@ -9,6 +9,6 @@ layout(location = 0) out vec4 color;
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
float depth = textureLod(depth_tex, coord, 0).r;
float depth = texelFetch(depth_tex, coord, 0).r;
color = vec4(depth, depth, depth, 1.0);
}

View File

@@ -8,6 +8,7 @@
#include "common/settings.h"
#include "video_core/host_shaders/blit_color_float_frag_spv.h"
#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
#include "video_core/host_shaders/convert_abgr8_to_d32f_frag_spv.h"
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_d32f_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
@@ -434,6 +435,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
convert_abgr8_to_d32f_frag(BuildShader(device, CONVERT_ABGR8_TO_D32F_FRAG_SPV)),
convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)),
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
@@ -559,6 +561,13 @@ void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipelineDepthTargetEx(convert_abgr8_to_d32f_pipeline, dst_framebuffer->RenderPass(),
convert_abgr8_to_d32f_frag);
Convert(*convert_abgr8_to_d32f_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertD32FToABGR8(const Framebuffer* dst_framebuffer,
ImageView& src_image_view) {
ConvertPipelineColorTargetEx(convert_d32f_to_abgr8_pipeline, dst_framebuffer->RenderPass(),

View File

@@ -67,6 +67,8 @@ public:
void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD32FToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
@@ -130,6 +132,7 @@ private:
vk::ShaderModule convert_depth_to_float_frag;
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
vk::ShaderModule convert_abgr8_to_d32f_frag;
vk::ShaderModule convert_d32f_to_abgr8_frag;
vk::ShaderModule convert_d24s8_to_abgr8_frag;
vk::ShaderModule convert_s8d24_to_abgr8_frag;
@@ -149,6 +152,7 @@ private:
vk::Pipeline convert_d16_to_r16_pipeline;
vk::Pipeline convert_r16_to_d16_pipeline;
vk::Pipeline convert_abgr8_to_d24s8_pipeline;
vk::Pipeline convert_abgr8_to_d32f_pipeline;
vk::Pipeline convert_d32f_to_abgr8_pipeline;
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
vk::Pipeline convert_s8d24_to_abgr8_pipeline;

View File

@@ -132,12 +132,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
RenderScreenshot(*framebuffer, use_accelerated);
Frame* frame = present_manager.GetRenderFrame();
blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
scheduler.Flush(*frame->render_ready);
present_manager.Present(frame);
{
std::scoped_lock lock{rasterizer.LockCaches()};
RenderScreenshot(*framebuffer, use_accelerated);
Frame* frame = present_manager.GetRenderFrame();
blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
scheduler.Flush(*frame->render_ready);
present_manager.Present(frame);
}
gpu.RendererFrameEndNotify();
rasterizer.TickFrame();

View File

@@ -198,7 +198,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
if (!pipeline) {
return;
}
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
std::scoped_lock lock{LockCaches()};
// update engine as channel may be different.
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
@@ -708,6 +708,7 @@ void RasterizerVulkan::TiledCacheBarrier() {
}
void RasterizerVulkan::FlushCommands() {
std::scoped_lock lock{LockCaches()};
if (draw_counter == 0) {
return;
}
@@ -805,6 +806,7 @@ void RasterizerVulkan::FlushWork() {
if ((++draw_counter & 7) != 7) {
return;
}
std::scoped_lock lock{LockCaches()};
if (draw_counter < DRAWS_TO_DISPATCH) {
// Send recorded tasks to the worker thread
scheduler.DispatchWork();
@@ -1499,7 +1501,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
CreateChannel(channel);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
std::scoped_lock lock{LockCaches()};
texture_cache.CreateChannel(channel);
buffer_cache.CreateChannel(channel);
}
@@ -1512,7 +1514,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
const s32 channel_id = channel.bind_id;
BindToChannel(channel_id);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
std::scoped_lock lock{LockCaches()};
texture_cache.BindToChannel(channel_id);
buffer_cache.BindToChannel(channel_id);
}
@@ -1525,7 +1527,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
EraseChannel(channel_id);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
std::scoped_lock lock{LockCaches()};
texture_cache.EraseChannel(channel_id);
buffer_cache.EraseChannel(channel_id);
}

View File

@@ -133,6 +133,10 @@ public:
void ReleaseChannel(s32 channel_id) override;
std::scoped_lock<std::recursive_mutex, std::recursive_mutex> LockCaches() {
return std::scoped_lock{buffer_cache.mutex, texture_cache.mutex};
}
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;

View File

@@ -1194,6 +1194,11 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
return blit_image_helper.ConvertD16ToR16(dst, src_view);
}
break;
case PixelFormat::A8B8G8R8_SRGB:
if (src_view.format == PixelFormat::D32_FLOAT) {
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
}
break;
case PixelFormat::A8B8G8R8_UNORM:
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
@@ -1205,6 +1210,16 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
}
break;
case PixelFormat::B8G8R8A8_SRGB:
if (src_view.format == PixelFormat::D32_FLOAT) {
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
}
break;
case PixelFormat::B8G8R8A8_UNORM:
if (src_view.format == PixelFormat::D32_FLOAT) {
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
}
break;
case PixelFormat::R32_FLOAT:
if (src_view.format == PixelFormat::D32_FLOAT) {
return blit_image_helper.ConvertD32ToR32(dst, src_view);
@@ -1222,6 +1237,12 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
}
break;
case PixelFormat::D32_FLOAT:
if (src_view.format == PixelFormat::A8B8G8R8_UNORM ||
src_view.format == PixelFormat::B8G8R8A8_UNORM ||
src_view.format == PixelFormat::A8B8G8R8_SRGB ||
src_view.format == PixelFormat::B8G8R8A8_SRGB) {
return blit_image_helper.ConvertABGR8ToD32F(dst, src_view);
}
if (src_view.format == PixelFormat::R32_FLOAT) {
return blit_image_helper.ConvertR32ToD32(dst, src_view);
}
@@ -2034,7 +2055,7 @@ void TextureCacheRuntime::TransitionImageLayout(Image& image) {
},
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([barrier = barrier](vk::CommandBuffer cmdbuf) {
scheduler.Record([barrier](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier);
});

View File

@@ -68,6 +68,7 @@ struct LevelInfo {
Extent2D tile_size;
u32 bpp_log2;
u32 tile_width_spacing;
u32 num_levels;
};
[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
@@ -118,11 +119,11 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size,
u32 level) {
u32 level, u32 num_levels) {
return {
.width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
.height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
.depth = level == 0
.depth = level == 0 && num_levels == 1
? block_size.depth
: AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
};
@@ -166,7 +167,7 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
if (level == 0) {
if (level == 0 && info.num_levels == 1) {
return Extent3D{
.width = info.block.width,
.height = info.block.height,
@@ -257,7 +258,7 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
u32 tile_width_spacing) {
u32 tile_width_spacing, u32 num_levels) {
const u32 bytes_per_block = BytesPerBlock(format);
return {
.size =
@@ -270,16 +271,18 @@ template <u32 GOB_EXTENT>
.tile_size = DefaultBlockSize(format),
.bpp_log2 = BytesPerBlockLog2(bytes_per_block),
.tile_width_spacing = tile_width_spacing,
.num_levels = num_levels,
};
}
[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing);
return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing,
info.resources.levels);
}
[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
u32 tile_width_spacing, u32 level) {
const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing);
const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing, level);
u32 offset = 0;
for (u32 current_level = 0; current_level < level; ++current_level) {
offset += CalculateLevelSize(info, current_level);
@@ -466,7 +469,7 @@ template <u32 GOB_EXTENT>
};
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing);
const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0);
const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0, info.resources.levels);
return Extent3D{
.width = Common::AlignUpLog2(num_tiles.width, alignment),
.height = Common::AlignUpLog2(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
@@ -533,7 +536,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
UNIMPLEMENTED_IF(copy.image_extent != level_size);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
const Extent3D block =
AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
size_t host_offset = copy.buffer_offset;
@@ -698,7 +702,7 @@ u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) {
const Extent2D tile_size = DefaultBlockSize(info.format);
const Extent3D level_size = AdjustMipSize(info.size, level);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level);
const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level, info.resources.levels);
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing);
}
@@ -887,7 +891,8 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
.image_extent = level_size,
};
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
const Extent3D block =
AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2);
size_t guest_layer_offset = 0;
@@ -1041,7 +1046,7 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
const Extent2D tile_size = DefaultBlockSize(info.format);
const Extent3D level_size = AdjustMipSize(info.size, level);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
return AdjustMipBlockSize(num_tiles, level_info.block, level);
return AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
}
boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
@@ -1063,7 +1068,8 @@ boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const I
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
const Extent3D block =
AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
params[level] = SwizzleParameters{
.num_tiles = num_tiles,
.block = block,
@@ -1292,11 +1298,11 @@ u32 MapSizeBytes(const ImageBase& image) {
}
}
static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) ==
static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0, 1}, 0) ==
0x7f8000);
static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x40000);
static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0, 1}, 0) == 0x40000);
static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0}, 0) == 0x40000);
static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0, 1}, 0) == 0x40000);
static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) ==
0x2afc00);