From e03f443390e8abd9d5e9b0171f5868923e47abee Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sat, 5 Nov 2022 19:21:57 +0100 Subject: [PATCH] early-access version 3089 --- README.md | 2 +- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/glasm/emit_glasm.cpp | 3 + .../glasm/emit_glasm_bitwise_conversion.cpp | 4 + .../glasm/emit_glasm_context_get_set.cpp | 6 +- .../backend/glasm/emit_glasm_instructions.h | 2 + .../backend/glasm/glasm_emit_context.cpp | 7 +- .../glsl/emit_glsl_bitwise_conversion.cpp | 4 + .../glsl/emit_glsl_context_get_set.cpp | 6 +- .../backend/glsl/emit_glsl_instructions.h | 2 + .../backend/glsl/glsl_emit_context.cpp | 3 + .../backend/spirv/emit_spirv.h | 4 + .../spirv/emit_spirv_bitwise_conversion.cpp | 4 + .../spirv/emit_spirv_context_get_set.cpp | 15 ++- .../backend/spirv/emit_spirv_instructions.h | 4 +- .../backend/spirv/spirv_emit_context.cpp | 31 ++++++ .../backend/spirv/spirv_emit_context.h | 4 + src/shader_recompiler/environment.h | 4 + .../frontend/ir/ir_emitter.cpp | 13 +++ .../frontend/ir/ir_emitter.h | 3 + src/shader_recompiler/frontend/ir/opcodes.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 2 + src/shader_recompiler/frontend/ir/type.h | 31 +++--- src/shader_recompiler/frontend/ir/value.cpp | 3 + src/shader_recompiler/frontend/ir/value.h | 12 ++ .../frontend/maxwell/translate_program.cpp | 4 +- src/shader_recompiler/host_translate_info.h | 1 + src/shader_recompiler/ir_opt/passes.h | 7 +- .../ir_opt/position_pass.cpp | 77 +++++++++++++ src/shader_recompiler/ir_opt/texture_pass.cpp | 51 ++++++++- src/shader_recompiler/shader_info.h | 11 ++ src/video_core/engines/maxwell_3d.h | 4 +- .../renderer_opengl/gl_buffer_cache.cpp | 15 +-- .../renderer_opengl/gl_graphics_pipeline.cpp | 11 ++ .../renderer_opengl/gl_rasterizer.cpp | 10 ++ .../renderer_opengl/gl_shader_cache.cpp | 5 +- .../renderer_opengl/gl_state_tracker.cpp | 4 +- .../renderer_vulkan/pipeline_helper.h | 10 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 22 +++- .../renderer_vulkan/vk_graphics_pipeline.h | 4 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 3 +- .../renderer_vulkan/vk_rasterizer.cpp | 16 +++ .../renderer_vulkan/vk_state_tracker.cpp | 2 +- src/video_core/shader_environment.cpp | 104 ++++++++++++++++-- src/video_core/shader_environment.h | 21 +++- src/video_core/texture_cache/util.cpp | 1 - 46 files changed, 491 insertions(+), 63 deletions(-) create mode 100755 src/shader_recompiler/ir_opt/position_pass.cpp diff --git a/README.md b/README.md index 4ac71db49..7a55fd381 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 3088. +This is the source code for early-access 3089. ## Legal Notice diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index d7a7126d4..0c232db79 100755 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -224,6 +224,7 @@ add_library(shader_recompiler STATIC ir_opt/lower_fp16_to_fp32.cpp ir_opt/lower_int64_to_int32.cpp ir_opt/passes.h + ir_opt/position_pass.cpp ir_opt/rescaling_pass.cpp ir_opt/ssa_rewrite_pass.cpp ir_opt/texture_pass.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index f35486d5d..b82cc8f69 100755 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -450,6 +450,9 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I if (program.info.uses_rescaling_uniform) { header += "PARAM scaling[1]={program.local[0..0]};"; } + if (program.info.uses_render_area) { + header += "PARAM render_area[1]={program.local[1..1]};"; + } header += "TEMP "; for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { header += fmt::format("R{},", index); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 748215904..b3c734ff6 100755 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -43,6 +43,10 @@ void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } +void EmitBitCastS32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 6de120815..804d5a467 100755 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -386,7 +386,7 @@ void EmitInvocationInfo(EmitContext& ctx, IR::Inst& inst) { ctx.Add("SHL.U {}.x, primitive.vertexcount, 16;", inst); break; default: - LOG_WARNING(Shader, "(STUBBED) EmitInvocationInfo"); + LOG_WARNING(Shader, "(STUBBED) called"); ctx.Add("MOV.S {}.x,0x00ff0000;", inst); } } @@ -408,6 +408,10 @@ void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { ctx.Add("MOV.F {}.x,scaling[0].z;", inst); } +void EmitRenderArea(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.F {},render_area[0];", inst); +} + void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) { ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index d6ce0cb15..267a2ba7c 100755 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -74,6 +74,7 @@ void EmitSampleId(EmitContext& ctx, IR::Inst& inst); void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); void EmitYDirection(EmitContext& ctx, IR::Inst& inst); void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst); +void EmitRenderArea(EmitContext& ctx, IR::Inst& inst); void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); @@ -196,6 +197,7 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); diff --git a/src/shader_recompiler/backend/glasm/glasm_emit_context.cpp b/src/shader_recompiler/backend/glasm/glasm_emit_context.cpp index 758d07572..1b7207acf 100755 --- a/src/shader_recompiler/backend/glasm/glasm_emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/glasm_emit_context.cpp @@ -95,10 +95,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (info.uses_invocation_id) { Add("ATTRIB primitive_invocation=primitive.invocation;"); } - if (info.uses_invocation_info) { - if (stage == Stage::TessellationControl || stage == Stage::TessellationEval) { - Add("ATTRIB primitive_vertexcount = primitive.vertexcount;"); - } + if (info.uses_invocation_info && + (stage == Stage::TessellationControl || stage == Stage::TessellationEval)) { + Add("ATTRIB primitive_vertexcount = primitive.vertexcount;"); } if (info.stores_tess_level_outer) { Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};"); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 3bff45c4e..2b53d62bc 100755 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -48,6 +48,10 @@ void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) ctx.AddU64("{}=doubleBitsToUint64({});", inst, value); } +void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=ftoi({});", inst, value); +} + void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 3e4d86667..90fffe0cd 100755 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -406,7 +406,7 @@ void EmitInvocationInfo(EmitContext& ctx, IR::Inst& inst) { ctx.AddU32("{}=uint(gl_PatchVerticesIn)<<16;", inst); break; default: - LOG_WARNING(Shader, "(STUBBED) EmitInvocationInfo"); + LOG_WARNING(Shader, "(STUBBED) called"); ctx.AddU32("{}=uint(0x00ff0000);", inst); } } @@ -428,6 +428,10 @@ void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { ctx.AddF32("{}=scaling.z;", inst); } +void EmitRenderArea(EmitContext& ctx, IR::Inst& inst) { + ctx.AddF32x4("{}=render_area;", inst); +} + void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { ctx.AddU32("{}=lmem[{}];", inst, word_offset); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 473c71d5c..aac05325f 100755 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -88,6 +88,7 @@ void EmitSampleId(EmitContext& ctx, IR::Inst& inst); void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); void EmitYDirection(EmitContext& ctx, IR::Inst& inst); void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst); +void EmitRenderArea(EmitContext& ctx, IR::Inst& inst); void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); @@ -230,6 +231,7 @@ void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst); void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst); void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index b08352b67..c836ddb47 100755 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -358,6 +358,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (info.uses_rescaling_uniform) { header += "layout(location=0) uniform vec4 scaling;"; } + if (info.uses_render_area) { + header += "layout(location=1) uniform vec4 render_area;"; + } DefineConstantBuffers(bindings); DefineConstantBufferIndirect(); DefineStorageBuffers(bindings); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index f579f812d..497c02da7 100755 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -23,8 +23,12 @@ struct RescalingLayout { alignas(16) std::array rescaling_images; u32 down_factor; }; +struct RenderAreaLayout { + std::array render_area; +}; constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures); constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor); +constexpr u32 RENDERAREA_LAYOUT_OFFSET = offsetof(RenderAreaLayout, render_area); [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index df6992d2a..7e182dbdb 100755 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -18,6 +18,10 @@ void EmitBitCastU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitBitCastS32F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + void EmitBitCastF16U16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ba610cab4..c74c78dfd 100755 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -353,7 +353,6 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::TessellationEvaluationPointV: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U))); - default: throw NotImplementedException("Read attribute {}", attr); } @@ -519,9 +518,8 @@ Id EmitInvocationInfo(EmitContext& ctx) { case Stage::TessellationEval: return ctx.OpShiftLeftLogical(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.patch_vertices_in), ctx.Const(16u)); - break; default: - LOG_WARNING(Shader, "(STUBBED) EmitInvocationInfo"); + LOG_WARNING(Shader, "(STUBBED) called"); return ctx.Const(0x00ff0000u); } } @@ -550,6 +548,17 @@ Id EmitResolutionDownFactor(EmitContext& ctx) { } } +Id EmitRenderArea(EmitContext& ctx) { + if (ctx.profile.unified_descriptor_binding) { + const Id pointer_type{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.F32[4])}; + const Id index{ctx.Const(ctx.render_are_member_index)}; + const Id pointer{ctx.OpAccessChain(pointer_type, ctx.render_area_push_constant, index)}; + return ctx.OpLoad(ctx.F32[4], pointer); + } else { + throw NotImplementedException("SPIR-V Instruction"); + } +} + Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; return ctx.OpLoad(ctx.U32[1], pointer); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 573bf81c2..d69e07583 100755 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -77,6 +77,7 @@ Id EmitSampleId(EmitContext& ctx); Id EmitIsHelperInvocation(EmitContext& ctx); Id EmitYDirection(EmitContext& ctx); Id EmitResolutionDownFactor(EmitContext& ctx); +Id EmitRenderArea(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); Id EmitUndefU1(EmitContext& ctx); @@ -178,7 +179,8 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); void EmitBitCastU16F16(EmitContext& ctx); Id EmitBitCastU32F32(EmitContext& ctx, Id value); void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastF16U16(EmitContext& ctx); +void EmitBitCastS32F32(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext&); Id EmitBitCastF32U32(EmitContext& ctx, Id value); void EmitBitCastF64U64(EmitContext& ctx); Id EmitPackUint2x32(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 51a50916f..237029d47 100755 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -473,6 +473,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf DefineAttributeMemAccess(program.info); DefineGlobalMemoryFunctions(program.info); DefineRescalingInput(program.info); + DefineRenderArea(program.info); } EmitContext::~EmitContext() = default; @@ -982,6 +983,36 @@ void EmitContext::DefineRescalingInputUniformConstant() { } } +void EmitContext::DefineRenderArea(const Info& info) { + if (!info.uses_render_area) { + return; + } + + if (profile.unified_descriptor_binding) { + boost::container::static_vector members{}; + u32 member_index{0}; + + members.push_back(F32[4]); + render_are_member_index = member_index++; + + const Id push_constant_struct{TypeStruct(std::span(members.data(), members.size()))}; + Decorate(push_constant_struct, spv::Decoration::Block); + Name(push_constant_struct, "RenderAreaInfo"); + + MemberDecorate(push_constant_struct, render_are_member_index, spv::Decoration::Offset, 0); + MemberName(push_constant_struct, render_are_member_index, "render_area"); + + const Id pointer_type{TypePointer(spv::StorageClass::PushConstant, push_constant_struct)}; + render_area_push_constant = + AddGlobalVariable(pointer_type, spv::StorageClass::PushConstant); + Name(render_area_push_constant, "render_area_push_constants"); + + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(render_area_push_constant); + } + } +} + void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 8640b2863..efcc5210b 100755 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -244,6 +244,9 @@ public: u32 texture_rescaling_index{}; u32 image_rescaling_index{}; + Id render_area_push_constant{}; + u32 render_are_member_index{}; + Id local_memory{}; Id shared_memory_u8{}; @@ -319,6 +322,7 @@ private: void DefineRescalingInput(const Info& info); void DefineRescalingInputPushConstant(); void DefineRescalingInputUniformConstant(); + void DefineRenderArea(const Info& info); void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 441f1b161..277e05e2a 100755 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -22,6 +22,10 @@ public: [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0; + [[nodiscard]] virtual TexturePixelFormat ReadTexturePixelFormat(u32 raw_handle) = 0; + + [[nodiscard]] virtual u32 ReadViewportTransformState() = 0; + [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; [[nodiscard]] virtual u32 LocalMemorySize() const = 0; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 7fe704b0c..15d893e75 100755 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -382,6 +382,14 @@ F32 IREmitter::ResolutionDownFactor() { return Inst(Opcode::ResolutionDownFactor); } +F32 IREmitter::RenderAreaWidth() { + return F32(CompositeExtract(Inst(Opcode::RenderArea), 0)); +} + +F32 IREmitter::RenderAreaHeight() { + return F32(CompositeExtract(Inst(Opcode::RenderArea), 1)); +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } @@ -687,6 +695,11 @@ IR::U32 IREmitter::BitCast(const IR::F32& value) { return Inst(Opcode::BitCastU32F32, value); } +template <> +IR::S32 IREmitter::BitCast(const IR::F32& value) { + return Inst(Opcode::BitCastS32F32, value); +} + template <> IR::F32 IREmitter::BitCast(const IR::U32& value) { return Inst(Opcode::BitCastF32U32, value); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 5b53e50f2..c35830a0e 100755 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -104,6 +104,9 @@ public: [[nodiscard]] F32 ResolutionDownFactor(); + [[nodiscard]] F32 RenderAreaWidth(); + [[nodiscard]] F32 RenderAreaHeight(); + [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 LoadGlobalU8(const U64& address); diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index 3b54b58fe..f497228a0 100755 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -37,6 +37,7 @@ constexpr Type U8{Type::U8}; constexpr Type U16{Type::U16}; constexpr Type U32{Type::U32}; constexpr Type U64{Type::U64}; +constexpr Type S32{Type::S32}; constexpr Type F16{Type::F16}; constexpr Type F32{Type::F32}; constexpr Type F64{Type::F64}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index ff9ad0514..5cedd23a9 100755 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -64,6 +64,7 @@ OPCODE(SampleId, U32, OPCODE(IsHelperInvocation, U1, ) OPCODE(YDirection, F32, ) OPCODE(ResolutionDownFactor, F32, ) +OPCODE(RenderArea, F32x4, ) // Undefined OPCODE(UndefU1, U1, ) @@ -174,6 +175,7 @@ OPCODE(SelectF64, F64, U1, OPCODE(BitCastU16F16, U16, F16, ) OPCODE(BitCastU32F32, U32, F32, ) OPCODE(BitCastU64F64, U64, F64, ) +OPCODE(BitCastS32F32, S32, F32, ) OPCODE(BitCastF16U16, F16, U16, ) OPCODE(BitCastF32U32, F32, U32, ) OPCODE(BitCastF64U64, F64, U64, ) diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index b74068078..1bf293ac7 100755 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -24,21 +24,22 @@ enum class Type { U16 = 1 << 7, U32 = 1 << 8, U64 = 1 << 9, - F16 = 1 << 10, - F32 = 1 << 11, - F64 = 1 << 12, - U32x2 = 1 << 13, - U32x3 = 1 << 14, - U32x4 = 1 << 15, - F16x2 = 1 << 16, - F16x3 = 1 << 17, - F16x4 = 1 << 18, - F32x2 = 1 << 19, - F32x3 = 1 << 20, - F32x4 = 1 << 21, - F64x2 = 1 << 22, - F64x3 = 1 << 23, - F64x4 = 1 << 24, + S32 = 1 << 10, + F16 = 1 << 11, + F32 = 1 << 12, + F64 = 1 << 13, + U32x2 = 1 << 14, + U32x3 = 1 << 15, + U32x4 = 1 << 16, + F16x2 = 1 << 17, + F16x3 = 1 << 18, + F16x4 = 1 << 19, + F32x2 = 1 << 20, + F32x3 = 1 << 21, + F32x4 = 1 << 22, + F64x2 = 1 << 23, + F64x3 = 1 << 24, + F64x4 = 1 << 25, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 55caca27b..c17a5b350 100755 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -23,6 +23,8 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} +Value::Value(s32 value) noexcept : type{Type::S32}, imm_s32{value} {} + Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} @@ -69,6 +71,7 @@ bool Value::operator==(const Value& other) const { return imm_u16 == other.imm_u16; case Type::U32: case Type::F32: + case Type::S32: return imm_u32 == other.imm_u32; case Type::U64: case Type::F64: diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index ee818f9cb..a56de263e 100755 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -44,6 +44,7 @@ public: explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; explicit Value(u32 value) noexcept; + explicit Value(s32 value) noexcept; explicit Value(f32 value) noexcept; explicit Value(u64 value) noexcept; explicit Value(f64 value) noexcept; @@ -66,6 +67,7 @@ public: [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; [[nodiscard]] u32 U32() const; + [[nodiscard]] s32 S32() const; [[nodiscard]] f32 F32() const; [[nodiscard]] u64 U64() const; [[nodiscard]] f64 F64() const; @@ -85,6 +87,7 @@ private: u8 imm_u8; u16 imm_u16; u32 imm_u32; + s32 imm_s32; f32 imm_f32; u64 imm_u64; f64 imm_f64; @@ -266,6 +269,7 @@ using U8 = TypedValue; using U16 = TypedValue; using U32 = TypedValue; using U64 = TypedValue; +using S32 = TypedValue; using F16 = TypedValue; using F32 = TypedValue; using F64 = TypedValue; @@ -377,6 +381,14 @@ inline u32 Value::U32() const { return imm_u32; } +inline s32 Value::S32() const { + if (IsIdentity()) { + return inst->Arg(0).S32(); + } + DEBUG_ASSERT(type == Type::S32); + return imm_s32; +} + inline f32 Value::F32() const { if (IsIdentity()) { return inst->Arg(0).F32(); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 87cc1112c..06a67eabe 100755 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -220,8 +220,10 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +namespace { +struct PositionInst { + IR::Inst* inst; + IR::Block* block; + IR::Attribute attr; +}; +using PositionInstVector = boost::container::small_vector; +} // Anonymous namespace + +void PositionPass(Environment& env, IR::Program& program) { + if (env.ShaderStage() != Stage::VertexB || env.ReadViewportTransformState()) { + return; + } + + Info& info{program.info}; + info.uses_render_area = true; + + PositionInstVector to_replace; + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.GetOpcode()) { + case IR::Opcode::SetAttribute: { + const IR::Attribute attr{inst.Arg(0).Attribute()}; + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: { + to_replace.push_back(PositionInst{.inst = &inst, .block = block, .attr = attr}); + break; + } + default: + break; + } + break; + } + default: + break; + } + } + } + + for (PositionInst& position_inst : to_replace) { + IR::IREmitter ir{*position_inst.block, + IR::Block::InstructionList::s_iterator_to(*position_inst.inst)}; + const IR::F32 value(position_inst.inst->Arg(1)); + const IR::F32F64 scale(ir.Imm32(2.f)); + const IR::F32 negative_one{ir.Imm32(-1.f)}; + switch (position_inst.attr) { + case IR::Attribute::PositionX: { + position_inst.inst->SetArg( + 1, + ir.FPFma(value, ir.FPMul(ir.FPRecip(ir.RenderAreaWidth()), scale), negative_one)); + break; + } + case IR::Attribute::PositionY: { + position_inst.inst->SetArg( + 1, + ir.FPFma(value, ir.FPMul(ir.FPRecip(ir.RenderAreaHeight()), scale), negative_one)); + break; + } + default: + break; + } + } +} +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index ead049048..339f122eb 100755 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -11,6 +11,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" @@ -363,6 +364,14 @@ TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { return env.ReadTextureType(lhs_raw | rhs_raw); } +TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) { + const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; + const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; + const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; + const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; + return env.ReadTexturePixelFormat(lhs_raw | rhs_raw); +} + class Descriptors { public: explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, @@ -451,9 +460,41 @@ void PatchImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { ir.FPMul(IR::F32(ir.CompositeExtract(coord, 1)), ir.FPRecip(ir.ConvertUToF(32, 32, ir.CompositeExtract(texture_size, 1)))))); } + +void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_format) { + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + auto get_max_value = [pixel_format]() -> float { + switch (pixel_format) { + case TexturePixelFormat::A8B8G8R8_SNORM: + case TexturePixelFormat::R8G8_SNORM: + case TexturePixelFormat::R8_SNORM: + return 1.f / std::numeric_limits::max(); + case TexturePixelFormat::R16G16B16A16_SNORM: + case TexturePixelFormat::R16G16_SNORM: + case TexturePixelFormat::R16_SNORM: + return 1.f / std::numeric_limits::max(); + default: + throw InvalidArgument("Invalid texture pixel format"); + } + }; + + const IR::Value new_inst{&*block.PrependNewInst(it, inst)}; + const IR::F32 x(ir.CompositeExtract(new_inst, 0)); + const IR::F32 y(ir.CompositeExtract(new_inst, 1)); + const IR::F32 z(ir.CompositeExtract(new_inst, 2)); + const IR::F32 w(ir.CompositeExtract(new_inst, 3)); + const IR::F16F32F64 max_value(ir.Imm32(get_max_value())); + const IR::Value converted = + ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast(x)), max_value), + ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast(y)), max_value), + ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast(z)), max_value), + ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast(w)), max_value)); + inst.ReplaceUsesWith(converted); +} } // Anonymous namespace -void TexturePass(Environment& env, IR::Program& program) { +void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info) { TextureInstVector to_replace; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -597,6 +638,14 @@ void TexturePass(Environment& env, IR::Program& program) { } else { inst->SetArg(0, IR::Value{}); } + + if (!host_info.support_snorm_render_buffer && inst->GetOpcode() == IR::Opcode::ImageFetch && + flags.type == TextureType::Buffer) { + const auto pixel_format = ReadTexturePixelFormat(env, cbuf); + if (pixel_format != TexturePixelFormat::OTHER) { + PatchTexelFetch(*texture_inst.block, *texture_inst.inst, pixel_format); + } + } } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 4c4556170..db1d87cf5 100755 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -29,6 +29,16 @@ enum class TextureType : u32 { }; constexpr u32 NUM_TEXTURE_TYPES = 9; +enum class TexturePixelFormat : u32 { + A8B8G8R8_SNORM, + R8_SNORM, + R8G8_SNORM, + R16G16B16A16_SNORM, + R16G16_SNORM, + R16_SNORM, + OTHER +}; + enum class ImageFormat : u32 { Typeless, R8_UINT, @@ -183,6 +193,7 @@ struct Info { bool uses_shadow_lod{}; bool uses_rescaling_uniform{}; bool uses_cbuf_indirect{}; + bool uses_render_area{}; IR::Type used_constant_buffer_types{}; IR::Type used_storage_buffer_types{}; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index deca60c18..314d7fda8 100755 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -2970,7 +2970,7 @@ public: CullFace gl_cull_face; ///< 0x1920 Viewport::PixelCenter viewport_pixel_center; ///< 0x1924 INSERT_PADDING_BYTES_NOINIT(0x4); - u32 viewport_scale_offset_enbled; ///< 0x192C + u32 viewport_scale_offset_enabled; ///< 0x192C INSERT_PADDING_BYTES_NOINIT(0xC); ViewportClipControl viewport_clip_control; ///< 0x193C UserClip::Op user_clip_op; ///< 0x1940 @@ -3482,7 +3482,7 @@ ASSERT_REG_POSITION(gl_cull_test_enabled, 0x1918); ASSERT_REG_POSITION(gl_front_face, 0x191C); ASSERT_REG_POSITION(gl_cull_face, 0x1920); ASSERT_REG_POSITION(viewport_pixel_center, 0x1924); -ASSERT_REG_POSITION(viewport_scale_offset_enbled, 0x192C); +ASSERT_REG_POSITION(viewport_scale_offset_enabled, 0x192C); ASSERT_REG_POSITION(viewport_clip_control, 0x193C); ASSERT_REG_POSITION(user_clip_op, 0x1940); ASSERT_REG_POSITION(render_enable_override, 0x1944); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 6a4b512e2..2a39e11bb 100755 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -29,17 +29,17 @@ constexpr std::array PROGRAM_LUT{ [[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) { switch (gl_format) { case GL_RGBA8_SNORM: - return GL_RGBA8; + return GL_RGBA8I; case GL_R8_SNORM: - return GL_R8; + return GL_R8I; case GL_RGBA16_SNORM: - return GL_RGBA16; + return GL_RGBA16I; case GL_R16_SNORM: - return GL_R16; + return GL_R16I; case GL_RG16_SNORM: - return GL_RG16; + return GL_RG16I; case GL_RG8_SNORM: - return GL_RG8; + return GL_RG8I; default: return gl_format; } @@ -96,9 +96,6 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { texture.Create(GL_TEXTURE_BUFFER); const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; const GLenum texture_format{GetTextureBufferFormat(gl_format)}; - if (texture_format != gl_format) { - LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM."); - } glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size); views.push_back({ .offset = offset, diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index f0aa4ae27..eda08164a 100755 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -503,6 +503,17 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { float_image_scaling_mask, down_factor, 0.0f); } } + if (info.uses_render_area) { + const auto render_area_width(static_cast(regs.surface_clip.width)); + const auto render_area_height(static_cast(regs.surface_clip.height)); + if (use_assembly) { + glProgramLocalParameter4fARB(AssemblyStage(stage), 1, render_area_width, + render_area_height, 0.0f, 0.0f); + } else { + glProgramUniform4f(source_programs[stage].handle, 1, render_area_width, + render_area_height, 0.0f, 0.0f); + } + } }}; if constexpr (Spec::enabled_stages[0]) { prepare_stage(0); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 579daf4bd..4fd0ec152 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -618,6 +618,16 @@ void RasterizerOpenGL::SyncViewport() { } flags[Dirty::Viewport0 + index] = false; + if (!regs.viewport_scale_offset_enabled) { + const auto x = static_cast(regs.surface_clip.x); + const auto y = static_cast(regs.surface_clip.y); + const auto width = static_cast(regs.surface_clip.width); + const auto height = static_cast(regs.surface_clip.height); + glViewportIndexedf(static_cast(index), x, y, width != 0.0f ? width : 1.0f, + height != 0.0f ? height : 1.0f); + continue; + } + const auto& src = regs.viewport_transform[index]; GLfloat x = conv(src.translate_x - src.scale_x); GLfloat y = conv(src.translate_y - src.scale_y); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index a2ced4052..f53f52beb 100755 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -49,7 +49,7 @@ using VideoCommon::LoadPipelines; using VideoCommon::SerializePipeline; using Context = ShaderContext::Context; -constexpr u32 CACHE_VERSION = 6; +constexpr u32 CACHE_VERSION = 7; template auto MakeSpan(Container& container) { @@ -76,7 +76,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, } break; case Shader::Stage::TessellationEval: - // invert the face + // Flip the face, as opengl's drawing is also flipped info.tess_clockwise = key.tessellation_clockwise == 0; info.tess_primitive = [&key] { switch (key.tessellation_primitive) { @@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_float16 = false, .support_int64 = device.HasShaderInt64(), .needs_demote_reorder = device.IsAmd(), + .support_snorm_render_buffer = false, } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index fe8bcec5c..fe5784b0c 100755 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -70,8 +70,8 @@ void SetupDirtyViewports(Tables& tables) { FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports); FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports); - tables[0][OFF(viewport_scale_offset_enbled)] = ViewportTransform; - tables[1][OFF(viewport_scale_offset_enbled)] = Viewports; + tables[0][OFF(viewport_scale_offset_enabled)] = ViewportTransform; + tables[1][OFF(viewport_scale_offset_enabled)] = Viewports; } void SetupDirtyScissors(Tables& tables) { diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index aa1bd2381..5af812c5a 100755 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -68,13 +68,15 @@ public: } vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { + using Shader::Backend::SPIRV::RenderAreaLayout; using Shader::Backend::SPIRV::RescalingLayout; const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u; const VkPushConstantRange range{ .stageFlags = static_cast( is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS), .offset = 0, - .size = static_cast(sizeof(RescalingLayout)) - size_offset, + .size = static_cast(sizeof(RescalingLayout)) - size_offset + + static_cast(sizeof(RenderAreaLayout)), }; return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, @@ -167,6 +169,12 @@ private: u32 image_bit{1u}; }; +class RenderAreaPushConstant { +public: + bool uses_render_area{}; + std::array words{}; +}; + inline void PushImageDescriptors(TextureCache& texture_cache, UpdateDescriptorQueue& update_descriptor_queue, const Shader::Info& info, RescalingPushConstant& rescaling, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7b9ad34bf..c3cd7836b 100755 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -31,6 +31,7 @@ namespace { using boost::container::small_vector; using boost::container::static_vector; using Shader::ImageBufferDescriptor; +using Shader::Backend::SPIRV::RENDERAREA_LAYOUT_OFFSET; using Shader::Backend::SPIRV::RESCALING_LAYOUT_DOWN_FACTOR_OFFSET; using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; using Tegra::Texture::TexturePair; @@ -433,12 +434,19 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { update_descriptor_queue.Acquire(); RescalingPushConstant rescaling; + RenderAreaPushConstant render_area; const VkSampler* samplers_it{samplers.data()}; const VideoCommon::ImageViewInOut* views_it{views.data()}; const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling, samplers_it, views_it); + const auto& info{stage_infos[0]}; + if (info.uses_render_area) { + render_area.uses_render_area = true; + render_area.words = {static_cast(regs.surface_clip.width), + static_cast(regs.surface_clip.height)}; + } }}; if constexpr (Spec::enabled_stages[0]) { prepare_stage(0); @@ -455,10 +463,11 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { prepare_stage(4); } - ConfigureDraw(rescaling); + ConfigureDraw(rescaling, render_area); } -void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { +void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, + const RenderAreaPushConstant& render_area) { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); @@ -474,7 +483,9 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), - is_rescaling, update_rescaling](vk::CommandBuffer cmdbuf) { + is_rescaling, update_rescaling, + uses_render_area = render_area.uses_render_area, + render_area_data = render_area.words](vk::CommandBuffer cmdbuf) { if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); } @@ -488,6 +499,11 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor), &scale_down_factor); } + if (uses_render_area) { + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RENDERAREA_LAYOUT_OFFSET, sizeof(render_area_data), + &render_area_data); + } if (!descriptor_set_layout) { return; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 9eaab6da7..ce8115460 100755 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -62,6 +62,7 @@ class Device; class PipelineStatistics; class RenderPassCache; class RescalingPushConstant; +class RenderAreaPushConstant; class Scheduler; class UpdateDescriptorQueue; @@ -119,7 +120,8 @@ private: template void ConfigureImpl(bool is_indexed); - void ConfigureDraw(const RescalingPushConstant& rescaling); + void ConfigureDraw(const RescalingPushConstant& rescaling, + const RenderAreaPushConstant& render_are); void MakePipeline(VkRenderPass render_pass); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2808d6c69..cc2a0bf48 100755 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -53,7 +53,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 6; +constexpr u32 CACHE_VERSION = 7; template auto MakeSpan(Container& container) { @@ -326,6 +326,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .support_int64 = device.IsShaderInt64Supported(), .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR, + .support_snorm_render_buffer = true, }; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b58d253ee..253281c11 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -683,6 +683,22 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg if (!state_tracker.TouchViewports()) { return; } + if (!regs.viewport_scale_offset_enabled) { + const auto x = static_cast(regs.surface_clip.x); + const auto y = static_cast(regs.surface_clip.y); + const auto width = static_cast(regs.surface_clip.width); + const auto height = static_cast(regs.surface_clip.height); + VkViewport viewport{ + .x = x, + .y = y, + .width = width != 0.0f ? width : 1.0f, + .height = height != 0.0f ? height : 1.0f, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewport); }); + return; + } const bool is_rescaling{texture_cache.IsRescaling()}; const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; const std::array viewports{ diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 77ec8e677..13770f0be 100755 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -51,7 +51,7 @@ Flags MakeInvalidationFlags() { void SetupDirtyViewports(Tables& tables) { FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports); FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports); - tables[0][OFF(viewport_scale_offset_enbled)] = Viewports; + tables[0][OFF(viewport_scale_offset_enabled)] = Viewports; tables[1][OFF(window_origin)] = Viewports; } diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 462f4d8f5..0f12ca474 100755 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -19,6 +19,7 @@ #include "video_core/engines/kepler_compute.h" #include "video_core/memory_manager.h" #include "video_core/shader_environment.h" +#include "video_core/texture_cache/format_lookup_table.h" #include "video_core/textures/texture.h" namespace VideoCommon { @@ -33,7 +34,7 @@ static u64 MakeCbufKey(u32 index, u32 offset) { return (static_cast(index) << 32) | offset; } -static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { +static Shader::TextureType ConvertTextureType(const Tegra::Texture::TICEntry& entry) { switch (entry.texture_type) { case Tegra::Texture::TextureType::Texture1D: return Shader::TextureType::Color1D; @@ -59,6 +60,26 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { } } +static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture::TICEntry& entry) { + switch (PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type, entry.b_type, + entry.a_type, entry.srgb_conversion)) { + case VideoCore::Surface::PixelFormat::A8B8G8R8_SNORM: + return Shader::TexturePixelFormat::A8B8G8R8_SNORM; + case VideoCore::Surface::PixelFormat::R8_SNORM: + return Shader::TexturePixelFormat::R8_SNORM; + case VideoCore::Surface::PixelFormat::R8G8_SNORM: + return Shader::TexturePixelFormat::R8G8_SNORM; + case VideoCore::Surface::PixelFormat::R16G16B16A16_SNORM: + return Shader::TexturePixelFormat::R16G16B16A16_SNORM; + case VideoCore::Surface::PixelFormat::R16G16_SNORM: + return Shader::TexturePixelFormat::R16G16_SNORM; + case VideoCore::Surface::PixelFormat::R16_SNORM: + return Shader::TexturePixelFormat::R16_SNORM; + default: + return Shader::TexturePixelFormat::OTHER; + } +} + static std::string_view StageToPrefix(Shader::Stage stage) { switch (stage) { case Shader::Stage::VertexB: @@ -178,22 +199,31 @@ void GenericEnvironment::Dump(u64 hash) { void GenericEnvironment::Serialize(std::ofstream& file) const { const u64 code_size{static_cast(CachedSize())}; const u64 num_texture_types{static_cast(texture_types.size())}; + const u64 num_texture_pixel_formats{static_cast(texture_pixel_formats.size())}; const u64 num_cbuf_values{static_cast(cbuf_values.size())}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&num_texture_pixel_formats), + sizeof(num_texture_pixel_formats)) .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) + .write(reinterpret_cast(&viewport_transform_state), + sizeof(viewport_transform_state)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(reinterpret_cast(code.data()), code_size); for (const auto& [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } + for (const auto& [key, format] : texture_pixel_formats) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&format), sizeof(format)); + } for (const auto& [key, type] : cbuf_values) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); @@ -237,15 +267,13 @@ std::optional GenericEnvironment::TryFindSize() { return std::nullopt; } -Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, - bool via_header_index, u32 raw) { +Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit, + bool via_header_index, u32 raw) { const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); - const Shader::TextureType result{ConvertType(entry)}; - texture_types.emplace(raw, result); - return result; + return entry; } GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, @@ -305,8 +333,27 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; - return ReadTextureTypeImpl(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, - handle); + auto entry = + ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle); + const Shader::TextureType result{ConvertTextureType(entry)}; + texture_types.emplace(handle, result); + return result; +} + +Shader::TexturePixelFormat GraphicsEnvironment::ReadTexturePixelFormat(u32 handle) { + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; + auto entry = + ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle); + const Shader::TexturePixelFormat result(ConvertTexturePixelFormat(entry)); + texture_pixel_formats.emplace(handle, result); + return result; +} + +u32 GraphicsEnvironment::ReadViewportTransformState() { + const auto& regs{maxwell3d->regs}; + viewport_transform_state = regs.viewport_scale_offset_enabled; + return viewport_transform_state; } ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, @@ -337,21 +384,41 @@ u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); + auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); + const Shader::TextureType result{ConvertTextureType(entry)}; + texture_types.emplace(handle, result); + return result; +} + +Shader::TexturePixelFormat ComputeEnvironment::ReadTexturePixelFormat(u32 handle) { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); + const Shader::TexturePixelFormat result(ConvertTexturePixelFormat(entry)); + texture_pixel_formats.emplace(handle, result); + return result; +} + +u32 ComputeEnvironment::ReadViewportTransformState() { + return viewport_transform_state; } void FileEnvironment::Deserialize(std::ifstream& file) { u64 code_size{}; u64 num_texture_types{}; + u64 num_texture_pixel_formats{}; u64 num_cbuf_values{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&num_texture_pixel_formats), + sizeof(num_texture_pixel_formats)) .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) .read(reinterpret_cast(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast(&viewport_transform_state), sizeof(viewport_transform_state)) .read(reinterpret_cast(&stage), sizeof(stage)); code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast(code.get()), code_size); @@ -362,6 +429,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast(&type), sizeof(type)); texture_types.emplace(key, type); } + for (size_t i = 0; i < num_texture_pixel_formats; ++i) { + u32 key; + Shader::TexturePixelFormat format; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&format), sizeof(format)); + texture_pixel_formats.emplace(key, format); + } for (size_t i = 0; i < num_cbuf_values; ++i) { u64 key; u32 value; @@ -409,6 +483,18 @@ Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { return it->second; } +Shader::TexturePixelFormat FileEnvironment::ReadTexturePixelFormat(u32 handle) { + const auto it{texture_pixel_formats.find(handle)}; + if (it == texture_pixel_formats.end()) { + throw Shader::LogicError("Uncached read texture pixel format"); + } + return it->second; +} + +u32 FileEnvironment::ReadViewportTransformState() { + return viewport_transform_state; +} + u32 FileEnvironment::LocalMemorySize() const { return local_memory_size; } diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 6a7f51fad..05e4d312c 100755 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -63,14 +63,15 @@ public: protected: std::optional TryFindSize(); - Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - u32 raw); + Tegra::Texture::TICEntry ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit, + bool via_header_index, u32 raw); Tegra::MemoryManager* gpu_memory{}; GPUVAddr program_base{}; std::vector code; std::unordered_map texture_types; + std::unordered_map texture_pixel_formats; std::unordered_map cbuf_values; u32 local_memory_size{}; @@ -85,6 +86,8 @@ protected: u32 cached_highest = 0; u32 initial_offset = 0; + u32 viewport_transform_state = 1; + bool has_unbound_instructions = false; }; @@ -102,6 +105,10 @@ public: Shader::TextureType ReadTextureType(u32 handle) override; + Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; + + u32 ReadViewportTransformState() override; + private: Tegra::Engines::Maxwell3D* maxwell3d{}; size_t stage_index{}; @@ -120,6 +127,10 @@ public: Shader::TextureType ReadTextureType(u32 handle) override; + Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; + + u32 ReadViewportTransformState() override; + private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; @@ -143,6 +154,10 @@ public: [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override; + [[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; + + [[nodiscard]] u32 ReadViewportTransformState() override; + [[nodiscard]] u32 LocalMemorySize() const override; [[nodiscard]] u32 SharedMemorySize() const override; @@ -156,6 +171,7 @@ public: private: std::unique_ptr code; std::unordered_map texture_types; + std::unordered_map texture_pixel_formats; std::unordered_map cbuf_values; std::array workgroup_size{}; u32 local_memory_size{}; @@ -164,6 +180,7 @@ private: u32 read_lowest{}; u32 read_highest{}; u32 initial_offset{}; + u32 viewport_transform_state = 1; }; void SerializePipeline(std::span key, std::span envs, diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 50de186b0..3ef1f366a 100755 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -516,7 +516,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; - UNIMPLEMENTED_IF(info.tile_width_spacing > 0); UNIMPLEMENTED_IF(copy.image_offset.x != 0); UNIMPLEMENTED_IF(copy.image_offset.y != 0); UNIMPLEMENTED_IF(copy.image_offset.z != 0);