early-access version 3656

main
pineappleEA 2023-06-10 20:07:01 +02:00
parent c313f4ceb4
commit 126c3c9ad0
13 changed files with 268 additions and 1 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 3655.
This is the source code for early-access 3656.
## Legal Notice

View File

@ -216,6 +216,7 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate_program.h
host_translate_info.h
ir_opt/collect_shader_info_pass.cpp
ir_opt/conditional_barrier_pass.cpp
ir_opt/constant_propagation_pass.cpp
ir_opt/dead_code_elimination_pass.cpp
ir_opt/dual_vertex_pass.cpp
@ -223,6 +224,7 @@ add_library(shader_recompiler STATIC
ir_opt/identity_removal_pass.cpp
ir_opt/layer_pass.cpp
ir_opt/lower_fp16_to_fp32.cpp
ir_opt/lower_fp64_to_fp32.cpp
ir_opt/lower_int64_to_int32.cpp
ir_opt/passes.h
ir_opt/position_pass.cpp

View File

@ -280,12 +280,18 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
RemoveUnreachableBlocks(program);
// Replace instructions before the SSA rewrite
if (!host_info.support_float64) {
Optimization::LowerFp64ToFp32(program);
}
if (!host_info.support_float16) {
Optimization::LowerFp16ToFp32(program);
}
if (!host_info.support_int64) {
Optimization::LowerInt64ToInt32(program);
}
if (!host_info.support_conditional_barrier) {
Optimization::ConditionalBarrierPass(program);
}
Optimization::SsaRewritePass(program);
Optimization::ConstantPropagationPass(env, program);

View File

@ -10,6 +10,7 @@ namespace Shader {
/// Misc information about the host
struct HostTranslateInfo {
bool support_float64{}; ///< True when the device supports 64-bit floats
bool support_float16{}; ///< True when the device supports 16-bit floats
bool support_int64{}; ///< True when the device supports 64-bit integers
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
@ -17,6 +18,8 @@ struct HostTranslateInfo {
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
///< passthrough shaders
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
///< control flow
};
} // namespace Shader

View File

@ -0,0 +1,44 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
void ConditionalBarrierPass(IR::Program& program) {
s32 conditional_control_flow_count{0};
s32 conditional_return_count{0};
for (IR::AbstractSyntaxNode& node : program.syntax_list) {
switch (node.type) {
case IR::AbstractSyntaxNode::Type::If:
case IR::AbstractSyntaxNode::Type::Loop:
conditional_control_flow_count++;
break;
case IR::AbstractSyntaxNode::Type::EndIf:
case IR::AbstractSyntaxNode::Type::Repeat:
conditional_control_flow_count--;
break;
case IR::AbstractSyntaxNode::Type::Unreachable:
case IR::AbstractSyntaxNode::Type::Return:
if (conditional_control_flow_count > 0) {
conditional_return_count++;
}
break;
case IR::AbstractSyntaxNode::Type::Block:
for (IR::Inst& inst : node.data.block->Instructions()) {
if ((conditional_control_flow_count > 0 || conditional_return_count > 0) &&
inst.GetOpcode() == IR::Opcode::Barrier) {
LOG_WARNING(Shader, "Barrier within conditional control flow");
inst.ReplaceOpcode(IR::Opcode::Identity);
}
}
break;
default:
break;
}
}
ASSERT(conditional_control_flow_count == 0);
}
} // namespace Shader::Optimization

View File

@ -0,0 +1,189 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/opcodes.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
namespace {
constexpr s32 F64ToF32Exp = +1023 - 127;
constexpr s32 F32ToF64Exp = +127 - 1023;
IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) {
const IR::U32 lo{ir.CompositeExtract(packed, 0)};
const IR::U32 hi{ir.CompositeExtract(packed, 1)};
const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))};
const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))};
const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))};
const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))};
const IR::U32 mantissa{
ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)};
const IR::U32 exp_if_subnorm{
ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))};
const IR::U32 exp_if_infnan{
ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)};
const IR::U32 result{
ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))};
return ir.BitCast<IR::F32>(result);
}
IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) {
const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))};
const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))};
const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))};
const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))};
const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))};
const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))};
const IR::U32 exp_if_subnorm{
ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))};
const IR::U32 exp_if_infnan{
ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)};
const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))};
const IR::U32 hi{
ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))};
return ir.CompositeConstruct(lo, hi);
}
IR::Opcode Replace(IR::Opcode op) {
switch (op) {
case IR::Opcode::FPAbs64:
return IR::Opcode::FPAbs32;
case IR::Opcode::FPAdd64:
return IR::Opcode::FPAdd32;
case IR::Opcode::FPCeil64:
return IR::Opcode::FPCeil32;
case IR::Opcode::FPFloor64:
return IR::Opcode::FPFloor32;
case IR::Opcode::FPFma64:
return IR::Opcode::FPFma32;
case IR::Opcode::FPMul64:
return IR::Opcode::FPMul32;
case IR::Opcode::FPNeg64:
return IR::Opcode::FPNeg32;
case IR::Opcode::FPRoundEven64:
return IR::Opcode::FPRoundEven32;
case IR::Opcode::FPSaturate64:
return IR::Opcode::FPSaturate32;
case IR::Opcode::FPClamp64:
return IR::Opcode::FPClamp32;
case IR::Opcode::FPTrunc64:
return IR::Opcode::FPTrunc32;
case IR::Opcode::CompositeConstructF64x2:
return IR::Opcode::CompositeConstructF32x2;
case IR::Opcode::CompositeConstructF64x3:
return IR::Opcode::CompositeConstructF32x3;
case IR::Opcode::CompositeConstructF64x4:
return IR::Opcode::CompositeConstructF32x4;
case IR::Opcode::CompositeExtractF64x2:
return IR::Opcode::CompositeExtractF32x2;
case IR::Opcode::CompositeExtractF64x3:
return IR::Opcode::CompositeExtractF32x3;
case IR::Opcode::CompositeExtractF64x4:
return IR::Opcode::CompositeExtractF32x4;
case IR::Opcode::CompositeInsertF64x2:
return IR::Opcode::CompositeInsertF32x2;
case IR::Opcode::CompositeInsertF64x3:
return IR::Opcode::CompositeInsertF32x3;
case IR::Opcode::CompositeInsertF64x4:
return IR::Opcode::CompositeInsertF32x4;
case IR::Opcode::FPOrdEqual64:
return IR::Opcode::FPOrdEqual32;
case IR::Opcode::FPUnordEqual64:
return IR::Opcode::FPUnordEqual32;
case IR::Opcode::FPOrdNotEqual64:
return IR::Opcode::FPOrdNotEqual32;
case IR::Opcode::FPUnordNotEqual64:
return IR::Opcode::FPUnordNotEqual32;
case IR::Opcode::FPOrdLessThan64:
return IR::Opcode::FPOrdLessThan32;
case IR::Opcode::FPUnordLessThan64:
return IR::Opcode::FPUnordLessThan32;
case IR::Opcode::FPOrdGreaterThan64:
return IR::Opcode::FPOrdGreaterThan32;
case IR::Opcode::FPUnordGreaterThan64:
return IR::Opcode::FPUnordGreaterThan32;
case IR::Opcode::FPOrdLessThanEqual64:
return IR::Opcode::FPOrdLessThanEqual32;
case IR::Opcode::FPUnordLessThanEqual64:
return IR::Opcode::FPUnordLessThanEqual32;
case IR::Opcode::FPOrdGreaterThanEqual64:
return IR::Opcode::FPOrdGreaterThanEqual32;
case IR::Opcode::FPUnordGreaterThanEqual64:
return IR::Opcode::FPUnordGreaterThanEqual32;
case IR::Opcode::FPIsNan64:
return IR::Opcode::FPIsNan32;
case IR::Opcode::ConvertS16F64:
return IR::Opcode::ConvertS16F32;
case IR::Opcode::ConvertS32F64:
return IR::Opcode::ConvertS32F32;
case IR::Opcode::ConvertS64F64:
return IR::Opcode::ConvertS64F32;
case IR::Opcode::ConvertU16F64:
return IR::Opcode::ConvertU16F32;
case IR::Opcode::ConvertU32F64:
return IR::Opcode::ConvertU32F32;
case IR::Opcode::ConvertU64F64:
return IR::Opcode::ConvertU64F32;
case IR::Opcode::PackFloat2x16:
return IR::Opcode::PackHalf2x16;
case IR::Opcode::UnpackFloat2x16:
return IR::Opcode::UnpackHalf2x16;
case IR::Opcode::ConvertF32F64:
return IR::Opcode::Identity;
case IR::Opcode::ConvertF64F32:
return IR::Opcode::Identity;
case IR::Opcode::ConvertF64S8:
return IR::Opcode::ConvertF32S8;
case IR::Opcode::ConvertF64S16:
return IR::Opcode::ConvertF32S16;
case IR::Opcode::ConvertF64S32:
return IR::Opcode::ConvertF32S32;
case IR::Opcode::ConvertF64S64:
return IR::Opcode::ConvertF32S64;
case IR::Opcode::ConvertF64U8:
return IR::Opcode::ConvertF32U8;
case IR::Opcode::ConvertF64U16:
return IR::Opcode::ConvertF32U16;
case IR::Opcode::ConvertF64U32:
return IR::Opcode::ConvertF32U32;
case IR::Opcode::ConvertF64U64:
return IR::Opcode::ConvertF32U64;
default:
return op;
}
}
void Lower(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::PackDouble2x32: {
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0)));
break;
}
case IR::Opcode::UnpackDouble2x32: {
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0)));
break;
}
default:
inst.ReplaceOpcode(Replace(inst.GetOpcode()));
break;
}
}
} // Anonymous namespace
void LowerFp64ToFp32(IR::Program& program) {
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
Lower(*block, inst);
}
}
}
} // namespace Shader::Optimization

View File

@ -13,10 +13,12 @@ struct HostTranslateInfo;
namespace Shader::Optimization {
void CollectShaderInfoPass(Environment& env, IR::Program& program);
void ConditionalBarrierPass(IR::Program& program);
void ConstantPropagationPass(Environment& env, IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program);
void GlobalMemoryToStorageBufferPass(IR::Program& program);
void IdentityRemovalPass(IR::Program& program);
void LowerFp64ToFp32(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
void LowerInt64ToInt32(IR::Program& program);
void RescalingPass(IR::Program& program);

View File

@ -201,6 +201,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
!(is_amd || (is_intel && !is_linux)) && !strict_context_required;
use_driver_cache = is_nvidia;
supports_conditional_barriers = !is_intel;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);

View File

@ -188,6 +188,10 @@ public:
return strict_context_required;
}
bool SupportsConditionalBarriers() const {
return supports_conditional_barriers;
}
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@ -233,6 +237,7 @@ private:
bool has_bool_ref_bug{};
bool can_report_memory{};
bool strict_context_required{};
bool supports_conditional_barriers{};
std::string vendor_name;
};

View File

@ -232,12 +232,14 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
},
host_info{
.support_float64 = true,
.support_float16 = false,
.support_int64 = device.HasShaderInt64(),
.needs_demote_reorder = device.IsAmd(),
.support_snorm_render_buffer = false,
.support_viewport_index_layer = device.HasVertexViewportLayer(),
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
.support_conditional_barrier = device.SupportsConditionalBarriers(),
} {
if (use_asynchronous_shaders) {
workers = CreateWorkers();

View File

@ -350,6 +350,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.has_broken_spirv_subgroup_mask_vector_extract_dynamic =
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY};
host_info = Shader::HostTranslateInfo{
.support_float64 = device.IsFloat64Supported(),
.support_float16 = device.IsFloat16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
.needs_demote_reorder =

View File

@ -386,6 +386,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal);
supports_conditional_barriers = !(is_intel_anv || is_intel_windows);
CollectPhysicalMemoryInfo();
CollectToolingInfo();

View File

@ -300,6 +300,11 @@ public:
return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
}
/// Returns true if the device suppors float64 natively.
bool IsFloat64Supported() const {
return features.features.shaderFloat64;
}
/// Returns true if the device supports float16 natively.
bool IsFloat16Supported() const {
return features.shader_float16_int8.shaderFloat16;
@ -580,6 +585,10 @@ public:
return properties.properties.limits.maxVertexInputBindings;
}
bool SupportsConditionalBarriers() const {
return supports_conditional_barriers;
}
private:
/// Checks if the physical device is suitable and configures the object state
/// with all necessary info about its properties.
@ -683,6 +692,7 @@ private:
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool