diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d7813bb4..6044e311a 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -166,7 +166,7 @@ macro(yuzu_find_packages) # Capitalization matters here. We need the naming to match the generated paths from Conan set(REQUIRED_LIBS # Cmake Pkg Prefix Version Conan Pkg - "Catch2 2.13 catch2/2.13.0" + "Catch2 2.13.7 catch2/2.13.7" "fmt 8.0 fmt/8.0.0" "lz4 1.8 lz4/1.9.2" "nlohmann_json 3.8 nlohmann_json/3.8.0" diff --git a/README.md b/README.md index c141aecc4..34af1bd09 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 2204. +This is the source code for early-access 2208. ## Legal Notice diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp index 5a30f55a7..14a2845b1 100755 --- a/src/audio_core/stream.cpp +++ b/src/audio_core/stream.cpp @@ -87,6 +87,14 @@ static void VolumeAdjustSamples(std::vector& samples, float game_volume) { } void Stream::PlayNextBuffer(std::chrono::nanoseconds ns_late) { + auto now = std::chrono::steady_clock::now(); + auto duration = now.time_since_epoch(); + auto nanoseconds = std::chrono::duration_cast(duration); + + if (nanoseconds > expected_cb_time) { + ns_late = nanoseconds - expected_cb_time; + } + if (!IsPlaying()) { // Ensure we are in playing state before playing the next buffer sink_stream.Flush(); @@ -121,6 +129,7 @@ void Stream::PlayNextBuffer(std::chrono::nanoseconds ns_late) { ns_late = {}; } + expected_cb_time = nanoseconds + (buffer_release_ns - ns_late); core_timing.ScheduleEvent(buffer_release_ns - ns_late, release_event, {}); } diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h index dbd97ec9c..111404e76 100755 --- a/src/audio_core/stream.h +++ b/src/audio_core/stream.h @@ -117,13 +117,14 @@ private: ReleaseCallback release_callback; ///< Buffer release callback for the stream State state{State::Stopped}; ///< Playback state of the stream std::shared_ptr - release_event; ///< Core timing release event for the stream - BufferPtr active_buffer; ///< Actively playing buffer in the stream - std::queue queued_buffers; ///< Buffers queued to be played in the stream - std::queue released_buffers; ///< Buffers recently released from the stream - SinkStream& sink_stream; ///< Output sink for the stream - Core::Timing::CoreTiming& core_timing; ///< Core timing instance. - std::string name; ///< Name of the stream, must be unique + release_event; ///< Core timing release event for the stream + BufferPtr active_buffer; ///< Actively playing buffer in the stream + std::queue queued_buffers; ///< Buffers queued to be played in the stream + std::queue released_buffers; ///< Buffers recently released from the stream + SinkStream& sink_stream; ///< Output sink for the stream + Core::Timing::CoreTiming& core_timing; ///< Core timing instance. + std::string name; ///< Name of the stream, must be unique + std::chrono::nanoseconds expected_cb_time = {}; ///< Estimated time of next callback }; using StreamPtr = std::shared_ptr; diff --git a/src/common/settings.h b/src/common/settings.h index ec8de1dfc..d805b176f 100755 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -596,6 +596,7 @@ struct Values { BasicSetting program_args{std::string(), "program_args"}; BasicSetting dump_exefs{false, "dump_exefs"}; BasicSetting dump_nso{false, "dump_nso"}; + BasicSetting dump_shaders{false, "dump_shaders"}; BasicSetting enable_fs_access_log{false, "enable_fs_access_log"}; BasicSetting reporting_services{false, "reporting_services"}; BasicSetting quest_flag{false, "quest_flag"}; diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 8369d0d84..102003f2e 100755 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -27,6 +27,8 @@ public: [[nodiscard]] virtual std::array WorkgroupSize() const = 0; + virtual void Dump(u64 hash) = 0; + [[nodiscard]] const ProgramHeader& SPH() const noexcept { return sph; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 42ef67628..4e86addb5 100755 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -422,6 +422,11 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + + if (Settings::values.dump_shaders) { + env.Dump(key.unique_hashes[index]); + } + if (!uses_vertex_a || index != 1) { // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); @@ -506,8 +511,12 @@ std::unique_ptr ShaderCache::CreateComputePipeline( LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; + if (Settings::values.dump_shaders) { + env.Dump(key.Hash()); + } + + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)}; Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index eb8b4e08b..7b7e913ad 100755 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -516,6 +516,9 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (Settings::values.dump_shaders) { + env.Dump(key.unique_hashes[index]); + } if (!uses_vertex_a || index != 1) { // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); @@ -611,6 +614,12 @@ std::unique_ptr PipelineCache::CreateComputePipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + + // Dump it before error. + if (Settings::values.dump_shaders) { + env.Dump(key.Hash()); + } + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 05850afd0..c47f5aa8b 100755 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include #include @@ -14,6 +15,7 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/logging/log.h" #include "shader_recompiler/environment.h" #include "video_core/engines/kepler_compute.h" @@ -57,6 +59,47 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { } } +static std::string_view StageToPreffix(Shader::Stage stage) { + switch (stage) { + case Shader::Stage::VertexB: + return "VB"; + case Shader::Stage::TessellationControl: + return "TC"; + case Shader::Stage::TessellationEval: + return "TE"; + case Shader::Stage::Geometry: + return "GS"; + case Shader::Stage::Fragment: + return "FS"; + case Shader::Stage::Compute: + return "CS"; + case Shader::Stage::VertexA: + return "VA"; + default: + return "UK"; + } +} + +static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest, + u32 initial_offset, Shader::Stage stage) { + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)}; + const auto base_dir{shader_dir / fmt::format("shaders")}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create shader dump directories"); + return; + } + const auto prefix = StageToPreffix(stage); + const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)}; + const size_t real_size = read_highest - read_lowest + initial_offset; + const size_t padding_needed = ((32 - (real_size % 32)) % 32); + std::fstream shader_file(name, std::ios::out | std::ios::binary); + const size_t jump_index = initial_offset / sizeof(u64); + shader_file.write(reinterpret_cast(code + jump_index), real_size); + for (size_t i = 0; i < padding_needed; i++) { + shader_file.put(0); + } +} + GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, u32 start_address_) : gpu_memory{&gpu_memory_}, program_base{program_base_} { @@ -128,6 +171,10 @@ u64 GenericEnvironment::CalculateHash() const { return Common::CityHash64(data.get(), size); } +void GenericEnvironment::Dump(u64 hash) { + DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage); +} + void GenericEnvironment::Serialize(std::ofstream& file) const { const u64 code_size{static_cast(CachedSize())}; const u64 num_texture_types{static_cast(texture_types.size())}; @@ -207,6 +254,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + initial_offset = sizeof(sph); gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -323,14 +371,20 @@ void FileEnvironment::Deserialize(std::ifstream& file) { if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); + initial_offset = 0; } else { file.read(reinterpret_cast(&sph), sizeof(sph)); + initial_offset = sizeof(sph); if (stage == Shader::Stage::Geometry) { file.read(reinterpret_cast(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); } } } +void FileEnvironment::Dump(u64 [[maybe_unused]] hash) { + DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage); +} + u64 FileEnvironment::ReadInstruction(u32 address) { if (address < read_lowest || address > read_highest) { throw Shader::LogicError("Out of bounds address {}", address); diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 6640e53d0..a3ae34218 100755 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -57,6 +57,8 @@ public: [[nodiscard]] u64 CalculateHash() const; + void Dump(u64 hash); + void Serialize(std::ofstream& file) const; protected: @@ -82,6 +84,7 @@ protected: u32 cached_lowest = std::numeric_limits::max(); u32 cached_highest = 0; + u32 initial_offset = 0; bool has_unbound_instructions = false; }; @@ -149,6 +152,8 @@ public: [[nodiscard]] std::array WorkgroupSize() const override; + void Dump(u64 hash); + private: std::unique_ptr code; std::unordered_map texture_types; @@ -159,6 +164,7 @@ private: u32 texture_bound{}; u32 read_lowest{}; u32 read_highest{}; + u32 initial_offset{}; }; void SerializePipeline(std::span key, std::span envs, diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index ba066f98f..06954963d 100755 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -66,15 +66,14 @@ float TSCEntry::MaxAnisotropy() const noexcept { return 1.0f; } const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue(); - u32 new_max_anisotropic{}; + u32 added_anisotropic{}; if (anisotropic_settings == 0) { - const auto anisotropic_based_onscale = Settings::values.resolution_info.up_scale >> - Settings::values.resolution_info.down_shift; - new_max_anisotropic = std::max(anisotropic_based_onscale + 1U, 1U); + added_anisotropic = Settings::values.resolution_info.up_scale >> + Settings::values.resolution_info.down_shift; } else { - new_max_anisotropic = Settings::values.max_anisotropy.GetValue(); + added_anisotropic = Settings::values.max_anisotropy.GetValue() - 1U; } - return static_cast(1U << std::min(max_anisotropy + anisotropic_settings - 1, 31U)); + return static_cast(1U << (max_anisotropy + added_anisotropic)); } } // namespace Tegra::Texture diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 07bfa0360..5cfa3412c 100755 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -50,6 +50,8 @@ void ConfigureDebug::SetConfiguration() { ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue()); ui->enable_nsight_aftermath->setEnabled(runtime_lock); ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue()); + ui->dump_shaders->setEnabled(runtime_lock); + ui->dump_shaders->setChecked(Settings::values.dump_shaders.GetValue()); ui->disable_macro_jit->setEnabled(runtime_lock); ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); ui->disable_loop_safety_checks->setEnabled(runtime_lock); @@ -71,6 +73,7 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.renderer_shader_feedback = ui->enable_shader_feedback->isChecked(); Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); + Settings::values.dump_shaders = ui->dump_shaders->isChecked(); Settings::values.disable_shader_loop_safety_checks = ui->disable_loop_safety_checks->isChecked(); Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index b884a56b0..6b12cc223 100755 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -105,6 +105,19 @@ + + + + true + + + When checked, it will dump all the original assembler shaders from the disk shader cache or game as found + + + Dump Game Shaders + + +