early-access version 2642

This commit is contained in:
pineappleEA 2022-04-04 00:41:58 +02:00
parent 3236e6f599
commit ec43dfdade
9 changed files with 64 additions and 58 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 2641. This is the source code for early-access 2642.
## Legal Notice ## Legal Notice

View File

@ -10,25 +10,49 @@
#include "common/uint128.h" #include "common/uint128.h"
#include "common/x64/native_clock.h" #include "common/x64/native_clock.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace Common { namespace Common {
#ifdef _MSC_VER
__forceinline static u64 FencedRDTSC() {
_mm_lfence();
_ReadWriteBarrier();
const u64 result = __rdtsc();
_mm_lfence();
_ReadWriteBarrier();
return result;
}
#else
static u64 FencedRDTSC() {
u64 result;
asm volatile("lfence\n\t"
"rdtsc\n\t"
"shl $32, %%rdx\n\t"
"or %%rdx, %0\n\t"
"lfence"
: "=a"(result)
:
: "rdx", "memory", "cc");
return result;
}
#endif
u64 EstimateRDTSCFrequency() { u64 EstimateRDTSCFrequency() {
// Discard the first result measuring the rdtsc. // Discard the first result measuring the rdtsc.
_mm_mfence(); FencedRDTSC();
__rdtsc();
std::this_thread::sleep_for(std::chrono::milliseconds{1}); std::this_thread::sleep_for(std::chrono::milliseconds{1});
_mm_mfence(); FencedRDTSC();
__rdtsc();
// Get the current time. // Get the current time.
const auto start_time = std::chrono::steady_clock::now(); const auto start_time = std::chrono::steady_clock::now();
_mm_mfence(); const u64 tsc_start = FencedRDTSC();
const u64 tsc_start = __rdtsc();
// Wait for 200 milliseconds. // Wait for 200 milliseconds.
std::this_thread::sleep_for(std::chrono::milliseconds{200}); std::this_thread::sleep_for(std::chrono::milliseconds{200});
const auto end_time = std::chrono::steady_clock::now(); const auto end_time = std::chrono::steady_clock::now();
_mm_mfence(); const u64 tsc_end = FencedRDTSC();
const u64 tsc_end = __rdtsc();
// Calculate differences. // Calculate differences.
const u64 timer_diff = static_cast<u64>( const u64 timer_diff = static_cast<u64>(
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
@ -42,8 +66,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
u64 rtsc_frequency_) u64 rtsc_frequency_)
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
rtsc_frequency_} { rtsc_frequency_} {
_mm_mfence(); time_point.inner.last_measure = FencedRDTSC();
time_point.inner.last_measure = __rdtsc();
time_point.inner.accumulated_ticks = 0U; time_point.inner.accumulated_ticks = 0U;
ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);
us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency);
@ -58,8 +81,7 @@ u64 NativeClock::GetRTSC() {
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do { do {
_mm_mfence(); const u64 current_measure = FencedRDTSC();
const u64 current_measure = __rdtsc();
u64 diff = current_measure - current_time_point.inner.last_measure; u64 diff = current_measure - current_time_point.inner.last_measure;
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
@ -80,8 +102,7 @@ void NativeClock::Pause(bool is_paused) {
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do { do {
new_time_point.pack = current_time_point.pack; new_time_point.pack = current_time_point.pack;
_mm_mfence(); new_time_point.inner.last_measure = FencedRDTSC();
new_time_point.inner.last_measure = __rdtsc();
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
current_time_point.pack, current_time_point.pack)); current_time_point.pack, current_time_point.pack));
} }

View File

@ -40,9 +40,6 @@ GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std
} }
GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
// Mark any pre-existing rasterizer memory in this range as remapped
rasterizer->ModifyGPUMemory(gpu_addr, size);
const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
if (it != map_ranges.end() && it->first == gpu_addr) { if (it != map_ranges.end() && it->first == gpu_addr) {
it->second = size; it->second = size;

View File

@ -200,7 +200,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
}); });
} }
const void* const descriptor_data{update_descriptor_queue.UpdateData()}; const void* const descriptor_data{update_descriptor_queue.UpdateData()};
const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty(); const bool is_rescaling = info.uses_rescaling_uniform;
scheduler.Record([this, descriptor_data, is_rescaling, scheduler.Record([this, descriptor_data, is_rescaling,
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) { rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);

View File

@ -238,6 +238,7 @@ GraphicsPipeline::GraphicsPipeline(
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
num_textures += Shader::NumDescriptors(info->texture_descriptors); num_textures += Shader::NumDescriptors(info->texture_descriptors);
uses_rescale_unfiorm |= info->uses_rescaling_uniform;
} }
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
@ -471,7 +472,8 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
}); });
} }
const bool is_rescaling{texture_cache.IsRescaling()}; const bool is_rescaling{texture_cache.IsRescaling()};
const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)}; const bool update_rescaling{uses_rescale_unfiorm ? scheduler.UpdateRescaling(is_rescaling)
: false};
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
const void* const descriptor_data{update_descriptor_queue.UpdateData()}; const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
@ -479,10 +481,12 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
if (bind_pipeline) { if (bind_pipeline) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
} }
if (uses_rescale_unfiorm) {
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
rescaling_data.data()); rescaling_data.data());
if (update_rescaling) { }
if (uses_rescale_unfiorm && update_rescaling) {
const f32 config_down_factor{Settings::values.resolution_info.down_factor}; const f32 config_down_factor{Settings::values.resolution_info.down_factor};
const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f};
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,

View File

@ -151,6 +151,7 @@ private:
std::mutex build_mutex; std::mutex build_mutex;
std::atomic_bool is_built{false}; std::atomic_bool is_built{false};
bool uses_push_descriptor{false}; bool uses_push_descriptor{false};
bool uses_rescale_unfiorm{false};
}; };
} // namespace Vulkan } // namespace Vulkan

View File

@ -234,12 +234,9 @@ void RasterizerVulkan::Clear() {
const VkExtent2D render_area = framebuffer->RenderArea(); const VkExtent2D render_area = framebuffer->RenderArea();
scheduler.RequestRenderpass(framebuffer); scheduler.RequestRenderpass(framebuffer);
u32 up_scale = 1; const bool is_rescaling = texture_cache.IsRescaling();
u32 down_shift = 0; const u32 up_scale = is_rescaling ? Settings::values.resolution_info.up_scale : 1U;
if (texture_cache.IsRescaling()) { const u32 down_shift = is_rescaling ? Settings::values.resolution_info.down_shift : 0U;
up_scale = Settings::values.resolution_info.up_scale;
down_shift = Settings::values.resolution_info.down_shift;
}
UpdateViewportsState(regs); UpdateViewportsState(regs);
VkClearRect clear_rect{ VkClearRect clear_rect{
@ -695,12 +692,9 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchScissors()) { if (!state_tracker.TouchScissors()) {
return; return;
} }
u32 up_scale = 1; const bool is_rescaling = texture_cache.IsRescaling();
u32 down_shift = 0; const u32 up_scale = is_rescaling ? Settings::values.resolution_info.up_scale : 1U;
if (texture_cache.IsRescaling()) { const u32 down_shift = is_rescaling ? Settings::values.resolution_info.down_shift : 0U;
up_scale = Settings::values.resolution_info.up_scale;
down_shift = Settings::values.resolution_info.down_shift;
}
const std::array scissors{ const std::array scissors{
GetScissorState(regs, 0, up_scale, down_shift), GetScissorState(regs, 0, up_scale, down_shift),
GetScissorState(regs, 1, up_scale, down_shift), GetScissorState(regs, 1, up_scale, down_shift),

View File

@ -328,7 +328,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
} }
const bool rescaled = RescaleRenderTargets(is_clear); const bool rescaled = RescaleRenderTargets(is_clear);
if (is_rescaling != rescaled) { const auto& resolution_info = Settings::values.resolution_info;
if (resolution_info.active && is_rescaling != rescaled) {
flags[Dirty::RescaleViewports] = true; flags[Dirty::RescaleViewports] = true;
flags[Dirty::RescaleScissors] = true; flags[Dirty::RescaleScissors] = true;
is_rescaling = rescaled; is_rescaling = rescaled;
@ -345,12 +346,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
for (size_t index = 0; index < NUM_RT; ++index) { for (size_t index = 0; index < NUM_RT; ++index) {
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
} }
u32 up_scale = 1; const u32 up_scale = is_rescaling ? resolution_info.up_scale : 1U;
u32 down_shift = 0; const u32 down_shift = is_rescaling ? resolution_info.down_shift : 0U;
if (is_rescaling) {
up_scale = Settings::values.resolution_info.up_scale;
down_shift = Settings::values.resolution_info.down_shift;
}
render_targets.size = Extent2D{ render_targets.size = Extent2D{
(maxwell3d.regs.render_area.width * up_scale) >> down_shift, (maxwell3d.regs.render_area.width * up_scale) >> down_shift,
(maxwell3d.regs.render_area.height * up_scale) >> down_shift, (maxwell3d.regs.render_area.height * up_scale) >> down_shift,
@ -454,12 +451,6 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
}); });
for (const ImageId image_id : images) { for (const ImageId image_id : images) {
DownloadImage(image_id);
}
}
template <class P>
void TextureCache<P>::DownloadImage(ImageId image_id) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info); const auto copies = FullDownloadCopies(image.info);
@ -467,6 +458,7 @@ void TextureCache<P>::DownloadImage(ImageId image_id) {
runtime.Finish(); runtime.Finish();
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
} }
}
template <class P> template <class P>
void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
@ -1063,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
for (const ImageId overlap_id : ignore_textures) { for (const ImageId overlap_id : ignore_textures) {
Image& overlap = slot_images[overlap_id]; Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified)) { if (True(overlap.flags & ImageFlagBits::GpuModified)) {
DownloadImage(overlap_id); UNIMPLEMENTED();
} }
if (True(overlap.flags & ImageFlagBits::Tracked)) { if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id); UntrackImage(overlap, overlap_id);

View File

@ -139,9 +139,6 @@ public:
/// Download contents of host images to guest memory in a region /// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size); void DownloadMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory
void DownloadImage(ImageId image_id);
/// Remove images in a region /// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size); void UnmapMemory(VAddr cpu_addr, size_t size);