early-access version 2156
This commit is contained in:
parent
238cebb24e
commit
f85f34d123
@ -1,7 +1,7 @@
|
|||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 2153.
|
This is the source code for early-access 2156.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
2656
externals/FidelityFX-FSR/ffx-fsr/ffx_a.h
vendored
Executable file
2656
externals/FidelityFX-FSR/ffx-fsr/ffx_a.h
vendored
Executable file
File diff suppressed because it is too large
Load Diff
1199
externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h
vendored
Executable file
1199
externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h
vendored
Executable file
File diff suppressed because it is too large
Load Diff
19
externals/FidelityFX-FSR/license.txt
vendored
Executable file
19
externals/FidelityFX-FSR/license.txt
vendored
Executable file
@ -0,0 +1,19 @@
|
|||||||
|
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
@ -48,8 +48,8 @@ struct Rectangle {
|
|||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] Rectangle<T> Scale(const float s) const {
|
[[nodiscard]] Rectangle<T> Scale(const float s) const {
|
||||||
return Rectangle{left, top, static_cast<T>(left + GetWidth() * s),
|
return Rectangle{left, top, static_cast<T>(static_cast<float>(left + GetWidth()) * s),
|
||||||
static_cast<T>(top + GetHeight() * s)};
|
static_cast<T>(static_cast<float>(top + GetHeight()) * s)};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -47,7 +47,9 @@ void LogSettings() {
|
|||||||
log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue());
|
log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue());
|
||||||
log_setting("Core_UseMultiCore", values.use_multi_core.GetValue());
|
log_setting("Core_UseMultiCore", values.use_multi_core.GetValue());
|
||||||
log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue());
|
log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue());
|
||||||
log_setting("Renderer_UseResolutionFactor", values.resolution_factor.GetValue());
|
log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue());
|
||||||
|
log_setting("Renderer_ScalingFilter", values.scaling_filter.GetValue());
|
||||||
|
log_setting("Renderer_AntiAliasing", values.anti_aliasing.GetValue());
|
||||||
log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue());
|
log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue());
|
||||||
log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue());
|
log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue());
|
||||||
log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue());
|
log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue());
|
||||||
@ -105,6 +107,55 @@ float Volume() {
|
|||||||
return values.volume.GetValue() / 100.0f;
|
return values.volume.GetValue() / 100.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UpdateRescalingInfo() {
|
||||||
|
const auto setup = values.resolution_setup.GetValue();
|
||||||
|
auto& info = values.resolution_info;
|
||||||
|
info.downscale = false;
|
||||||
|
switch (setup) {
|
||||||
|
case ResolutionSetup::Res1_2X:
|
||||||
|
info.up_scale = 1;
|
||||||
|
info.down_shift = 1;
|
||||||
|
info.downscale = true;
|
||||||
|
break;
|
||||||
|
case ResolutionSetup::Res3_4X:
|
||||||
|
info.up_scale = 3;
|
||||||
|
info.down_shift = 2;
|
||||||
|
info.downscale = true;
|
||||||
|
break;
|
||||||
|
case ResolutionSetup::Res1X:
|
||||||
|
info.up_scale = 1;
|
||||||
|
info.down_shift = 0;
|
||||||
|
break;
|
||||||
|
case ResolutionSetup::Res2X:
|
||||||
|
info.up_scale = 2;
|
||||||
|
info.down_shift = 0;
|
||||||
|
break;
|
||||||
|
case ResolutionSetup::Res3X:
|
||||||
|
info.up_scale = 3;
|
||||||
|
info.down_shift = 0;
|
||||||
|
break;
|
||||||
|
case ResolutionSetup::Res4X:
|
||||||
|
info.up_scale = 4;
|
||||||
|
info.down_shift = 0;
|
||||||
|
break;
|
||||||
|
case ResolutionSetup::Res5X:
|
||||||
|
info.up_scale = 5;
|
||||||
|
info.down_shift = 0;
|
||||||
|
break;
|
||||||
|
case ResolutionSetup::Res6X:
|
||||||
|
info.up_scale = 6;
|
||||||
|
info.down_shift = 0;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
info.up_scale = 1;
|
||||||
|
info.down_shift = 0;
|
||||||
|
}
|
||||||
|
info.up_factor = static_cast<f32>(info.up_scale) / (1U << info.down_shift);
|
||||||
|
info.down_factor = static_cast<f32>(1U << info.down_shift) / info.up_scale;
|
||||||
|
info.active = info.up_scale != 1 || info.down_shift != 0;
|
||||||
|
}
|
||||||
|
|
||||||
void RestoreGlobalState(bool is_powered_on) {
|
void RestoreGlobalState(bool is_powered_on) {
|
||||||
// If a game is running, DO NOT restore the global settings state
|
// If a game is running, DO NOT restore the global settings state
|
||||||
if (is_powered_on) {
|
if (is_powered_on) {
|
||||||
|
@ -52,6 +52,56 @@ enum class NvdecEmulation : u32 {
|
|||||||
GPU = 2,
|
GPU = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class ResolutionSetup : u32 {
|
||||||
|
Res1_2X = 0,
|
||||||
|
Res3_4X = 1,
|
||||||
|
Res1X = 2,
|
||||||
|
Res2X = 3,
|
||||||
|
Res3X = 4,
|
||||||
|
Res4X = 5,
|
||||||
|
Res5X = 6,
|
||||||
|
Res6X = 7,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class ScalingFilter : u32 {
|
||||||
|
NearestNeighbor = 0,
|
||||||
|
Bilinear = 1,
|
||||||
|
Bicubic = 2,
|
||||||
|
Gaussian = 3,
|
||||||
|
ScaleForce = 4,
|
||||||
|
Fsr = 5,
|
||||||
|
LastFilter = Fsr,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class AntiAliasing : u32 {
|
||||||
|
None = 0,
|
||||||
|
Fxaa = 1,
|
||||||
|
LastAA = Fxaa,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ResolutionScalingInfo {
|
||||||
|
u32 up_scale{1};
|
||||||
|
u32 down_shift{0};
|
||||||
|
f32 up_factor{1.0f};
|
||||||
|
f32 down_factor{1.0f};
|
||||||
|
bool active{};
|
||||||
|
bool downscale{};
|
||||||
|
|
||||||
|
s32 ScaleUp(s32 value) const {
|
||||||
|
if (value == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return std::max((value * static_cast<s32>(up_scale)) >> static_cast<s32>(down_shift), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 ScaleUp(u32 value) const {
|
||||||
|
if (value == 0U) {
|
||||||
|
return 0U;
|
||||||
|
}
|
||||||
|
return std::max((value * up_scale) >> down_shift, 1U);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/** The BasicSetting class is a simple resource manager. It defines a label and default value
|
/** The BasicSetting class is a simple resource manager. It defines a label and default value
|
||||||
* alongside the actual value of the setting for simpler and less-error prone use with frontend
|
* alongside the actual value of the setting for simpler and less-error prone use with frontend
|
||||||
* configurations. Setting a default value and label is required, though subclasses may deviate from
|
* configurations. Setting a default value and label is required, though subclasses may deviate from
|
||||||
@ -451,7 +501,10 @@ struct Values {
|
|||||||
"disable_shader_loop_safety_checks"};
|
"disable_shader_loop_safety_checks"};
|
||||||
Setting<int> vulkan_device{0, "vulkan_device"};
|
Setting<int> vulkan_device{0, "vulkan_device"};
|
||||||
|
|
||||||
Setting<u16> resolution_factor{1, "resolution_factor"};
|
ResolutionScalingInfo resolution_info{};
|
||||||
|
Setting<ResolutionSetup> resolution_setup{ResolutionSetup::Res1X, "resolution_setup"};
|
||||||
|
Setting<ScalingFilter> scaling_filter{ScalingFilter::Bilinear, "scaling_filter"};
|
||||||
|
Setting<AntiAliasing> anti_aliasing{AntiAliasing::None, "anti_aliasing"};
|
||||||
// *nix platforms may have issues with the borderless windowed fullscreen mode.
|
// *nix platforms may have issues with the borderless windowed fullscreen mode.
|
||||||
// Default to exclusive fullscreen on these platforms for now.
|
// Default to exclusive fullscreen on these platforms for now.
|
||||||
RangedSetting<FullscreenMode> fullscreen_mode{
|
RangedSetting<FullscreenMode> fullscreen_mode{
|
||||||
@ -596,6 +649,8 @@ std::string GetTimeZoneString();
|
|||||||
|
|
||||||
void LogSettings();
|
void LogSettings();
|
||||||
|
|
||||||
|
void UpdateRescalingInfo();
|
||||||
|
|
||||||
// Restore the global state of all applicable settings in the Values struct
|
// Restore the global state of all applicable settings in the Values struct
|
||||||
void RestoreGlobalState(bool is_powered_on);
|
void RestoreGlobalState(bool is_powered_on);
|
||||||
|
|
||||||
|
@ -44,16 +44,13 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) {
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) {
|
FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale) {
|
||||||
u32 width, height;
|
const bool is_docked = Settings::values.use_docked_mode.GetValue();
|
||||||
|
const u32 screen_width = is_docked ? ScreenDocked::Width : ScreenUndocked::Width;
|
||||||
|
const u32 screen_height = is_docked ? ScreenDocked::Height : ScreenUndocked::Height;
|
||||||
|
|
||||||
if (Settings::values.use_docked_mode.GetValue()) {
|
const u32 width = static_cast<u32>(static_cast<f32>(screen_width) * res_scale);
|
||||||
width = ScreenDocked::Width * res_scale;
|
const u32 height = static_cast<u32>(static_cast<f32>(screen_height) * res_scale);
|
||||||
height = ScreenDocked::Height * res_scale;
|
|
||||||
} else {
|
|
||||||
width = ScreenUndocked::Width * res_scale;
|
|
||||||
height = ScreenUndocked::Height * res_scale;
|
|
||||||
}
|
|
||||||
|
|
||||||
return DefaultFrameLayout(width, height);
|
return DefaultFrameLayout(width, height);
|
||||||
}
|
}
|
||||||
|
@ -60,7 +60,7 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height);
|
|||||||
* Convenience method to get frame layout by resolution scale
|
* Convenience method to get frame layout by resolution scale
|
||||||
* @param res_scale resolution scale factor
|
* @param res_scale resolution scale factor
|
||||||
*/
|
*/
|
||||||
FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale);
|
FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convenience method to determine emulation aspect ratio
|
* Convenience method to determine emulation aspect ratio
|
||||||
|
@ -801,15 +801,11 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext&
|
|||||||
rb.Push(ResultSuccess);
|
rb.Push(ResultSuccess);
|
||||||
|
|
||||||
if (Settings::values.use_docked_mode.GetValue()) {
|
if (Settings::values.use_docked_mode.GetValue()) {
|
||||||
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) *
|
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth));
|
||||||
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
|
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight));
|
||||||
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) *
|
|
||||||
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
|
|
||||||
} else {
|
} else {
|
||||||
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) *
|
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth));
|
||||||
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
|
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight));
|
||||||
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) *
|
|
||||||
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -543,11 +543,8 @@ private:
|
|||||||
switch (transaction) {
|
switch (transaction) {
|
||||||
case TransactionId::Connect: {
|
case TransactionId::Connect: {
|
||||||
IGBPConnectRequestParcel request{ctx.ReadBuffer()};
|
IGBPConnectRequestParcel request{ctx.ReadBuffer()};
|
||||||
IGBPConnectResponseParcel response{
|
IGBPConnectResponseParcel response{static_cast<u32>(DisplayResolution::UndockedWidth),
|
||||||
static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) *
|
static_cast<u32>(DisplayResolution::UndockedHeight)};
|
||||||
Settings::values.resolution_factor.GetValue()),
|
|
||||||
static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) *
|
|
||||||
Settings::values.resolution_factor.GetValue())};
|
|
||||||
|
|
||||||
buffer_queue.Connect();
|
buffer_queue.Connect();
|
||||||
|
|
||||||
@ -777,15 +774,11 @@ private:
|
|||||||
rb.Push(ResultSuccess);
|
rb.Push(ResultSuccess);
|
||||||
|
|
||||||
if (Settings::values.use_docked_mode.GetValue()) {
|
if (Settings::values.use_docked_mode.GetValue()) {
|
||||||
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) *
|
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth));
|
||||||
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
|
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight));
|
||||||
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) *
|
|
||||||
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
|
|
||||||
} else {
|
} else {
|
||||||
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) *
|
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth));
|
||||||
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
|
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight));
|
||||||
rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) *
|
|
||||||
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rb.PushRaw<float>(60.0f); // This wouldn't seem to be correct for 30 fps games.
|
rb.PushRaw<float>(60.0f); // This wouldn't seem to be correct for 30 fps games.
|
||||||
@ -1065,10 +1058,8 @@ private:
|
|||||||
// This only returns the fixed values of 1280x720 and makes no distinguishing
|
// This only returns the fixed values of 1280x720 and makes no distinguishing
|
||||||
// between docked and undocked dimensions. We take the liberty of applying
|
// between docked and undocked dimensions. We take the liberty of applying
|
||||||
// the resolution scaling factor here.
|
// the resolution scaling factor here.
|
||||||
rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth) *
|
rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth));
|
||||||
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
|
rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight));
|
||||||
rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight) *
|
|
||||||
static_cast<u32>(Settings::values.resolution_factor.GetValue()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetLayerScalingMode(Kernel::HLERequestContext& ctx) {
|
void SetLayerScalingMode(Kernel::HLERequestContext& ctx) {
|
||||||
@ -1101,8 +1092,6 @@ private:
|
|||||||
LOG_WARNING(Service_VI, "(STUBBED) called");
|
LOG_WARNING(Service_VI, "(STUBBED) called");
|
||||||
|
|
||||||
DisplayInfo display_info;
|
DisplayInfo display_info;
|
||||||
display_info.width *= static_cast<u64>(Settings::values.resolution_factor.GetValue());
|
|
||||||
display_info.height *= static_cast<u64>(Settings::values.resolution_factor.GetValue());
|
|
||||||
ctx.WriteBuffer(&display_info, sizeof(DisplayInfo));
|
ctx.WriteBuffer(&display_info, sizeof(DisplayInfo));
|
||||||
IPC::ResponseBuilder rb{ctx, 4};
|
IPC::ResponseBuilder rb{ctx, 4};
|
||||||
rb.Push(ResultSuccess);
|
rb.Push(ResultSuccess);
|
||||||
|
@ -229,8 +229,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
|
|||||||
AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core.GetValue());
|
AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core.GetValue());
|
||||||
AddField(field_type, "Renderer_Backend",
|
AddField(field_type, "Renderer_Backend",
|
||||||
TranslateRenderer(Settings::values.renderer_backend.GetValue()));
|
TranslateRenderer(Settings::values.renderer_backend.GetValue()));
|
||||||
AddField(field_type, "Renderer_ResolutionFactor",
|
|
||||||
Settings::values.resolution_factor.GetValue());
|
|
||||||
AddField(field_type, "Renderer_UseSpeedLimit", Settings::values.use_speed_limit.GetValue());
|
AddField(field_type, "Renderer_UseSpeedLimit", Settings::values.use_speed_limit.GetValue());
|
||||||
AddField(field_type, "Renderer_SpeedLimit", Settings::values.speed_limit.GetValue());
|
AddField(field_type, "Renderer_SpeedLimit", Settings::values.speed_limit.GetValue());
|
||||||
AddField(field_type, "Renderer_UseDiskShaderCache",
|
AddField(field_type, "Renderer_UseDiskShaderCache",
|
||||||
|
@ -221,6 +221,7 @@ add_library(shader_recompiler STATIC
|
|||||||
ir_opt/lower_fp16_to_fp32.cpp
|
ir_opt/lower_fp16_to_fp32.cpp
|
||||||
ir_opt/lower_int64_to_int32.cpp
|
ir_opt/lower_int64_to_int32.cpp
|
||||||
ir_opt/passes.h
|
ir_opt/passes.h
|
||||||
|
ir_opt/rescaling_pass.cpp
|
||||||
ir_opt/ssa_rewrite_pass.cpp
|
ir_opt/ssa_rewrite_pass.cpp
|
||||||
ir_opt/texture_pass.cpp
|
ir_opt/texture_pass.cpp
|
||||||
ir_opt/verification_pass.cpp
|
ir_opt/verification_pass.cpp
|
||||||
|
@ -14,6 +14,8 @@ struct Bindings {
|
|||||||
u32 storage_buffer{};
|
u32 storage_buffer{};
|
||||||
u32 texture{};
|
u32 texture{};
|
||||||
u32 image{};
|
u32 image{};
|
||||||
|
u32 texture_scaling_index{};
|
||||||
|
u32 image_scaling_index{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Shader::Backend
|
} // namespace Shader::Backend
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include "shader_recompiler/backend/bindings.h"
|
#include "shader_recompiler/backend/bindings.h"
|
||||||
#include "shader_recompiler/backend/glasm/emit_context.h"
|
#include "shader_recompiler/backend/glasm/emit_context.h"
|
||||||
|
#include "shader_recompiler/backend/glasm/emit_glasm.h"
|
||||||
#include "shader_recompiler/frontend/ir/program.h"
|
#include "shader_recompiler/frontend/ir/program.h"
|
||||||
#include "shader_recompiler/profile.h"
|
#include "shader_recompiler/profile.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
@ -55,7 +56,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
|
|||||||
}
|
}
|
||||||
if (!runtime_info.glasm_use_storage_buffers) {
|
if (!runtime_info.glasm_use_storage_buffers) {
|
||||||
if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
|
if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
|
||||||
Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
|
const size_t index{num + PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE};
|
||||||
|
Add("PARAM c[{}]={{program.local[0..{}]}};", index, index - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage = program.stage;
|
stage = program.stage;
|
||||||
|
@ -448,6 +448,9 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I
|
|||||||
header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
|
header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
|
||||||
header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
|
header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
|
||||||
}
|
}
|
||||||
|
if (program.info.uses_rescaling_uniform) {
|
||||||
|
header += "PARAM scaling[1]={program.local[0..0]};";
|
||||||
|
}
|
||||||
header += "TEMP ";
|
header += "TEMP ";
|
||||||
for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
|
for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
|
||||||
header += fmt::format("R{},", index);
|
header += fmt::format("R{},", index);
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
|
|
||||||
namespace Shader::Backend::GLASM {
|
namespace Shader::Backend::GLASM {
|
||||||
|
|
||||||
|
constexpr u32 PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE = 1;
|
||||||
|
|
||||||
[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
|
[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
|
||||||
IR::Program& program, Bindings& bindings);
|
IR::Program& program, Bindings& bindings);
|
||||||
|
|
||||||
|
@ -608,6 +608,24 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Re
|
|||||||
ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
|
ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
|
||||||
|
if (!index.IsImmediate()) {
|
||||||
|
throw NotImplementedException("Non-constant texture rescaling");
|
||||||
|
}
|
||||||
|
ctx.Add("AND.U RC.x,scaling[0].x,{};"
|
||||||
|
"SNE.S {},RC.x,0;",
|
||||||
|
1u << index.U32(), ctx.reg_alloc.Define(inst));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
|
||||||
|
if (!index.IsImmediate()) {
|
||||||
|
throw NotImplementedException("Non-constant texture rescaling");
|
||||||
|
}
|
||||||
|
ctx.Add("AND.U RC.x,scaling[0].y,{};"
|
||||||
|
"SNE.S {},RC.x,0;",
|
||||||
|
1u << index.U32(), ctx.reg_alloc.Define(inst));
|
||||||
|
}
|
||||||
|
|
||||||
void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
|
void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
|
||||||
ScalarU32 value) {
|
ScalarU32 value) {
|
||||||
ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");
|
ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");
|
||||||
|
@ -72,6 +72,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
|
|||||||
void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
|
void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
|
||||||
void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
|
void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
|
||||||
void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
|
void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
|
||||||
|
void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst);
|
||||||
void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset);
|
void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset);
|
||||||
void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value);
|
void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value);
|
||||||
void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
|
void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
|
||||||
@ -303,6 +304,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
|
|||||||
void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
|
void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
|
||||||
void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
|
void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
|
||||||
void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
|
void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
|
||||||
|
void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
|
||||||
|
void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
|
||||||
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
|
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
|
||||||
void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value);
|
void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value);
|
||||||
void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
|
void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
|
||||||
@ -553,6 +556,8 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
|||||||
void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
|
void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
|
||||||
void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
|
void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
|
||||||
Register color);
|
Register color);
|
||||||
|
void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
|
||||||
|
void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
|
||||||
void EmitBindlessImageAtomicIAdd32(EmitContext&);
|
void EmitBindlessImageAtomicIAdd32(EmitContext&);
|
||||||
void EmitBindlessImageAtomicSMin32(EmitContext&);
|
void EmitBindlessImageAtomicSMin32(EmitContext&);
|
||||||
void EmitBindlessImageAtomicUMin32(EmitContext&);
|
void EmitBindlessImageAtomicUMin32(EmitContext&);
|
||||||
|
@ -90,6 +90,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
|
|||||||
ctx.Add("MUL.S {}.x,{},{};", inst, a, b);
|
ctx.Add("MUL.S {}.x,{},{};", inst, a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
|
||||||
|
ctx.Add("DIV.S {}.x,{},{};", inst, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
|
||||||
|
ctx.Add("DIV.U {}.x,{},{};", inst, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
|
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
|
||||||
if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) {
|
if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) {
|
||||||
ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32));
|
ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32));
|
||||||
|
@ -210,6 +210,10 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
|
|||||||
ctx.Add("MOV.F {}.x,y_direction[0].w;", inst);
|
ctx.Add("MOV.F {}.x,y_direction[0].w;", inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) {
|
||||||
|
ctx.Add("MOV.F {}.x,scaling[0].z;", inst);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
|
void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
|
||||||
ctx.Add("MOV.S {}.x,0;", inst);
|
ctx.Add("MOV.S {}.x,0;", inst);
|
||||||
}
|
}
|
||||||
|
@ -393,6 +393,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
|
|||||||
DefineGenericOutput(index, program.invocations);
|
DefineGenericOutput(index, program.invocations);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (info.uses_rescaling_uniform) {
|
||||||
|
header += "layout(location=0) uniform vec4 scaling;";
|
||||||
|
}
|
||||||
DefineConstantBuffers(bindings);
|
DefineConstantBuffers(bindings);
|
||||||
DefineStorageBuffers(bindings);
|
DefineStorageBuffers(bindings);
|
||||||
SetupImages(bindings);
|
SetupImages(bindings);
|
||||||
|
@ -445,6 +445,10 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
|
|||||||
ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst);
|
ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) {
|
||||||
|
ctx.AddF32("{}=scaling.z;", inst);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) {
|
void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) {
|
||||||
ctx.AddU32("{}=lmem[{}];", inst, word_offset);
|
ctx.AddU32("{}=lmem[{}];", inst, word_offset);
|
||||||
}
|
}
|
||||||
|
@ -612,6 +612,22 @@ void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value
|
|||||||
value);
|
value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
|
||||||
|
if (!index.IsImmediate()) {
|
||||||
|
throw NotImplementedException("Non-constant texture rescaling");
|
||||||
|
}
|
||||||
|
const u32 image_index{index.U32()};
|
||||||
|
ctx.AddU1("{}=(ftou(scaling.x)&{})!=0;", inst, 1u << image_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
|
||||||
|
if (!index.IsImmediate()) {
|
||||||
|
throw NotImplementedException("Non-constant texture rescaling");
|
||||||
|
}
|
||||||
|
const u32 image_index{index.U32()};
|
||||||
|
ctx.AddU1("{}=(ftou(scaling.y)&{})!=0;", inst, 1u << image_index);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitBindlessImageSampleImplicitLod(EmitContext&) {
|
void EmitBindlessImageSampleImplicitLod(EmitContext&) {
|
||||||
NotImplemented();
|
NotImplemented();
|
||||||
}
|
}
|
||||||
|
@ -85,6 +85,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
|
|||||||
void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
|
void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
|
||||||
void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
|
void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
|
||||||
void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
|
void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
|
||||||
|
void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst);
|
||||||
void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset);
|
void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset);
|
||||||
void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value);
|
void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value);
|
||||||
void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
|
void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
|
||||||
@ -362,6 +363,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin
|
|||||||
void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
|
void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
|
||||||
void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
|
void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
|
||||||
void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
|
void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
|
||||||
|
void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
|
||||||
|
void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
|
||||||
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
||||||
void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
||||||
void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
||||||
@ -627,6 +630,8 @@ void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
|||||||
std::string_view coords);
|
std::string_view coords);
|
||||||
void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
||||||
std::string_view coords, std::string_view color);
|
std::string_view coords, std::string_view color);
|
||||||
|
void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
|
||||||
|
void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
|
||||||
void EmitBindlessImageAtomicIAdd32(EmitContext&);
|
void EmitBindlessImageAtomicIAdd32(EmitContext&);
|
||||||
void EmitBindlessImageAtomicSMin32(EmitContext&);
|
void EmitBindlessImageAtomicSMin32(EmitContext&);
|
||||||
void EmitBindlessImageAtomicUMin32(EmitContext&);
|
void EmitBindlessImageAtomicUMin32(EmitContext&);
|
||||||
|
@ -78,6 +78,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin
|
|||||||
ctx.AddU32("{}=uint({}*{});", inst, a, b);
|
ctx.AddU32("{}=uint({}*{});", inst, a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
|
||||||
|
ctx.AddU32("{}=uint(int({})/int({}));", inst, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
|
||||||
|
ctx.AddU32("{}={}/{};", inst, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||||
ctx.AddU32("{}=uint(-({}));", inst, value);
|
ctx.AddU32("{}=uint(-({}));", inst, value);
|
||||||
}
|
}
|
||||||
|
@ -7,11 +7,14 @@
|
|||||||
#include <climits>
|
#include <climits>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
|
||||||
|
#include <boost/container/static_vector.hpp>
|
||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/div_ceil.h"
|
#include "common/div_ceil.h"
|
||||||
#include "shader_recompiler/backend/spirv/emit_context.h"
|
#include "shader_recompiler/backend/spirv/emit_context.h"
|
||||||
|
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
namespace {
|
namespace {
|
||||||
@ -456,8 +459,9 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
|
|||||||
|
|
||||||
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
|
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
|
||||||
IR::Program& program, Bindings& bindings)
|
IR::Program& program, Bindings& bindings)
|
||||||
: Sirit::Module(profile_.supported_spirv), profile{profile_},
|
: Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_},
|
||||||
runtime_info{runtime_info_}, stage{program.stage} {
|
stage{program.stage}, texture_rescaling_index{bindings.texture_scaling_index},
|
||||||
|
image_rescaling_index{bindings.image_scaling_index} {
|
||||||
const bool is_unified{profile.unified_descriptor_binding};
|
const bool is_unified{profile.unified_descriptor_binding};
|
||||||
u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer};
|
u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer};
|
||||||
u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer};
|
u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer};
|
||||||
@ -474,10 +478,11 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
|
|||||||
DefineStorageBuffers(program.info, storage_binding);
|
DefineStorageBuffers(program.info, storage_binding);
|
||||||
DefineTextureBuffers(program.info, texture_binding);
|
DefineTextureBuffers(program.info, texture_binding);
|
||||||
DefineImageBuffers(program.info, image_binding);
|
DefineImageBuffers(program.info, image_binding);
|
||||||
DefineTextures(program.info, texture_binding);
|
DefineTextures(program.info, texture_binding, bindings.texture_scaling_index);
|
||||||
DefineImages(program.info, image_binding);
|
DefineImages(program.info, image_binding, bindings.image_scaling_index);
|
||||||
DefineAttributeMemAccess(program.info);
|
DefineAttributeMemAccess(program.info);
|
||||||
DefineGlobalMemoryFunctions(program.info);
|
DefineGlobalMemoryFunctions(program.info);
|
||||||
|
DefineRescalingInput(program.info);
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitContext::~EmitContext() = default;
|
EmitContext::~EmitContext() = default;
|
||||||
@ -920,6 +925,73 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
|
|||||||
define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
|
define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitContext::DefineRescalingInput(const Info& info) {
|
||||||
|
if (!info.uses_rescaling_uniform) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (profile.unified_descriptor_binding) {
|
||||||
|
DefineRescalingInputPushConstant();
|
||||||
|
} else {
|
||||||
|
DefineRescalingInputUniformConstant();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitContext::DefineRescalingInputPushConstant() {
|
||||||
|
boost::container::static_vector<Id, 3> members{};
|
||||||
|
u32 member_index{0};
|
||||||
|
|
||||||
|
rescaling_textures_type = TypeArray(U32[1], Const(4u));
|
||||||
|
Decorate(rescaling_textures_type, spv::Decoration::ArrayStride, 4u);
|
||||||
|
members.push_back(rescaling_textures_type);
|
||||||
|
rescaling_textures_member_index = member_index++;
|
||||||
|
|
||||||
|
rescaling_images_type = TypeArray(U32[1], Const(NUM_IMAGE_SCALING_WORDS));
|
||||||
|
Decorate(rescaling_images_type, spv::Decoration::ArrayStride, 4u);
|
||||||
|
members.push_back(rescaling_images_type);
|
||||||
|
rescaling_images_member_index = member_index++;
|
||||||
|
|
||||||
|
if (stage != Stage::Compute) {
|
||||||
|
members.push_back(F32[1]);
|
||||||
|
rescaling_downfactor_member_index = member_index++;
|
||||||
|
}
|
||||||
|
const Id push_constant_struct{TypeStruct(std::span(members.data(), members.size()))};
|
||||||
|
Decorate(push_constant_struct, spv::Decoration::Block);
|
||||||
|
Name(push_constant_struct, "ResolutionInfo");
|
||||||
|
|
||||||
|
MemberDecorate(push_constant_struct, rescaling_textures_member_index, spv::Decoration::Offset,
|
||||||
|
static_cast<u32>(offsetof(RescalingLayout, rescaling_textures)));
|
||||||
|
MemberName(push_constant_struct, rescaling_textures_member_index, "rescaling_textures");
|
||||||
|
|
||||||
|
MemberDecorate(push_constant_struct, rescaling_images_member_index, spv::Decoration::Offset,
|
||||||
|
static_cast<u32>(offsetof(RescalingLayout, rescaling_images)));
|
||||||
|
MemberName(push_constant_struct, rescaling_images_member_index, "rescaling_images");
|
||||||
|
|
||||||
|
if (stage != Stage::Compute) {
|
||||||
|
MemberDecorate(push_constant_struct, rescaling_downfactor_member_index,
|
||||||
|
spv::Decoration::Offset,
|
||||||
|
static_cast<u32>(offsetof(RescalingLayout, down_factor)));
|
||||||
|
MemberName(push_constant_struct, rescaling_downfactor_member_index, "down_factor");
|
||||||
|
}
|
||||||
|
const Id pointer_type{TypePointer(spv::StorageClass::PushConstant, push_constant_struct)};
|
||||||
|
rescaling_push_constants = AddGlobalVariable(pointer_type, spv::StorageClass::PushConstant);
|
||||||
|
Name(rescaling_push_constants, "rescaling_push_constants");
|
||||||
|
|
||||||
|
if (profile.supported_spirv >= 0x00010400) {
|
||||||
|
interfaces.push_back(rescaling_push_constants);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitContext::DefineRescalingInputUniformConstant() {
|
||||||
|
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, F32[4])};
|
||||||
|
rescaling_uniform_constant =
|
||||||
|
AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant);
|
||||||
|
Decorate(rescaling_uniform_constant, spv::Decoration::Location, 0u);
|
||||||
|
|
||||||
|
if (profile.supported_spirv >= 0x00010400) {
|
||||||
|
interfaces.push_back(rescaling_uniform_constant);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
|
void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
|
||||||
if (info.constant_buffer_descriptors.empty()) {
|
if (info.constant_buffer_descriptors.empty()) {
|
||||||
return;
|
return;
|
||||||
@ -1108,7 +1180,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineTextures(const Info& info, u32& binding) {
|
void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_index) {
|
||||||
textures.reserve(info.texture_descriptors.size());
|
textures.reserve(info.texture_descriptors.size());
|
||||||
for (const TextureDescriptor& desc : info.texture_descriptors) {
|
for (const TextureDescriptor& desc : info.texture_descriptors) {
|
||||||
const Id image_type{ImageType(*this, desc)};
|
const Id image_type{ImageType(*this, desc)};
|
||||||
@ -1130,13 +1202,14 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) {
|
|||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
}
|
}
|
||||||
++binding;
|
++binding;
|
||||||
|
++scaling_index;
|
||||||
}
|
}
|
||||||
if (info.uses_atomic_image_u32) {
|
if (info.uses_atomic_image_u32) {
|
||||||
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
|
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineImages(const Info& info, u32& binding) {
|
void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_index) {
|
||||||
images.reserve(info.image_descriptors.size());
|
images.reserve(info.image_descriptors.size());
|
||||||
for (const ImageDescriptor& desc : info.image_descriptors) {
|
for (const ImageDescriptor& desc : info.image_descriptors) {
|
||||||
if (desc.count != 1) {
|
if (desc.count != 1) {
|
||||||
@ -1157,6 +1230,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding) {
|
|||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
}
|
}
|
||||||
++binding;
|
++binding;
|
||||||
|
++scaling_index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -235,6 +235,16 @@ public:
|
|||||||
Id indexed_load_func{};
|
Id indexed_load_func{};
|
||||||
Id indexed_store_func{};
|
Id indexed_store_func{};
|
||||||
|
|
||||||
|
Id rescaling_uniform_constant{};
|
||||||
|
Id rescaling_push_constants{};
|
||||||
|
Id rescaling_textures_type{};
|
||||||
|
Id rescaling_images_type{};
|
||||||
|
u32 rescaling_textures_member_index{};
|
||||||
|
u32 rescaling_images_member_index{};
|
||||||
|
u32 rescaling_downfactor_member_index{};
|
||||||
|
u32 texture_rescaling_index{};
|
||||||
|
u32 image_rescaling_index{};
|
||||||
|
|
||||||
Id local_memory{};
|
Id local_memory{};
|
||||||
|
|
||||||
Id shared_memory_u8{};
|
Id shared_memory_u8{};
|
||||||
@ -299,10 +309,13 @@ private:
|
|||||||
void DefineStorageBuffers(const Info& info, u32& binding);
|
void DefineStorageBuffers(const Info& info, u32& binding);
|
||||||
void DefineTextureBuffers(const Info& info, u32& binding);
|
void DefineTextureBuffers(const Info& info, u32& binding);
|
||||||
void DefineImageBuffers(const Info& info, u32& binding);
|
void DefineImageBuffers(const Info& info, u32& binding);
|
||||||
void DefineTextures(const Info& info, u32& binding);
|
void DefineTextures(const Info& info, u32& binding, u32& scaling_index);
|
||||||
void DefineImages(const Info& info, u32& binding);
|
void DefineImages(const Info& info, u32& binding, u32& scaling_index);
|
||||||
void DefineAttributeMemAccess(const Info& info);
|
void DefineAttributeMemAccess(const Info& info);
|
||||||
void DefineGlobalMemoryFunctions(const Info& info);
|
void DefineGlobalMemoryFunctions(const Info& info);
|
||||||
|
void DefineRescalingInput(const Info& info);
|
||||||
|
void DefineRescalingInputPushConstant();
|
||||||
|
void DefineRescalingInputUniformConstant();
|
||||||
|
|
||||||
void DefineInputs(const IR::Program& program);
|
void DefineInputs(const IR::Program& program);
|
||||||
void DefineOutputs(const IR::Program& program);
|
void DefineOutputs(const IR::Program& program);
|
||||||
|
@ -16,6 +16,19 @@
|
|||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
|
constexpr u32 NUM_TEXTURE_SCALING_WORDS = 4;
|
||||||
|
constexpr u32 NUM_IMAGE_SCALING_WORDS = 2;
|
||||||
|
constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS =
|
||||||
|
NUM_TEXTURE_SCALING_WORDS + NUM_IMAGE_SCALING_WORDS;
|
||||||
|
|
||||||
|
struct RescalingLayout {
|
||||||
|
alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures;
|
||||||
|
alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images;
|
||||||
|
alignas(16) u32 down_factor;
|
||||||
|
};
|
||||||
|
constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures);
|
||||||
|
constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor);
|
||||||
|
|
||||||
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
|
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
|
||||||
IR::Program& program, Bindings& bindings);
|
IR::Program& program, Bindings& bindings);
|
||||||
|
|
||||||
|
@ -542,6 +542,18 @@ Id EmitYDirection(EmitContext& ctx) {
|
|||||||
return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f);
|
return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitResolutionDownFactor(EmitContext& ctx) {
|
||||||
|
if (ctx.profile.unified_descriptor_binding) {
|
||||||
|
const Id pointer_type{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.F32[1])};
|
||||||
|
const Id index{ctx.Const(ctx.rescaling_downfactor_member_index)};
|
||||||
|
const Id pointer{ctx.OpAccessChain(pointer_type, ctx.rescaling_push_constants, index)};
|
||||||
|
return ctx.OpLoad(ctx.F32[1], pointer);
|
||||||
|
} else {
|
||||||
|
const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)};
|
||||||
|
return ctx.OpCompositeExtract(ctx.F32[1], composite, 2u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
|
Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
|
||||||
const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
|
const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
|
||||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
|
@ -224,6 +224,36 @@ Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx
|
|||||||
Decorate(ctx, inst, sample);
|
Decorate(ctx, inst, sample);
|
||||||
return ctx.OpCompositeExtract(result_type, sample, 1U);
|
return ctx.OpCompositeExtract(result_type, sample, 1U);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id IsScaled(EmitContext& ctx, const IR::Value& index, Id member_index, u32 base_index) {
|
||||||
|
const Id push_constant_u32{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1])};
|
||||||
|
Id bit{};
|
||||||
|
if (index.IsImmediate()) {
|
||||||
|
// Use BitwiseAnd instead of BitfieldExtract for better codegen on Nvidia OpenGL.
|
||||||
|
// LOP32I.NZ is used to set the predicate rather than BFE+ISETP.
|
||||||
|
const u32 index_value{index.U32() + base_index};
|
||||||
|
const Id word_index{ctx.Const(index_value / 32)};
|
||||||
|
const Id bit_index_mask{ctx.Const(1u << (index_value % 32))};
|
||||||
|
const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants,
|
||||||
|
member_index, word_index)};
|
||||||
|
const Id word{ctx.OpLoad(ctx.U32[1], pointer)};
|
||||||
|
bit = ctx.OpBitwiseAnd(ctx.U32[1], word, bit_index_mask);
|
||||||
|
} else {
|
||||||
|
Id index_value{ctx.Def(index)};
|
||||||
|
if (base_index != 0) {
|
||||||
|
index_value = ctx.OpIAdd(ctx.U32[1], index_value, ctx.Const(base_index));
|
||||||
|
}
|
||||||
|
const Id bit_index{ctx.OpBitwiseAnd(ctx.U32[1], index_value, ctx.Const(31u))};
|
||||||
|
bit = ctx.OpBitFieldUExtract(ctx.U32[1], index_value, bit_index, ctx.Const(1u));
|
||||||
|
}
|
||||||
|
return ctx.OpINotEqual(ctx.U1, bit, ctx.u32_zero_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id BitTest(EmitContext& ctx, Id mask, Id bit) {
|
||||||
|
const Id shifted{ctx.OpShiftRightLogical(ctx.U32[1], mask, bit)};
|
||||||
|
const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))};
|
||||||
|
return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value);
|
||||||
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
|
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
|
||||||
@ -470,4 +500,28 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
|
|||||||
ctx.OpImageWrite(Image(ctx, index, info), coords, color);
|
ctx.OpImageWrite(Image(ctx, index, info), coords, color);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index) {
|
||||||
|
if (ctx.profile.unified_descriptor_binding) {
|
||||||
|
const Id member_index{ctx.Const(ctx.rescaling_textures_member_index)};
|
||||||
|
return IsScaled(ctx, index, member_index, ctx.texture_rescaling_index);
|
||||||
|
} else {
|
||||||
|
const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)};
|
||||||
|
const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 0u)};
|
||||||
|
const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)};
|
||||||
|
return BitTest(ctx, mask, ctx.Def(index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index) {
|
||||||
|
if (ctx.profile.unified_descriptor_binding) {
|
||||||
|
const Id member_index{ctx.Const(ctx.rescaling_images_member_index)};
|
||||||
|
return IsScaled(ctx, index, member_index, ctx.image_rescaling_index);
|
||||||
|
} else {
|
||||||
|
const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)};
|
||||||
|
const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 1u)};
|
||||||
|
const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)};
|
||||||
|
return BitTest(ctx, mask, ctx.Def(index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
@ -75,6 +75,7 @@ Id EmitInvocationId(EmitContext& ctx);
|
|||||||
Id EmitSampleId(EmitContext& ctx);
|
Id EmitSampleId(EmitContext& ctx);
|
||||||
Id EmitIsHelperInvocation(EmitContext& ctx);
|
Id EmitIsHelperInvocation(EmitContext& ctx);
|
||||||
Id EmitYDirection(EmitContext& ctx);
|
Id EmitYDirection(EmitContext& ctx);
|
||||||
|
Id EmitResolutionDownFactor(EmitContext& ctx);
|
||||||
Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
|
Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
|
||||||
void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
|
void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
|
||||||
Id EmitUndefU1(EmitContext& ctx);
|
Id EmitUndefU1(EmitContext& ctx);
|
||||||
@ -283,6 +284,8 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
|
|||||||
Id EmitISub32(EmitContext& ctx, Id a, Id b);
|
Id EmitISub32(EmitContext& ctx, Id a, Id b);
|
||||||
Id EmitISub64(EmitContext& ctx, Id a, Id b);
|
Id EmitISub64(EmitContext& ctx, Id a, Id b);
|
||||||
Id EmitIMul32(EmitContext& ctx, Id a, Id b);
|
Id EmitIMul32(EmitContext& ctx, Id a, Id b);
|
||||||
|
Id EmitSDiv32(EmitContext& ctx, Id a, Id b);
|
||||||
|
Id EmitUDiv32(EmitContext& ctx, Id a, Id b);
|
||||||
Id EmitINeg32(EmitContext& ctx, Id value);
|
Id EmitINeg32(EmitContext& ctx, Id value);
|
||||||
Id EmitINeg64(EmitContext& ctx, Id value);
|
Id EmitINeg64(EmitContext& ctx, Id value);
|
||||||
Id EmitIAbs32(EmitContext& ctx, Id value);
|
Id EmitIAbs32(EmitContext& ctx, Id value);
|
||||||
@ -510,6 +513,8 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
|
|||||||
Id derivates, Id offset, Id lod_clamp);
|
Id derivates, Id offset, Id lod_clamp);
|
||||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
||||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
|
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
|
||||||
|
Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index);
|
||||||
|
Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index);
|
||||||
Id EmitBindlessImageAtomicIAdd32(EmitContext&);
|
Id EmitBindlessImageAtomicIAdd32(EmitContext&);
|
||||||
Id EmitBindlessImageAtomicSMin32(EmitContext&);
|
Id EmitBindlessImageAtomicSMin32(EmitContext&);
|
||||||
Id EmitBindlessImageAtomicUMin32(EmitContext&);
|
Id EmitBindlessImageAtomicUMin32(EmitContext&);
|
||||||
|
@ -72,6 +72,14 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
|
|||||||
return ctx.OpIMul(ctx.U32[1], a, b);
|
return ctx.OpIMul(ctx.U32[1], a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSDiv32(EmitContext& ctx, Id a, Id b) {
|
||||||
|
return ctx.OpSDiv(ctx.U32[1], a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitUDiv32(EmitContext& ctx, Id a, Id b) {
|
||||||
|
return ctx.OpUDiv(ctx.U32[1], a, b);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitINeg32(EmitContext& ctx, Id value) {
|
Id EmitINeg32(EmitContext& ctx, Id value) {
|
||||||
return ctx.OpSNegate(ctx.U32[1], value);
|
return ctx.OpSNegate(ctx.U32[1], value);
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,11 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
|
|||||||
PrependNewInst(end(), op, args);
|
PrependNewInst(end(), op, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) {
|
||||||
|
Inst* const inst{inst_pool->Create(base_inst)};
|
||||||
|
return instructions.insert(insertion_point, *inst);
|
||||||
|
}
|
||||||
|
|
||||||
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
|
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
|
||||||
std::initializer_list<Value> args, u32 flags) {
|
std::initializer_list<Value> args, u32 flags) {
|
||||||
Inst* const inst{inst_pool->Create(op, flags)};
|
Inst* const inst{inst_pool->Create(op, flags)};
|
||||||
|
@ -40,6 +40,9 @@ public:
|
|||||||
/// Appends a new instruction to the end of this basic block.
|
/// Appends a new instruction to the end of this basic block.
|
||||||
void AppendNewInst(Opcode op, std::initializer_list<Value> args);
|
void AppendNewInst(Opcode op, std::initializer_list<Value> args);
|
||||||
|
|
||||||
|
/// Prepends a copy of an instruction to this basic block before the insertion point.
|
||||||
|
iterator PrependNewInst(iterator insertion_point, const Inst& base_inst);
|
||||||
|
|
||||||
/// Prepends a new instruction to this basic block before the insertion point.
|
/// Prepends a new instruction to this basic block before the insertion point.
|
||||||
iterator PrependNewInst(iterator insertion_point, Opcode op,
|
iterator PrependNewInst(iterator insertion_point, Opcode op,
|
||||||
std::initializer_list<Value> args = {}, u32 flags = 0);
|
std::initializer_list<Value> args = {}, u32 flags = 0);
|
||||||
|
@ -375,6 +375,10 @@ F32 IREmitter::YDirection() {
|
|||||||
return Inst<F32>(Opcode::YDirection);
|
return Inst<F32>(Opcode::YDirection);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
F32 IREmitter::ResolutionDownFactor() {
|
||||||
|
return Inst<F32>(Opcode::ResolutionDownFactor);
|
||||||
|
}
|
||||||
|
|
||||||
U32 IREmitter::LaneId() {
|
U32 IREmitter::LaneId() {
|
||||||
return Inst<U32>(Opcode::LaneId);
|
return Inst<U32>(Opcode::LaneId);
|
||||||
}
|
}
|
||||||
@ -1141,6 +1145,10 @@ U32 IREmitter::IMul(const U32& a, const U32& b) {
|
|||||||
return Inst<U32>(Opcode::IMul32, a, b);
|
return Inst<U32>(Opcode::IMul32, a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) {
|
||||||
|
return Inst<U32>(is_signed ? Opcode::SDiv32 : Opcode::UDiv32, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
U32U64 IREmitter::INeg(const U32U64& value) {
|
U32U64 IREmitter::INeg(const U32U64& value) {
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::U32:
|
case Type::U32:
|
||||||
@ -1938,6 +1946,14 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c
|
|||||||
return Inst(op, Flags{info}, handle, coords, value);
|
return Inst(op, Flags{info}, handle, coords, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U1 IREmitter::IsTextureScaled(const U32& index) {
|
||||||
|
return Inst<U1>(Opcode::IsTextureScaled, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
U1 IREmitter::IsImageScaled(const U32& index) {
|
||||||
|
return Inst<U1>(Opcode::IsImageScaled, index);
|
||||||
|
}
|
||||||
|
|
||||||
U1 IREmitter::VoteAll(const U1& value) {
|
U1 IREmitter::VoteAll(const U1& value) {
|
||||||
return Inst<U1>(Opcode::VoteAll, value);
|
return Inst<U1>(Opcode::VoteAll, value);
|
||||||
}
|
}
|
||||||
|
@ -102,6 +102,8 @@ public:
|
|||||||
[[nodiscard]] U1 IsHelperInvocation();
|
[[nodiscard]] U1 IsHelperInvocation();
|
||||||
[[nodiscard]] F32 YDirection();
|
[[nodiscard]] F32 YDirection();
|
||||||
|
|
||||||
|
[[nodiscard]] F32 ResolutionDownFactor();
|
||||||
|
|
||||||
[[nodiscard]] U32 LaneId();
|
[[nodiscard]] U32 LaneId();
|
||||||
|
|
||||||
[[nodiscard]] U32 LoadGlobalU8(const U64& address);
|
[[nodiscard]] U32 LoadGlobalU8(const U64& address);
|
||||||
@ -207,6 +209,7 @@ public:
|
|||||||
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
|
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
|
||||||
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
|
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
|
||||||
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
|
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
|
||||||
|
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
|
||||||
[[nodiscard]] U32U64 INeg(const U32U64& value);
|
[[nodiscard]] U32U64 INeg(const U32U64& value);
|
||||||
[[nodiscard]] U32 IAbs(const U32& value);
|
[[nodiscard]] U32 IAbs(const U32& value);
|
||||||
[[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
|
[[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
|
||||||
@ -356,6 +359,10 @@ public:
|
|||||||
TextureInstInfo info);
|
TextureInstInfo info);
|
||||||
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
|
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
|
||||||
const Value& value, TextureInstInfo info);
|
const Value& value, TextureInstInfo info);
|
||||||
|
|
||||||
|
[[nodiscard]] U1 IsTextureScaled(const U32& index);
|
||||||
|
[[nodiscard]] U1 IsImageScaled(const U32& index);
|
||||||
|
|
||||||
[[nodiscard]] U1 VoteAll(const U1& value);
|
[[nodiscard]] U1 VoteAll(const U1& value);
|
||||||
[[nodiscard]] U1 VoteAny(const U1& value);
|
[[nodiscard]] U1 VoteAny(const U1& value);
|
||||||
[[nodiscard]] U1 VoteEqual(const U1& value);
|
[[nodiscard]] U1 VoteEqual(const U1& value);
|
||||||
|
@ -46,6 +46,17 @@ Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} {
|
||||||
|
if (base.op == Opcode::Phi) {
|
||||||
|
throw NotImplementedException("Copying phi node");
|
||||||
|
}
|
||||||
|
std::construct_at(&args);
|
||||||
|
const size_t num_args{base.NumArgs()};
|
||||||
|
for (size_t index = 0; index < num_args; ++index) {
|
||||||
|
SetArg(index, base.Arg(index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Inst::~Inst() {
|
Inst::~Inst() {
|
||||||
if (op == Opcode::Phi) {
|
if (op == Opcode::Phi) {
|
||||||
std::destroy_at(&phi_args);
|
std::destroy_at(&phi_args);
|
||||||
|
@ -62,6 +62,7 @@ OPCODE(InvocationId, U32,
|
|||||||
OPCODE(SampleId, U32, )
|
OPCODE(SampleId, U32, )
|
||||||
OPCODE(IsHelperInvocation, U1, )
|
OPCODE(IsHelperInvocation, U1, )
|
||||||
OPCODE(YDirection, F32, )
|
OPCODE(YDirection, F32, )
|
||||||
|
OPCODE(ResolutionDownFactor, F32, )
|
||||||
|
|
||||||
// Undefined
|
// Undefined
|
||||||
OPCODE(UndefU1, U1, )
|
OPCODE(UndefU1, U1, )
|
||||||
@ -286,6 +287,8 @@ OPCODE(IAdd64, U64, U64,
|
|||||||
OPCODE(ISub32, U32, U32, U32, )
|
OPCODE(ISub32, U32, U32, U32, )
|
||||||
OPCODE(ISub64, U64, U64, U64, )
|
OPCODE(ISub64, U64, U64, U64, )
|
||||||
OPCODE(IMul32, U32, U32, U32, )
|
OPCODE(IMul32, U32, U32, U32, )
|
||||||
|
OPCODE(SDiv32, U32, U32, U32, )
|
||||||
|
OPCODE(UDiv32, U32, U32, U32, )
|
||||||
OPCODE(INeg32, U32, U32, )
|
OPCODE(INeg32, U32, U32, )
|
||||||
OPCODE(INeg64, U64, U64, )
|
OPCODE(INeg64, U64, U64, )
|
||||||
OPCODE(IAbs32, U32, U32, )
|
OPCODE(IAbs32, U32, U32, )
|
||||||
@ -490,6 +493,9 @@ OPCODE(ImageGradient, F32x4, Opaq
|
|||||||
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
|
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
|
||||||
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
|
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
|
||||||
|
|
||||||
|
OPCODE(IsTextureScaled, U1, U32, )
|
||||||
|
OPCODE(IsImageScaled, U1, U32, )
|
||||||
|
|
||||||
// Atomic Image operations
|
// Atomic Image operations
|
||||||
|
|
||||||
OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, )
|
OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, )
|
||||||
|
@ -116,10 +116,10 @@ public:
|
|||||||
class Inst : public boost::intrusive::list_base_hook<> {
|
class Inst : public boost::intrusive::list_base_hook<> {
|
||||||
public:
|
public:
|
||||||
explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
|
explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
|
||||||
|
explicit Inst(const Inst& base);
|
||||||
~Inst();
|
~Inst();
|
||||||
|
|
||||||
Inst& operator=(const Inst&) = delete;
|
Inst& operator=(const Inst&) = delete;
|
||||||
Inst(const Inst&) = delete;
|
|
||||||
|
|
||||||
Inst& operator=(Inst&&) = delete;
|
Inst& operator=(Inst&&) = delete;
|
||||||
Inst(Inst&&) = delete;
|
Inst(Inst&&) = delete;
|
||||||
|
@ -177,6 +177,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||||||
Optimization::TexturePass(env, program);
|
Optimization::TexturePass(env, program);
|
||||||
|
|
||||||
Optimization::ConstantPropagationPass(program);
|
Optimization::ConstantPropagationPass(program);
|
||||||
|
|
||||||
|
if (Settings::values.resolution_info.active) {
|
||||||
|
Optimization::RescalingPass(program);
|
||||||
|
}
|
||||||
Optimization::DeadCodeEliminationPass(program);
|
Optimization::DeadCodeEliminationPass(program);
|
||||||
if (Settings::values.renderer_debug) {
|
if (Settings::values.renderer_debug) {
|
||||||
Optimization::VerificationPass(program);
|
Optimization::VerificationPass(program);
|
||||||
|
@ -430,6 +430,11 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
|||||||
case IR::Opcode::IsHelperInvocation:
|
case IR::Opcode::IsHelperInvocation:
|
||||||
info.uses_is_helper_invocation = true;
|
info.uses_is_helper_invocation = true;
|
||||||
break;
|
break;
|
||||||
|
case IR::Opcode::ResolutionDownFactor:
|
||||||
|
case IR::Opcode::IsTextureScaled:
|
||||||
|
case IR::Opcode::IsImageScaled:
|
||||||
|
info.uses_rescaling_uniform = true;
|
||||||
|
break;
|
||||||
case IR::Opcode::LaneId:
|
case IR::Opcode::LaneId:
|
||||||
info.uses_subgroup_invocation_id = true;
|
info.uses_subgroup_invocation_id = true;
|
||||||
break;
|
break;
|
||||||
|
@ -19,6 +19,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
|||||||
void IdentityRemovalPass(IR::Program& program);
|
void IdentityRemovalPass(IR::Program& program);
|
||||||
void LowerFp16ToFp32(IR::Program& program);
|
void LowerFp16ToFp32(IR::Program& program);
|
||||||
void LowerInt64ToInt32(IR::Program& program);
|
void LowerInt64ToInt32(IR::Program& program);
|
||||||
|
void RescalingPass(IR::Program& program);
|
||||||
void SsaRewritePass(IR::Program& program);
|
void SsaRewritePass(IR::Program& program);
|
||||||
void TexturePass(Environment& env, IR::Program& program);
|
void TexturePass(Environment& env, IR::Program& program);
|
||||||
void VerificationPass(const IR::Program& program);
|
void VerificationPass(const IR::Program& program);
|
||||||
|
295
src/shader_recompiler/ir_opt/rescaling_pass.cpp
Executable file
295
src/shader_recompiler/ir_opt/rescaling_pass.cpp
Executable file
@ -0,0 +1,295 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "common/alignment.h"
|
||||||
|
#include "common/settings.h"
|
||||||
|
#include "shader_recompiler/environment.h"
|
||||||
|
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||||
|
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||||
|
#include "shader_recompiler/frontend/ir/program.h"
|
||||||
|
#include "shader_recompiler/frontend/ir/value.h"
|
||||||
|
#include "shader_recompiler/ir_opt/passes.h"
|
||||||
|
#include "shader_recompiler/shader_info.h"
|
||||||
|
|
||||||
|
namespace Shader::Optimization {
|
||||||
|
namespace {
|
||||||
|
[[nodiscard]] bool IsTextureTypeRescalable(TextureType type) {
|
||||||
|
switch (type) {
|
||||||
|
case TextureType::Color2D:
|
||||||
|
case TextureType::ColorArray2D:
|
||||||
|
return true;
|
||||||
|
case TextureType::Color1D:
|
||||||
|
case TextureType::ColorArray1D:
|
||||||
|
case TextureType::Color3D:
|
||||||
|
case TextureType::ColorCube:
|
||||||
|
case TextureType::ColorArrayCube:
|
||||||
|
case TextureType::Buffer:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VisitMark(const IR::Inst& inst) {
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::ShuffleIndex:
|
||||||
|
case IR::Opcode::ShuffleUp:
|
||||||
|
case IR::Opcode::ShuffleDown:
|
||||||
|
case IR::Opcode::ShuffleButterfly: {
|
||||||
|
const IR::Value shfl_arg{inst.Arg(0)};
|
||||||
|
if (shfl_arg.IsImmediate()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const IR::Inst* const arg_inst{shfl_arg.InstRecursive()};
|
||||||
|
if (arg_inst->GetOpcode() != IR::Opcode::BitCastU32F32) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const IR::Value bitcast_arg{arg_inst->Arg(0)};
|
||||||
|
if (bitcast_arg.IsImmediate()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()};
|
||||||
|
if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) {
|
||||||
|
const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()};
|
||||||
|
switch (attr) {
|
||||||
|
case IR::Attribute::PositionX:
|
||||||
|
case IR::Attribute::PositionY:
|
||||||
|
bitcast_inst->SetFlags<u32>(0xDEADBEEF);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PatchFragCoord(IR::Block& block, IR::Inst& inst) {
|
||||||
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
const IR::F32 down_factor{ir.ResolutionDownFactor()};
|
||||||
|
const IR::F32 frag_coord{ir.GetAttribute(inst.Arg(0).Attribute())};
|
||||||
|
const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)};
|
||||||
|
inst.ReplaceUsesWith(downscaled_frag_coord);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
|
||||||
|
IR::U32 scaled_value{value};
|
||||||
|
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
|
||||||
|
scaled_value = ir.IMul(scaled_value, ir.Imm32(up_scale));
|
||||||
|
}
|
||||||
|
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
|
||||||
|
scaled_value = ir.ShiftRightArithmetic(scaled_value, ir.Imm32(down_shift));
|
||||||
|
}
|
||||||
|
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value,
|
||||||
|
const IR::Attribute attrib) {
|
||||||
|
const IR::F32 up_factor{ir.Imm32(Settings::values.resolution_info.up_factor)};
|
||||||
|
const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), up_factor)};
|
||||||
|
const IR::F32 frag_coord{ir.GetAttribute(attrib)};
|
||||||
|
const IR::F32 down_factor{ir.Imm32(Settings::values.resolution_info.down_factor)};
|
||||||
|
const IR::F32 floor{ir.FPMul(up_factor, ir.FPFloor(ir.FPMul(frag_coord, down_factor)))};
|
||||||
|
const IR::F16F32F64 deviation{ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor)))};
|
||||||
|
return IR::U32{ir.Select(is_scaled, ir.ConvertFToU(32, deviation), value)};
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
|
||||||
|
IR::U32 scaled_value{value};
|
||||||
|
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
|
||||||
|
scaled_value = ir.ShiftLeftLogical(scaled_value, ir.Imm32(down_shift));
|
||||||
|
}
|
||||||
|
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
|
||||||
|
scaled_value = ir.IDiv(scaled_value, ir.Imm32(up_scale));
|
||||||
|
}
|
||||||
|
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
|
||||||
|
}
|
||||||
|
|
||||||
|
void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) {
|
||||||
|
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
|
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||||
|
switch (info.type) {
|
||||||
|
case TextureType::Color2D:
|
||||||
|
case TextureType::ColorArray2D: {
|
||||||
|
const IR::Value new_inst{&*block.PrependNewInst(it, inst)};
|
||||||
|
const IR::U32 width{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 0)})};
|
||||||
|
const IR::U32 height{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 1)})};
|
||||||
|
const IR::Value replacement{ir.CompositeConstruct(
|
||||||
|
width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))};
|
||||||
|
inst.ReplaceUsesWith(replacement);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case TextureType::Color1D:
|
||||||
|
case TextureType::ColorArray1D:
|
||||||
|
case TextureType::Color3D:
|
||||||
|
case TextureType::ColorCube:
|
||||||
|
case TextureType::ColorArrayCube:
|
||||||
|
case TextureType::Buffer:
|
||||||
|
// Nothing to patch here
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
|
||||||
|
size_t index) {
|
||||||
|
const IR::Value composite{inst.Arg(index)};
|
||||||
|
if (composite.IsEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
|
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
|
||||||
|
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
|
||||||
|
switch (info.type) {
|
||||||
|
case TextureType::Color2D:
|
||||||
|
inst.SetArg(index, ir.CompositeConstruct(x, y));
|
||||||
|
break;
|
||||||
|
case TextureType::ColorArray2D: {
|
||||||
|
const IR::U32 z{ir.CompositeExtract(composite, 2)};
|
||||||
|
inst.SetArg(index, ir.CompositeConstruct(x, y, z));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case TextureType::Color1D:
|
||||||
|
case TextureType::ColorArray1D:
|
||||||
|
case TextureType::Color3D:
|
||||||
|
case TextureType::ColorCube:
|
||||||
|
case TextureType::ColorArrayCube:
|
||||||
|
case TextureType::Buffer:
|
||||||
|
// Nothing to patch here
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
|
||||||
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
|
const IR::Value coord{inst.Arg(1)};
|
||||||
|
const IR::U32 coord_x{ir.CompositeExtract(coord, 0)};
|
||||||
|
const IR::U32 coord_y{ir.CompositeExtract(coord, 1)};
|
||||||
|
|
||||||
|
const IR::U32 scaled_x{SubScale(ir, is_scaled, coord_x, IR::Attribute::PositionX)};
|
||||||
|
const IR::U32 scaled_y{SubScale(ir, is_scaled, coord_y, IR::Attribute::PositionY)};
|
||||||
|
switch (info.type) {
|
||||||
|
case TextureType::Color2D:
|
||||||
|
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y));
|
||||||
|
break;
|
||||||
|
case TextureType::ColorArray2D: {
|
||||||
|
const IR::U32 z{ir.CompositeExtract(coord, 2)};
|
||||||
|
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y, z));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case TextureType::Color1D:
|
||||||
|
case TextureType::ColorArray1D:
|
||||||
|
case TextureType::Color3D:
|
||||||
|
case TextureType::ColorCube:
|
||||||
|
case TextureType::ColorArrayCube:
|
||||||
|
case TextureType::Buffer:
|
||||||
|
// Nothing to patch here
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
|
||||||
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
|
if (!IsTextureTypeRescalable(info.type)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||||
|
SubScaleCoord(ir, inst, is_scaled);
|
||||||
|
// Scale ImageFetch offset
|
||||||
|
ScaleIntegerComposite(ir, inst, is_scaled, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
|
||||||
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
|
if (!IsTextureTypeRescalable(info.type)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
|
||||||
|
SubScaleCoord(ir, inst, is_scaled);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
|
||||||
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
|
if (!IsTextureTypeRescalable(info.type)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||||
|
ScaleIntegerComposite(ir, inst, is_scaled, 1);
|
||||||
|
// Scale ImageFetch offset
|
||||||
|
ScaleIntegerComposite(ir, inst, is_scaled, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PatchImageRead(IR::Block& block, IR::Inst& inst) {
|
||||||
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||||
|
if (!IsTextureTypeRescalable(info.type)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
|
||||||
|
ScaleIntegerComposite(ir, inst, is_scaled, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) {
|
||||||
|
const bool is_fragment_shader{program.stage == Stage::Fragment};
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::GetAttribute: {
|
||||||
|
const IR::Attribute attr{inst.Arg(0).Attribute()};
|
||||||
|
switch (attr) {
|
||||||
|
case IR::Attribute::PositionX:
|
||||||
|
case IR::Attribute::PositionY:
|
||||||
|
if (is_fragment_shader && inst.Flags<u32>() != 0xDEADBEEF) {
|
||||||
|
PatchFragCoord(block, inst);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case IR::Opcode::ImageQueryDimensions:
|
||||||
|
PatchImageQueryDimensions(block, inst);
|
||||||
|
break;
|
||||||
|
case IR::Opcode::ImageFetch:
|
||||||
|
if (is_fragment_shader) {
|
||||||
|
SubScaleImageFetch(block, inst);
|
||||||
|
} else {
|
||||||
|
PatchImageFetch(block, inst);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case IR::Opcode::ImageRead:
|
||||||
|
if (is_fragment_shader) {
|
||||||
|
SubScaleImageRead(block, inst);
|
||||||
|
} else {
|
||||||
|
PatchImageRead(block, inst);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
void RescalingPass(IR::Program& program) {
|
||||||
|
const bool is_fragment_shader{program.stage == Stage::Fragment};
|
||||||
|
if (is_fragment_shader) {
|
||||||
|
for (IR::Block* const block : program.post_order_blocks) {
|
||||||
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
|
VisitMark(inst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (IR::Block* const block : program.post_order_blocks) {
|
||||||
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
|
Visit(program, *block, inst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::Optimization
|
@ -172,6 +172,7 @@ struct Info {
|
|||||||
bool uses_global_memory{};
|
bool uses_global_memory{};
|
||||||
bool uses_atomic_image_u32{};
|
bool uses_atomic_image_u32{};
|
||||||
bool uses_shadow_lod{};
|
bool uses_shadow_lod{};
|
||||||
|
bool uses_rescaling_uniform{};
|
||||||
|
|
||||||
IR::Type used_constant_buffer_types{};
|
IR::Type used_constant_buffer_types{};
|
||||||
IR::Type used_storage_buffer_types{};
|
IR::Type used_storage_buffer_types{};
|
||||||
@ -190,4 +191,13 @@ struct Info {
|
|||||||
ImageDescriptors image_descriptors;
|
ImageDescriptors image_descriptors;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Descriptors>
|
||||||
|
u32 NumDescriptors(const Descriptors& descriptors) {
|
||||||
|
u32 num{};
|
||||||
|
for (const auto& desc : descriptors) {
|
||||||
|
num += desc.count;
|
||||||
|
}
|
||||||
|
return num;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
@ -130,6 +130,8 @@ add_library(video_core STATIC
|
|||||||
renderer_vulkan/vk_descriptor_pool.h
|
renderer_vulkan/vk_descriptor_pool.h
|
||||||
renderer_vulkan/vk_fence_manager.cpp
|
renderer_vulkan/vk_fence_manager.cpp
|
||||||
renderer_vulkan/vk_fence_manager.h
|
renderer_vulkan/vk_fence_manager.h
|
||||||
|
renderer_vulkan/vk_fsr.cpp
|
||||||
|
renderer_vulkan/vk_fsr.h
|
||||||
renderer_vulkan/vk_graphics_pipeline.cpp
|
renderer_vulkan/vk_graphics_pipeline.cpp
|
||||||
renderer_vulkan/vk_graphics_pipeline.h
|
renderer_vulkan/vk_graphics_pipeline.h
|
||||||
renderer_vulkan/vk_master_semaphore.cpp
|
renderer_vulkan/vk_master_semaphore.cpp
|
||||||
|
@ -853,12 +853,14 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
|||||||
}
|
}
|
||||||
if constexpr (USE_MEMORY_MAPS) {
|
if constexpr (USE_MEMORY_MAPS) {
|
||||||
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
|
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
|
||||||
|
runtime.PreCopyBarrier();
|
||||||
for (auto& [copy, buffer_id] : downloads) {
|
for (auto& [copy, buffer_id] : downloads) {
|
||||||
// Have in mind the staging buffer offset for the copy
|
// Have in mind the staging buffer offset for the copy
|
||||||
copy.dst_offset += download_staging.offset;
|
copy.dst_offset += download_staging.offset;
|
||||||
const std::array copies{copy};
|
const std::array copies{copy};
|
||||||
runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
|
runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false);
|
||||||
}
|
}
|
||||||
|
runtime.PostCopyBarrier();
|
||||||
runtime.Finish();
|
runtime.Finish();
|
||||||
for (const auto& [copy, buffer_id] : downloads) {
|
for (const auto& [copy, buffer_id] : downloads) {
|
||||||
const Buffer& buffer = slot_buffers[buffer_id];
|
const Buffer& buffer = slot_buffers[buffer_id];
|
||||||
|
@ -29,6 +29,8 @@ enum : u8 {
|
|||||||
ColorBuffer6,
|
ColorBuffer6,
|
||||||
ColorBuffer7,
|
ColorBuffer7,
|
||||||
ZetaBuffer,
|
ZetaBuffer,
|
||||||
|
RescaleViewports,
|
||||||
|
RescaleScissors,
|
||||||
|
|
||||||
VertexBuffers,
|
VertexBuffers,
|
||||||
VertexBuffer0,
|
VertexBuffer0,
|
||||||
|
@ -1,3 +1,11 @@
|
|||||||
|
set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr)
|
||||||
|
|
||||||
|
set(GLSL_INCLUDES
|
||||||
|
fidelityfx_fsr.comp
|
||||||
|
${FIDELITYFX_INCLUDE_DIR}/ffx_a.h
|
||||||
|
${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h
|
||||||
|
)
|
||||||
|
|
||||||
set(SHADER_FILES
|
set(SHADER_FILES
|
||||||
astc_decoder.comp
|
astc_decoder.comp
|
||||||
block_linear_unswizzle_2d.comp
|
block_linear_unswizzle_2d.comp
|
||||||
@ -5,14 +13,25 @@ set(SHADER_FILES
|
|||||||
convert_depth_to_float.frag
|
convert_depth_to_float.frag
|
||||||
convert_float_to_depth.frag
|
convert_float_to_depth.frag
|
||||||
full_screen_triangle.vert
|
full_screen_triangle.vert
|
||||||
|
fxaa.frag
|
||||||
|
fxaa.vert
|
||||||
opengl_copy_bc4.comp
|
opengl_copy_bc4.comp
|
||||||
opengl_present.frag
|
opengl_present.frag
|
||||||
opengl_present.vert
|
opengl_present.vert
|
||||||
|
opengl_present_scaleforce.frag
|
||||||
pitch_unswizzle.comp
|
pitch_unswizzle.comp
|
||||||
|
present_bicubic.frag
|
||||||
|
present_gaussian.frag
|
||||||
vulkan_blit_color_float.frag
|
vulkan_blit_color_float.frag
|
||||||
vulkan_blit_depth_stencil.frag
|
vulkan_blit_depth_stencil.frag
|
||||||
|
vulkan_fidelityfx_fsr_easu_fp16.comp
|
||||||
|
vulkan_fidelityfx_fsr_easu_fp32.comp
|
||||||
|
vulkan_fidelityfx_fsr_rcas_fp16.comp
|
||||||
|
vulkan_fidelityfx_fsr_rcas_fp32.comp
|
||||||
vulkan_present.frag
|
vulkan_present.frag
|
||||||
vulkan_present.vert
|
vulkan_present.vert
|
||||||
|
vulkan_present_scaleforce_fp16.frag
|
||||||
|
vulkan_present_scaleforce_fp32.frag
|
||||||
vulkan_quad_indexed.comp
|
vulkan_quad_indexed.comp
|
||||||
vulkan_uint8.comp
|
vulkan_uint8.comp
|
||||||
)
|
)
|
||||||
@ -76,7 +95,7 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES})
|
|||||||
OUTPUT
|
OUTPUT
|
||||||
${SPIRV_HEADER_FILE}
|
${SPIRV_HEADER_FILE}
|
||||||
COMMAND
|
COMMAND
|
||||||
${GLSLANGVALIDATOR} -V ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
|
${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
|
||||||
MAIN_DEPENDENCY
|
MAIN_DEPENDENCY
|
||||||
${SOURCE_FILE}
|
${SOURCE_FILE}
|
||||||
)
|
)
|
||||||
@ -84,9 +103,12 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES})
|
|||||||
endif()
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
|
set(SHADER_SOURCES ${SHADER_FILES})
|
||||||
|
list(APPEND SHADER_SOURCES ${GLSL_INCLUDES})
|
||||||
|
|
||||||
add_custom_target(host_shaders
|
add_custom_target(host_shaders
|
||||||
DEPENDS
|
DEPENDS
|
||||||
${SHADER_HEADERS}
|
${SHADER_HEADERS}
|
||||||
SOURCES
|
SOURCES
|
||||||
${SHADER_FILES}
|
${SHADER_SOURCES}
|
||||||
)
|
)
|
||||||
|
116
src/video_core/host_shaders/fidelityfx_fsr.comp
Executable file
116
src/video_core/host_shaders/fidelityfx_fsr.comp
Executable file
@ -0,0 +1,116 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
//!#version 460 core
|
||||||
|
#extension GL_ARB_separate_shader_objects : enable
|
||||||
|
#extension GL_ARB_shading_language_420pack : enable
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types : require
|
||||||
|
|
||||||
|
// FidelityFX Super Resolution Sample
|
||||||
|
//
|
||||||
|
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files(the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions :
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE.
|
||||||
|
|
||||||
|
layout( push_constant ) uniform constants {
|
||||||
|
uvec4 Const0;
|
||||||
|
uvec4 Const1;
|
||||||
|
uvec4 Const2;
|
||||||
|
uvec4 Const3;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set=0,binding=0) uniform sampler2D InputTexture;
|
||||||
|
layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
|
||||||
|
|
||||||
|
#define A_GPU 1
|
||||||
|
#define A_GLSL 1
|
||||||
|
|
||||||
|
#ifndef YUZU_USE_FP16
|
||||||
|
#include "ffx_a.h"
|
||||||
|
|
||||||
|
#if USE_EASU
|
||||||
|
#define FSR_EASU_F 1
|
||||||
|
AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
|
||||||
|
AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
|
||||||
|
AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
|
||||||
|
#endif
|
||||||
|
#if USE_RCAS
|
||||||
|
#define FSR_RCAS_F 1
|
||||||
|
AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
|
||||||
|
void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define A_HALF
|
||||||
|
#include "ffx_a.h"
|
||||||
|
|
||||||
|
#if USE_EASU
|
||||||
|
#define FSR_EASU_H 1
|
||||||
|
AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
|
||||||
|
AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
|
||||||
|
AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
|
||||||
|
#endif
|
||||||
|
#if USE_RCAS
|
||||||
|
#define FSR_RCAS_H 1
|
||||||
|
AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
|
||||||
|
void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "ffx_fsr1.h"
|
||||||
|
|
||||||
|
void CurrFilter(AU2 pos) {
|
||||||
|
#if USE_BILINEAR
|
||||||
|
AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
|
||||||
|
imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0));
|
||||||
|
#endif
|
||||||
|
#if USE_EASU
|
||||||
|
#ifndef YUZU_USE_FP16
|
||||||
|
AF3 c;
|
||||||
|
FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
|
||||||
|
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
|
||||||
|
#else
|
||||||
|
AH3 c;
|
||||||
|
FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
|
||||||
|
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#if USE_RCAS
|
||||||
|
#ifndef YUZU_USE_FP16
|
||||||
|
AF3 c;
|
||||||
|
FsrRcasF(c.r, c.g, c.b, pos, Const0);
|
||||||
|
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
|
||||||
|
#else
|
||||||
|
AH3 c;
|
||||||
|
FsrRcasH(c.r, c.g, c.b, pos, Const0);
|
||||||
|
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
layout(local_size_x=64) in;
|
||||||
|
void main() {
|
||||||
|
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
|
||||||
|
AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
|
||||||
|
CurrFilter(gxy);
|
||||||
|
gxy.x += 8u;
|
||||||
|
CurrFilter(gxy);
|
||||||
|
gxy.y += 8u;
|
||||||
|
CurrFilter(gxy);
|
||||||
|
gxy.x -= 8u;
|
||||||
|
CurrFilter(gxy);
|
||||||
|
}
|
76
src/video_core/host_shaders/fxaa.frag
Executable file
76
src/video_core/host_shaders/fxaa.frag
Executable file
@ -0,0 +1,76 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
// Source code is adapted from
|
||||||
|
// https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/
|
||||||
|
|
||||||
|
#version 460
|
||||||
|
|
||||||
|
#ifdef VULKAN
|
||||||
|
|
||||||
|
#define BINDING_COLOR_TEXTURE 1
|
||||||
|
|
||||||
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||||
|
|
||||||
|
#define BINDING_COLOR_TEXTURE 0
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
layout (location = 0) in vec4 posPos;
|
||||||
|
|
||||||
|
layout (location = 0) out vec4 frag_color;
|
||||||
|
|
||||||
|
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
|
||||||
|
|
||||||
|
const float FXAA_SPAN_MAX = 8.0;
|
||||||
|
const float FXAA_REDUCE_MUL = 1.0 / 8.0;
|
||||||
|
const float FXAA_REDUCE_MIN = 1.0 / 128.0;
|
||||||
|
|
||||||
|
#define FxaaTexLod0(t, p) textureLod(t, p, 0.0)
|
||||||
|
#define FxaaTexOff(t, p, o) textureLodOffset(t, p, 0.0, o)
|
||||||
|
|
||||||
|
vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) {
|
||||||
|
|
||||||
|
vec3 rgbNW = FxaaTexLod0(tex, posPos.zw).xyz;
|
||||||
|
vec3 rgbNE = FxaaTexOff(tex, posPos.zw, ivec2(1,0)).xyz;
|
||||||
|
vec3 rgbSW = FxaaTexOff(tex, posPos.zw, ivec2(0,1)).xyz;
|
||||||
|
vec3 rgbSE = FxaaTexOff(tex, posPos.zw, ivec2(1,1)).xyz;
|
||||||
|
vec3 rgbM = FxaaTexLod0(tex, posPos.xy).xyz;
|
||||||
|
/*---------------------------------------------------------*/
|
||||||
|
vec3 luma = vec3(0.299, 0.587, 0.114);
|
||||||
|
float lumaNW = dot(rgbNW, luma);
|
||||||
|
float lumaNE = dot(rgbNE, luma);
|
||||||
|
float lumaSW = dot(rgbSW, luma);
|
||||||
|
float lumaSE = dot(rgbSE, luma);
|
||||||
|
float lumaM = dot(rgbM, luma);
|
||||||
|
/*---------------------------------------------------------*/
|
||||||
|
float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
|
||||||
|
float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
|
||||||
|
/*---------------------------------------------------------*/
|
||||||
|
vec2 dir;
|
||||||
|
dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
|
||||||
|
dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE));
|
||||||
|
/*---------------------------------------------------------*/
|
||||||
|
float dirReduce = max(
|
||||||
|
(lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL),
|
||||||
|
FXAA_REDUCE_MIN);
|
||||||
|
float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce);
|
||||||
|
dir = min(vec2( FXAA_SPAN_MAX, FXAA_SPAN_MAX),
|
||||||
|
max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX),
|
||||||
|
dir * rcpDirMin)) / textureSize(tex, 0);
|
||||||
|
/*--------------------------------------------------------*/
|
||||||
|
vec3 rgbA = (1.0 / 2.0) * (
|
||||||
|
FxaaTexLod0(tex, posPos.xy + dir * (1.0 / 3.0 - 0.5)).xyz +
|
||||||
|
FxaaTexLod0(tex, posPos.xy + dir * (2.0 / 3.0 - 0.5)).xyz);
|
||||||
|
vec3 rgbB = rgbA * (1.0 / 2.0) + (1.0 / 4.0) * (
|
||||||
|
FxaaTexLod0(tex, posPos.xy + dir * (0.0 / 3.0 - 0.5)).xyz +
|
||||||
|
FxaaTexLod0(tex, posPos.xy + dir * (3.0 / 3.0 - 0.5)).xyz);
|
||||||
|
float lumaB = dot(rgbB, luma);
|
||||||
|
if((lumaB < lumaMin) || (lumaB > lumaMax)) return rgbA;
|
||||||
|
return rgbB;
|
||||||
|
}
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0);
|
||||||
|
}
|
38
src/video_core/host_shaders/fxaa.vert
Executable file
38
src/video_core/host_shaders/fxaa.vert
Executable file
@ -0,0 +1,38 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#version 460
|
||||||
|
|
||||||
|
out gl_PerVertex {
|
||||||
|
vec4 gl_Position;
|
||||||
|
};
|
||||||
|
|
||||||
|
const vec2 vertices[4] =
|
||||||
|
vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0));
|
||||||
|
|
||||||
|
layout (location = 0) out vec4 posPos;
|
||||||
|
|
||||||
|
#ifdef VULKAN
|
||||||
|
|
||||||
|
#define BINDING_COLOR_TEXTURE 0
|
||||||
|
#define VERTEX_ID gl_VertexIndex
|
||||||
|
|
||||||
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||||
|
|
||||||
|
#define BINDING_COLOR_TEXTURE 0
|
||||||
|
#define VERTEX_ID gl_VertexID
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
|
||||||
|
|
||||||
|
const float FXAA_SUBPIX_SHIFT = 0;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
vec2 vertex = vertices[VERTEX_ID];
|
||||||
|
gl_Position = vec4(vertex, 0.0, 1.0);
|
||||||
|
vec2 vert_tex_coord = (vertex + 1.0) / 2.0;
|
||||||
|
posPos.xy = vert_tex_coord;
|
||||||
|
posPos.zw = vert_tex_coord - (0.5 + FXAA_SUBPIX_SHIFT) / textureSize(input_texture, 0);
|
||||||
|
}
|
130
src/video_core/host_shaders/opengl_present_scaleforce.frag
Executable file
130
src/video_core/host_shaders/opengl_present_scaleforce.frag
Executable file
@ -0,0 +1,130 @@
|
|||||||
|
// MIT License
|
||||||
|
//
|
||||||
|
// Copyright (c) 2020 BreadFish64
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files (the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions:
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in all
|
||||||
|
// copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
// SOFTWARE.
|
||||||
|
|
||||||
|
// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce
|
||||||
|
|
||||||
|
//! #version 460
|
||||||
|
|
||||||
|
#extension GL_ARB_separate_shader_objects : enable
|
||||||
|
|
||||||
|
#ifdef YUZU_USE_FP16
|
||||||
|
|
||||||
|
#extension GL_AMD_gpu_shader_half_float : enable
|
||||||
|
#extension GL_NV_gpu_shader5 : enable
|
||||||
|
|
||||||
|
#define lfloat float16_t
|
||||||
|
#define lvec2 f16vec2
|
||||||
|
#define lvec3 f16vec3
|
||||||
|
#define lvec4 f16vec4
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define lfloat float
|
||||||
|
#define lvec2 vec2
|
||||||
|
#define lvec3 vec3
|
||||||
|
#define lvec4 vec4
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef VULKAN
|
||||||
|
|
||||||
|
#define BINDING_COLOR_TEXTURE 1
|
||||||
|
|
||||||
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||||
|
|
||||||
|
#define BINDING_COLOR_TEXTURE 0
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
layout (location = 0) in vec2 tex_coord;
|
||||||
|
|
||||||
|
layout (location = 0) out vec4 frag_color;
|
||||||
|
|
||||||
|
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
|
||||||
|
|
||||||
|
const bool ignore_alpha = true;
|
||||||
|
|
||||||
|
lfloat ColorDist1(lvec4 a, lvec4 b) {
|
||||||
|
// https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion
|
||||||
|
const lvec3 K = lvec3(0.2627, 0.6780, 0.0593);
|
||||||
|
const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b);
|
||||||
|
const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r);
|
||||||
|
lvec4 diff = a - b;
|
||||||
|
lfloat Y = dot(diff.rgb, K);
|
||||||
|
lfloat Cb = scaleB * (diff.b - Y);
|
||||||
|
lfloat Cr = scaleR * (diff.r - Y);
|
||||||
|
lvec3 YCbCr = lvec3(Y, Cb, Cr);
|
||||||
|
lfloat d = length(YCbCr);
|
||||||
|
if (ignore_alpha) {
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
return sqrt(a.a * b.a * d * d + diff.a * diff.a);
|
||||||
|
}
|
||||||
|
|
||||||
|
lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) {
|
||||||
|
return lvec4(
|
||||||
|
ColorDist1(ref, A),
|
||||||
|
ColorDist1(ref, B),
|
||||||
|
ColorDist1(ref, C),
|
||||||
|
ColorDist1(ref, D)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
vec4 Scaleforce(sampler2D tex, vec2 tex_coord) {
|
||||||
|
lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1)));
|
||||||
|
lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1)));
|
||||||
|
lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1)));
|
||||||
|
lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0)));
|
||||||
|
lvec4 cc = lvec4(texture(tex, tex_coord));
|
||||||
|
lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0)));
|
||||||
|
lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1)));
|
||||||
|
lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1)));
|
||||||
|
lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1)));
|
||||||
|
|
||||||
|
lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr);
|
||||||
|
lvec4 offset_br = ColorDist(cc, br, bc, bl, cl);
|
||||||
|
|
||||||
|
// Calculate how different cc is from the texels around it
|
||||||
|
const lfloat plus_weight = lfloat(1.5);
|
||||||
|
const lfloat cross_weight = lfloat(1.5);
|
||||||
|
lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight));
|
||||||
|
|
||||||
|
if (total_dist == lfloat(0.0)) {
|
||||||
|
return cc;
|
||||||
|
} else {
|
||||||
|
// Add together all the distances with direction taken into account
|
||||||
|
lvec4 tmp = offset_tl - offset_br;
|
||||||
|
lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight;
|
||||||
|
|
||||||
|
// When the image has thin points, they tend to split apart.
|
||||||
|
// This is because the texels all around are different and total_offset reaches into clear areas.
|
||||||
|
// This works pretty well to keep the offset in bounds for these cases.
|
||||||
|
lfloat clamp_val = length(total_offset) / total_dist;
|
||||||
|
vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0);
|
||||||
|
|
||||||
|
return texture(tex, tex_coord - final_offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
frag_color = Scaleforce(input_texture, tex_coord);
|
||||||
|
}
|
67
src/video_core/host_shaders/present_bicubic.frag
Executable file
67
src/video_core/host_shaders/present_bicubic.frag
Executable file
@ -0,0 +1,67 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#version 460 core
|
||||||
|
|
||||||
|
#ifdef VULKAN
|
||||||
|
|
||||||
|
#define BINDING_COLOR_TEXTURE 1
|
||||||
|
|
||||||
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||||
|
|
||||||
|
#define BINDING_COLOR_TEXTURE 0
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
layout (location = 0) in vec2 frag_tex_coord;
|
||||||
|
|
||||||
|
layout (location = 0) out vec4 color;
|
||||||
|
|
||||||
|
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture;
|
||||||
|
|
||||||
|
vec4 cubic(float v) {
|
||||||
|
vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v;
|
||||||
|
vec4 s = n * n * n;
|
||||||
|
float x = s.x;
|
||||||
|
float y = s.y - 4.0 * s.x;
|
||||||
|
float z = s.z - 4.0 * s.y + 6.0 * s.x;
|
||||||
|
float w = 6.0 - x - y - z;
|
||||||
|
return vec4(x, y, z, w) * (1.0 / 6.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) {
|
||||||
|
|
||||||
|
vec2 texSize = textureSize(textureSampler, 0);
|
||||||
|
vec2 invTexSize = 1.0 / texSize;
|
||||||
|
|
||||||
|
texCoords = texCoords * texSize - 0.5;
|
||||||
|
|
||||||
|
vec2 fxy = fract(texCoords);
|
||||||
|
texCoords -= fxy;
|
||||||
|
|
||||||
|
vec4 xcubic = cubic(fxy.x);
|
||||||
|
vec4 ycubic = cubic(fxy.y);
|
||||||
|
|
||||||
|
vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy;
|
||||||
|
|
||||||
|
vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw);
|
||||||
|
vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s;
|
||||||
|
|
||||||
|
offset *= invTexSize.xxyy;
|
||||||
|
|
||||||
|
vec4 sample0 = texture(textureSampler, offset.xz);
|
||||||
|
vec4 sample1 = texture(textureSampler, offset.yz);
|
||||||
|
vec4 sample2 = texture(textureSampler, offset.xw);
|
||||||
|
vec4 sample3 = texture(textureSampler, offset.yw);
|
||||||
|
|
||||||
|
float sx = s.x / (s.x + s.y);
|
||||||
|
float sy = s.z / (s.z + s.w);
|
||||||
|
|
||||||
|
return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy);
|
||||||
|
}
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f);
|
||||||
|
}
|
70
src/video_core/host_shaders/present_gaussian.frag
Executable file
70
src/video_core/host_shaders/present_gaussian.frag
Executable file
@ -0,0 +1,70 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
// Code adapted from the following sources:
|
||||||
|
// - https://learnopengl.com/Advanced-Lighting/Bloom
|
||||||
|
// - https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/
|
||||||
|
|
||||||
|
#version 460 core
|
||||||
|
|
||||||
|
#ifdef VULKAN
|
||||||
|
|
||||||
|
#define BINDING_COLOR_TEXTURE 1
|
||||||
|
|
||||||
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||||
|
|
||||||
|
#define BINDING_COLOR_TEXTURE 0
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
layout(location = 0) in vec2 frag_tex_coord;
|
||||||
|
|
||||||
|
layout(location = 0) out vec4 color;
|
||||||
|
|
||||||
|
layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture;
|
||||||
|
|
||||||
|
const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308);
|
||||||
|
const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703);
|
||||||
|
|
||||||
|
vec4 blurVertical(sampler2D textureSampler, vec2 coord, vec2 norm) {
|
||||||
|
vec4 result = vec4(0.0f);
|
||||||
|
for (int i = 1; i < 3; i++) {
|
||||||
|
result += texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) * weight[i];
|
||||||
|
result += texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) * weight[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
vec4 blurHorizontal(sampler2D textureSampler, vec2 coord, vec2 norm) {
|
||||||
|
vec4 result = vec4(0.0f);
|
||||||
|
for (int i = 1; i < 3; i++) {
|
||||||
|
result += texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) * weight[i];
|
||||||
|
result += texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) * weight[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) {
|
||||||
|
vec4 result = vec4(0.0f);
|
||||||
|
for (int i = 1; i < 3; i++) {
|
||||||
|
result +=
|
||||||
|
texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) * weight[i];
|
||||||
|
result +=
|
||||||
|
texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) * weight[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0];
|
||||||
|
vec2 tex_offset = 1.0f / textureSize(color_texture, 0);
|
||||||
|
|
||||||
|
// TODO(Blinkhawk): This code can be optimized through shader group instructions.
|
||||||
|
vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb;
|
||||||
|
vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
|
||||||
|
vec3 diagonalA = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
|
||||||
|
vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb;
|
||||||
|
vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
|
||||||
|
color = vec4(combination + base, 1.0f);
|
||||||
|
}
|
11
src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
Executable file
11
src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
Executable file
@ -0,0 +1,11 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#version 460 core
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#define YUZU_USE_FP16
|
||||||
|
#define USE_EASU 1
|
||||||
|
|
||||||
|
#include "fidelityfx_fsr.comp"
|
10
src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
Executable file
10
src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
Executable file
@ -0,0 +1,10 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#version 460 core
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#define USE_EASU 1
|
||||||
|
|
||||||
|
#include "fidelityfx_fsr.comp"
|
11
src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
Executable file
11
src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
Executable file
@ -0,0 +1,11 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#version 460 core
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#define YUZU_USE_FP16
|
||||||
|
#define USE_RCAS 1
|
||||||
|
|
||||||
|
#include "fidelityfx_fsr.comp"
|
10
src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
Executable file
10
src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
Executable file
@ -0,0 +1,10 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#version 460 core
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#define USE_RCAS 1
|
||||||
|
|
||||||
|
#include "fidelityfx_fsr.comp"
|
7
src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
Executable file
7
src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#version 460
|
||||||
|
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#define YUZU_USE_FP16
|
||||||
|
|
||||||
|
#include "opengl_present_scaleforce.frag"
|
5
src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
Executable file
5
src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
Executable file
@ -0,0 +1,5 @@
|
|||||||
|
#version 460
|
||||||
|
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "opengl_present_scaleforce.frag"
|
@ -5,6 +5,7 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <span>
|
#include <span>
|
||||||
|
|
||||||
|
#include "shader_recompiler/backend/glasm/emit_glasm.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_device.h"
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
@ -229,7 +230,9 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
|
|||||||
.padding = 0,
|
.padding = 0,
|
||||||
};
|
};
|
||||||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||||
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
|
glProgramLocalParametersI4uivNV(
|
||||||
|
PROGRAM_LUT[stage],
|
||||||
|
Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
|
||||||
reinterpret_cast<const GLuint*>(&ssbo));
|
reinterpret_cast<const GLuint*>(&ssbo));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -250,7 +253,9 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
|
|||||||
.padding = 0,
|
.padding = 0,
|
||||||
};
|
};
|
||||||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||||
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
|
glProgramLocalParametersI4uivNV(
|
||||||
|
GL_COMPUTE_PROGRAM_NV,
|
||||||
|
Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
|
||||||
reinterpret_cast<const GLuint*>(&ssbo));
|
reinterpret_cast<const GLuint*>(&ssbo));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,15 +19,6 @@ using VideoCommon::ImageId;
|
|||||||
constexpr u32 MAX_TEXTURES = 64;
|
constexpr u32 MAX_TEXTURES = 64;
|
||||||
constexpr u32 MAX_IMAGES = 16;
|
constexpr u32 MAX_IMAGES = 16;
|
||||||
|
|
||||||
template <typename Range>
|
|
||||||
u32 AccumulateCount(const Range& range) {
|
|
||||||
u32 num{};
|
|
||||||
for (const auto& desc : range) {
|
|
||||||
num += desc.count;
|
|
||||||
}
|
|
||||||
return num;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t ComputePipelineKey::Hash() const noexcept {
|
size_t ComputePipelineKey::Hash() const noexcept {
|
||||||
return static_cast<size_t>(
|
return static_cast<size_t>(
|
||||||
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
|
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
|
||||||
@ -58,17 +49,17 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
|
|||||||
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
|
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
|
||||||
uniform_buffer_sizes.begin());
|
uniform_buffer_sizes.begin());
|
||||||
|
|
||||||
num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
|
num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
|
||||||
num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
|
num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
|
||||||
|
|
||||||
const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
|
const u32 num_textures{num_texture_buffers + Shader::NumDescriptors(info.texture_descriptors)};
|
||||||
ASSERT(num_textures <= MAX_TEXTURES);
|
ASSERT(num_textures <= MAX_TEXTURES);
|
||||||
|
|
||||||
const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
|
const u32 num_images{num_image_buffers + Shader::NumDescriptors(info.image_descriptors)};
|
||||||
ASSERT(num_images <= MAX_IMAGES);
|
ASSERT(num_images <= MAX_IMAGES);
|
||||||
|
|
||||||
const bool is_glasm{assembly_program.handle != 0};
|
const bool is_glasm{assembly_program.handle != 0};
|
||||||
const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
|
const u32 num_storage_buffers{Shader::NumDescriptors(info.storage_buffers_descriptors)};
|
||||||
use_storage_buffers =
|
use_storage_buffers =
|
||||||
!is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
|
!is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
|
||||||
writes_global_memory = !use_storage_buffers &&
|
writes_global_memory = !use_storage_buffers &&
|
||||||
@ -88,8 +79,7 @@ void ComputePipeline::Configure() {
|
|||||||
}
|
}
|
||||||
texture_cache.SynchronizeComputeDescriptors();
|
texture_cache.SynchronizeComputeDescriptors();
|
||||||
|
|
||||||
std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
|
boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
|
||||||
boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
|
|
||||||
std::array<GLuint, MAX_TEXTURES> samplers;
|
std::array<GLuint, MAX_TEXTURES> samplers;
|
||||||
std::array<GLuint, MAX_TEXTURES> textures;
|
std::array<GLuint, MAX_TEXTURES> textures;
|
||||||
std::array<GLuint, MAX_IMAGES> images;
|
std::array<GLuint, MAX_IMAGES> images;
|
||||||
@ -119,33 +109,39 @@ void ComputePipeline::Configure() {
|
|||||||
}
|
}
|
||||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||||
}};
|
}};
|
||||||
const auto add_image{[&](const auto& desc) {
|
const auto add_image{[&](const auto& desc, bool blacklist) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
const auto handle{read_handle(desc, index)};
|
const auto handle{read_handle(desc, index)};
|
||||||
image_view_indices.push_back(handle.first);
|
views.push_back({
|
||||||
|
.index = handle.first,
|
||||||
|
.blacklist = blacklist,
|
||||||
|
.id = {},
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
const auto handle{read_handle(desc, index)};
|
const auto handle{read_handle(desc, index)};
|
||||||
image_view_indices.push_back(handle.first);
|
views.push_back({handle.first});
|
||||||
samplers[sampler_binding++] = 0;
|
samplers[sampler_binding++] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::ranges::for_each(info.image_buffer_descriptors, add_image);
|
for (const auto& desc : info.image_buffer_descriptors) {
|
||||||
|
add_image(desc, false);
|
||||||
|
}
|
||||||
for (const auto& desc : info.texture_descriptors) {
|
for (const auto& desc : info.texture_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
const auto handle{read_handle(desc, index)};
|
const auto handle{read_handle(desc, index)};
|
||||||
image_view_indices.push_back(handle.first);
|
views.push_back({handle.first});
|
||||||
|
|
||||||
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
|
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
|
||||||
samplers[sampler_binding++] = sampler->Handle();
|
samplers[sampler_binding++] = sampler->Handle();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::ranges::for_each(info.image_descriptors, add_image);
|
for (const auto& desc : info.image_descriptors) {
|
||||||
|
add_image(desc, desc.is_written);
|
||||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
}
|
||||||
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
|
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
|
||||||
|
|
||||||
if (assembly_program.handle != 0) {
|
if (assembly_program.handle != 0) {
|
||||||
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
|
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
|
||||||
@ -161,7 +157,7 @@ void ComputePipeline::Configure() {
|
|||||||
if constexpr (is_image) {
|
if constexpr (is_image) {
|
||||||
is_written = desc.is_written;
|
is_written = desc.is_written;
|
||||||
}
|
}
|
||||||
ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
|
ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)};
|
||||||
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
|
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
|
||||||
image_view.BufferSize(), image_view.format,
|
image_view.BufferSize(), image_view.format,
|
||||||
is_written, is_image);
|
is_written, is_image);
|
||||||
@ -177,23 +173,45 @@ void ComputePipeline::Configure() {
|
|||||||
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
|
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
|
||||||
buffer_cache.BindHostComputeBuffers();
|
buffer_cache.BindHostComputeBuffers();
|
||||||
|
|
||||||
const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
|
const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers +
|
||||||
|
num_image_buffers};
|
||||||
texture_binding += num_texture_buffers;
|
texture_binding += num_texture_buffers;
|
||||||
image_binding += num_image_buffers;
|
image_binding += num_image_buffers;
|
||||||
|
|
||||||
|
u32 texture_scaling_mask{};
|
||||||
for (const auto& desc : info.texture_descriptors) {
|
for (const auto& desc : info.texture_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
|
||||||
textures[texture_binding++] = image_view.Handle(desc.type);
|
textures[texture_binding] = image_view.Handle(desc.type);
|
||||||
|
if (texture_cache.IsRescaling(image_view)) {
|
||||||
|
texture_scaling_mask |= 1u << texture_binding;
|
||||||
|
}
|
||||||
|
++texture_binding;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
u32 image_scaling_mask{};
|
||||||
for (const auto& desc : info.image_descriptors) {
|
for (const auto& desc : info.image_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
|
||||||
if (desc.is_written) {
|
if (desc.is_written) {
|
||||||
texture_cache.MarkModification(image_view.image_id);
|
texture_cache.MarkModification(image_view.image_id);
|
||||||
}
|
}
|
||||||
images[image_binding++] = image_view.StorageView(desc.type, desc.format);
|
images[image_binding] = image_view.StorageView(desc.type, desc.format);
|
||||||
|
if (texture_cache.IsRescaling(image_view)) {
|
||||||
|
image_scaling_mask |= 1u << image_binding;
|
||||||
|
}
|
||||||
|
++image_binding;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (info.uses_rescaling_uniform) {
|
||||||
|
const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)};
|
||||||
|
const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)};
|
||||||
|
if (assembly_program.handle != 0) {
|
||||||
|
glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_texture_scaling_mask,
|
||||||
|
float_image_scaling_mask, 0.0f, 0.0f);
|
||||||
|
} else {
|
||||||
|
glProgramUniform4f(source_program.handle, 0, float_texture_scaling_mask,
|
||||||
|
float_image_scaling_mask, 0.0f, 0.0f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (texture_binding != 0) {
|
if (texture_binding != 0) {
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||||
#include "video_core/shader_notify.h"
|
#include "video_core/shader_notify.h"
|
||||||
#include "video_core/texture_cache/texture_cache_base.h"
|
#include "video_core/texture_cache/texture_cache.h"
|
||||||
|
|
||||||
#if defined(_MSC_VER) && defined(NDEBUG)
|
#if defined(_MSC_VER) && defined(NDEBUG)
|
||||||
#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
|
#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
|
||||||
@ -27,6 +27,7 @@ namespace OpenGL {
|
|||||||
namespace {
|
namespace {
|
||||||
using Shader::ImageBufferDescriptor;
|
using Shader::ImageBufferDescriptor;
|
||||||
using Shader::ImageDescriptor;
|
using Shader::ImageDescriptor;
|
||||||
|
using Shader::NumDescriptors;
|
||||||
using Shader::TextureBufferDescriptor;
|
using Shader::TextureBufferDescriptor;
|
||||||
using Shader::TextureDescriptor;
|
using Shader::TextureDescriptor;
|
||||||
using Tegra::Texture::TexturePair;
|
using Tegra::Texture::TexturePair;
|
||||||
@ -35,15 +36,6 @@ using VideoCommon::ImageId;
|
|||||||
constexpr u32 MAX_TEXTURES = 64;
|
constexpr u32 MAX_TEXTURES = 64;
|
||||||
constexpr u32 MAX_IMAGES = 8;
|
constexpr u32 MAX_IMAGES = 8;
|
||||||
|
|
||||||
template <typename Range>
|
|
||||||
u32 AccumulateCount(const Range& range) {
|
|
||||||
u32 num{};
|
|
||||||
for (const auto& desc : range) {
|
|
||||||
num += desc.count;
|
|
||||||
}
|
|
||||||
return num;
|
|
||||||
}
|
|
||||||
|
|
||||||
GLenum Stage(size_t stage_index) {
|
GLenum Stage(size_t stage_index) {
|
||||||
switch (stage_index) {
|
switch (stage_index) {
|
||||||
case 0:
|
case 0:
|
||||||
@ -204,23 +196,23 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
|
base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
|
||||||
base_storage_bindings[stage + 1] = base_storage_bindings[stage];
|
base_storage_bindings[stage + 1] = base_storage_bindings[stage];
|
||||||
|
|
||||||
base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
|
base_uniform_bindings[stage + 1] += NumDescriptors(info.constant_buffer_descriptors);
|
||||||
base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
|
base_storage_bindings[stage + 1] += NumDescriptors(info.storage_buffers_descriptors);
|
||||||
}
|
}
|
||||||
enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
|
enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
|
||||||
std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
|
std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
|
||||||
|
|
||||||
const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
|
const u32 num_tex_buffer_bindings{NumDescriptors(info.texture_buffer_descriptors)};
|
||||||
num_texture_buffers[stage] += num_tex_buffer_bindings;
|
num_texture_buffers[stage] += num_tex_buffer_bindings;
|
||||||
num_textures += num_tex_buffer_bindings;
|
num_textures += num_tex_buffer_bindings;
|
||||||
|
|
||||||
const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
|
const u32 num_img_buffers_bindings{NumDescriptors(info.image_buffer_descriptors)};
|
||||||
num_image_buffers[stage] += num_img_buffers_bindings;
|
num_image_buffers[stage] += num_img_buffers_bindings;
|
||||||
num_images += num_img_buffers_bindings;
|
num_images += num_img_buffers_bindings;
|
||||||
|
|
||||||
num_textures += AccumulateCount(info.texture_descriptors);
|
num_textures += NumDescriptors(info.texture_descriptors);
|
||||||
num_images += AccumulateCount(info.image_descriptors);
|
num_images += NumDescriptors(info.image_descriptors);
|
||||||
num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
|
num_storage_buffers += NumDescriptors(info.storage_buffers_descriptors);
|
||||||
|
|
||||||
writes_global_memory |= std::ranges::any_of(
|
writes_global_memory |= std::ranges::any_of(
|
||||||
info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
|
info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
|
||||||
@ -288,10 +280,9 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
|
|
||||||
template <typename Spec>
|
template <typename Spec>
|
||||||
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
||||||
std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
|
std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
|
||||||
std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
|
|
||||||
std::array<GLuint, MAX_TEXTURES> samplers;
|
std::array<GLuint, MAX_TEXTURES> samplers;
|
||||||
size_t image_view_index{};
|
size_t views_index{};
|
||||||
GLsizei sampler_binding{};
|
GLsizei sampler_binding{};
|
||||||
|
|
||||||
texture_cache.SynchronizeGraphicsDescriptors();
|
texture_cache.SynchronizeGraphicsDescriptors();
|
||||||
@ -336,30 +327,34 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
}
|
}
|
||||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||||
}};
|
}};
|
||||||
const auto add_image{[&](const auto& desc) {
|
const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
const auto handle{read_handle(desc, index)};
|
const auto handle{read_handle(desc, index)};
|
||||||
image_view_indices[image_view_index++] = handle.first;
|
views[views_index++] = {
|
||||||
|
.index = handle.first,
|
||||||
|
.blacklist = blacklist,
|
||||||
|
.id = {},
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
if constexpr (Spec::has_texture_buffers) {
|
if constexpr (Spec::has_texture_buffers) {
|
||||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
const auto handle{read_handle(desc, index)};
|
const auto handle{read_handle(desc, index)};
|
||||||
image_view_indices[image_view_index++] = handle.first;
|
views[views_index++] = {handle.first};
|
||||||
samplers[sampler_binding++] = 0;
|
samplers[sampler_binding++] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr (Spec::has_image_buffers) {
|
if constexpr (Spec::has_image_buffers) {
|
||||||
for (const auto& desc : info.image_buffer_descriptors) {
|
for (const auto& desc : info.image_buffer_descriptors) {
|
||||||
add_image(desc);
|
add_image(desc, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const auto& desc : info.texture_descriptors) {
|
for (const auto& desc : info.texture_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
const auto handle{read_handle(desc, index)};
|
const auto handle{read_handle(desc, index)};
|
||||||
image_view_indices[image_view_index++] = handle.first;
|
views[views_index++] = {handle.first};
|
||||||
|
|
||||||
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
|
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
|
||||||
samplers[sampler_binding++] = sampler->Handle();
|
samplers[sampler_binding++] = sampler->Handle();
|
||||||
@ -367,7 +362,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
}
|
}
|
||||||
if constexpr (Spec::has_images) {
|
if constexpr (Spec::has_images) {
|
||||||
for (const auto& desc : info.image_descriptors) {
|
for (const auto& desc : info.image_descriptors) {
|
||||||
add_image(desc);
|
add_image(desc, desc.is_written);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
@ -386,13 +381,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
if constexpr (Spec::enabled_stages[4]) {
|
if constexpr (Spec::enabled_stages[4]) {
|
||||||
config_stage(4);
|
config_stage(4);
|
||||||
}
|
}
|
||||||
const std::span indices_span(image_view_indices.data(), image_view_index);
|
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views_index));
|
||||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
|
||||||
|
|
||||||
texture_cache.UpdateRenderTargets(false);
|
texture_cache.UpdateRenderTargets(false);
|
||||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||||
|
|
||||||
ImageId* texture_buffer_index{image_view_ids.data()};
|
VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
|
||||||
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
|
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
|
||||||
size_t index{};
|
size_t index{};
|
||||||
const auto add_buffer{[&](const auto& desc) {
|
const auto add_buffer{[&](const auto& desc) {
|
||||||
@ -402,12 +396,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
if constexpr (is_image) {
|
if constexpr (is_image) {
|
||||||
is_written = desc.is_written;
|
is_written = desc.is_written;
|
||||||
}
|
}
|
||||||
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
|
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
|
||||||
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
|
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
|
||||||
image_view.BufferSize(), image_view.format,
|
image_view.BufferSize(), image_view.format,
|
||||||
is_written, is_image);
|
is_written, is_image);
|
||||||
++index;
|
++index;
|
||||||
++texture_buffer_index;
|
++texture_buffer_it;
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
const Shader::Info& info{stage_infos[stage]};
|
const Shader::Info& info{stage_infos[stage]};
|
||||||
@ -423,13 +417,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
add_buffer(desc);
|
add_buffer(desc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const auto& desc : info.texture_descriptors) {
|
texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors);
|
||||||
texture_buffer_index += desc.count;
|
|
||||||
}
|
|
||||||
if constexpr (Spec::has_images) {
|
if constexpr (Spec::has_images) {
|
||||||
for (const auto& desc : info.image_descriptors) {
|
texture_buffer_it += Shader::NumDescriptors(info.image_descriptors);
|
||||||
texture_buffer_index += desc.count;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
if constexpr (Spec::enabled_stages[0]) {
|
if constexpr (Spec::enabled_stages[0]) {
|
||||||
@ -453,12 +443,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
if (!is_built.load(std::memory_order::relaxed)) {
|
if (!is_built.load(std::memory_order::relaxed)) {
|
||||||
WaitForBuild();
|
WaitForBuild();
|
||||||
}
|
}
|
||||||
if (assembly_programs[0].handle != 0) {
|
const bool use_assembly{assembly_programs[0].handle != 0};
|
||||||
|
if (use_assembly) {
|
||||||
program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
|
program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
|
||||||
} else {
|
} else {
|
||||||
program_manager.BindSourcePrograms(source_programs);
|
program_manager.BindSourcePrograms(source_programs);
|
||||||
}
|
}
|
||||||
const ImageId* views_it{image_view_ids.data()};
|
const VideoCommon::ImageViewInOut* views_it{views.data()};
|
||||||
GLsizei texture_binding = 0;
|
GLsizei texture_binding = 0;
|
||||||
GLsizei image_binding = 0;
|
GLsizei image_binding = 0;
|
||||||
std::array<GLuint, MAX_TEXTURES> textures;
|
std::array<GLuint, MAX_TEXTURES> textures;
|
||||||
@ -473,20 +464,49 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
views_it += num_texture_buffers[stage];
|
views_it += num_texture_buffers[stage];
|
||||||
views_it += num_image_buffers[stage];
|
views_it += num_image_buffers[stage];
|
||||||
|
|
||||||
|
u32 texture_scaling_mask{};
|
||||||
|
u32 image_scaling_mask{};
|
||||||
|
u32 stage_texture_binding{};
|
||||||
|
u32 stage_image_binding{};
|
||||||
|
|
||||||
const auto& info{stage_infos[stage]};
|
const auto& info{stage_infos[stage]};
|
||||||
for (const auto& desc : info.texture_descriptors) {
|
for (const auto& desc : info.texture_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
|
||||||
textures[texture_binding++] = image_view.Handle(desc.type);
|
textures[texture_binding] = image_view.Handle(desc.type);
|
||||||
|
if (texture_cache.IsRescaling(image_view)) {
|
||||||
|
texture_scaling_mask |= 1u << stage_texture_binding;
|
||||||
|
}
|
||||||
|
++texture_binding;
|
||||||
|
++stage_texture_binding;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const auto& desc : info.image_descriptors) {
|
for (const auto& desc : info.image_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
|
||||||
if (desc.is_written) {
|
if (desc.is_written) {
|
||||||
texture_cache.MarkModification(image_view.image_id);
|
texture_cache.MarkModification(image_view.image_id);
|
||||||
}
|
}
|
||||||
images[image_binding++] = image_view.StorageView(desc.type, desc.format);
|
images[image_binding] = image_view.StorageView(desc.type, desc.format);
|
||||||
|
if (texture_cache.IsRescaling(image_view)) {
|
||||||
|
image_scaling_mask |= 1u << stage_image_binding;
|
||||||
|
}
|
||||||
|
++image_binding;
|
||||||
|
++stage_image_binding;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (info.uses_rescaling_uniform) {
|
||||||
|
const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)};
|
||||||
|
const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)};
|
||||||
|
const bool is_rescaling{texture_cache.IsRescaling()};
|
||||||
|
const f32 config_down_factor{Settings::values.resolution_info.down_factor};
|
||||||
|
const f32 down_factor{is_rescaling ? config_down_factor : 1.0f};
|
||||||
|
if (use_assembly) {
|
||||||
|
glProgramLocalParameter4fARB(AssemblyStage(stage), 0, float_texture_scaling_mask,
|
||||||
|
float_image_scaling_mask, down_factor, 0.0f);
|
||||||
|
} else {
|
||||||
|
glProgramUniform4f(source_programs[stage].handle, 0, float_texture_scaling_mask,
|
||||||
|
float_image_scaling_mask, down_factor, 0.0f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
|
@ -186,6 +186,10 @@ void RasterizerOpenGL::Clear() {
|
|||||||
SyncRasterizeEnable();
|
SyncRasterizeEnable();
|
||||||
SyncStencilTestState();
|
SyncStencilTestState();
|
||||||
|
|
||||||
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
|
texture_cache.UpdateRenderTargets(true);
|
||||||
|
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||||
|
SyncViewport();
|
||||||
if (regs.clear_flags.scissor) {
|
if (regs.clear_flags.scissor) {
|
||||||
SyncScissorTest();
|
SyncScissorTest();
|
||||||
} else {
|
} else {
|
||||||
@ -194,10 +198,6 @@ void RasterizerOpenGL::Clear() {
|
|||||||
}
|
}
|
||||||
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
|
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
|
||||||
|
|
||||||
std::scoped_lock lock{texture_cache.mutex};
|
|
||||||
texture_cache.UpdateRenderTargets(true);
|
|
||||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
|
||||||
|
|
||||||
if (use_color) {
|
if (use_color) {
|
||||||
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
||||||
}
|
}
|
||||||
@ -216,8 +216,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||||||
|
|
||||||
query_cache.UpdateCounters();
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
SyncState();
|
|
||||||
|
|
||||||
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
|
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
|
||||||
if (!pipeline) {
|
if (!pipeline) {
|
||||||
return;
|
return;
|
||||||
@ -225,6 +223,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||||
pipeline->Configure(is_indexed);
|
pipeline->Configure(is_indexed);
|
||||||
|
|
||||||
|
SyncState();
|
||||||
|
|
||||||
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
|
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
|
||||||
BeginTransformFeedback(pipeline, primitive_mode);
|
BeginTransformFeedback(pipeline, primitive_mode);
|
||||||
|
|
||||||
@ -535,7 +535,8 @@ void RasterizerOpenGL::SyncViewport() {
|
|||||||
auto& flags = maxwell3d.dirty.flags;
|
auto& flags = maxwell3d.dirty.flags;
|
||||||
const auto& regs = maxwell3d.regs;
|
const auto& regs = maxwell3d.regs;
|
||||||
|
|
||||||
const bool dirty_viewport = flags[Dirty::Viewports];
|
const bool rescale_viewports = flags[VideoCommon::Dirty::RescaleViewports];
|
||||||
|
const bool dirty_viewport = flags[Dirty::Viewports] || rescale_viewports;
|
||||||
const bool dirty_clip_control = flags[Dirty::ClipControl];
|
const bool dirty_clip_control = flags[Dirty::ClipControl];
|
||||||
|
|
||||||
if (dirty_clip_control || flags[Dirty::FrontFace]) {
|
if (dirty_clip_control || flags[Dirty::FrontFace]) {
|
||||||
@ -555,8 +556,7 @@ void RasterizerOpenGL::SyncViewport() {
|
|||||||
}
|
}
|
||||||
glFrontFace(mode);
|
glFrontFace(mode);
|
||||||
}
|
}
|
||||||
|
if (dirty_viewport || dirty_clip_control) {
|
||||||
if (dirty_viewport || flags[Dirty::ClipControl]) {
|
|
||||||
flags[Dirty::ClipControl] = false;
|
flags[Dirty::ClipControl] = false;
|
||||||
|
|
||||||
bool flip_y = false;
|
bool flip_y = false;
|
||||||
@ -572,37 +572,58 @@ void RasterizerOpenGL::SyncViewport() {
|
|||||||
state_tracker.ClipControl(origin, depth);
|
state_tracker.ClipControl(origin, depth);
|
||||||
state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
|
state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
|
||||||
}
|
}
|
||||||
|
const bool is_rescaling{texture_cache.IsRescaling()};
|
||||||
|
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
|
||||||
|
const auto conv = [scale](float value) -> GLfloat {
|
||||||
|
float new_value = value * scale;
|
||||||
|
if (scale < 1.0f) {
|
||||||
|
const bool sign = std::signbit(value);
|
||||||
|
new_value = std::round(std::abs(new_value));
|
||||||
|
new_value = sign ? -new_value : new_value;
|
||||||
|
}
|
||||||
|
return static_cast<GLfloat>(new_value);
|
||||||
|
};
|
||||||
|
|
||||||
if (dirty_viewport) {
|
if (dirty_viewport) {
|
||||||
flags[Dirty::Viewports] = false;
|
flags[Dirty::Viewports] = false;
|
||||||
|
|
||||||
const bool force = flags[Dirty::ViewportTransform];
|
const bool force = flags[Dirty::ViewportTransform] || rescale_viewports;
|
||||||
flags[Dirty::ViewportTransform] = false;
|
flags[Dirty::ViewportTransform] = false;
|
||||||
|
flags[VideoCommon::Dirty::RescaleViewports] = false;
|
||||||
|
|
||||||
for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) {
|
for (size_t index = 0; index < Maxwell::NumViewports; ++index) {
|
||||||
if (!force && !flags[Dirty::Viewport0 + i]) {
|
if (!force && !flags[Dirty::Viewport0 + index]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
flags[Dirty::Viewport0 + i] = false;
|
flags[Dirty::Viewport0 + index] = false;
|
||||||
|
|
||||||
const auto& src = regs.viewport_transform[i];
|
const auto& src = regs.viewport_transform[index];
|
||||||
const Common::Rectangle<f32> rect{src.GetRect()};
|
GLfloat x = conv(src.translate_x - src.scale_x);
|
||||||
glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(),
|
GLfloat y = conv(src.translate_y - src.scale_y);
|
||||||
rect.GetHeight());
|
GLfloat width = conv(src.scale_x * 2.0f);
|
||||||
|
GLfloat height = conv(src.scale_y * 2.0f);
|
||||||
|
|
||||||
|
if (height < 0) {
|
||||||
|
y += height;
|
||||||
|
height = -height;
|
||||||
|
}
|
||||||
|
glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f,
|
||||||
|
height != 0.0f ? height : 1.0f);
|
||||||
|
|
||||||
const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
|
const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
|
||||||
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
|
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
|
||||||
const GLdouble far_depth = src.translate_z + src.scale_z;
|
const GLdouble far_depth = src.translate_z + src.scale_z;
|
||||||
if (device.HasDepthBufferFloat()) {
|
if (device.HasDepthBufferFloat()) {
|
||||||
glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth);
|
glDepthRangeIndexeddNV(static_cast<GLuint>(index), near_depth, far_depth);
|
||||||
} else {
|
} else {
|
||||||
glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
|
glDepthRangeIndexed(static_cast<GLuint>(index), near_depth, far_depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!GLAD_GL_NV_viewport_swizzle) {
|
if (!GLAD_GL_NV_viewport_swizzle) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
glViewportSwizzleNV(static_cast<GLuint>(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x),
|
glViewportSwizzleNV(static_cast<GLuint>(index),
|
||||||
|
MaxwellToGL::ViewportSwizzle(src.swizzle.x),
|
||||||
MaxwellToGL::ViewportSwizzle(src.swizzle.y),
|
MaxwellToGL::ViewportSwizzle(src.swizzle.y),
|
||||||
MaxwellToGL::ViewportSwizzle(src.swizzle.z),
|
MaxwellToGL::ViewportSwizzle(src.swizzle.z),
|
||||||
MaxwellToGL::ViewportSwizzle(src.swizzle.w));
|
MaxwellToGL::ViewportSwizzle(src.swizzle.w));
|
||||||
@ -905,14 +926,34 @@ void RasterizerOpenGL::SyncLogicOpState() {
|
|||||||
|
|
||||||
void RasterizerOpenGL::SyncScissorTest() {
|
void RasterizerOpenGL::SyncScissorTest() {
|
||||||
auto& flags = maxwell3d.dirty.flags;
|
auto& flags = maxwell3d.dirty.flags;
|
||||||
if (!flags[Dirty::Scissors]) {
|
if (!flags[Dirty::Scissors] && !flags[VideoCommon::Dirty::RescaleScissors]) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
flags[Dirty::Scissors] = false;
|
flags[Dirty::Scissors] = false;
|
||||||
|
|
||||||
|
const bool force = flags[VideoCommon::Dirty::RescaleScissors];
|
||||||
|
flags[VideoCommon::Dirty::RescaleScissors] = false;
|
||||||
|
|
||||||
const auto& regs = maxwell3d.regs;
|
const auto& regs = maxwell3d.regs;
|
||||||
|
|
||||||
|
const auto& resolution = Settings::values.resolution_info;
|
||||||
|
const bool is_rescaling{texture_cache.IsRescaling()};
|
||||||
|
const u32 up_scale = is_rescaling ? resolution.up_scale : 1U;
|
||||||
|
const u32 down_shift = is_rescaling ? resolution.down_shift : 0U;
|
||||||
|
const auto scale_up = [up_scale, down_shift](u32 value) -> u32 {
|
||||||
|
if (value == 0) {
|
||||||
|
return 0U;
|
||||||
|
}
|
||||||
|
const u32 upset = value * up_scale;
|
||||||
|
u32 acumm{};
|
||||||
|
if ((up_scale >> down_shift) == 0) {
|
||||||
|
acumm = upset % 2;
|
||||||
|
}
|
||||||
|
const u32 converted_value = upset >> down_shift;
|
||||||
|
return std::max<u32>(converted_value + acumm, 1U);
|
||||||
|
};
|
||||||
for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
|
for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
|
||||||
if (!flags[Dirty::Scissor0 + index]) {
|
if (!force && !flags[Dirty::Scissor0 + index]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
flags[Dirty::Scissor0 + index] = false;
|
flags[Dirty::Scissor0 + index] = false;
|
||||||
@ -920,8 +961,8 @@ void RasterizerOpenGL::SyncScissorTest() {
|
|||||||
const auto& src = regs.scissor_test[index];
|
const auto& src = regs.scissor_test[index];
|
||||||
if (src.enable) {
|
if (src.enable) {
|
||||||
glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
|
glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
|
||||||
glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y,
|
glScissorIndexed(static_cast<GLuint>(index), scale_up(src.min_x), scale_up(src.min_y),
|
||||||
src.max_x - src.min_x, src.max_y - src.min_y);
|
scale_up(src.max_x - src.min_x), scale_up(src.max_y - src.min_y));
|
||||||
} else {
|
} else {
|
||||||
glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
|
glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
|
||||||
}
|
}
|
||||||
|
@ -166,7 +166,7 @@ void OGLFramebuffer::Create() {
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||||
glGenFramebuffers(1, &handle);
|
glCreateFramebuffers(1, &handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
void OGLFramebuffer::Release() {
|
void OGLFramebuffer::Release() {
|
||||||
|
@ -426,16 +426,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
|
|||||||
// Normal path
|
// Normal path
|
||||||
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
|
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
|
||||||
|
|
||||||
for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
|
total_storage_buffers +=
|
||||||
total_storage_buffers += desc.count;
|
Shader::NumDescriptors(programs[index].info.storage_buffers_descriptors);
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// VertexB path when VertexA is present.
|
// VertexB path when VertexA is present.
|
||||||
auto& program_va{programs[0]};
|
auto& program_va{programs[0]};
|
||||||
auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
||||||
for (const auto& desc : program_vb.info.storage_buffers_descriptors) {
|
total_storage_buffers +=
|
||||||
total_storage_buffers += desc.count;
|
Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors);
|
||||||
}
|
|
||||||
programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
|
programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -510,10 +508,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
|
|||||||
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
|
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
|
||||||
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
||||||
|
|
||||||
u32 num_storage_buffers{};
|
const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)};
|
||||||
for (const auto& desc : program.info.storage_buffers_descriptors) {
|
|
||||||
num_storage_buffers += desc.count;
|
|
||||||
}
|
|
||||||
Shader::RuntimeInfo info;
|
Shader::RuntimeInfo info;
|
||||||
info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
|
info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
|
||||||
|
|
||||||
|
@ -316,6 +316,52 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) {
|
||||||
|
const GLenum target = ImageTarget(info);
|
||||||
|
const GLsizei width = info.size.width;
|
||||||
|
const GLsizei height = info.size.height;
|
||||||
|
const GLsizei depth = info.size.depth;
|
||||||
|
const int max_host_mip_levels = std::bit_width(info.size.width);
|
||||||
|
const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
|
||||||
|
const GLsizei num_layers = info.resources.layers;
|
||||||
|
const GLsizei num_samples = info.num_samples;
|
||||||
|
|
||||||
|
GLuint handle = 0;
|
||||||
|
OGLTexture texture;
|
||||||
|
if (target != GL_TEXTURE_BUFFER) {
|
||||||
|
texture.Create(target);
|
||||||
|
handle = texture.handle;
|
||||||
|
}
|
||||||
|
switch (target) {
|
||||||
|
case GL_TEXTURE_1D_ARRAY:
|
||||||
|
glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
|
||||||
|
break;
|
||||||
|
case GL_TEXTURE_2D_ARRAY:
|
||||||
|
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
|
||||||
|
break;
|
||||||
|
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
|
||||||
|
// TODO: Where should 'fixedsamplelocations' come from?
|
||||||
|
const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
|
||||||
|
glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
|
||||||
|
height >> samples_y, num_layers, GL_FALSE);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case GL_TEXTURE_RECTANGLE:
|
||||||
|
glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
|
||||||
|
break;
|
||||||
|
case GL_TEXTURE_3D:
|
||||||
|
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
|
||||||
|
break;
|
||||||
|
case GL_TEXTURE_BUFFER:
|
||||||
|
UNREACHABLE();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return texture;
|
||||||
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) {
|
[[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case PixelFormat::B5G6R5_UNORM:
|
case PixelFormat::B5G6R5_UNORM:
|
||||||
@ -359,7 +405,8 @@ ImageBufferMap::~ImageBufferMap() {
|
|||||||
|
|
||||||
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
|
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
|
||||||
StateTracker& state_tracker_)
|
StateTracker& state_tracker_)
|
||||||
: device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) {
|
: device{device_}, state_tracker{state_tracker_},
|
||||||
|
util_shaders(program_manager), resolution{Settings::values.resolution_info} {
|
||||||
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
|
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
|
||||||
for (size_t i = 0; i < TARGETS.size(); ++i) {
|
for (size_t i = 0; i < TARGETS.size(); ++i) {
|
||||||
const GLenum target = TARGETS[i];
|
const GLenum target = TARGETS[i];
|
||||||
@ -426,6 +473,17 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
|
|||||||
set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
|
set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
|
||||||
set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
|
set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
|
||||||
set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
|
set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
|
||||||
|
|
||||||
|
if (resolution.active) {
|
||||||
|
for (size_t i = 0; i < rescale_draw_fbos.size(); ++i) {
|
||||||
|
rescale_draw_fbos[i].Create();
|
||||||
|
rescale_read_fbos[i].Create();
|
||||||
|
|
||||||
|
// Make sure the framebuffer is created without DSA
|
||||||
|
glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_draw_fbos[i].handle);
|
||||||
|
glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_read_fbos[i].handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TextureCacheRuntime::~TextureCacheRuntime() = default;
|
TextureCacheRuntime::~TextureCacheRuntime() = default;
|
||||||
@ -605,13 +663,13 @@ std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req
|
|||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
|
||||||
Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
|
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
|
||||||
VAddr cpu_addr_)
|
VAddr cpu_addr_)
|
||||||
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) {
|
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
|
||||||
if (CanBeAccelerated(runtime, info)) {
|
if (CanBeAccelerated(*runtime, info)) {
|
||||||
flags |= ImageFlagBits::AcceleratedUpload;
|
flags |= ImageFlagBits::AcceleratedUpload;
|
||||||
}
|
}
|
||||||
if (IsConverted(runtime.device, info.format, info.type)) {
|
if (IsConverted(runtime->device, info.format, info.type)) {
|
||||||
flags |= ImageFlagBits::Converted;
|
flags |= ImageFlagBits::Converted;
|
||||||
gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
|
gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
|
||||||
gl_format = GL_RGBA;
|
gl_format = GL_RGBA;
|
||||||
@ -622,58 +680,25 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
|
|||||||
gl_format = tuple.format;
|
gl_format = tuple.format;
|
||||||
gl_type = tuple.type;
|
gl_type = tuple.type;
|
||||||
}
|
}
|
||||||
const GLenum target = ImageTarget(info);
|
texture = MakeImage(info, gl_internal_format);
|
||||||
const GLsizei width = info.size.width;
|
current_texture = texture.handle;
|
||||||
const GLsizei height = info.size.height;
|
if (runtime->device.HasDebuggingToolAttached()) {
|
||||||
const GLsizei depth = info.size.depth;
|
|
||||||
const int max_host_mip_levels = std::bit_width(info.size.width);
|
|
||||||
const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
|
|
||||||
const GLsizei num_layers = info.resources.layers;
|
|
||||||
const GLsizei num_samples = info.num_samples;
|
|
||||||
|
|
||||||
GLuint handle = 0;
|
|
||||||
if (target != GL_TEXTURE_BUFFER) {
|
|
||||||
texture.Create(target);
|
|
||||||
handle = texture.handle;
|
|
||||||
}
|
|
||||||
switch (target) {
|
|
||||||
case GL_TEXTURE_1D_ARRAY:
|
|
||||||
glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
|
|
||||||
break;
|
|
||||||
case GL_TEXTURE_2D_ARRAY:
|
|
||||||
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
|
|
||||||
break;
|
|
||||||
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
|
|
||||||
// TODO: Where should 'fixedsamplelocations' come from?
|
|
||||||
const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
|
|
||||||
glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
|
|
||||||
height >> samples_y, num_layers, GL_FALSE);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case GL_TEXTURE_RECTANGLE:
|
|
||||||
glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
|
|
||||||
break;
|
|
||||||
case GL_TEXTURE_3D:
|
|
||||||
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
|
|
||||||
break;
|
|
||||||
case GL_TEXTURE_BUFFER:
|
|
||||||
UNREACHABLE();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (runtime.device.HasDebuggingToolAttached()) {
|
|
||||||
const std::string name = VideoCommon::Name(*this);
|
const std::string name = VideoCommon::Name(*this);
|
||||||
glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle,
|
glObjectLabel(ImageTarget(info) == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE,
|
||||||
static_cast<GLsizei>(name.size()), name.data());
|
texture.handle, static_cast<GLsizei>(name.size()), name.data());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {}
|
||||||
|
|
||||||
Image::~Image() = default;
|
Image::~Image() = default;
|
||||||
|
|
||||||
void Image::UploadMemory(const ImageBufferMap& map,
|
void Image::UploadMemory(const ImageBufferMap& map,
|
||||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||||
|
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
|
||||||
|
if (is_rescaled) {
|
||||||
|
ScaleDown(true);
|
||||||
|
}
|
||||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
|
||||||
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
|
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
|
||||||
|
|
||||||
@ -693,12 +718,18 @@ void Image::UploadMemory(const ImageBufferMap& map,
|
|||||||
}
|
}
|
||||||
CopyBufferToImage(copy, map.offset);
|
CopyBufferToImage(copy, map.offset);
|
||||||
}
|
}
|
||||||
|
if (is_rescaled) {
|
||||||
|
ScaleUp();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Image::DownloadMemory(ImageBufferMap& map,
|
void Image::DownloadMemory(ImageBufferMap& map,
|
||||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||||
|
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
|
||||||
|
if (is_rescaled) {
|
||||||
|
ScaleDown();
|
||||||
|
}
|
||||||
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
|
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
|
||||||
|
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
|
||||||
glPixelStorei(GL_PACK_ALIGNMENT, 1);
|
glPixelStorei(GL_PACK_ALIGNMENT, 1);
|
||||||
|
|
||||||
@ -716,6 +747,9 @@ void Image::DownloadMemory(ImageBufferMap& map,
|
|||||||
}
|
}
|
||||||
CopyImageToBuffer(copy, map.offset);
|
CopyImageToBuffer(copy, map.offset);
|
||||||
}
|
}
|
||||||
|
if (is_rescaled) {
|
||||||
|
ScaleUp(true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GLuint Image::StorageHandle() noexcept {
|
GLuint Image::StorageHandle() noexcept {
|
||||||
@ -741,11 +775,11 @@ GLuint Image::StorageHandle() noexcept {
|
|||||||
return store_view.handle;
|
return store_view.handle;
|
||||||
}
|
}
|
||||||
store_view.Create();
|
store_view.Create();
|
||||||
glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0,
|
glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0,
|
||||||
info.resources.levels, 0, info.resources.layers);
|
info.resources.levels, 0, info.resources.layers);
|
||||||
return store_view.handle;
|
return store_view.handle;
|
||||||
default:
|
default:
|
||||||
return texture.handle;
|
return current_texture;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -849,6 +883,140 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Image::Scale(bool up_scale) {
|
||||||
|
const auto format_type = GetFormatType(info.format);
|
||||||
|
const GLenum attachment = [format_type] {
|
||||||
|
switch (format_type) {
|
||||||
|
case SurfaceType::ColorTexture:
|
||||||
|
return GL_COLOR_ATTACHMENT0;
|
||||||
|
case SurfaceType::Depth:
|
||||||
|
return GL_DEPTH_ATTACHMENT;
|
||||||
|
case SurfaceType::DepthStencil:
|
||||||
|
return GL_DEPTH_STENCIL_ATTACHMENT;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return GL_COLOR_ATTACHMENT0;
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
const GLenum mask = [format_type] {
|
||||||
|
switch (format_type) {
|
||||||
|
case SurfaceType::ColorTexture:
|
||||||
|
return GL_COLOR_BUFFER_BIT;
|
||||||
|
case SurfaceType::Depth:
|
||||||
|
return GL_DEPTH_BUFFER_BIT;
|
||||||
|
case SurfaceType::DepthStencil:
|
||||||
|
return GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return GL_COLOR_BUFFER_BIT;
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
const size_t fbo_index = [format_type] {
|
||||||
|
switch (format_type) {
|
||||||
|
case SurfaceType::ColorTexture:
|
||||||
|
return 0;
|
||||||
|
case SurfaceType::Depth:
|
||||||
|
return 1;
|
||||||
|
case SurfaceType::DepthStencil:
|
||||||
|
return 2;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
const bool is_2d = info.type == ImageType::e2D;
|
||||||
|
const bool is_color{(mask & GL_COLOR_BUFFER_BIT) != 0};
|
||||||
|
// Integer formats must use NEAREST filter
|
||||||
|
const bool linear_color_format{is_color && !IsPixelFormatInteger(info.format)};
|
||||||
|
const GLenum filter = linear_color_format ? GL_LINEAR : GL_NEAREST;
|
||||||
|
|
||||||
|
const auto& resolution = runtime->resolution;
|
||||||
|
const u32 scaled_width = resolution.ScaleUp(info.size.width);
|
||||||
|
const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
|
||||||
|
const u32 original_width = info.size.width;
|
||||||
|
const u32 original_height = info.size.height;
|
||||||
|
|
||||||
|
if (!upscaled_backup.handle) {
|
||||||
|
auto dst_info = info;
|
||||||
|
dst_info.size.width = scaled_width;
|
||||||
|
dst_info.size.height = scaled_height;
|
||||||
|
upscaled_backup = MakeImage(dst_info, gl_internal_format);
|
||||||
|
}
|
||||||
|
const u32 src_width = up_scale ? original_width : scaled_width;
|
||||||
|
const u32 src_height = up_scale ? original_height : scaled_height;
|
||||||
|
const u32 dst_width = up_scale ? scaled_width : original_width;
|
||||||
|
const u32 dst_height = up_scale ? scaled_height : original_height;
|
||||||
|
const auto src_handle = up_scale ? texture.handle : upscaled_backup.handle;
|
||||||
|
const auto dst_handle = up_scale ? upscaled_backup.handle : texture.handle;
|
||||||
|
|
||||||
|
// TODO (ameerj): Investigate other GL states that affect blitting.
|
||||||
|
glDisablei(GL_SCISSOR_TEST, 0);
|
||||||
|
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(dst_width),
|
||||||
|
static_cast<GLfloat>(dst_height));
|
||||||
|
|
||||||
|
const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle;
|
||||||
|
const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle;
|
||||||
|
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
|
||||||
|
for (s32 level = 0; level < info.resources.levels; ++level) {
|
||||||
|
const u32 src_level_width = std::max(1u, src_width >> level);
|
||||||
|
const u32 src_level_height = std::max(1u, src_height >> level);
|
||||||
|
const u32 dst_level_width = std::max(1u, dst_width >> level);
|
||||||
|
const u32 dst_level_height = std::max(1u, dst_height >> level);
|
||||||
|
|
||||||
|
glNamedFramebufferTextureLayer(read_fbo, attachment, src_handle, level, layer);
|
||||||
|
glNamedFramebufferTextureLayer(draw_fbo, attachment, dst_handle, level, layer);
|
||||||
|
|
||||||
|
glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0,
|
||||||
|
0, dst_level_width, dst_level_height, mask, filter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current_texture = dst_handle;
|
||||||
|
auto& state_tracker = runtime->GetStateTracker();
|
||||||
|
state_tracker.NotifyViewport0();
|
||||||
|
state_tracker.NotifyScissor0();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Image::ScaleUp(bool ignore) {
|
||||||
|
if (True(flags & ImageFlagBits::Rescaled)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (gl_format == 0 && gl_type == 0) {
|
||||||
|
// compressed textures
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (info.type == ImageType::Linear) {
|
||||||
|
UNREACHABLE();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
flags |= ImageFlagBits::Rescaled;
|
||||||
|
if (!runtime->resolution.active) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
has_scaled = true;
|
||||||
|
if (ignore) {
|
||||||
|
current_texture = upscaled_backup.handle;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
Scale(true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Image::ScaleDown(bool ignore) {
|
||||||
|
if (False(flags & ImageFlagBits::Rescaled)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
flags &= ~ImageFlagBits::Rescaled;
|
||||||
|
if (!runtime->resolution.active) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (ignore) {
|
||||||
|
current_texture = texture.handle;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
Scale(false);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
|
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
|
||||||
ImageId image_id_, Image& image)
|
ImageId image_id_, Image& image)
|
||||||
: VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
|
: VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
|
||||||
@ -862,7 +1030,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
|
|||||||
flat_range = info.range;
|
flat_range = info.range;
|
||||||
set_object_label = device.HasDebuggingToolAttached();
|
set_object_label = device.HasDebuggingToolAttached();
|
||||||
is_render_target = info.IsRenderTarget();
|
is_render_target = info.IsRenderTarget();
|
||||||
original_texture = image.texture.handle;
|
original_texture = image.Handle();
|
||||||
num_samples = image.info.num_samples;
|
num_samples = image.info.num_samples;
|
||||||
if (!is_render_target) {
|
if (!is_render_target) {
|
||||||
swizzle[0] = info.x_source;
|
swizzle[0] = info.x_source;
|
||||||
@ -950,7 +1118,7 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
|||||||
const VideoCommon::ImageViewInfo& view_info)
|
const VideoCommon::ImageViewInfo& view_info)
|
||||||
: VideoCommon::ImageViewBase{info, view_info} {}
|
: VideoCommon::ImageViewBase{info, view_info} {}
|
||||||
|
|
||||||
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
|
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
|
||||||
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
|
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
|
||||||
|
|
||||||
GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
|
GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
|
||||||
@ -1116,25 +1284,24 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
|
|||||||
void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image,
|
void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image,
|
||||||
std::span<const VideoCommon::ImageCopy> copies) {
|
std::span<const VideoCommon::ImageCopy> copies) {
|
||||||
static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
|
static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
|
||||||
const u32 requested_pbo_size =
|
const u32 img_bpp = BytesPerBlock(src_image.info.format);
|
||||||
std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes);
|
|
||||||
|
|
||||||
if (bgr_pbo_size < requested_pbo_size) {
|
|
||||||
bgr_pbo.Create();
|
|
||||||
bgr_pbo_size = requested_pbo_size;
|
|
||||||
glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY);
|
|
||||||
}
|
|
||||||
for (const ImageCopy& copy : copies) {
|
for (const ImageCopy& copy : copies) {
|
||||||
ASSERT(copy.src_offset == zero_offset);
|
ASSERT(copy.src_offset == zero_offset);
|
||||||
ASSERT(copy.dst_offset == zero_offset);
|
ASSERT(copy.dst_offset == zero_offset);
|
||||||
|
const u32 num_src_layers = static_cast<u32>(copy.src_subresource.num_layers);
|
||||||
|
const u32 copy_size = copy.extent.width * copy.extent.height * num_src_layers * img_bpp;
|
||||||
|
if (bgr_pbo_size < copy_size) {
|
||||||
|
bgr_pbo.Create();
|
||||||
|
bgr_pbo_size = copy_size;
|
||||||
|
glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY);
|
||||||
|
}
|
||||||
// Copy from source to PBO
|
// Copy from source to PBO
|
||||||
glPixelStorei(GL_PACK_ALIGNMENT, 1);
|
glPixelStorei(GL_PACK_ALIGNMENT, 1);
|
||||||
glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
|
glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle);
|
||||||
glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
|
glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
|
||||||
copy.src_subresource.num_layers, src_image.GlFormat(),
|
num_src_layers, src_image.GlFormat(), src_image.GlType(),
|
||||||
src_image.GlType(), static_cast<GLsizei>(bgr_pbo_size), nullptr);
|
static_cast<GLsizei>(bgr_pbo_size), nullptr);
|
||||||
|
|
||||||
// Copy from PBO to destination in desired GL format
|
// Copy from PBO to destination in desired GL format
|
||||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||||
|
@ -15,6 +15,10 @@
|
|||||||
#include "video_core/texture_cache/image_view_base.h"
|
#include "video_core/texture_cache/image_view_base.h"
|
||||||
#include "video_core/texture_cache/texture_cache_base.h"
|
#include "video_core/texture_cache/texture_cache_base.h"
|
||||||
|
|
||||||
|
namespace Settings {
|
||||||
|
struct ResolutionScalingInfo;
|
||||||
|
}
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
class Device;
|
class Device;
|
||||||
@ -80,7 +84,7 @@ public:
|
|||||||
|
|
||||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
|
||||||
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
|
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) {
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,6 +114,12 @@ public:
|
|||||||
|
|
||||||
bool HasNativeASTC() const noexcept;
|
bool HasNativeASTC() const noexcept;
|
||||||
|
|
||||||
|
void TickFrame() {}
|
||||||
|
|
||||||
|
StateTracker& GetStateTracker() {
|
||||||
|
return state_tracker;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct StagingBuffers {
|
struct StagingBuffers {
|
||||||
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
|
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
|
||||||
@ -149,6 +159,10 @@ private:
|
|||||||
OGLTextureView null_image_view_cube;
|
OGLTextureView null_image_view_cube;
|
||||||
|
|
||||||
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
|
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
|
||||||
|
|
||||||
|
std::array<OGLFramebuffer, 3> rescale_draw_fbos;
|
||||||
|
std::array<OGLFramebuffer, 3> rescale_read_fbos;
|
||||||
|
const Settings::ResolutionScalingInfo& resolution;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Image : public VideoCommon::ImageBase {
|
class Image : public VideoCommon::ImageBase {
|
||||||
@ -157,6 +171,7 @@ class Image : public VideoCommon::ImageBase {
|
|||||||
public:
|
public:
|
||||||
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
||||||
VAddr cpu_addr);
|
VAddr cpu_addr);
|
||||||
|
explicit Image(const VideoCommon::NullImageParams&);
|
||||||
|
|
||||||
~Image();
|
~Image();
|
||||||
|
|
||||||
@ -174,7 +189,7 @@ public:
|
|||||||
GLuint StorageHandle() noexcept;
|
GLuint StorageHandle() noexcept;
|
||||||
|
|
||||||
GLuint Handle() const noexcept {
|
GLuint Handle() const noexcept {
|
||||||
return texture.handle;
|
return current_texture;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLuint GlFormat() const noexcept {
|
GLuint GlFormat() const noexcept {
|
||||||
@ -185,16 +200,25 @@ public:
|
|||||||
return gl_type;
|
return gl_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ScaleUp(bool ignore = false);
|
||||||
|
|
||||||
|
bool ScaleDown(bool ignore = false);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||||
|
|
||||||
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||||
|
|
||||||
|
void Scale(bool up_scale);
|
||||||
|
|
||||||
OGLTexture texture;
|
OGLTexture texture;
|
||||||
|
OGLTexture upscaled_backup;
|
||||||
OGLTextureView store_view;
|
OGLTextureView store_view;
|
||||||
GLenum gl_internal_format = GL_NONE;
|
GLenum gl_internal_format = GL_NONE;
|
||||||
GLenum gl_format = GL_NONE;
|
GLenum gl_format = GL_NONE;
|
||||||
GLenum gl_type = GL_NONE;
|
GLenum gl_type = GL_NONE;
|
||||||
|
TextureCacheRuntime* runtime{};
|
||||||
|
GLuint current_texture{};
|
||||||
};
|
};
|
||||||
|
|
||||||
class ImageView : public VideoCommon::ImageViewBase {
|
class ImageView : public VideoCommon::ImageViewBase {
|
||||||
@ -206,7 +230,7 @@ public:
|
|||||||
const VideoCommon::ImageViewInfo&, GPUVAddr);
|
const VideoCommon::ImageViewInfo&, GPUVAddr);
|
||||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
||||||
const VideoCommon::ImageViewInfo& view_info);
|
const VideoCommon::ImageViewInfo& view_info);
|
||||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
|
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
|
||||||
|
|
||||||
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
|
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
|
||||||
Shader::ImageFormat image_format);
|
Shader::ImageFormat image_format);
|
||||||
|
@ -21,8 +21,13 @@
|
|||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "core/perf_stats.h"
|
#include "core/perf_stats.h"
|
||||||
#include "core/telemetry_session.h"
|
#include "core/telemetry_session.h"
|
||||||
|
#include "video_core/host_shaders/fxaa_frag.h"
|
||||||
|
#include "video_core/host_shaders/fxaa_vert.h"
|
||||||
#include "video_core/host_shaders/opengl_present_frag.h"
|
#include "video_core/host_shaders/opengl_present_frag.h"
|
||||||
|
#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
|
||||||
#include "video_core/host_shaders/opengl_present_vert.h"
|
#include "video_core/host_shaders/opengl_present_vert.h"
|
||||||
|
#include "video_core/host_shaders/present_bicubic_frag.h"
|
||||||
|
#include "video_core/host_shaders/present_gaussian_frag.h"
|
||||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||||
@ -208,7 +213,9 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
|
|||||||
framebuffer_crop_rect = framebuffer.crop_rect;
|
framebuffer_crop_rect = framebuffer.crop_rect;
|
||||||
|
|
||||||
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
|
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
|
||||||
if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
|
screen_info.was_accelerated =
|
||||||
|
rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride);
|
||||||
|
if (screen_info.was_accelerated) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -251,12 +258,25 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
|
|||||||
|
|
||||||
void RendererOpenGL::InitOpenGLObjects() {
|
void RendererOpenGL::InitOpenGLObjects() {
|
||||||
// Create shader programs
|
// Create shader programs
|
||||||
|
fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER);
|
||||||
|
fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER);
|
||||||
present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
|
present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
|
||||||
present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
|
present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
|
||||||
|
present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER);
|
||||||
|
present_gaussian_fragment =
|
||||||
|
CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER);
|
||||||
|
present_scaleforce_fragment =
|
||||||
|
CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),
|
||||||
|
GL_FRAGMENT_SHADER);
|
||||||
|
|
||||||
// Generate presentation sampler
|
// Generate presentation sampler
|
||||||
present_sampler.Create();
|
present_sampler.Create();
|
||||||
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||||
|
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||||
|
|
||||||
|
present_sampler_nn.Create();
|
||||||
|
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||||
|
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||||
|
|
||||||
// Generate VBO handle for drawing
|
// Generate VBO handle for drawing
|
||||||
vertex_buffer.Create();
|
vertex_buffer.Create();
|
||||||
@ -274,6 +294,8 @@ void RendererOpenGL::InitOpenGLObjects() {
|
|||||||
|
|
||||||
// Clear screen to black
|
// Clear screen to black
|
||||||
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
|
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
|
||||||
|
|
||||||
|
fxaa_framebuffer.Create();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererOpenGL::AddTelemetryFields() {
|
void RendererOpenGL::AddTelemetryFields() {
|
||||||
@ -325,18 +347,130 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
|||||||
texture.resource.Release();
|
texture.resource.Release();
|
||||||
texture.resource.Create(GL_TEXTURE_2D);
|
texture.resource.Create(GL_TEXTURE_2D);
|
||||||
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
|
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
|
||||||
|
fxaa_texture.Release();
|
||||||
|
fxaa_texture.Create(GL_TEXTURE_2D);
|
||||||
|
glTextureStorage2D(fxaa_texture.handle, 1, GL_RGBA16F,
|
||||||
|
Settings::values.resolution_info.ScaleUp(screen_info.texture.width),
|
||||||
|
Settings::values.resolution_info.ScaleUp(screen_info.texture.height));
|
||||||
|
glNamedFramebufferTexture(fxaa_framebuffer.handle, GL_COLOR_ATTACHMENT0, fxaa_texture.handle,
|
||||||
|
0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
||||||
|
// TODO: Signal state tracker about these changes
|
||||||
|
state_tracker.NotifyScreenDrawVertexArray();
|
||||||
|
state_tracker.NotifyPolygonModes();
|
||||||
|
state_tracker.NotifyViewport0();
|
||||||
|
state_tracker.NotifyScissor0();
|
||||||
|
state_tracker.NotifyColorMask(0);
|
||||||
|
state_tracker.NotifyBlend0();
|
||||||
|
state_tracker.NotifyFramebuffer();
|
||||||
|
state_tracker.NotifyFrontFace();
|
||||||
|
state_tracker.NotifyCullTest();
|
||||||
|
state_tracker.NotifyDepthTest();
|
||||||
|
state_tracker.NotifyStencilTest();
|
||||||
|
state_tracker.NotifyPolygonOffset();
|
||||||
|
state_tracker.NotifyRasterizeEnable();
|
||||||
|
state_tracker.NotifyFramebufferSRGB();
|
||||||
|
state_tracker.NotifyLogicOp();
|
||||||
|
state_tracker.NotifyClipControl();
|
||||||
|
state_tracker.NotifyAlphaTest();
|
||||||
|
|
||||||
|
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||||
|
|
||||||
// Update background color before drawing
|
// Update background color before drawing
|
||||||
glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
|
glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
|
||||||
Settings::values.bg_green.GetValue() / 255.0f,
|
Settings::values.bg_green.GetValue() / 255.0f,
|
||||||
Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
|
Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
|
||||||
|
|
||||||
|
glEnable(GL_CULL_FACE);
|
||||||
|
glDisable(GL_COLOR_LOGIC_OP);
|
||||||
|
glDisable(GL_DEPTH_TEST);
|
||||||
|
glDisable(GL_STENCIL_TEST);
|
||||||
|
glDisable(GL_POLYGON_OFFSET_FILL);
|
||||||
|
glDisable(GL_RASTERIZER_DISCARD);
|
||||||
|
glDisable(GL_ALPHA_TEST);
|
||||||
|
glDisablei(GL_BLEND, 0);
|
||||||
|
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
|
||||||
|
glCullFace(GL_BACK);
|
||||||
|
glFrontFace(GL_CW);
|
||||||
|
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
||||||
|
|
||||||
|
glBindTextureUnit(0, screen_info.display_texture);
|
||||||
|
|
||||||
|
if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa) {
|
||||||
|
program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle);
|
||||||
|
|
||||||
|
glEnablei(GL_SCISSOR_TEST, 0);
|
||||||
|
auto viewport_width = screen_info.texture.width;
|
||||||
|
auto scissor_width = framebuffer_crop_rect.GetWidth();
|
||||||
|
if (scissor_width <= 0) {
|
||||||
|
scissor_width = viewport_width;
|
||||||
|
}
|
||||||
|
auto viewport_height = screen_info.texture.height;
|
||||||
|
auto scissor_height = framebuffer_crop_rect.GetHeight();
|
||||||
|
if (scissor_height <= 0) {
|
||||||
|
scissor_height = viewport_height;
|
||||||
|
}
|
||||||
|
if (screen_info.was_accelerated) {
|
||||||
|
viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width);
|
||||||
|
scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width);
|
||||||
|
viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height);
|
||||||
|
scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height);
|
||||||
|
}
|
||||||
|
glScissorIndexed(0, 0, 0, scissor_width, scissor_height);
|
||||||
|
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(viewport_width),
|
||||||
|
static_cast<GLfloat>(viewport_height));
|
||||||
|
glDepthRangeIndexed(0, 0.0, 0.0);
|
||||||
|
|
||||||
|
glBindSampler(0, present_sampler.handle);
|
||||||
|
GLint old_read_fb;
|
||||||
|
GLint old_draw_fb;
|
||||||
|
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
|
||||||
|
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
|
||||||
|
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fxaa_framebuffer.handle);
|
||||||
|
|
||||||
|
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||||
|
|
||||||
|
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
|
||||||
|
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
|
||||||
|
|
||||||
|
glBindTextureUnit(0, fxaa_texture.handle);
|
||||||
|
}
|
||||||
|
|
||||||
// Set projection matrix
|
// Set projection matrix
|
||||||
const std::array ortho_matrix =
|
const std::array ortho_matrix =
|
||||||
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
|
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
|
||||||
program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
|
|
||||||
|
GLuint fragment_handle;
|
||||||
|
const auto filter = Settings::values.scaling_filter.GetValue();
|
||||||
|
switch (filter) {
|
||||||
|
case Settings::ScalingFilter::NearestNeighbor:
|
||||||
|
fragment_handle = present_bilinear_fragment.handle;
|
||||||
|
break;
|
||||||
|
case Settings::ScalingFilter::Bilinear:
|
||||||
|
fragment_handle = present_bilinear_fragment.handle;
|
||||||
|
break;
|
||||||
|
case Settings::ScalingFilter::Bicubic:
|
||||||
|
fragment_handle = present_bicubic_fragment.handle;
|
||||||
|
break;
|
||||||
|
case Settings::ScalingFilter::Gaussian:
|
||||||
|
fragment_handle = present_gaussian_fragment.handle;
|
||||||
|
break;
|
||||||
|
case Settings::ScalingFilter::ScaleForce:
|
||||||
|
fragment_handle = present_scaleforce_fragment.handle;
|
||||||
|
break;
|
||||||
|
case Settings::ScalingFilter::Fsr:
|
||||||
|
LOG_WARNING(
|
||||||
|
Render_OpenGL,
|
||||||
|
"FidelityFX FSR Super Sampling is not supported in OpenGL, changing to ScaleForce");
|
||||||
|
fragment_handle = present_scaleforce_fragment.handle;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fragment_handle = present_bilinear_fragment.handle;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle);
|
||||||
glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
|
glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
|
||||||
ortho_matrix.data());
|
ortho_matrix.data());
|
||||||
|
|
||||||
@ -370,6 +504,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||||||
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
|
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
|
||||||
static_cast<f32>(screen_info.texture.height);
|
static_cast<f32>(screen_info.texture.height);
|
||||||
}
|
}
|
||||||
|
if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa &&
|
||||||
|
!screen_info.was_accelerated) {
|
||||||
|
scale_u /= Settings::values.resolution_info.up_factor;
|
||||||
|
scale_v /= Settings::values.resolution_info.up_factor;
|
||||||
|
}
|
||||||
|
|
||||||
const auto& screen = layout.screen;
|
const auto& screen = layout.screen;
|
||||||
const std::array vertices = {
|
const std::array vertices = {
|
||||||
@ -380,47 +519,14 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||||||
};
|
};
|
||||||
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
|
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
|
||||||
|
|
||||||
// TODO: Signal state tracker about these changes
|
|
||||||
state_tracker.NotifyScreenDrawVertexArray();
|
|
||||||
state_tracker.NotifyPolygonModes();
|
|
||||||
state_tracker.NotifyViewport0();
|
|
||||||
state_tracker.NotifyScissor0();
|
|
||||||
state_tracker.NotifyColorMask(0);
|
|
||||||
state_tracker.NotifyBlend0();
|
|
||||||
state_tracker.NotifyFramebuffer();
|
|
||||||
state_tracker.NotifyFrontFace();
|
|
||||||
state_tracker.NotifyCullTest();
|
|
||||||
state_tracker.NotifyDepthTest();
|
|
||||||
state_tracker.NotifyStencilTest();
|
|
||||||
state_tracker.NotifyPolygonOffset();
|
|
||||||
state_tracker.NotifyRasterizeEnable();
|
|
||||||
state_tracker.NotifyFramebufferSRGB();
|
|
||||||
state_tracker.NotifyLogicOp();
|
|
||||||
state_tracker.NotifyClipControl();
|
|
||||||
state_tracker.NotifyAlphaTest();
|
|
||||||
|
|
||||||
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
|
||||||
glEnable(GL_CULL_FACE);
|
|
||||||
if (screen_info.display_srgb) {
|
if (screen_info.display_srgb) {
|
||||||
glEnable(GL_FRAMEBUFFER_SRGB);
|
glEnable(GL_FRAMEBUFFER_SRGB);
|
||||||
} else {
|
} else {
|
||||||
glDisable(GL_FRAMEBUFFER_SRGB);
|
glDisable(GL_FRAMEBUFFER_SRGB);
|
||||||
}
|
}
|
||||||
glDisable(GL_COLOR_LOGIC_OP);
|
|
||||||
glDisable(GL_DEPTH_TEST);
|
|
||||||
glDisable(GL_STENCIL_TEST);
|
|
||||||
glDisable(GL_POLYGON_OFFSET_FILL);
|
|
||||||
glDisable(GL_RASTERIZER_DISCARD);
|
|
||||||
glDisable(GL_ALPHA_TEST);
|
|
||||||
glDisablei(GL_BLEND, 0);
|
|
||||||
glDisablei(GL_SCISSOR_TEST, 0);
|
glDisablei(GL_SCISSOR_TEST, 0);
|
||||||
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
|
|
||||||
glCullFace(GL_BACK);
|
|
||||||
glFrontFace(GL_CW);
|
|
||||||
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
|
||||||
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
|
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
|
||||||
static_cast<GLfloat>(layout.height));
|
static_cast<GLfloat>(layout.height));
|
||||||
glDepthRangeIndexed(0, 0.0, 0.0);
|
|
||||||
|
|
||||||
glEnableVertexAttribArray(PositionLocation);
|
glEnableVertexAttribArray(PositionLocation);
|
||||||
glEnableVertexAttribArray(TexCoordLocation);
|
glEnableVertexAttribArray(TexCoordLocation);
|
||||||
@ -440,8 +546,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||||||
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
|
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
|
||||||
}
|
}
|
||||||
|
|
||||||
glBindTextureUnit(0, screen_info.display_texture);
|
if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) {
|
||||||
glBindSampler(0, present_sampler.handle);
|
glBindSampler(0, present_sampler.handle);
|
||||||
|
} else {
|
||||||
|
glBindSampler(0, present_sampler_nn.handle);
|
||||||
|
}
|
||||||
|
|
||||||
glClear(GL_COLOR_BUFFER_BIT);
|
glClear(GL_COLOR_BUFFER_BIT);
|
||||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||||
|
@ -50,6 +50,7 @@ struct TextureInfo {
|
|||||||
/// Structure used for storing information about the display target for the Switch screen
|
/// Structure used for storing information about the display target for the Switch screen
|
||||||
struct ScreenInfo {
|
struct ScreenInfo {
|
||||||
GLuint display_texture{};
|
GLuint display_texture{};
|
||||||
|
bool was_accelerated = false;
|
||||||
bool display_srgb{};
|
bool display_srgb{};
|
||||||
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
|
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
|
||||||
TextureInfo texture;
|
TextureInfo texture;
|
||||||
@ -109,9 +110,15 @@ private:
|
|||||||
|
|
||||||
// OpenGL object IDs
|
// OpenGL object IDs
|
||||||
OGLSampler present_sampler;
|
OGLSampler present_sampler;
|
||||||
|
OGLSampler present_sampler_nn;
|
||||||
OGLBuffer vertex_buffer;
|
OGLBuffer vertex_buffer;
|
||||||
|
OGLProgram fxaa_vertex;
|
||||||
|
OGLProgram fxaa_fragment;
|
||||||
OGLProgram present_vertex;
|
OGLProgram present_vertex;
|
||||||
OGLProgram present_fragment;
|
OGLProgram present_bilinear_fragment;
|
||||||
|
OGLProgram present_bicubic_fragment;
|
||||||
|
OGLProgram present_gaussian_fragment;
|
||||||
|
OGLProgram present_scaleforce_fragment;
|
||||||
OGLFramebuffer screenshot_framebuffer;
|
OGLFramebuffer screenshot_framebuffer;
|
||||||
|
|
||||||
// GPU address of the vertex buffer
|
// GPU address of the vertex buffer
|
||||||
@ -119,6 +126,8 @@ private:
|
|||||||
|
|
||||||
/// Display information for Switch screen
|
/// Display information for Switch screen
|
||||||
ScreenInfo screen_info;
|
ScreenInfo screen_info;
|
||||||
|
OGLTexture fxaa_texture;
|
||||||
|
OGLFramebuffer fxaa_framebuffer;
|
||||||
|
|
||||||
/// OpenGL framebuffer data
|
/// OpenGL framebuffer data
|
||||||
std::vector<u8> gl_framebuffer_data;
|
std::vector<u8> gl_framebuffer_data;
|
||||||
|
@ -363,7 +363,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
|
|||||||
|
|
||||||
BlitImageHelper::~BlitImageHelper() = default;
|
BlitImageHelper::~BlitImageHelper() = default;
|
||||||
|
|
||||||
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
|
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view,
|
||||||
const Region2D& dst_region, const Region2D& src_region,
|
const Region2D& dst_region, const Region2D& src_region,
|
||||||
Tegra::Engines::Fermi2D::Filter filter,
|
Tegra::Engines::Fermi2D::Filter filter,
|
||||||
Tegra::Engines::Fermi2D::Operation operation) {
|
Tegra::Engines::Fermi2D::Operation operation) {
|
||||||
@ -373,9 +373,8 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
|
|||||||
.operation = operation,
|
.operation = operation,
|
||||||
};
|
};
|
||||||
const VkPipelineLayout layout = *one_texture_pipeline_layout;
|
const VkPipelineLayout layout = *one_texture_pipeline_layout;
|
||||||
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
|
|
||||||
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
|
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
|
||||||
const VkPipeline pipeline = FindOrEmplacePipeline(key);
|
const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
|
||||||
scheduler.RequestRenderpass(dst_framebuffer);
|
scheduler.RequestRenderpass(dst_framebuffer);
|
||||||
scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
|
scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
|
||||||
src_view](vk::CommandBuffer cmdbuf) {
|
src_view](vk::CommandBuffer cmdbuf) {
|
||||||
@ -398,10 +397,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
|
|||||||
Tegra::Engines::Fermi2D::Operation operation) {
|
Tegra::Engines::Fermi2D::Operation operation) {
|
||||||
ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
|
ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
|
||||||
ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
|
ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
|
||||||
|
const BlitImagePipelineKey key{
|
||||||
|
.renderpass = dst_framebuffer->RenderPass(),
|
||||||
|
.operation = operation,
|
||||||
|
};
|
||||||
const VkPipelineLayout layout = *two_textures_pipeline_layout;
|
const VkPipelineLayout layout = *two_textures_pipeline_layout;
|
||||||
const VkSampler sampler = *nearest_sampler;
|
const VkSampler sampler = *nearest_sampler;
|
||||||
const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
|
const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key);
|
||||||
scheduler.RequestRenderpass(dst_framebuffer);
|
scheduler.RequestRenderpass(dst_framebuffer);
|
||||||
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
|
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
|
||||||
src_stencil_view, this](vk::CommandBuffer cmdbuf) {
|
src_stencil_view, this](vk::CommandBuffer cmdbuf) {
|
||||||
@ -419,40 +421,45 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
|
void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
|
||||||
const ImageView& src_image_view) {
|
const ImageView& src_image_view, u32 up_scale,
|
||||||
|
u32 down_shift) {
|
||||||
ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
|
ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
|
||||||
Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
|
Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
|
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
|
||||||
const ImageView& src_image_view) {
|
const ImageView& src_image_view, u32 up_scale,
|
||||||
|
u32 down_shift) {
|
||||||
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
|
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
|
||||||
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
|
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
|
void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
|
||||||
const ImageView& src_image_view) {
|
const ImageView& src_image_view, u32 up_scale,
|
||||||
|
u32 down_shift) {
|
||||||
ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
|
ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
|
||||||
Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
|
Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
|
void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
|
||||||
const ImageView& src_image_view) {
|
const ImageView& src_image_view, u32 up_scale,
|
||||||
|
u32 down_shift) {
|
||||||
ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
|
ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
|
||||||
Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
|
Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||||
const ImageView& src_image_view) {
|
const ImageView& src_image_view, u32 up_scale, u32 down_shift) {
|
||||||
const VkPipelineLayout layout = *one_texture_pipeline_layout;
|
const VkPipelineLayout layout = *one_texture_pipeline_layout;
|
||||||
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
|
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
|
||||||
const VkSampler sampler = *nearest_sampler;
|
const VkSampler sampler = *nearest_sampler;
|
||||||
const VkExtent2D extent{
|
const VkExtent2D extent{
|
||||||
.width = src_image_view.size.width,
|
.width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
|
||||||
.height = src_image_view.size.height,
|
.height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
|
||||||
};
|
};
|
||||||
scheduler.RequestRenderpass(dst_framebuffer);
|
scheduler.RequestRenderpass(dst_framebuffer);
|
||||||
scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift,
|
||||||
|
this](vk::CommandBuffer cmdbuf) {
|
||||||
const VkOffset2D offset{
|
const VkOffset2D offset{
|
||||||
.x = 0,
|
.x = 0,
|
||||||
.y = 0,
|
.y = 0,
|
||||||
@ -488,7 +495,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
|
|||||||
scheduler.InvalidateState();
|
scheduler.InvalidateState();
|
||||||
}
|
}
|
||||||
|
|
||||||
VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
|
VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) {
|
||||||
const auto it = std::ranges::find(blit_color_keys, key);
|
const auto it = std::ranges::find(blit_color_keys, key);
|
||||||
if (it != blit_color_keys.end()) {
|
if (it != blit_color_keys.end()) {
|
||||||
return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
|
return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
|
||||||
@ -542,12 +549,14 @@ VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& ke
|
|||||||
return *blit_color_pipelines.back();
|
return *blit_color_pipelines.back();
|
||||||
}
|
}
|
||||||
|
|
||||||
VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
|
VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key) {
|
||||||
if (blit_depth_stencil_pipeline) {
|
const auto it = std::ranges::find(blit_depth_stencil_keys, key);
|
||||||
return *blit_depth_stencil_pipeline;
|
if (it != blit_depth_stencil_keys.end()) {
|
||||||
|
return *blit_depth_stencil_pipelines[std::distance(blit_depth_stencil_keys.begin(), it)];
|
||||||
}
|
}
|
||||||
|
blit_depth_stencil_keys.push_back(key);
|
||||||
const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
|
const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
|
||||||
blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
|
blit_depth_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
|
||||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
@ -560,15 +569,15 @@ VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
|
|||||||
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||||
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||||
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||||
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
|
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
|
||||||
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||||
.layout = *two_textures_pipeline_layout,
|
.layout = *two_textures_pipeline_layout,
|
||||||
.renderPass = renderpass,
|
.renderPass = key.renderpass,
|
||||||
.subpass = 0,
|
.subpass = 0,
|
||||||
.basePipelineHandle = VK_NULL_HANDLE,
|
.basePipelineHandle = VK_NULL_HANDLE,
|
||||||
.basePipelineIndex = 0,
|
.basePipelineIndex = 0,
|
||||||
});
|
}));
|
||||||
return *blit_depth_stencil_pipeline;
|
return *blit_depth_stencil_pipelines.back();
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
|
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
|
||||||
|
@ -34,7 +34,7 @@ public:
|
|||||||
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
|
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
|
||||||
~BlitImageHelper();
|
~BlitImageHelper();
|
||||||
|
|
||||||
void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
|
void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
|
||||||
const Region2D& dst_region, const Region2D& src_region,
|
const Region2D& dst_region, const Region2D& src_region,
|
||||||
Tegra::Engines::Fermi2D::Filter filter,
|
Tegra::Engines::Fermi2D::Filter filter,
|
||||||
Tegra::Engines::Fermi2D::Operation operation);
|
Tegra::Engines::Fermi2D::Operation operation);
|
||||||
@ -44,21 +44,25 @@ public:
|
|||||||
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
|
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
|
||||||
Tegra::Engines::Fermi2D::Operation operation);
|
Tegra::Engines::Fermi2D::Operation operation);
|
||||||
|
|
||||||
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
|
||||||
|
u32 up_scale, u32 down_shift);
|
||||||
|
|
||||||
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
|
||||||
|
u32 up_scale, u32 down_shift);
|
||||||
|
|
||||||
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
|
||||||
|
u32 up_scale, u32 down_shift);
|
||||||
|
|
||||||
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
|
||||||
|
u32 up_scale, u32 down_shift);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||||
const ImageView& src_image_view);
|
const ImageView& src_image_view, u32 up_scale, u32 down_shift);
|
||||||
|
|
||||||
[[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
|
[[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
|
||||||
|
|
||||||
[[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
|
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
|
||||||
|
|
||||||
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
|
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
|
||||||
|
|
||||||
@ -84,7 +88,8 @@ private:
|
|||||||
|
|
||||||
std::vector<BlitImagePipelineKey> blit_color_keys;
|
std::vector<BlitImagePipelineKey> blit_color_keys;
|
||||||
std::vector<vk::Pipeline> blit_color_pipelines;
|
std::vector<vk::Pipeline> blit_color_pipelines;
|
||||||
vk::Pipeline blit_depth_stencil_pipeline;
|
std::vector<BlitImagePipelineKey> blit_depth_stencil_keys;
|
||||||
|
std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
|
||||||
vk::Pipeline convert_d32_to_r32_pipeline;
|
vk::Pipeline convert_d32_to_r32_pipeline;
|
||||||
vk::Pipeline convert_r32_to_d32_pipeline;
|
vk::Pipeline convert_r32_to_d32_pipeline;
|
||||||
vk::Pipeline convert_d16_to_r16_pipeline;
|
vk::Pipeline convert_d16_to_r16_pipeline;
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||||
#include "shader_recompiler/shader_info.h"
|
#include "shader_recompiler/shader_info.h"
|
||||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||||
@ -20,6 +21,8 @@
|
|||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
|
||||||
|
|
||||||
class DescriptorLayoutBuilder {
|
class DescriptorLayoutBuilder {
|
||||||
public:
|
public:
|
||||||
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
|
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
|
||||||
@ -68,18 +71,28 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
|
vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
|
||||||
|
using Shader::Backend::SPIRV::RescalingLayout;
|
||||||
|
const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u;
|
||||||
|
const VkPushConstantRange range{
|
||||||
|
.stageFlags = static_cast<VkShaderStageFlags>(
|
||||||
|
is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS),
|
||||||
|
.offset = 0,
|
||||||
|
.size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset,
|
||||||
|
};
|
||||||
return device->GetLogical().CreatePipelineLayout({
|
return device->GetLogical().CreatePipelineLayout({
|
||||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.setLayoutCount = descriptor_set_layout ? 1U : 0U,
|
.setLayoutCount = descriptor_set_layout ? 1U : 0U,
|
||||||
.pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
|
.pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
|
||||||
.pushConstantRangeCount = 0,
|
.pushConstantRangeCount = 1,
|
||||||
.pPushConstantRanges = nullptr,
|
.pPushConstantRanges = &range,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void Add(const Shader::Info& info, VkShaderStageFlags stage) {
|
void Add(const Shader::Info& info, VkShaderStageFlags stage) {
|
||||||
|
is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0;
|
||||||
|
|
||||||
Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
|
Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
|
||||||
Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
|
Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
|
||||||
Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
|
Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
|
||||||
@ -115,6 +128,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
const Device* device{};
|
const Device* device{};
|
||||||
|
bool is_compute{};
|
||||||
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
|
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
|
||||||
boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
|
boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
@ -122,31 +136,68 @@ private:
|
|||||||
size_t offset{};
|
size_t offset{};
|
||||||
};
|
};
|
||||||
|
|
||||||
inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
|
class RescalingPushConstant {
|
||||||
const ImageId*& image_view_ids, TextureCache& texture_cache,
|
public:
|
||||||
VKUpdateDescriptorQueue& update_descriptor_queue) {
|
explicit RescalingPushConstant() noexcept {}
|
||||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
|
||||||
image_view_ids += desc.count;
|
void PushTexture(bool is_rescaled) noexcept {
|
||||||
|
*texture_ptr |= is_rescaled ? texture_bit : 0u;
|
||||||
|
texture_bit <<= 1u;
|
||||||
|
if (texture_bit == 0u) {
|
||||||
|
texture_bit = 1u;
|
||||||
|
++texture_ptr;
|
||||||
}
|
}
|
||||||
for (const auto& desc : info.image_buffer_descriptors) {
|
|
||||||
image_view_ids += desc.count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PushImage(bool is_rescaled) noexcept {
|
||||||
|
*image_ptr |= is_rescaled ? image_bit : 0u;
|
||||||
|
image_bit <<= 1u;
|
||||||
|
if (image_bit == 0u) {
|
||||||
|
image_bit = 1u;
|
||||||
|
++image_ptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS>& Data() const noexcept {
|
||||||
|
return words;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS> words{};
|
||||||
|
u32* texture_ptr{words.data()};
|
||||||
|
u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS};
|
||||||
|
u32 texture_bit{1u};
|
||||||
|
u32 image_bit{1u};
|
||||||
|
};
|
||||||
|
|
||||||
|
inline void PushImageDescriptors(TextureCache& texture_cache,
|
||||||
|
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||||
|
const Shader::Info& info, RescalingPushConstant& rescaling,
|
||||||
|
const VkSampler*& samplers,
|
||||||
|
const VideoCommon::ImageViewInOut*& views) {
|
||||||
|
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
|
||||||
|
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
|
||||||
|
views += num_texture_buffers;
|
||||||
|
views += num_image_buffers;
|
||||||
for (const auto& desc : info.texture_descriptors) {
|
for (const auto& desc : info.texture_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
|
const VideoCommon::ImageViewId image_view_id{(views++)->id};
|
||||||
const VkSampler sampler{*(samplers++)};
|
const VkSampler sampler{*(samplers++)};
|
||||||
ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
|
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
|
||||||
const VkImageView vk_image_view{image_view.Handle(desc.type)};
|
const VkImageView vk_image_view{image_view.Handle(desc.type)};
|
||||||
update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
|
update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
|
||||||
|
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const auto& desc : info.image_descriptors) {
|
for (const auto& desc : info.image_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
|
ImageView& image_view{texture_cache.GetImageView((views++)->id)};
|
||||||
if (desc.is_written) {
|
if (desc.is_written) {
|
||||||
texture_cache.MarkModification(image_view.image_id);
|
texture_cache.MarkModification(image_view.image_id);
|
||||||
}
|
}
|
||||||
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
|
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
|
||||||
update_descriptor_queue.AddImage(vk_image_view);
|
update_descriptor_queue.AddImage(vk_image_view);
|
||||||
|
rescaling.PushImage(texture_cache.IsRescaling(image_view));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,14 +12,22 @@
|
|||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/math_util.h"
|
#include "common/math_util.h"
|
||||||
|
#include "common/settings.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/frontend/emu_window.h"
|
#include "core/frontend/emu_window.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host_shaders/fxaa_frag_spv.h"
|
||||||
|
#include "video_core/host_shaders/fxaa_vert_spv.h"
|
||||||
|
#include "video_core/host_shaders/present_bicubic_frag_spv.h"
|
||||||
|
#include "video_core/host_shaders/present_gaussian_frag_spv.h"
|
||||||
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
|
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
|
||||||
|
#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h"
|
||||||
|
#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h"
|
||||||
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
|
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
|
||||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_fsr.h"
|
||||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||||
@ -144,8 +152,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
|
|||||||
scheduler.Wait(resource_ticks[image_index]);
|
scheduler.Wait(resource_ticks[image_index]);
|
||||||
resource_ticks[image_index] = scheduler.CurrentTick();
|
resource_ticks[image_index] = scheduler.CurrentTick();
|
||||||
|
|
||||||
UpdateDescriptorSet(image_index,
|
VkImageView source_image_view =
|
||||||
use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
|
use_accelerated ? screen_info.image_view : *raw_image_views[image_index];
|
||||||
|
|
||||||
BufferData data;
|
BufferData data;
|
||||||
SetUniformData(data, layout);
|
SetUniformData(data, layout);
|
||||||
@ -222,9 +230,134 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
|
|||||||
read_barrier);
|
read_barrier);
|
||||||
cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
|
cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
0, write_barrier);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue();
|
||||||
|
if (use_accelerated && anti_alias_pass != Settings::AntiAliasing::None) {
|
||||||
|
UpdateAADescriptorSet(image_index, source_image_view, false);
|
||||||
|
const u32 up_scale = Settings::values.resolution_info.up_scale;
|
||||||
|
const u32 down_shift = Settings::values.resolution_info.down_shift;
|
||||||
|
VkExtent2D size{
|
||||||
|
.width = (up_scale * framebuffer.width) >> down_shift,
|
||||||
|
.height = (up_scale * framebuffer.height) >> down_shift,
|
||||||
|
};
|
||||||
|
scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) {
|
||||||
|
const VkImageMemoryBarrier base_barrier{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.srcAccessMask = 0,
|
||||||
|
.dstAccessMask = 0,
|
||||||
|
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.image = {},
|
||||||
|
.subresourceRange =
|
||||||
|
{
|
||||||
|
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
|
.baseMipLevel = 0,
|
||||||
|
.levelCount = 1,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = 1,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
{
|
||||||
|
VkImageMemoryBarrier fsr_write_barrier = base_barrier;
|
||||||
|
fsr_write_barrier.image = *aa_image;
|
||||||
|
fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||||
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, fsr_write_barrier);
|
||||||
|
}
|
||||||
|
|
||||||
|
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
|
||||||
|
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
|
||||||
|
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
|
||||||
|
const VkClearValue clear_color{
|
||||||
|
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
|
||||||
|
};
|
||||||
|
const VkRenderPassBeginInfo renderpass_bi{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.renderPass = *aa_renderpass,
|
||||||
|
.framebuffer = *aa_framebuffer,
|
||||||
|
.renderArea =
|
||||||
|
{
|
||||||
|
.offset = {0, 0},
|
||||||
|
.extent = size,
|
||||||
|
},
|
||||||
|
.clearValueCount = 1,
|
||||||
|
.pClearValues = &clear_color,
|
||||||
|
};
|
||||||
|
const VkViewport viewport{
|
||||||
|
.x = 0.0f,
|
||||||
|
.y = 0.0f,
|
||||||
|
.width = static_cast<float>(size.width),
|
||||||
|
.height = static_cast<float>(size.height),
|
||||||
|
.minDepth = 0.0f,
|
||||||
|
.maxDepth = 1.0f,
|
||||||
|
};
|
||||||
|
const VkRect2D scissor{
|
||||||
|
.offset = {0, 0},
|
||||||
|
.extent = size,
|
||||||
|
};
|
||||||
|
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
|
||||||
|
switch (anti_alias_pass) {
|
||||||
|
case Settings::AntiAliasing::Fxaa:
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cmdbuf.SetViewport(0, viewport);
|
||||||
|
cmdbuf.SetScissor(0, scissor);
|
||||||
|
|
||||||
|
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
|
||||||
|
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0,
|
||||||
|
aa_descriptor_sets[image_index], {});
|
||||||
|
cmdbuf.Draw(4, 1, 0, 0);
|
||||||
|
cmdbuf.EndRenderPass();
|
||||||
|
|
||||||
|
{
|
||||||
|
VkImageMemoryBarrier blit_read_barrier = base_barrier;
|
||||||
|
blit_read_barrier.image = *aa_image;
|
||||||
|
blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||||
|
blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||||
|
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||||
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
source_image_view = *aa_image_view;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fsr) {
|
||||||
|
auto crop_rect = framebuffer.crop_rect;
|
||||||
|
if (crop_rect.GetWidth() == 0) {
|
||||||
|
crop_rect.right = framebuffer.width;
|
||||||
|
}
|
||||||
|
if (crop_rect.GetHeight() == 0) {
|
||||||
|
crop_rect.bottom = framebuffer.height;
|
||||||
|
}
|
||||||
|
crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
|
||||||
|
VkExtent2D fsr_input_size{
|
||||||
|
.width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
|
||||||
|
.height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
|
||||||
|
};
|
||||||
|
VkImageView fsr_image_view =
|
||||||
|
fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
|
||||||
|
UpdateDescriptorSet(image_index, fsr_image_view, true);
|
||||||
|
} else {
|
||||||
|
const bool is_nn =
|
||||||
|
Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor;
|
||||||
|
UpdateDescriptorSet(image_index, source_image_view, is_nn);
|
||||||
|
}
|
||||||
|
|
||||||
scheduler.Record(
|
scheduler.Record(
|
||||||
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
|
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
|
||||||
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
|
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
|
||||||
@ -258,8 +391,28 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
|
|||||||
.offset = {0, 0},
|
.offset = {0, 0},
|
||||||
.extent = size,
|
.extent = size,
|
||||||
};
|
};
|
||||||
|
const auto filter = Settings::values.scaling_filter.GetValue();
|
||||||
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
|
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
|
||||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
|
switch (filter) {
|
||||||
|
case Settings::ScalingFilter::NearestNeighbor:
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
|
||||||
|
break;
|
||||||
|
case Settings::ScalingFilter::Bilinear:
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
|
||||||
|
break;
|
||||||
|
case Settings::ScalingFilter::Bicubic:
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline);
|
||||||
|
break;
|
||||||
|
case Settings::ScalingFilter::Gaussian:
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *gaussian_pipeline);
|
||||||
|
break;
|
||||||
|
case Settings::ScalingFilter::ScaleForce:
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
|
||||||
|
break;
|
||||||
|
}
|
||||||
cmdbuf.SetViewport(0, viewport);
|
cmdbuf.SetViewport(0, viewport);
|
||||||
cmdbuf.SetScissor(0, scissor);
|
cmdbuf.SetScissor(0, scissor);
|
||||||
|
|
||||||
@ -281,11 +434,16 @@ VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& frameb
|
|||||||
}
|
}
|
||||||
|
|
||||||
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
|
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
|
||||||
|
return CreateFramebuffer(image_view, extent, renderpass);
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent,
|
||||||
|
vk::RenderPass& rd) {
|
||||||
return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{
|
return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{
|
||||||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.renderPass = *renderpass,
|
.renderPass = *rd,
|
||||||
.attachmentCount = 1,
|
.attachmentCount = 1,
|
||||||
.pAttachments = &image_view,
|
.pAttachments = &image_view,
|
||||||
.width = extent.width,
|
.width = extent.width,
|
||||||
@ -308,9 +466,21 @@ void VKBlitScreen::CreateDynamicResources() {
|
|||||||
CreateRenderPass();
|
CreateRenderPass();
|
||||||
CreateFramebuffers();
|
CreateFramebuffers();
|
||||||
CreateGraphicsPipeline();
|
CreateGraphicsPipeline();
|
||||||
|
fsr.reset();
|
||||||
|
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
|
||||||
|
CreateFSR();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
|
void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
|
||||||
|
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
|
||||||
|
if (!fsr) {
|
||||||
|
CreateFSR();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fsr.reset();
|
||||||
|
}
|
||||||
|
|
||||||
if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) {
|
if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -324,7 +494,16 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
|
|||||||
|
|
||||||
void VKBlitScreen::CreateShaders() {
|
void VKBlitScreen::CreateShaders() {
|
||||||
vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
|
vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
|
||||||
fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
|
fxaa_vertex_shader = BuildShader(device, FXAA_VERT_SPV);
|
||||||
|
fxaa_fragment_shader = BuildShader(device, FXAA_FRAG_SPV);
|
||||||
|
bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
|
||||||
|
bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV);
|
||||||
|
gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV);
|
||||||
|
if (device.IsFloat16Supported()) {
|
||||||
|
scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV);
|
||||||
|
} else {
|
||||||
|
scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::CreateSemaphores() {
|
void VKBlitScreen::CreateSemaphores() {
|
||||||
@ -344,6 +523,13 @@ void VKBlitScreen::CreateDescriptorPool() {
|
|||||||
},
|
},
|
||||||
}};
|
}};
|
||||||
|
|
||||||
|
const std::array<VkDescriptorPoolSize, 1> pool_sizes_aa{{
|
||||||
|
{
|
||||||
|
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||||
|
.descriptorCount = static_cast<u32>(image_count * 2),
|
||||||
|
},
|
||||||
|
}};
|
||||||
|
|
||||||
const VkDescriptorPoolCreateInfo ci{
|
const VkDescriptorPoolCreateInfo ci{
|
||||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
@ -353,19 +539,33 @@ void VKBlitScreen::CreateDescriptorPool() {
|
|||||||
.pPoolSizes = pool_sizes.data(),
|
.pPoolSizes = pool_sizes.data(),
|
||||||
};
|
};
|
||||||
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
|
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
|
||||||
|
|
||||||
|
const VkDescriptorPoolCreateInfo ci_aa{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
||||||
|
.maxSets = static_cast<u32>(image_count),
|
||||||
|
.poolSizeCount = static_cast<u32>(pool_sizes_aa.size()),
|
||||||
|
.pPoolSizes = pool_sizes_aa.data(),
|
||||||
|
};
|
||||||
|
aa_descriptor_pool = device.GetLogical().CreateDescriptorPool(ci_aa);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::CreateRenderPass() {
|
void VKBlitScreen::CreateRenderPass() {
|
||||||
|
renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat());
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::RenderPass VKBlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) {
|
||||||
const VkAttachmentDescription color_attachment{
|
const VkAttachmentDescription color_attachment{
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.format = swapchain.GetImageViewFormat(),
|
.format = format,
|
||||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||||
.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
|
.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
|
||||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||||
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
|
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
|
||||||
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
|
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
|
||||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||||
.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
.finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL,
|
||||||
};
|
};
|
||||||
|
|
||||||
const VkAttachmentReference color_attachment_ref{
|
const VkAttachmentReference color_attachment_ref{
|
||||||
@ -408,7 +608,7 @@ void VKBlitScreen::CreateRenderPass() {
|
|||||||
.pDependencies = &dependency,
|
.pDependencies = &dependency,
|
||||||
};
|
};
|
||||||
|
|
||||||
renderpass = device.GetLogical().CreateRenderPass(renderpass_ci);
|
return device.GetLogical().CreateRenderPass(renderpass_ci);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::CreateDescriptorSetLayout() {
|
void VKBlitScreen::CreateDescriptorSetLayout() {
|
||||||
@ -429,6 +629,23 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
|
|||||||
},
|
},
|
||||||
}};
|
}};
|
||||||
|
|
||||||
|
const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings_aa{{
|
||||||
|
{
|
||||||
|
.binding = 0,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
|
||||||
|
.pImmutableSamplers = nullptr,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.binding = 1,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||||
|
.pImmutableSamplers = nullptr,
|
||||||
|
},
|
||||||
|
}};
|
||||||
|
|
||||||
const VkDescriptorSetLayoutCreateInfo ci{
|
const VkDescriptorSetLayoutCreateInfo ci{
|
||||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
@ -437,11 +654,21 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
|
|||||||
.pBindings = layout_bindings.data(),
|
.pBindings = layout_bindings.data(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const VkDescriptorSetLayoutCreateInfo ci_aa{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.bindingCount = static_cast<u32>(layout_bindings_aa.size()),
|
||||||
|
.pBindings = layout_bindings_aa.data(),
|
||||||
|
};
|
||||||
|
|
||||||
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
|
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
|
||||||
|
aa_descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci_aa);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::CreateDescriptorSets() {
|
void VKBlitScreen::CreateDescriptorSets() {
|
||||||
const std::vector layouts(image_count, *descriptor_set_layout);
|
const std::vector layouts(image_count, *descriptor_set_layout);
|
||||||
|
const std::vector layouts_aa(image_count, *aa_descriptor_set_layout);
|
||||||
|
|
||||||
const VkDescriptorSetAllocateInfo ai{
|
const VkDescriptorSetAllocateInfo ai{
|
||||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||||
@ -451,7 +678,16 @@ void VKBlitScreen::CreateDescriptorSets() {
|
|||||||
.pSetLayouts = layouts.data(),
|
.pSetLayouts = layouts.data(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const VkDescriptorSetAllocateInfo ai_aa{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.descriptorPool = *aa_descriptor_pool,
|
||||||
|
.descriptorSetCount = static_cast<u32>(image_count),
|
||||||
|
.pSetLayouts = layouts_aa.data(),
|
||||||
|
};
|
||||||
|
|
||||||
descriptor_sets = descriptor_pool.Allocate(ai);
|
descriptor_sets = descriptor_pool.Allocate(ai);
|
||||||
|
aa_descriptor_sets = aa_descriptor_pool.Allocate(ai_aa);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::CreatePipelineLayout() {
|
void VKBlitScreen::CreatePipelineLayout() {
|
||||||
@ -464,11 +700,21 @@ void VKBlitScreen::CreatePipelineLayout() {
|
|||||||
.pushConstantRangeCount = 0,
|
.pushConstantRangeCount = 0,
|
||||||
.pPushConstantRanges = nullptr,
|
.pPushConstantRanges = nullptr,
|
||||||
};
|
};
|
||||||
|
const VkPipelineLayoutCreateInfo ci_aa{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.setLayoutCount = 1,
|
||||||
|
.pSetLayouts = aa_descriptor_set_layout.address(),
|
||||||
|
.pushConstantRangeCount = 0,
|
||||||
|
.pPushConstantRanges = nullptr,
|
||||||
|
};
|
||||||
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
|
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
|
||||||
|
aa_pipeline_layout = device.GetLogical().CreatePipelineLayout(ci_aa);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::CreateGraphicsPipeline() {
|
void VKBlitScreen::CreateGraphicsPipeline() {
|
||||||
const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
|
const std::array<VkPipelineShaderStageCreateInfo, 2> bilinear_shader_stages{{
|
||||||
{
|
{
|
||||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
@ -483,7 +729,70 @@ void VKBlitScreen::CreateGraphicsPipeline() {
|
|||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||||
.module = *fragment_shader,
|
.module = *bilinear_fragment_shader,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = nullptr,
|
||||||
|
},
|
||||||
|
}};
|
||||||
|
|
||||||
|
const std::array<VkPipelineShaderStageCreateInfo, 2> bicubic_shader_stages{{
|
||||||
|
{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_VERTEX_BIT,
|
||||||
|
.module = *vertex_shader,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = nullptr,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||||
|
.module = *bicubic_fragment_shader,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = nullptr,
|
||||||
|
},
|
||||||
|
}};
|
||||||
|
|
||||||
|
const std::array<VkPipelineShaderStageCreateInfo, 2> gaussian_shader_stages{{
|
||||||
|
{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_VERTEX_BIT,
|
||||||
|
.module = *vertex_shader,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = nullptr,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||||
|
.module = *gaussian_fragment_shader,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = nullptr,
|
||||||
|
},
|
||||||
|
}};
|
||||||
|
|
||||||
|
const std::array<VkPipelineShaderStageCreateInfo, 2> scaleforce_shader_stages{{
|
||||||
|
{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_VERTEX_BIT,
|
||||||
|
.module = *vertex_shader,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = nullptr,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||||
|
.module = *scaleforce_fragment_shader,
|
||||||
.pName = "main",
|
.pName = "main",
|
||||||
.pSpecializationInfo = nullptr,
|
.pSpecializationInfo = nullptr,
|
||||||
},
|
},
|
||||||
@ -583,12 +892,12 @@ void VKBlitScreen::CreateGraphicsPipeline() {
|
|||||||
.pDynamicStates = dynamic_states.data(),
|
.pDynamicStates = dynamic_states.data(),
|
||||||
};
|
};
|
||||||
|
|
||||||
const VkGraphicsPipelineCreateInfo pipeline_ci{
|
const VkGraphicsPipelineCreateInfo bilinear_pipeline_ci{
|
||||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.stageCount = static_cast<u32>(shader_stages.size()),
|
.stageCount = static_cast<u32>(bilinear_shader_stages.size()),
|
||||||
.pStages = shader_stages.data(),
|
.pStages = bilinear_shader_stages.data(),
|
||||||
.pVertexInputState = &vertex_input_ci,
|
.pVertexInputState = &vertex_input_ci,
|
||||||
.pInputAssemblyState = &input_assembly_ci,
|
.pInputAssemblyState = &input_assembly_ci,
|
||||||
.pTessellationState = nullptr,
|
.pTessellationState = nullptr,
|
||||||
@ -605,7 +914,76 @@ void VKBlitScreen::CreateGraphicsPipeline() {
|
|||||||
.basePipelineIndex = 0,
|
.basePipelineIndex = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci);
|
const VkGraphicsPipelineCreateInfo bicubic_pipeline_ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stageCount = static_cast<u32>(bicubic_shader_stages.size()),
|
||||||
|
.pStages = bicubic_shader_stages.data(),
|
||||||
|
.pVertexInputState = &vertex_input_ci,
|
||||||
|
.pInputAssemblyState = &input_assembly_ci,
|
||||||
|
.pTessellationState = nullptr,
|
||||||
|
.pViewportState = &viewport_state_ci,
|
||||||
|
.pRasterizationState = &rasterization_ci,
|
||||||
|
.pMultisampleState = &multisampling_ci,
|
||||||
|
.pDepthStencilState = nullptr,
|
||||||
|
.pColorBlendState = &color_blend_ci,
|
||||||
|
.pDynamicState = &dynamic_state_ci,
|
||||||
|
.layout = *pipeline_layout,
|
||||||
|
.renderPass = *renderpass,
|
||||||
|
.subpass = 0,
|
||||||
|
.basePipelineHandle = 0,
|
||||||
|
.basePipelineIndex = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkGraphicsPipelineCreateInfo gaussian_pipeline_ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stageCount = static_cast<u32>(gaussian_shader_stages.size()),
|
||||||
|
.pStages = gaussian_shader_stages.data(),
|
||||||
|
.pVertexInputState = &vertex_input_ci,
|
||||||
|
.pInputAssemblyState = &input_assembly_ci,
|
||||||
|
.pTessellationState = nullptr,
|
||||||
|
.pViewportState = &viewport_state_ci,
|
||||||
|
.pRasterizationState = &rasterization_ci,
|
||||||
|
.pMultisampleState = &multisampling_ci,
|
||||||
|
.pDepthStencilState = nullptr,
|
||||||
|
.pColorBlendState = &color_blend_ci,
|
||||||
|
.pDynamicState = &dynamic_state_ci,
|
||||||
|
.layout = *pipeline_layout,
|
||||||
|
.renderPass = *renderpass,
|
||||||
|
.subpass = 0,
|
||||||
|
.basePipelineHandle = 0,
|
||||||
|
.basePipelineIndex = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkGraphicsPipelineCreateInfo scaleforce_pipeline_ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stageCount = static_cast<u32>(scaleforce_shader_stages.size()),
|
||||||
|
.pStages = scaleforce_shader_stages.data(),
|
||||||
|
.pVertexInputState = &vertex_input_ci,
|
||||||
|
.pInputAssemblyState = &input_assembly_ci,
|
||||||
|
.pTessellationState = nullptr,
|
||||||
|
.pViewportState = &viewport_state_ci,
|
||||||
|
.pRasterizationState = &rasterization_ci,
|
||||||
|
.pMultisampleState = &multisampling_ci,
|
||||||
|
.pDepthStencilState = nullptr,
|
||||||
|
.pColorBlendState = &color_blend_ci,
|
||||||
|
.pDynamicState = &dynamic_state_ci,
|
||||||
|
.layout = *pipeline_layout,
|
||||||
|
.renderPass = *renderpass,
|
||||||
|
.subpass = 0,
|
||||||
|
.basePipelineHandle = 0,
|
||||||
|
.basePipelineIndex = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
bilinear_pipeline = device.GetLogical().CreateGraphicsPipeline(bilinear_pipeline_ci);
|
||||||
|
bicubic_pipeline = device.GetLogical().CreateGraphicsPipeline(bicubic_pipeline_ci);
|
||||||
|
gaussian_pipeline = device.GetLogical().CreateGraphicsPipeline(gaussian_pipeline_ci);
|
||||||
|
scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::CreateSampler() {
|
void VKBlitScreen::CreateSampler() {
|
||||||
@ -614,8 +992,29 @@ void VKBlitScreen::CreateSampler() {
|
|||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.magFilter = VK_FILTER_LINEAR,
|
.magFilter = VK_FILTER_LINEAR,
|
||||||
|
.minFilter = VK_FILTER_LINEAR,
|
||||||
|
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
|
||||||
|
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||||
|
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||||
|
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||||
|
.mipLodBias = 0.0f,
|
||||||
|
.anisotropyEnable = VK_FALSE,
|
||||||
|
.maxAnisotropy = 0.0f,
|
||||||
|
.compareEnable = VK_FALSE,
|
||||||
|
.compareOp = VK_COMPARE_OP_NEVER,
|
||||||
|
.minLod = 0.0f,
|
||||||
|
.maxLod = 0.0f,
|
||||||
|
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
|
||||||
|
.unnormalizedCoordinates = VK_FALSE,
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkSamplerCreateInfo ci_nn{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.magFilter = VK_FILTER_NEAREST,
|
||||||
.minFilter = VK_FILTER_NEAREST,
|
.minFilter = VK_FILTER_NEAREST,
|
||||||
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
|
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
|
||||||
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||||
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||||
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||||
@ -631,6 +1030,7 @@ void VKBlitScreen::CreateSampler() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
sampler = device.GetLogical().CreateSampler(ci);
|
sampler = device.GetLogical().CreateSampler(ci);
|
||||||
|
nn_sampler = device.GetLogical().CreateSampler(ci_nn);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::CreateFramebuffers() {
|
void VKBlitScreen::CreateFramebuffers() {
|
||||||
@ -639,7 +1039,7 @@ void VKBlitScreen::CreateFramebuffers() {
|
|||||||
|
|
||||||
for (std::size_t i = 0; i < image_count; ++i) {
|
for (std::size_t i = 0; i < image_count; ++i) {
|
||||||
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
|
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
|
||||||
framebuffers[i] = CreateFramebuffer(image_view, size);
|
framebuffers[i] = CreateFramebuffer(image_view, size, renderpass);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -649,6 +1049,11 @@ void VKBlitScreen::ReleaseRawImages() {
|
|||||||
}
|
}
|
||||||
raw_images.clear();
|
raw_images.clear();
|
||||||
raw_buffer_commits.clear();
|
raw_buffer_commits.clear();
|
||||||
|
|
||||||
|
aa_image_view.reset();
|
||||||
|
aa_image.reset();
|
||||||
|
aa_commit = MemoryCommit{};
|
||||||
|
|
||||||
buffer.reset();
|
buffer.reset();
|
||||||
buffer_commit = MemoryCommit{};
|
buffer_commit = MemoryCommit{};
|
||||||
}
|
}
|
||||||
@ -675,8 +1080,11 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
|
|||||||
raw_image_views.resize(image_count);
|
raw_image_views.resize(image_count);
|
||||||
raw_buffer_commits.resize(image_count);
|
raw_buffer_commits.resize(image_count);
|
||||||
|
|
||||||
for (size_t i = 0; i < image_count; ++i) {
|
const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1,
|
||||||
raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
|
u32 down_shift = 0) {
|
||||||
|
u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
|
||||||
|
: VK_IMAGE_USAGE_TRANSFER_DST_BIT;
|
||||||
|
return device.GetLogical().CreateImage(VkImageCreateInfo{
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
@ -684,26 +1092,30 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
|
|||||||
.format = GetFormat(framebuffer),
|
.format = GetFormat(framebuffer),
|
||||||
.extent =
|
.extent =
|
||||||
{
|
{
|
||||||
.width = framebuffer.width,
|
.width = (up_scale * framebuffer.width) >> down_shift,
|
||||||
.height = framebuffer.height,
|
.height = (up_scale * framebuffer.height) >> down_shift,
|
||||||
.depth = 1,
|
.depth = 1,
|
||||||
},
|
},
|
||||||
.mipLevels = 1,
|
.mipLevels = 1,
|
||||||
.arrayLayers = 1,
|
.arrayLayers = 1,
|
||||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||||
.tiling = VK_IMAGE_TILING_LINEAR,
|
.tiling = used_on_framebuffer ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
|
||||||
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | extra_usages,
|
||||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||||
.queueFamilyIndexCount = 0,
|
.queueFamilyIndexCount = 0,
|
||||||
.pQueueFamilyIndices = nullptr,
|
.pQueueFamilyIndices = nullptr,
|
||||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||||
});
|
});
|
||||||
raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal);
|
};
|
||||||
raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
|
const auto create_commit = [&](vk::Image& image) {
|
||||||
|
return memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
|
||||||
|
};
|
||||||
|
const auto create_image_view = [&](vk::Image& image) {
|
||||||
|
return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.image = *raw_images[i],
|
.image = *image,
|
||||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||||
.format = GetFormat(framebuffer),
|
.format = GetFormat(framebuffer),
|
||||||
.components =
|
.components =
|
||||||
@ -722,10 +1134,211 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
|
|||||||
.layerCount = 1,
|
.layerCount = 1,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
};
|
||||||
|
|
||||||
|
for (size_t i = 0; i < image_count; ++i) {
|
||||||
|
raw_images[i] = create_image();
|
||||||
|
raw_buffer_commits[i] = create_commit(raw_images[i]);
|
||||||
|
raw_image_views[i] = create_image_view(raw_images[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
|
// AA Resources
|
||||||
|
const u32 up_scale = Settings::values.resolution_info.up_scale;
|
||||||
|
const u32 down_shift = Settings::values.resolution_info.down_shift;
|
||||||
|
aa_image = create_image(true, up_scale, down_shift);
|
||||||
|
aa_commit = create_commit(aa_image);
|
||||||
|
aa_image_view = create_image_view(aa_image);
|
||||||
|
VkExtent2D size{
|
||||||
|
.width = (up_scale * framebuffer.width) >> down_shift,
|
||||||
|
.height = (up_scale * framebuffer.height) >> down_shift,
|
||||||
|
};
|
||||||
|
if (aa_renderpass) {
|
||||||
|
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false);
|
||||||
|
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
|
||||||
|
|
||||||
|
const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{
|
||||||
|
{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_VERTEX_BIT,
|
||||||
|
.module = *fxaa_vertex_shader,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = nullptr,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||||
|
.module = *fxaa_fragment_shader,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = nullptr,
|
||||||
|
},
|
||||||
|
}};
|
||||||
|
|
||||||
|
const auto vertex_binding_description = ScreenRectVertex::GetDescription();
|
||||||
|
const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
|
||||||
|
|
||||||
|
const VkPipelineVertexInputStateCreateInfo vertex_input_ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.vertexBindingDescriptionCount = 1,
|
||||||
|
.pVertexBindingDescriptions = &vertex_binding_description,
|
||||||
|
.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()},
|
||||||
|
.pVertexAttributeDescriptions = vertex_attrs_description.data(),
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
|
||||||
|
.primitiveRestartEnable = VK_FALSE,
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkPipelineViewportStateCreateInfo viewport_state_ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.viewportCount = 1,
|
||||||
|
.pViewports = nullptr,
|
||||||
|
.scissorCount = 1,
|
||||||
|
.pScissors = nullptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkPipelineRasterizationStateCreateInfo rasterization_ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.depthClampEnable = VK_FALSE,
|
||||||
|
.rasterizerDiscardEnable = VK_FALSE,
|
||||||
|
.polygonMode = VK_POLYGON_MODE_FILL,
|
||||||
|
.cullMode = VK_CULL_MODE_NONE,
|
||||||
|
.frontFace = VK_FRONT_FACE_CLOCKWISE,
|
||||||
|
.depthBiasEnable = VK_FALSE,
|
||||||
|
.depthBiasConstantFactor = 0.0f,
|
||||||
|
.depthBiasClamp = 0.0f,
|
||||||
|
.depthBiasSlopeFactor = 0.0f,
|
||||||
|
.lineWidth = 1.0f,
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkPipelineMultisampleStateCreateInfo multisampling_ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
|
||||||
|
.sampleShadingEnable = VK_FALSE,
|
||||||
|
.minSampleShading = 0.0f,
|
||||||
|
.pSampleMask = nullptr,
|
||||||
|
.alphaToCoverageEnable = VK_FALSE,
|
||||||
|
.alphaToOneEnable = VK_FALSE,
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkPipelineColorBlendAttachmentState color_blend_attachment{
|
||||||
|
.blendEnable = VK_FALSE,
|
||||||
|
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||||
|
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||||
|
.colorBlendOp = VK_BLEND_OP_ADD,
|
||||||
|
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||||
|
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||||
|
.alphaBlendOp = VK_BLEND_OP_ADD,
|
||||||
|
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
||||||
|
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkPipelineColorBlendStateCreateInfo color_blend_ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.logicOpEnable = VK_FALSE,
|
||||||
|
.logicOp = VK_LOGIC_OP_COPY,
|
||||||
|
.attachmentCount = 1,
|
||||||
|
.pAttachments = &color_blend_attachment,
|
||||||
|
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr std::array dynamic_states{
|
||||||
|
VK_DYNAMIC_STATE_VIEWPORT,
|
||||||
|
VK_DYNAMIC_STATE_SCISSOR,
|
||||||
|
};
|
||||||
|
const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
|
||||||
|
.pDynamicStates = dynamic_states.data(),
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkGraphicsPipelineCreateInfo fxaa_pipeline_ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stageCount = static_cast<u32>(fxaa_shader_stages.size()),
|
||||||
|
.pStages = fxaa_shader_stages.data(),
|
||||||
|
.pVertexInputState = &vertex_input_ci,
|
||||||
|
.pInputAssemblyState = &input_assembly_ci,
|
||||||
|
.pTessellationState = nullptr,
|
||||||
|
.pViewportState = &viewport_state_ci,
|
||||||
|
.pRasterizationState = &rasterization_ci,
|
||||||
|
.pMultisampleState = &multisampling_ci,
|
||||||
|
.pDepthStencilState = nullptr,
|
||||||
|
.pColorBlendState = &color_blend_ci,
|
||||||
|
.pDynamicState = &dynamic_state_ci,
|
||||||
|
.layout = *aa_pipeline_layout,
|
||||||
|
.renderPass = *aa_renderpass,
|
||||||
|
.subpass = 0,
|
||||||
|
.basePipelineHandle = 0,
|
||||||
|
.basePipelineIndex = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
// AA
|
||||||
|
aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VKBlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view,
|
||||||
|
bool nn) const {
|
||||||
|
const VkDescriptorImageInfo image_info{
|
||||||
|
.sampler = nn ? *nn_sampler : *sampler,
|
||||||
|
.imageView = image_view,
|
||||||
|
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkWriteDescriptorSet sampler_write{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.dstSet = aa_descriptor_sets[image_index],
|
||||||
|
.dstBinding = 0,
|
||||||
|
.dstArrayElement = 0,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||||
|
.pImageInfo = &image_info,
|
||||||
|
.pBufferInfo = nullptr,
|
||||||
|
.pTexelBufferView = nullptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
const VkWriteDescriptorSet sampler_write_2{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.dstSet = aa_descriptor_sets[image_index],
|
||||||
|
.dstBinding = 1,
|
||||||
|
.dstArrayElement = 0,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||||
|
.pImageInfo = &image_info,
|
||||||
|
.pBufferInfo = nullptr,
|
||||||
|
.pTexelBufferView = nullptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
|
||||||
|
}
|
||||||
|
|
||||||
|
void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view,
|
||||||
|
bool nn) const {
|
||||||
const VkDescriptorBufferInfo buffer_info{
|
const VkDescriptorBufferInfo buffer_info{
|
||||||
.buffer = *buffer,
|
.buffer = *buffer,
|
||||||
.offset = offsetof(BufferData, uniform),
|
.offset = offsetof(BufferData, uniform),
|
||||||
@ -746,7 +1359,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
|
|||||||
};
|
};
|
||||||
|
|
||||||
const VkDescriptorImageInfo image_info{
|
const VkDescriptorImageInfo image_info{
|
||||||
.sampler = *sampler,
|
.sampler = nn ? *nn_sampler : *sampler,
|
||||||
.imageView = image_view,
|
.imageView = image_view,
|
||||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
};
|
};
|
||||||
@ -798,10 +1411,11 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
|
|||||||
UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
|
UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
|
||||||
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
|
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
|
||||||
|
|
||||||
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
|
|
||||||
// (e.g. handheld mode) on a 1920x1080 framebuffer.
|
|
||||||
f32 scale_u = 1.0f;
|
f32 scale_u = 1.0f;
|
||||||
f32 scale_v = 1.0f;
|
f32 scale_v = 1.0f;
|
||||||
|
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
|
||||||
|
// (e.g. handheld mode) on a 1920x1080 framebuffer.
|
||||||
|
if (!fsr) {
|
||||||
if (framebuffer_crop_rect.GetWidth() > 0) {
|
if (framebuffer_crop_rect.GetWidth() > 0) {
|
||||||
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
|
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
|
||||||
static_cast<f32>(screen_info.width);
|
static_cast<f32>(screen_info.width);
|
||||||
@ -810,6 +1424,7 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
|
|||||||
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
|
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
|
||||||
static_cast<f32>(screen_info.height);
|
static_cast<f32>(screen_info.height);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const auto& screen = layout.screen;
|
const auto& screen = layout.screen;
|
||||||
const auto x = static_cast<f32>(screen.left);
|
const auto x = static_cast<f32>(screen.left);
|
||||||
@ -822,6 +1437,15 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
|
|||||||
data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v);
|
data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VKBlitScreen::CreateFSR() {
|
||||||
|
const auto& layout = render_window.GetFramebufferLayout();
|
||||||
|
const VkExtent2D fsr_size{
|
||||||
|
.width = layout.screen.GetWidth(),
|
||||||
|
.height = layout.screen.GetHeight(),
|
||||||
|
};
|
||||||
|
fsr = std::make_unique<FSR>(device, memory_allocator, image_count, fsr_size);
|
||||||
|
}
|
||||||
|
|
||||||
u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
|
u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
|
||||||
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
|
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
|
||||||
}
|
}
|
||||||
|
@ -34,6 +34,7 @@ namespace Vulkan {
|
|||||||
struct ScreenInfo;
|
struct ScreenInfo;
|
||||||
|
|
||||||
class Device;
|
class Device;
|
||||||
|
class FSR;
|
||||||
class RasterizerVulkan;
|
class RasterizerVulkan;
|
||||||
class VKScheduler;
|
class VKScheduler;
|
||||||
class VKSwapchain;
|
class VKSwapchain;
|
||||||
@ -66,6 +67,9 @@ public:
|
|||||||
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
|
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
|
||||||
VkExtent2D extent);
|
VkExtent2D extent);
|
||||||
|
|
||||||
|
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
|
||||||
|
VkExtent2D extent, vk::RenderPass& rd);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct BufferData;
|
struct BufferData;
|
||||||
|
|
||||||
@ -74,6 +78,7 @@ private:
|
|||||||
void CreateSemaphores();
|
void CreateSemaphores();
|
||||||
void CreateDescriptorPool();
|
void CreateDescriptorPool();
|
||||||
void CreateRenderPass();
|
void CreateRenderPass();
|
||||||
|
vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
|
||||||
void CreateDescriptorSetLayout();
|
void CreateDescriptorSetLayout();
|
||||||
void CreateDescriptorSets();
|
void CreateDescriptorSets();
|
||||||
void CreatePipelineLayout();
|
void CreatePipelineLayout();
|
||||||
@ -88,11 +93,14 @@ private:
|
|||||||
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
|
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
|
||||||
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
|
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
|
||||||
|
|
||||||
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
|
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
|
||||||
|
void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
|
||||||
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
|
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
|
||||||
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
|
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
|
||||||
const Layout::FramebufferLayout layout) const;
|
const Layout::FramebufferLayout layout) const;
|
||||||
|
|
||||||
|
void CreateFSR();
|
||||||
|
|
||||||
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
|
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
|
||||||
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
|
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
|
||||||
std::size_t image_index) const;
|
std::size_t image_index) const;
|
||||||
@ -107,14 +115,24 @@ private:
|
|||||||
const VKScreenInfo& screen_info;
|
const VKScreenInfo& screen_info;
|
||||||
|
|
||||||
vk::ShaderModule vertex_shader;
|
vk::ShaderModule vertex_shader;
|
||||||
vk::ShaderModule fragment_shader;
|
vk::ShaderModule fxaa_vertex_shader;
|
||||||
|
vk::ShaderModule fxaa_fragment_shader;
|
||||||
|
vk::ShaderModule bilinear_fragment_shader;
|
||||||
|
vk::ShaderModule bicubic_fragment_shader;
|
||||||
|
vk::ShaderModule gaussian_fragment_shader;
|
||||||
|
vk::ShaderModule scaleforce_fragment_shader;
|
||||||
vk::DescriptorPool descriptor_pool;
|
vk::DescriptorPool descriptor_pool;
|
||||||
vk::DescriptorSetLayout descriptor_set_layout;
|
vk::DescriptorSetLayout descriptor_set_layout;
|
||||||
vk::PipelineLayout pipeline_layout;
|
vk::PipelineLayout pipeline_layout;
|
||||||
vk::Pipeline pipeline;
|
vk::Pipeline nearest_neightbor_pipeline;
|
||||||
|
vk::Pipeline bilinear_pipeline;
|
||||||
|
vk::Pipeline bicubic_pipeline;
|
||||||
|
vk::Pipeline gaussian_pipeline;
|
||||||
|
vk::Pipeline scaleforce_pipeline;
|
||||||
vk::RenderPass renderpass;
|
vk::RenderPass renderpass;
|
||||||
std::vector<vk::Framebuffer> framebuffers;
|
std::vector<vk::Framebuffer> framebuffers;
|
||||||
vk::DescriptorSets descriptor_sets;
|
vk::DescriptorSets descriptor_sets;
|
||||||
|
vk::Sampler nn_sampler;
|
||||||
vk::Sampler sampler;
|
vk::Sampler sampler;
|
||||||
|
|
||||||
vk::Buffer buffer;
|
vk::Buffer buffer;
|
||||||
@ -126,8 +144,22 @@ private:
|
|||||||
std::vector<vk::Image> raw_images;
|
std::vector<vk::Image> raw_images;
|
||||||
std::vector<vk::ImageView> raw_image_views;
|
std::vector<vk::ImageView> raw_image_views;
|
||||||
std::vector<MemoryCommit> raw_buffer_commits;
|
std::vector<MemoryCommit> raw_buffer_commits;
|
||||||
|
|
||||||
|
vk::DescriptorPool aa_descriptor_pool;
|
||||||
|
vk::DescriptorSetLayout aa_descriptor_set_layout;
|
||||||
|
vk::PipelineLayout aa_pipeline_layout;
|
||||||
|
vk::Pipeline aa_pipeline;
|
||||||
|
vk::RenderPass aa_renderpass;
|
||||||
|
vk::Framebuffer aa_framebuffer;
|
||||||
|
vk::DescriptorSets aa_descriptor_sets;
|
||||||
|
vk::Image aa_image;
|
||||||
|
vk::ImageView aa_image_view;
|
||||||
|
MemoryCommit aa_commit;
|
||||||
|
|
||||||
u32 raw_width = 0;
|
u32 raw_width = 0;
|
||||||
u32 raw_height = 0;
|
u32 raw_height = 0;
|
||||||
|
|
||||||
|
std::unique_ptr<FSR> fsr;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
@ -146,7 +146,7 @@ void BufferCacheRuntime::Finish() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||||
std::span<const VideoCommon::BufferCopy> copies) {
|
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
|
||||||
static constexpr VkMemoryBarrier READ_BARRIER{
|
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
@ -163,10 +163,42 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
|||||||
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
|
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
|
||||||
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
|
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
|
||||||
|
if (barrier) {
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||||
|
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER);
|
||||||
|
}
|
||||||
|
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
|
||||||
|
if (barrier) {
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void BufferCacheRuntime::PreCopyBarrier() {
|
||||||
|
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||||
|
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
|
};
|
||||||
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
0, READ_BARRIER);
|
0, READ_BARRIER);
|
||||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void BufferCacheRuntime::PostCopyBarrier() {
|
||||||
|
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
|
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||||
|
};
|
||||||
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||||
0, WRITE_BARRIER);
|
0, WRITE_BARRIER);
|
||||||
});
|
});
|
||||||
|
@ -69,8 +69,12 @@ public:
|
|||||||
|
|
||||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
|
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||||
|
|
||||||
|
void PreCopyBarrier();
|
||||||
|
|
||||||
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
|
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
|
||||||
std::span<const VideoCommon::BufferCopy> copies);
|
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||||
|
|
||||||
|
void PostCopyBarrier();
|
||||||
|
|
||||||
void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);
|
void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);
|
||||||
|
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
using Shader::ImageBufferDescriptor;
|
using Shader::ImageBufferDescriptor;
|
||||||
|
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
|
||||||
using Tegra::Texture::TexturePair;
|
using Tegra::Texture::TexturePair;
|
||||||
|
|
||||||
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
|
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
|
||||||
@ -108,8 +109,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||||||
texture_cache.SynchronizeComputeDescriptors();
|
texture_cache.SynchronizeComputeDescriptors();
|
||||||
|
|
||||||
static constexpr size_t max_elements = 64;
|
static constexpr size_t max_elements = 64;
|
||||||
std::array<ImageId, max_elements> image_view_ids;
|
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
|
||||||
boost::container::static_vector<u32, max_elements> image_view_indices;
|
|
||||||
boost::container::static_vector<VkSampler, max_elements> samplers;
|
boost::container::static_vector<VkSampler, max_elements> samplers;
|
||||||
|
|
||||||
const auto& qmd{kepler_compute.launch_description};
|
const auto& qmd{kepler_compute.launch_description};
|
||||||
@ -134,30 +134,37 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||||||
}
|
}
|
||||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||||
}};
|
}};
|
||||||
const auto add_image{[&](const auto& desc) {
|
const auto add_image{[&](const auto& desc, bool blacklist) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
const auto handle{read_handle(desc, index)};
|
const auto handle{read_handle(desc, index)};
|
||||||
image_view_indices.push_back(handle.first);
|
views.push_back({
|
||||||
|
.index = handle.first,
|
||||||
|
.blacklist = blacklist,
|
||||||
|
.id = {},
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
std::ranges::for_each(info.texture_buffer_descriptors, add_image);
|
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||||
std::ranges::for_each(info.image_buffer_descriptors, add_image);
|
add_image(desc, false);
|
||||||
|
}
|
||||||
|
for (const auto& desc : info.image_buffer_descriptors) {
|
||||||
|
add_image(desc, false);
|
||||||
|
}
|
||||||
for (const auto& desc : info.texture_descriptors) {
|
for (const auto& desc : info.texture_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
const auto handle{read_handle(desc, index)};
|
const auto handle{read_handle(desc, index)};
|
||||||
image_view_indices.push_back(handle.first);
|
views.push_back({handle.first});
|
||||||
|
|
||||||
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
|
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
|
||||||
samplers.push_back(sampler->Handle());
|
samplers.push_back(sampler->Handle());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::ranges::for_each(info.image_descriptors, add_image);
|
for (const auto& desc : info.image_descriptors) {
|
||||||
|
add_image(desc, desc.is_written);
|
||||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
}
|
||||||
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
|
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
|
||||||
|
|
||||||
buffer_cache.UnbindComputeTextureBuffers();
|
buffer_cache.UnbindComputeTextureBuffers();
|
||||||
ImageId* texture_buffer_ids{image_view_ids.data()};
|
|
||||||
size_t index{};
|
size_t index{};
|
||||||
const auto add_buffer{[&](const auto& desc) {
|
const auto add_buffer{[&](const auto& desc) {
|
||||||
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
|
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
|
||||||
@ -166,11 +173,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||||||
if constexpr (is_image) {
|
if constexpr (is_image) {
|
||||||
is_written = desc.is_written;
|
is_written = desc.is_written;
|
||||||
}
|
}
|
||||||
ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
|
ImageView& image_view = texture_cache.GetImageView(views[index].id);
|
||||||
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
|
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
|
||||||
image_view.BufferSize(), image_view.format,
|
image_view.BufferSize(), image_view.format,
|
||||||
is_written, is_image);
|
is_written, is_image);
|
||||||
++texture_buffer_ids;
|
|
||||||
++index;
|
++index;
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
@ -180,9 +186,11 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||||||
buffer_cache.UpdateComputeBuffers();
|
buffer_cache.UpdateComputeBuffers();
|
||||||
buffer_cache.BindHostComputeBuffers();
|
buffer_cache.BindHostComputeBuffers();
|
||||||
|
|
||||||
|
RescalingPushConstant rescaling;
|
||||||
const VkSampler* samplers_it{samplers.data()};
|
const VkSampler* samplers_it{samplers.data()};
|
||||||
const ImageId* views_it{image_view_ids.data()};
|
const VideoCommon::ImageViewInOut* views_it{views.data()};
|
||||||
PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue);
|
PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it,
|
||||||
|
views_it);
|
||||||
|
|
||||||
if (!is_built.load(std::memory_order::relaxed)) {
|
if (!is_built.load(std::memory_order::relaxed)) {
|
||||||
// Wait for the pipeline to be built
|
// Wait for the pipeline to be built
|
||||||
@ -192,11 +200,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||||
scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
|
const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
|
||||||
|
scheduler.Record([this, descriptor_data, is_rescaling,
|
||||||
|
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||||
if (!descriptor_set_layout) {
|
if (!descriptor_set_layout) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (is_rescaling) {
|
||||||
|
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
|
||||||
|
rescaling_data.data());
|
||||||
|
}
|
||||||
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
||||||
const vk::Device& dev{device.GetLogical()};
|
const vk::Device& dev{device.GetLogical()};
|
||||||
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
|
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
|
||||||
|
553
src/video_core/renderer_vulkan/vk_fsr.cpp
Executable file
553
src/video_core/renderer_vulkan/vk_fsr.cpp
Executable file
@ -0,0 +1,553 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include "common/bit_cast.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "common/div_ceil.h"
|
||||||
|
|
||||||
|
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
|
||||||
|
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
|
||||||
|
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h"
|
||||||
|
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_fsr.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||||
|
#include "video_core/vulkan_common/vulkan_device.h"
|
||||||
|
|
||||||
|
namespace Vulkan {
|
||||||
|
namespace {
|
||||||
|
// Reimplementations of the constant generating functions in ffx_fsr1.h
|
||||||
|
// GCC generated a lot of warnings when using the official header.
|
||||||
|
u32 AU1_AH1_AF1(f32 f) {
|
||||||
|
static constexpr u32 base[512]{
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
|
||||||
|
0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
|
||||||
|
0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
|
||||||
|
0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
|
||||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||||
|
0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008,
|
||||||
|
0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
|
||||||
|
0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
|
||||||
|
0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
|
||||||
|
0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||||
|
};
|
||||||
|
static constexpr s8 shift[512]{
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16,
|
||||||
|
0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||||
|
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||||
|
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
|
||||||
|
0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||||
|
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||||
|
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||||
|
0x18, 0x18,
|
||||||
|
};
|
||||||
|
const u32 u = Common::BitCast<u32>(f);
|
||||||
|
const u32 i = u >> 23;
|
||||||
|
return base[i] + ((u & 0x7fffff) >> shift[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 AU1_AH2_AF2(f32 a[2]) {
|
||||||
|
return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX,
|
||||||
|
f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY,
|
||||||
|
f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) {
|
||||||
|
con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX);
|
||||||
|
con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY);
|
||||||
|
con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f);
|
||||||
|
con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f);
|
||||||
|
con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
|
||||||
|
con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY);
|
||||||
|
con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
|
||||||
|
con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY);
|
||||||
|
con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX);
|
||||||
|
con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
|
||||||
|
con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
|
||||||
|
con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
|
||||||
|
con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX);
|
||||||
|
con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY);
|
||||||
|
con3[2] = con3[3] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4],
|
||||||
|
f32 inputViewportInPixelsX, f32 inputViewportInPixelsY,
|
||||||
|
f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX,
|
||||||
|
f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) {
|
||||||
|
FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY,
|
||||||
|
inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY);
|
||||||
|
con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f +
|
||||||
|
inputOffsetInPixelsX);
|
||||||
|
con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f +
|
||||||
|
inputOffsetInPixelsY);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FsrRcasCon(u32* con, f32 sharpness) {
|
||||||
|
sharpness = std::exp2f(-sharpness);
|
||||||
|
f32 hSharp[2]{sharpness, sharpness};
|
||||||
|
con[0] = Common::BitCast<u32>(sharpness);
|
||||||
|
con[1] = AU1_AH2_AF2(hSharp);
|
||||||
|
con[2] = 0;
|
||||||
|
con[3] = 0;
|
||||||
|
}
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,
|
||||||
|
VkExtent2D output_size_)
|
||||||
|
: device{device_}, memory_allocator{memory_allocator_}, image_count{image_count_},
|
||||||
|
output_size{output_size_} {
|
||||||
|
|
||||||
|
CreateImages();
|
||||||
|
CreateSampler();
|
||||||
|
CreateShaders();
|
||||||
|
CreateDescriptorPool();
|
||||||
|
CreateDescriptorSetLayout();
|
||||||
|
CreateDescriptorSets();
|
||||||
|
CreatePipelineLayout();
|
||||||
|
CreatePipeline();
|
||||||
|
}
|
||||||
|
|
||||||
|
VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
|
||||||
|
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
|
||||||
|
|
||||||
|
UpdateDescriptorSet(image_index, image_view);
|
||||||
|
|
||||||
|
scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) {
|
||||||
|
const VkImageMemoryBarrier base_barrier{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.srcAccessMask = 0,
|
||||||
|
.dstAccessMask = 0,
|
||||||
|
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.image = {},
|
||||||
|
.subresourceRange =
|
||||||
|
{
|
||||||
|
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
|
.baseMipLevel = 0,
|
||||||
|
.levelCount = 1,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = 1,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
|
||||||
|
|
||||||
|
std::array<u32, 4 * 4> push_constants;
|
||||||
|
FsrEasuConOffset(
|
||||||
|
push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
|
||||||
|
push_constants.data() + 12,
|
||||||
|
|
||||||
|
static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
|
||||||
|
static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height),
|
||||||
|
static_cast<f32>(output_size.width), static_cast<f32>(output_size.height),
|
||||||
|
static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
|
||||||
|
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
|
||||||
|
|
||||||
|
{
|
||||||
|
VkImageMemoryBarrier fsr_write_barrier = base_barrier;
|
||||||
|
fsr_write_barrier.image = *images[image_index],
|
||||||
|
fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||||
|
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, fsr_write_barrier);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
|
||||||
|
descriptor_sets[image_index * 2], {});
|
||||||
|
cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
|
||||||
|
Common::DivCeil(output_size.height, 16u), 1);
|
||||||
|
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline);
|
||||||
|
|
||||||
|
FsrRcasCon(push_constants.data(), 0.25f);
|
||||||
|
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
|
||||||
|
|
||||||
|
{
|
||||||
|
std::array<VkImageMemoryBarrier, 2> barriers;
|
||||||
|
auto& fsr_read_barrier = barriers[0];
|
||||||
|
auto& blit_write_barrier = barriers[1];
|
||||||
|
|
||||||
|
fsr_read_barrier = base_barrier;
|
||||||
|
fsr_read_barrier.image = *images[image_index];
|
||||||
|
fsr_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
fsr_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||||
|
|
||||||
|
blit_write_barrier = base_barrier;
|
||||||
|
blit_write_barrier.image = *images[image_count + image_index];
|
||||||
|
blit_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||||
|
blit_write_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||||
|
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, {}, {}, barriers);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
|
||||||
|
descriptor_sets[image_index * 2 + 1], {});
|
||||||
|
cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
|
||||||
|
Common::DivCeil(output_size.height, 16u), 1);
|
||||||
|
|
||||||
|
{
|
||||||
|
std::array<VkImageMemoryBarrier, 1> barriers;
|
||||||
|
auto& blit_read_barrier = barriers[0];
|
||||||
|
|
||||||
|
blit_read_barrier = base_barrier;
|
||||||
|
blit_read_barrier.image = *images[image_count + image_index];
|
||||||
|
blit_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||||
|
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, {}, {}, barriers);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return *image_views[image_count + image_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
void FSR::CreateDescriptorPool() {
|
||||||
|
const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
|
||||||
|
{
|
||||||
|
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||||
|
.descriptorCount = static_cast<u32>(image_count * 2),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||||
|
.descriptorCount = static_cast<u32>(image_count * 2),
|
||||||
|
},
|
||||||
|
}};
|
||||||
|
|
||||||
|
const VkDescriptorPoolCreateInfo ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
||||||
|
.maxSets = static_cast<u32>(image_count * 2),
|
||||||
|
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
|
||||||
|
.pPoolSizes = pool_sizes.data(),
|
||||||
|
};
|
||||||
|
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FSR::CreateDescriptorSetLayout() {
|
||||||
|
const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{
|
||||||
|
{
|
||||||
|
.binding = 0,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
.pImmutableSamplers = sampler.address(),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.binding = 1,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
.pImmutableSamplers = sampler.address(),
|
||||||
|
},
|
||||||
|
}};
|
||||||
|
|
||||||
|
const VkDescriptorSetLayoutCreateInfo ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.bindingCount = static_cast<u32>(layout_bindings.size()),
|
||||||
|
.pBindings = layout_bindings.data(),
|
||||||
|
};
|
||||||
|
|
||||||
|
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FSR::CreateDescriptorSets() {
|
||||||
|
const u32 sets = static_cast<u32>(image_count * 2);
|
||||||
|
const std::vector layouts(sets, *descriptor_set_layout);
|
||||||
|
|
||||||
|
const VkDescriptorSetAllocateInfo ai{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.descriptorPool = *descriptor_pool,
|
||||||
|
.descriptorSetCount = sets,
|
||||||
|
.pSetLayouts = layouts.data(),
|
||||||
|
};
|
||||||
|
|
||||||
|
descriptor_sets = descriptor_pool.Allocate(ai);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FSR::CreateImages() {
|
||||||
|
images.resize(image_count * 2);
|
||||||
|
image_views.resize(image_count * 2);
|
||||||
|
buffer_commits.resize(image_count * 2);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < image_count * 2; ++i) {
|
||||||
|
images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.imageType = VK_IMAGE_TYPE_2D,
|
||||||
|
.format = VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||||
|
.extent =
|
||||||
|
{
|
||||||
|
.width = output_size.width,
|
||||||
|
.height = output_size.height,
|
||||||
|
.depth = 1,
|
||||||
|
},
|
||||||
|
.mipLevels = 1,
|
||||||
|
.arrayLayers = 1,
|
||||||
|
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||||
|
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||||
|
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT |
|
||||||
|
VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||||
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||||
|
.queueFamilyIndexCount = 0,
|
||||||
|
.pQueueFamilyIndices = nullptr,
|
||||||
|
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||||
|
});
|
||||||
|
buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal);
|
||||||
|
image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.image = *images[i],
|
||||||
|
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||||
|
.format = VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||||
|
.components =
|
||||||
|
{
|
||||||
|
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
},
|
||||||
|
.subresourceRange =
|
||||||
|
{
|
||||||
|
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
|
.baseMipLevel = 0,
|
||||||
|
.levelCount = 1,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = 1,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void FSR::CreatePipelineLayout() {
|
||||||
|
VkPushConstantRange push_const{
|
||||||
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
.offset = 0,
|
||||||
|
.size = sizeof(std::array<u32, 4 * 4>),
|
||||||
|
};
|
||||||
|
VkPipelineLayoutCreateInfo ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.setLayoutCount = 1,
|
||||||
|
.pSetLayouts = descriptor_set_layout.address(),
|
||||||
|
.pushConstantRangeCount = 1,
|
||||||
|
.pPushConstantRanges = &push_const,
|
||||||
|
};
|
||||||
|
|
||||||
|
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
|
||||||
|
const auto fsr_image_view = *image_views[image_index];
|
||||||
|
const auto blit_image_view = *image_views[image_count + image_index];
|
||||||
|
|
||||||
|
const VkDescriptorImageInfo image_info{
|
||||||
|
.sampler = VK_NULL_HANDLE,
|
||||||
|
.imageView = image_view,
|
||||||
|
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
};
|
||||||
|
const VkDescriptorImageInfo fsr_image_info{
|
||||||
|
.sampler = VK_NULL_HANDLE,
|
||||||
|
.imageView = fsr_image_view,
|
||||||
|
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
};
|
||||||
|
const VkDescriptorImageInfo blit_image_info{
|
||||||
|
.sampler = VK_NULL_HANDLE,
|
||||||
|
.imageView = blit_image_view,
|
||||||
|
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkWriteDescriptorSet sampler_write{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.dstSet = descriptor_sets[image_index * 2],
|
||||||
|
.dstBinding = 0,
|
||||||
|
.dstArrayElement = 0,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||||
|
.pImageInfo = &image_info,
|
||||||
|
.pBufferInfo = nullptr,
|
||||||
|
.pTexelBufferView = nullptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkWriteDescriptorSet output_write{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.dstSet = descriptor_sets[image_index * 2],
|
||||||
|
.dstBinding = 1,
|
||||||
|
.dstArrayElement = 0,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||||
|
.pImageInfo = &fsr_image_info,
|
||||||
|
.pBufferInfo = nullptr,
|
||||||
|
.pTexelBufferView = nullptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
|
||||||
|
|
||||||
|
sampler_write.dstSet = descriptor_sets[image_index * 2 + 1];
|
||||||
|
sampler_write.pImageInfo = &fsr_image_info;
|
||||||
|
output_write.dstSet = descriptor_sets[image_index * 2 + 1];
|
||||||
|
output_write.pImageInfo = &blit_image_info;
|
||||||
|
|
||||||
|
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
|
||||||
|
}
|
||||||
|
|
||||||
|
void FSR::CreateSampler() {
|
||||||
|
const VkSamplerCreateInfo ci{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.magFilter = VK_FILTER_LINEAR,
|
||||||
|
.minFilter = VK_FILTER_LINEAR,
|
||||||
|
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
|
||||||
|
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||||
|
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||||
|
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||||
|
.mipLodBias = 0.0f,
|
||||||
|
.anisotropyEnable = VK_FALSE,
|
||||||
|
.maxAnisotropy = 0.0f,
|
||||||
|
.compareEnable = VK_FALSE,
|
||||||
|
.compareOp = VK_COMPARE_OP_NEVER,
|
||||||
|
.minLod = 0.0f,
|
||||||
|
.maxLod = 0.0f,
|
||||||
|
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
|
||||||
|
.unnormalizedCoordinates = VK_FALSE,
|
||||||
|
};
|
||||||
|
|
||||||
|
sampler = device.GetLogical().CreateSampler(ci);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FSR::CreateShaders() {
|
||||||
|
if (device.IsFloat16Supported()) {
|
||||||
|
easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV);
|
||||||
|
rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV);
|
||||||
|
} else {
|
||||||
|
easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV);
|
||||||
|
rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void FSR::CreatePipeline() {
|
||||||
|
VkPipelineShaderStageCreateInfo shader_stage_easu{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
.module = *easu_shader,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = nullptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkPipelineShaderStageCreateInfo shader_stage_rcas{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
.module = *rcas_shader,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = nullptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkComputePipelineCreateInfo pipeline_ci_easu{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = shader_stage_easu,
|
||||||
|
.layout = *pipeline_layout,
|
||||||
|
.basePipelineHandle = VK_NULL_HANDLE,
|
||||||
|
.basePipelineIndex = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkComputePipelineCreateInfo pipeline_ci_rcas{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = shader_stage_rcas,
|
||||||
|
.layout = *pipeline_layout,
|
||||||
|
.basePipelineHandle = VK_NULL_HANDLE,
|
||||||
|
.basePipelineIndex = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_easu);
|
||||||
|
rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_rcas);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Vulkan
|
54
src/video_core/renderer_vulkan/vk_fsr.h
Executable file
54
src/video_core/renderer_vulkan/vk_fsr.h
Executable file
@ -0,0 +1,54 @@
|
|||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/math_util.h"
|
||||||
|
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||||
|
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||||
|
|
||||||
|
namespace Vulkan {
|
||||||
|
|
||||||
|
class Device;
|
||||||
|
class VKScheduler;
|
||||||
|
|
||||||
|
class FSR {
|
||||||
|
public:
|
||||||
|
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
|
||||||
|
VkExtent2D output_size);
|
||||||
|
VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
|
||||||
|
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void CreateDescriptorPool();
|
||||||
|
void CreateDescriptorSetLayout();
|
||||||
|
void CreateDescriptorSets();
|
||||||
|
void CreateImages();
|
||||||
|
void CreateSampler();
|
||||||
|
void CreateShaders();
|
||||||
|
void CreatePipeline();
|
||||||
|
void CreatePipelineLayout();
|
||||||
|
|
||||||
|
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
|
||||||
|
|
||||||
|
const Device& device;
|
||||||
|
MemoryAllocator& memory_allocator;
|
||||||
|
size_t image_count;
|
||||||
|
VkExtent2D output_size;
|
||||||
|
|
||||||
|
vk::DescriptorPool descriptor_pool;
|
||||||
|
vk::DescriptorSetLayout descriptor_set_layout;
|
||||||
|
vk::DescriptorSets descriptor_sets;
|
||||||
|
vk::PipelineLayout pipeline_layout;
|
||||||
|
vk::ShaderModule easu_shader;
|
||||||
|
vk::ShaderModule rcas_shader;
|
||||||
|
vk::Pipeline easu_pipeline;
|
||||||
|
vk::Pipeline rcas_pipeline;
|
||||||
|
vk::Sampler sampler;
|
||||||
|
std::vector<vk::Image> images;
|
||||||
|
std::vector<vk::ImageView> image_views;
|
||||||
|
std::vector<MemoryCommit> buffer_commits;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Vulkan
|
@ -32,6 +32,8 @@ namespace {
|
|||||||
using boost::container::small_vector;
|
using boost::container::small_vector;
|
||||||
using boost::container::static_vector;
|
using boost::container::static_vector;
|
||||||
using Shader::ImageBufferDescriptor;
|
using Shader::ImageBufferDescriptor;
|
||||||
|
using Shader::Backend::SPIRV::RESCALING_LAYOUT_DOWN_FACTOR_OFFSET;
|
||||||
|
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
|
||||||
using Tegra::Texture::TexturePair;
|
using Tegra::Texture::TexturePair;
|
||||||
using VideoCore::Surface::PixelFormat;
|
using VideoCore::Surface::PixelFormat;
|
||||||
using VideoCore::Surface::PixelFormatFromDepthFormat;
|
using VideoCore::Surface::PixelFormatFromDepthFormat;
|
||||||
@ -235,6 +237,7 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
stage_infos[stage] = *info;
|
stage_infos[stage] = *info;
|
||||||
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
|
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
|
||||||
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
|
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
|
||||||
|
num_textures += Shader::NumDescriptors(info->texture_descriptors);
|
||||||
}
|
}
|
||||||
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
|
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
|
||||||
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
|
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
|
||||||
@ -277,11 +280,10 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
|
|||||||
|
|
||||||
template <typename Spec>
|
template <typename Spec>
|
||||||
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
||||||
std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids;
|
std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views;
|
||||||
std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices;
|
|
||||||
std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
|
std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
|
||||||
size_t sampler_index{};
|
size_t sampler_index{};
|
||||||
size_t image_index{};
|
size_t view_index{};
|
||||||
|
|
||||||
texture_cache.SynchronizeGraphicsDescriptors();
|
texture_cache.SynchronizeGraphicsDescriptors();
|
||||||
|
|
||||||
@ -322,26 +324,30 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
}
|
}
|
||||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||||
}};
|
}};
|
||||||
const auto add_image{[&](const auto& desc) {
|
const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
const auto handle{read_handle(desc, index)};
|
const auto handle{read_handle(desc, index)};
|
||||||
image_view_indices[image_index++] = handle.first;
|
views[view_index++] = {
|
||||||
|
.index = handle.first,
|
||||||
|
.blacklist = blacklist,
|
||||||
|
.id = {},
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
if constexpr (Spec::has_texture_buffers) {
|
if constexpr (Spec::has_texture_buffers) {
|
||||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||||
add_image(desc);
|
add_image(desc, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr (Spec::has_image_buffers) {
|
if constexpr (Spec::has_image_buffers) {
|
||||||
for (const auto& desc : info.image_buffer_descriptors) {
|
for (const auto& desc : info.image_buffer_descriptors) {
|
||||||
add_image(desc);
|
add_image(desc, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const auto& desc : info.texture_descriptors) {
|
for (const auto& desc : info.texture_descriptors) {
|
||||||
for (u32 index = 0; index < desc.count; ++index) {
|
for (u32 index = 0; index < desc.count; ++index) {
|
||||||
const auto handle{read_handle(desc, index)};
|
const auto handle{read_handle(desc, index)};
|
||||||
image_view_indices[image_index++] = handle.first;
|
views[view_index++] = {handle.first};
|
||||||
|
|
||||||
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
|
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
|
||||||
samplers[sampler_index++] = sampler->Handle();
|
samplers[sampler_index++] = sampler->Handle();
|
||||||
@ -349,7 +355,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
}
|
}
|
||||||
if constexpr (Spec::has_images) {
|
if constexpr (Spec::has_images) {
|
||||||
for (const auto& desc : info.image_descriptors) {
|
for (const auto& desc : info.image_descriptors) {
|
||||||
add_image(desc);
|
add_image(desc, desc.is_written);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
@ -368,10 +374,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
if constexpr (Spec::enabled_stages[4]) {
|
if constexpr (Spec::enabled_stages[4]) {
|
||||||
config_stage(4);
|
config_stage(4);
|
||||||
}
|
}
|
||||||
const std::span indices_span(image_view_indices.data(), image_index);
|
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), view_index));
|
||||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
|
||||||
|
|
||||||
ImageId* texture_buffer_index{image_view_ids.data()};
|
VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
|
||||||
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
|
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
|
||||||
size_t index{};
|
size_t index{};
|
||||||
const auto add_buffer{[&](const auto& desc) {
|
const auto add_buffer{[&](const auto& desc) {
|
||||||
@ -381,12 +386,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
if constexpr (is_image) {
|
if constexpr (is_image) {
|
||||||
is_written = desc.is_written;
|
is_written = desc.is_written;
|
||||||
}
|
}
|
||||||
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
|
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
|
||||||
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
|
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
|
||||||
image_view.BufferSize(), image_view.format,
|
image_view.BufferSize(), image_view.format,
|
||||||
is_written, is_image);
|
is_written, is_image);
|
||||||
++index;
|
++index;
|
||||||
++texture_buffer_index;
|
++texture_buffer_it;
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
buffer_cache.UnbindGraphicsTextureBuffers(stage);
|
buffer_cache.UnbindGraphicsTextureBuffers(stage);
|
||||||
@ -402,13 +407,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
add_buffer(desc);
|
add_buffer(desc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const auto& desc : info.texture_descriptors) {
|
texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors);
|
||||||
texture_buffer_index += desc.count;
|
|
||||||
}
|
|
||||||
if constexpr (Spec::has_images) {
|
if constexpr (Spec::has_images) {
|
||||||
for (const auto& desc : info.image_descriptors) {
|
texture_buffer_it += Shader::NumDescriptors(info.image_descriptors);
|
||||||
texture_buffer_index += desc.count;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
if constexpr (Spec::enabled_stages[0]) {
|
if constexpr (Spec::enabled_stages[0]) {
|
||||||
@ -432,12 +433,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
|
|
||||||
update_descriptor_queue.Acquire();
|
update_descriptor_queue.Acquire();
|
||||||
|
|
||||||
|
RescalingPushConstant rescaling;
|
||||||
const VkSampler* samplers_it{samplers.data()};
|
const VkSampler* samplers_it{samplers.data()};
|
||||||
const ImageId* views_it{image_view_ids.data()};
|
const VideoCommon::ImageViewInOut* views_it{views.data()};
|
||||||
const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
|
const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
|
||||||
buffer_cache.BindHostStageBuffers(stage);
|
buffer_cache.BindHostStageBuffers(stage);
|
||||||
PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache,
|
PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling,
|
||||||
update_descriptor_queue);
|
samplers_it, views_it);
|
||||||
}};
|
}};
|
||||||
if constexpr (Spec::enabled_stages[0]) {
|
if constexpr (Spec::enabled_stages[0]) {
|
||||||
prepare_stage(0);
|
prepare_stage(0);
|
||||||
@ -454,10 +456,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||||||
if constexpr (Spec::enabled_stages[4]) {
|
if constexpr (Spec::enabled_stages[4]) {
|
||||||
prepare_stage(4);
|
prepare_stage(4);
|
||||||
}
|
}
|
||||||
ConfigureDraw();
|
ConfigureDraw(rescaling);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GraphicsPipeline::ConfigureDraw() {
|
void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
|
||||||
texture_cache.UpdateRenderTargets(false);
|
texture_cache.UpdateRenderTargets(false);
|
||||||
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
|
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
|
||||||
|
|
||||||
@ -468,12 +470,25 @@ void GraphicsPipeline::ConfigureDraw() {
|
|||||||
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
|
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
const bool is_rescaling{texture_cache.IsRescaling()};
|
||||||
|
const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)};
|
||||||
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
|
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
|
||||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||||
scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
|
||||||
|
is_rescaling, update_rescaling](vk::CommandBuffer cmdbuf) {
|
||||||
if (bind_pipeline) {
|
if (bind_pipeline) {
|
||||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
|
||||||
}
|
}
|
||||||
|
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
|
||||||
|
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
|
||||||
|
rescaling_data.data());
|
||||||
|
if (update_rescaling) {
|
||||||
|
const f32 config_down_factor{Settings::values.resolution_info.down_factor};
|
||||||
|
const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f};
|
||||||
|
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
|
||||||
|
RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor),
|
||||||
|
&scale_down_factor);
|
||||||
|
}
|
||||||
if (!descriptor_set_layout) {
|
if (!descriptor_set_layout) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -826,18 +841,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
|||||||
void GraphicsPipeline::Validate() {
|
void GraphicsPipeline::Validate() {
|
||||||
size_t num_images{};
|
size_t num_images{};
|
||||||
for (const auto& info : stage_infos) {
|
for (const auto& info : stage_infos) {
|
||||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
num_images += Shader::NumDescriptors(info.texture_buffer_descriptors);
|
||||||
num_images += desc.count;
|
num_images += Shader::NumDescriptors(info.image_buffer_descriptors);
|
||||||
}
|
num_images += Shader::NumDescriptors(info.texture_descriptors);
|
||||||
for (const auto& desc : info.image_buffer_descriptors) {
|
num_images += Shader::NumDescriptors(info.image_descriptors);
|
||||||
num_images += desc.count;
|
|
||||||
}
|
|
||||||
for (const auto& desc : info.texture_descriptors) {
|
|
||||||
num_images += desc.count;
|
|
||||||
}
|
|
||||||
for (const auto& desc : info.image_descriptors) {
|
|
||||||
num_images += desc.count;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
|
ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
|
||||||
}
|
}
|
||||||
|
@ -62,6 +62,7 @@ namespace Vulkan {
|
|||||||
class Device;
|
class Device;
|
||||||
class PipelineStatistics;
|
class PipelineStatistics;
|
||||||
class RenderPassCache;
|
class RenderPassCache;
|
||||||
|
class RescalingPushConstant;
|
||||||
class VKScheduler;
|
class VKScheduler;
|
||||||
class VKUpdateDescriptorQueue;
|
class VKUpdateDescriptorQueue;
|
||||||
|
|
||||||
@ -113,7 +114,7 @@ private:
|
|||||||
template <typename Spec>
|
template <typename Spec>
|
||||||
void ConfigureImpl(bool is_indexed);
|
void ConfigureImpl(bool is_indexed);
|
||||||
|
|
||||||
void ConfigureDraw();
|
void ConfigureDraw(const RescalingPushConstant& rescaling);
|
||||||
|
|
||||||
void MakePipeline(VkRenderPass render_pass);
|
void MakePipeline(VkRenderPass render_pass);
|
||||||
|
|
||||||
@ -138,6 +139,7 @@ private:
|
|||||||
std::array<Shader::Info, NUM_STAGES> stage_infos;
|
std::array<Shader::Info, NUM_STAGES> stage_infos;
|
||||||
std::array<u32, 5> enabled_uniform_buffer_masks{};
|
std::array<u32, 5> enabled_uniform_buffer_masks{};
|
||||||
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
|
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
|
||||||
|
u32 num_textures{};
|
||||||
|
|
||||||
vk::DescriptorSetLayout descriptor_set_layout;
|
vk::DescriptorSetLayout descriptor_set_layout;
|
||||||
DescriptorAllocator descriptor_allocator;
|
DescriptorAllocator descriptor_allocator;
|
||||||
|
@ -70,7 +70,9 @@ public:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// If none of the above is hit, fallback to a regular wait
|
// If none of the above is hit, fallback to a regular wait
|
||||||
semaphore.Wait(tick);
|
while (!semaphore.Wait(tick)) {
|
||||||
|
}
|
||||||
|
Refresh();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -58,18 +58,28 @@ struct DrawParams {
|
|||||||
bool is_indexed;
|
bool is_indexed;
|
||||||
};
|
};
|
||||||
|
|
||||||
VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
|
VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) {
|
||||||
const auto& src = regs.viewport_transform[index];
|
const auto& src = regs.viewport_transform[index];
|
||||||
const float width = src.scale_x * 2.0f;
|
const auto conv = [scale](float value) {
|
||||||
float y = src.translate_y - src.scale_y;
|
float new_value = value * scale;
|
||||||
float height = src.scale_y * 2.0f;
|
if (scale < 1.0f) {
|
||||||
|
const bool sign = std::signbit(value);
|
||||||
|
new_value = std::round(std::abs(new_value));
|
||||||
|
new_value = sign ? -new_value : new_value;
|
||||||
|
}
|
||||||
|
return new_value;
|
||||||
|
};
|
||||||
|
const float x = conv(src.translate_x - src.scale_x);
|
||||||
|
const float width = conv(src.scale_x * 2.0f);
|
||||||
|
float y = conv(src.translate_y - src.scale_y);
|
||||||
|
float height = conv(src.scale_y * 2.0f);
|
||||||
if (regs.screen_y_control.y_negate) {
|
if (regs.screen_y_control.y_negate) {
|
||||||
y += height;
|
y += height;
|
||||||
height = -height;
|
height = -height;
|
||||||
}
|
}
|
||||||
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
|
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
|
||||||
VkViewport viewport{
|
VkViewport viewport{
|
||||||
.x = src.translate_x - src.scale_x,
|
.x = x,
|
||||||
.y = y,
|
.y = y,
|
||||||
.width = width != 0.0f ? width : 1.0f,
|
.width = width != 0.0f ? width : 1.0f,
|
||||||
.height = height != 0.0f ? height : 1.0f,
|
.height = height != 0.0f ? height : 1.0f,
|
||||||
@ -83,14 +93,27 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
|
|||||||
return viewport;
|
return viewport;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
|
VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u32 down_shift = 0) {
|
||||||
const auto& src = regs.scissor_test[index];
|
const auto& src = regs.scissor_test[index];
|
||||||
VkRect2D scissor;
|
VkRect2D scissor;
|
||||||
|
const auto scale_up = [&](s32 value) -> s32 {
|
||||||
|
if (value == 0) {
|
||||||
|
return 0U;
|
||||||
|
}
|
||||||
|
const s32 upset = value * up_scale;
|
||||||
|
s32 acumm = 0;
|
||||||
|
if ((up_scale >> down_shift) == 0) {
|
||||||
|
acumm = upset % 2;
|
||||||
|
}
|
||||||
|
const s32 converted_value = (value * up_scale) >> down_shift;
|
||||||
|
return value < 0 ? std::min<s32>(converted_value - acumm, -1)
|
||||||
|
: std::max<s32>(converted_value + acumm, 1);
|
||||||
|
};
|
||||||
if (src.enable) {
|
if (src.enable) {
|
||||||
scissor.offset.x = static_cast<s32>(src.min_x);
|
scissor.offset.x = scale_up(static_cast<s32>(src.min_x));
|
||||||
scissor.offset.y = static_cast<s32>(src.min_y);
|
scissor.offset.y = scale_up(static_cast<s32>(src.min_y));
|
||||||
scissor.extent.width = src.max_x - src.min_x;
|
scissor.extent.width = scale_up(src.max_x - src.min_x);
|
||||||
scissor.extent.height = src.max_y - src.min_y;
|
scissor.extent.height = scale_up(src.max_y - src.min_y);
|
||||||
} else {
|
} else {
|
||||||
scissor.offset.x = 0;
|
scissor.offset.x = 0;
|
||||||
scissor.offset.y = 0;
|
scissor.offset.y = 0;
|
||||||
@ -199,7 +222,7 @@ void RasterizerVulkan::Clear() {
|
|||||||
|
|
||||||
query_cache.UpdateCounters();
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
const auto& regs = maxwell3d.regs;
|
auto& regs = maxwell3d.regs;
|
||||||
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
|
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
|
||||||
regs.clear_buffers.A;
|
regs.clear_buffers.A;
|
||||||
const bool use_depth = regs.clear_buffers.Z;
|
const bool use_depth = regs.clear_buffers.Z;
|
||||||
@ -214,8 +237,16 @@ void RasterizerVulkan::Clear() {
|
|||||||
const VkExtent2D render_area = framebuffer->RenderArea();
|
const VkExtent2D render_area = framebuffer->RenderArea();
|
||||||
scheduler.RequestRenderpass(framebuffer);
|
scheduler.RequestRenderpass(framebuffer);
|
||||||
|
|
||||||
|
u32 up_scale = 1;
|
||||||
|
u32 down_shift = 0;
|
||||||
|
if (texture_cache.IsRescaling()) {
|
||||||
|
up_scale = Settings::values.resolution_info.up_scale;
|
||||||
|
down_shift = Settings::values.resolution_info.down_shift;
|
||||||
|
}
|
||||||
|
UpdateViewportsState(regs);
|
||||||
|
|
||||||
VkClearRect clear_rect{
|
VkClearRect clear_rect{
|
||||||
.rect = GetScissorState(regs, 0),
|
.rect = GetScissorState(regs, 0, up_scale, down_shift),
|
||||||
.baseArrayLayer = regs.clear_buffers.layer,
|
.baseArrayLayer = regs.clear_buffers.layer,
|
||||||
.layerCount = 1,
|
.layerCount = 1,
|
||||||
};
|
};
|
||||||
@ -230,7 +261,38 @@ void RasterizerVulkan::Clear() {
|
|||||||
const u32 color_attachment = regs.clear_buffers.RT;
|
const u32 color_attachment = regs.clear_buffers.RT;
|
||||||
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
|
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
|
||||||
VkClearValue clear_value;
|
VkClearValue clear_value;
|
||||||
|
bool is_integer = false;
|
||||||
|
bool is_signed = false;
|
||||||
|
size_t int_size = 8;
|
||||||
|
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++i) {
|
||||||
|
const auto& this_rt = regs.rt[i];
|
||||||
|
if (this_rt.Address() == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (this_rt.format == Tegra::RenderTargetFormat::NONE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto format =
|
||||||
|
VideoCore::Surface::PixelFormatFromRenderTargetFormat(this_rt.format);
|
||||||
|
is_integer = IsPixelFormatInteger(format);
|
||||||
|
is_signed = IsPixelFormatSignedInteger(format);
|
||||||
|
int_size = PixelComponentSizeBitsInteger(format);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!is_integer) {
|
||||||
std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
|
std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
|
||||||
|
} else if (!is_signed) {
|
||||||
|
for (size_t i = 0; i < 4; i++) {
|
||||||
|
clear_value.color.uint32[i] = static_cast<u32>(
|
||||||
|
static_cast<f32>(static_cast<u64>(int_size) << 1U) * regs.clear_color[i]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (size_t i = 0; i < 4; i++) {
|
||||||
|
clear_value.color.int32[i] =
|
||||||
|
static_cast<s32>(static_cast<f32>(static_cast<s64>(int_size - 1) << 1) *
|
||||||
|
(regs.clear_color[i] - 0.5f));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
|
||||||
const VkClearAttachment attachment{
|
const VkClearAttachment attachment{
|
||||||
@ -595,15 +657,17 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
|
|||||||
if (!state_tracker.TouchViewports()) {
|
if (!state_tracker.TouchViewports()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const bool is_rescaling{texture_cache.IsRescaling()};
|
||||||
|
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
|
||||||
const std::array viewports{
|
const std::array viewports{
|
||||||
GetViewportState(device, regs, 0), GetViewportState(device, regs, 1),
|
GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale),
|
||||||
GetViewportState(device, regs, 2), GetViewportState(device, regs, 3),
|
GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale),
|
||||||
GetViewportState(device, regs, 4), GetViewportState(device, regs, 5),
|
GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale),
|
||||||
GetViewportState(device, regs, 6), GetViewportState(device, regs, 7),
|
GetViewportState(device, regs, 6, scale), GetViewportState(device, regs, 7, scale),
|
||||||
GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
|
GetViewportState(device, regs, 8, scale), GetViewportState(device, regs, 9, scale),
|
||||||
GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
|
GetViewportState(device, regs, 10, scale), GetViewportState(device, regs, 11, scale),
|
||||||
GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
|
GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale),
|
||||||
GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
|
GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale),
|
||||||
};
|
};
|
||||||
scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
|
scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
|
||||||
}
|
}
|
||||||
@ -612,13 +676,29 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
|
|||||||
if (!state_tracker.TouchScissors()) {
|
if (!state_tracker.TouchScissors()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
u32 up_scale = 1;
|
||||||
|
u32 down_shift = 0;
|
||||||
|
if (texture_cache.IsRescaling()) {
|
||||||
|
up_scale = Settings::values.resolution_info.up_scale;
|
||||||
|
down_shift = Settings::values.resolution_info.down_shift;
|
||||||
|
}
|
||||||
const std::array scissors{
|
const std::array scissors{
|
||||||
GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
|
GetScissorState(regs, 0, up_scale, down_shift),
|
||||||
GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
|
GetScissorState(regs, 1, up_scale, down_shift),
|
||||||
GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
|
GetScissorState(regs, 2, up_scale, down_shift),
|
||||||
GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
|
GetScissorState(regs, 3, up_scale, down_shift),
|
||||||
GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
|
GetScissorState(regs, 4, up_scale, down_shift),
|
||||||
GetScissorState(regs, 15),
|
GetScissorState(regs, 5, up_scale, down_shift),
|
||||||
|
GetScissorState(regs, 6, up_scale, down_shift),
|
||||||
|
GetScissorState(regs, 7, up_scale, down_shift),
|
||||||
|
GetScissorState(regs, 8, up_scale, down_shift),
|
||||||
|
GetScissorState(regs, 9, up_scale, down_shift),
|
||||||
|
GetScissorState(regs, 10, up_scale, down_shift),
|
||||||
|
GetScissorState(regs, 11, up_scale, down_shift),
|
||||||
|
GetScissorState(regs, 12, up_scale, down_shift),
|
||||||
|
GetScissorState(regs, 13, up_scale, down_shift),
|
||||||
|
GetScissorState(regs, 14, up_scale, down_shift),
|
||||||
|
GetScissorState(regs, 15, up_scale, down_shift),
|
||||||
};
|
};
|
||||||
scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
|
scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
|
||||||
}
|
}
|
||||||
|
@ -128,6 +128,15 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool VKScheduler::UpdateRescaling(bool is_rescaling) {
|
||||||
|
if (state.rescaling_defined && is_rescaling == state.is_rescaling) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
state.rescaling_defined = true;
|
||||||
|
state.is_rescaling = is_rescaling;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void VKScheduler::WorkerThread(std::stop_token stop_token) {
|
void VKScheduler::WorkerThread(std::stop_token stop_token) {
|
||||||
Common::SetCurrentThreadName("yuzu:VulkanWorker");
|
Common::SetCurrentThreadName("yuzu:VulkanWorker");
|
||||||
do {
|
do {
|
||||||
@ -227,6 +236,7 @@ void VKScheduler::AllocateNewContext() {
|
|||||||
|
|
||||||
void VKScheduler::InvalidateState() {
|
void VKScheduler::InvalidateState() {
|
||||||
state.graphics_pipeline = nullptr;
|
state.graphics_pipeline = nullptr;
|
||||||
|
state.rescaling_defined = false;
|
||||||
state_tracker.InvalidateCommandBufferState();
|
state_tracker.InvalidateCommandBufferState();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,6 +56,9 @@ public:
|
|||||||
/// Update the pipeline to the current execution context.
|
/// Update the pipeline to the current execution context.
|
||||||
bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
|
bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
|
||||||
|
|
||||||
|
/// Update the rescaling state. Returns true if the state has to be updated.
|
||||||
|
bool UpdateRescaling(bool is_rescaling);
|
||||||
|
|
||||||
/// Invalidates current command buffer state except for render passes
|
/// Invalidates current command buffer state except for render passes
|
||||||
void InvalidateState();
|
void InvalidateState();
|
||||||
|
|
||||||
@ -185,6 +188,8 @@ private:
|
|||||||
VkFramebuffer framebuffer = nullptr;
|
VkFramebuffer framebuffer = nullptr;
|
||||||
VkExtent2D render_area = {0, 0};
|
VkExtent2D render_area = {0, 0};
|
||||||
GraphicsPipeline* graphics_pipeline = nullptr;
|
GraphicsPipeline* graphics_pipeline = nullptr;
|
||||||
|
bool is_rescaling = false;
|
||||||
|
bool rescaling_defined = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
void WorkerThread(std::stop_token stop_token);
|
void WorkerThread(std::stop_token stop_token);
|
||||||
|
@ -71,11 +71,15 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool TouchViewports() {
|
bool TouchViewports() {
|
||||||
return Exchange(Dirty::Viewports, false);
|
const bool dirty_viewports = Exchange(Dirty::Viewports, false);
|
||||||
|
const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
|
||||||
|
return dirty_viewports || rescale_viewports;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TouchScissors() {
|
bool TouchScissors() {
|
||||||
return Exchange(Dirty::Scissors, false);
|
const bool dirty_scissors = Exchange(Dirty::Scissors, false);
|
||||||
|
const bool rescale_scissors = Exchange(VideoCommon::Dirty::RescaleScissors, false);
|
||||||
|
return dirty_scissors || rescale_scissors;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TouchDepthBias() {
|
bool TouchDepthBias() {
|
||||||
|
@ -32,10 +32,12 @@ using Tegra::Engines::Fermi2D;
|
|||||||
using Tegra::Texture::SwizzleSource;
|
using Tegra::Texture::SwizzleSource;
|
||||||
using Tegra::Texture::TextureMipmapFilter;
|
using Tegra::Texture::TextureMipmapFilter;
|
||||||
using VideoCommon::BufferImageCopy;
|
using VideoCommon::BufferImageCopy;
|
||||||
|
using VideoCommon::ImageFlagBits;
|
||||||
using VideoCommon::ImageInfo;
|
using VideoCommon::ImageInfo;
|
||||||
using VideoCommon::ImageType;
|
using VideoCommon::ImageType;
|
||||||
using VideoCommon::SubresourceRange;
|
using VideoCommon::SubresourceRange;
|
||||||
using VideoCore::Surface::IsPixelFormatASTC;
|
using VideoCore::Surface::IsPixelFormatASTC;
|
||||||
|
using VideoCore::Surface::IsPixelFormatInteger;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||||
@ -588,8 +590,158 @@ struct RangedBarrierRange {
|
|||||||
UNREACHABLE_MSG("Invalid image format={}", format);
|
UNREACHABLE_MSG("Invalid image format={}", format);
|
||||||
return VK_FORMAT_R32_UINT;
|
return VK_FORMAT_R32_UINT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info,
|
||||||
|
VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution,
|
||||||
|
bool up_scaling = true) {
|
||||||
|
const bool is_2d = info.type == ImageType::e2D;
|
||||||
|
const auto resources = info.resources;
|
||||||
|
const VkExtent2D extent{
|
||||||
|
.width = info.size.width,
|
||||||
|
.height = info.size.height,
|
||||||
|
};
|
||||||
|
// Depth and integer formats must use NEAREST filter for blits.
|
||||||
|
const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT};
|
||||||
|
const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)};
|
||||||
|
const VkFilter vk_filter = is_bilinear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
|
||||||
|
|
||||||
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d,
|
||||||
|
vk_filter, up_scaling](vk::CommandBuffer cmdbuf) {
|
||||||
|
const VkOffset2D src_size{
|
||||||
|
.x = static_cast<s32>(up_scaling ? extent.width : resolution.ScaleUp(extent.width)),
|
||||||
|
.y = static_cast<s32>(is_2d && up_scaling ? extent.height
|
||||||
|
: resolution.ScaleUp(extent.height)),
|
||||||
|
};
|
||||||
|
const VkOffset2D dst_size{
|
||||||
|
.x = static_cast<s32>(up_scaling ? resolution.ScaleUp(extent.width) : extent.width),
|
||||||
|
.y = static_cast<s32>(is_2d && up_scaling ? resolution.ScaleUp(extent.height)
|
||||||
|
: extent.height),
|
||||||
|
};
|
||||||
|
boost::container::small_vector<VkImageBlit, 4> regions;
|
||||||
|
regions.reserve(resources.levels);
|
||||||
|
for (s32 level = 0; level < resources.levels; level++) {
|
||||||
|
regions.push_back({
|
||||||
|
.srcSubresource{
|
||||||
|
.aspectMask = aspect_mask,
|
||||||
|
.mipLevel = static_cast<u32>(level),
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = static_cast<u32>(resources.layers),
|
||||||
|
},
|
||||||
|
.srcOffsets{
|
||||||
|
{
|
||||||
|
.x = 0,
|
||||||
|
.y = 0,
|
||||||
|
.z = 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.x = std::max(1, src_size.x >> level),
|
||||||
|
.y = std::max(1, src_size.y >> level),
|
||||||
|
.z = 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
.dstSubresource{
|
||||||
|
.aspectMask = aspect_mask,
|
||||||
|
.mipLevel = static_cast<u32>(level),
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = static_cast<u32>(resources.layers),
|
||||||
|
},
|
||||||
|
.dstOffsets{
|
||||||
|
{
|
||||||
|
.x = 0,
|
||||||
|
.y = 0,
|
||||||
|
.z = 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.x = std::max(1, dst_size.x >> level),
|
||||||
|
.y = std::max(1, dst_size.y >> level),
|
||||||
|
.z = 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
const VkImageSubresourceRange subresource_range{
|
||||||
|
.aspectMask = aspect_mask,
|
||||||
|
.baseMipLevel = 0,
|
||||||
|
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||||
|
};
|
||||||
|
const std::array read_barriers{
|
||||||
|
VkImageMemoryBarrier{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||||
|
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||||
|
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||||
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.image = src_image,
|
||||||
|
.subresourceRange = subresource_range,
|
||||||
|
},
|
||||||
|
VkImageMemoryBarrier{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
|
||||||
|
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||||
|
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
|
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
|
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, // Discard contents
|
||||||
|
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||||
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.image = dst_image,
|
||||||
|
.subresourceRange = subresource_range,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const std::array write_barriers{
|
||||||
|
VkImageMemoryBarrier{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.srcAccessMask = 0,
|
||||||
|
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT,
|
||||||
|
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||||
|
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.image = src_image,
|
||||||
|
.subresourceRange = subresource_range,
|
||||||
|
},
|
||||||
|
VkImageMemoryBarrier{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
|
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT,
|
||||||
|
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||||
|
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.image = dst_image,
|
||||||
|
.subresourceRange = subresource_range,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
0, nullptr, nullptr, read_barriers);
|
||||||
|
cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
|
||||||
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, vk_filter);
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||||
|
0, nullptr, nullptr, write_barriers);
|
||||||
|
});
|
||||||
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
|
||||||
|
MemoryAllocator& memory_allocator_,
|
||||||
|
StagingBufferPool& staging_buffer_pool_,
|
||||||
|
BlitImageHelper& blit_image_helper_,
|
||||||
|
ASTCDecoderPass& astc_decoder_pass_,
|
||||||
|
RenderPassCache& render_pass_cache_)
|
||||||
|
: device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_},
|
||||||
|
staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_},
|
||||||
|
astc_decoder_pass{astc_decoder_pass_}, render_pass_cache{render_pass_cache_},
|
||||||
|
resolution{Settings::values.resolution_info} {}
|
||||||
|
|
||||||
void TextureCacheRuntime::Finish() {
|
void TextureCacheRuntime::Finish() {
|
||||||
scheduler.Finish();
|
scheduler.Finish();
|
||||||
}
|
}
|
||||||
@ -614,8 +766,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
|
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
|
||||||
blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter,
|
blit_image_helper.BlitColor(dst_framebuffer, src.Handle(Shader::TextureType::Color2D),
|
||||||
operation);
|
dst_region, src_region, filter, operation);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
||||||
@ -719,26 +871,29 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
|
void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view,
|
||||||
|
bool rescaled) {
|
||||||
|
const u32 up_scale = rescaled ? resolution.up_scale : 1;
|
||||||
|
const u32 down_shift = rescaled ? resolution.down_shift : 0;
|
||||||
switch (dst_view.format) {
|
switch (dst_view.format) {
|
||||||
case PixelFormat::R16_UNORM:
|
case PixelFormat::R16_UNORM:
|
||||||
if (src_view.format == PixelFormat::D16_UNORM) {
|
if (src_view.format == PixelFormat::D16_UNORM) {
|
||||||
return blit_image_helper.ConvertD16ToR16(dst, src_view);
|
return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case PixelFormat::R32_FLOAT:
|
case PixelFormat::R32_FLOAT:
|
||||||
if (src_view.format == PixelFormat::D32_FLOAT) {
|
if (src_view.format == PixelFormat::D32_FLOAT) {
|
||||||
return blit_image_helper.ConvertD32ToR32(dst, src_view);
|
return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case PixelFormat::D16_UNORM:
|
case PixelFormat::D16_UNORM:
|
||||||
if (src_view.format == PixelFormat::R16_UNORM) {
|
if (src_view.format == PixelFormat::R16_UNORM) {
|
||||||
return blit_image_helper.ConvertR16ToD16(dst, src_view);
|
return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case PixelFormat::D32_FLOAT:
|
case PixelFormat::D32_FLOAT:
|
||||||
if (src_view.format == PixelFormat::R32_FLOAT) {
|
if (src_view.format == PixelFormat::R32_FLOAT) {
|
||||||
return blit_image_helper.ConvertR32ToD32(dst, src_view);
|
return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -840,36 +995,39 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
|
|||||||
return device.GetDeviceLocalMemory();
|
return device.GetDeviceLocalMemory();
|
||||||
}
|
}
|
||||||
|
|
||||||
Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
|
void TextureCacheRuntime::TickFrame() {}
|
||||||
|
|
||||||
|
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
|
||||||
VAddr cpu_addr_)
|
VAddr cpu_addr_)
|
||||||
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
|
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
|
||||||
image(MakeImage(runtime.device, info)),
|
runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)),
|
||||||
commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)),
|
commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)),
|
||||||
aspect_mask(ImageAspectMask(info.format)) {
|
aspect_mask(ImageAspectMask(info.format)) {
|
||||||
if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
|
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
|
||||||
if (Settings::values.accelerate_astc.GetValue()) {
|
if (Settings::values.accelerate_astc.GetValue()) {
|
||||||
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
|
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
|
||||||
} else {
|
} else {
|
||||||
flags |= VideoCommon::ImageFlagBits::Converted;
|
flags |= VideoCommon::ImageFlagBits::Converted;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (runtime.device.HasDebuggingToolAttached()) {
|
if (runtime->device.HasDebuggingToolAttached()) {
|
||||||
image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
|
original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
|
||||||
}
|
}
|
||||||
static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
|
static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.usage = VK_IMAGE_USAGE_STORAGE_BIT,
|
.usage = VK_IMAGE_USAGE_STORAGE_BIT,
|
||||||
};
|
};
|
||||||
if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
|
current_image = *original_image;
|
||||||
const auto& device = runtime.device.GetLogical();
|
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
|
||||||
|
const auto& device = runtime->device.GetLogical();
|
||||||
storage_image_views.reserve(info.resources.levels);
|
storage_image_views.reserve(info.resources.levels);
|
||||||
for (s32 level = 0; level < info.resources.levels; ++level) {
|
for (s32 level = 0; level < info.resources.levels; ++level) {
|
||||||
storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{
|
storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||||
.pNext = &storage_image_view_usage_create_info,
|
.pNext = &storage_image_view_usage_create_info,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.image = *image,
|
.image = *original_image,
|
||||||
.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
|
.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
|
||||||
.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
||||||
.components{
|
.components{
|
||||||
@ -890,26 +1048,39 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {}
|
||||||
|
|
||||||
Image::~Image() = default;
|
Image::~Image() = default;
|
||||||
|
|
||||||
void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
|
void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
|
||||||
// TODO: Move this to another API
|
// TODO: Move this to another API
|
||||||
|
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
|
||||||
|
if (is_rescaled) {
|
||||||
|
ScaleDown(true);
|
||||||
|
}
|
||||||
scheduler->RequestOutsideRenderPassOperationContext();
|
scheduler->RequestOutsideRenderPassOperationContext();
|
||||||
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
|
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
|
||||||
const VkBuffer src_buffer = map.buffer;
|
const VkBuffer src_buffer = map.buffer;
|
||||||
const VkImage vk_image = *image;
|
const VkImage vk_image = *original_image;
|
||||||
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
|
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
|
||||||
const bool is_initialized = std::exchange(initialized, true);
|
const bool is_initialized = std::exchange(initialized, true);
|
||||||
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
|
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
|
||||||
vk_copies](vk::CommandBuffer cmdbuf) {
|
vk_copies](vk::CommandBuffer cmdbuf) {
|
||||||
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
|
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
|
||||||
});
|
});
|
||||||
|
if (is_rescaled) {
|
||||||
|
ScaleUp();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
|
void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
|
||||||
|
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
|
||||||
|
if (is_rescaled) {
|
||||||
|
ScaleDown();
|
||||||
|
}
|
||||||
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
|
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
|
||||||
scheduler->RequestOutsideRenderPassOperationContext();
|
scheduler->RequestOutsideRenderPassOperationContext();
|
||||||
scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
|
scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask,
|
||||||
vk_copies](vk::CommandBuffer cmdbuf) {
|
vk_copies](vk::CommandBuffer cmdbuf) {
|
||||||
const VkImageMemoryBarrier read_barrier{
|
const VkImageMemoryBarrier read_barrier{
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||||
@ -959,6 +1130,146 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
|
|||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||||
0, memory_write_barrier, nullptr, image_write_barrier);
|
0, memory_write_barrier, nullptr, image_write_barrier);
|
||||||
});
|
});
|
||||||
|
if (is_rescaled) {
|
||||||
|
ScaleUp(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Image::ScaleUp(bool ignore) {
|
||||||
|
if (True(flags & ImageFlagBits::Rescaled)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ASSERT(info.type != ImageType::Linear);
|
||||||
|
flags |= ImageFlagBits::Rescaled;
|
||||||
|
const auto& resolution = runtime->resolution;
|
||||||
|
if (!resolution.active) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
has_scaled = true;
|
||||||
|
const auto& device = runtime->device;
|
||||||
|
if (!scaled_image) {
|
||||||
|
const bool is_2d = info.type == ImageType::e2D;
|
||||||
|
const u32 scaled_width = resolution.ScaleUp(info.size.width);
|
||||||
|
const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
|
||||||
|
auto scaled_info = info;
|
||||||
|
scaled_info.size.width = scaled_width;
|
||||||
|
scaled_info.size.height = scaled_height;
|
||||||
|
scaled_image = MakeImage(device, scaled_info);
|
||||||
|
auto& allocator = runtime->memory_allocator;
|
||||||
|
scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
|
||||||
|
ignore = false;
|
||||||
|
}
|
||||||
|
current_image = *scaled_image;
|
||||||
|
if (ignore) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aspect_mask == 0) {
|
||||||
|
aspect_mask = ImageAspectMask(info.format);
|
||||||
|
}
|
||||||
|
static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
|
||||||
|
const PixelFormat format = StorageFormat(info.format);
|
||||||
|
const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
|
||||||
|
const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
|
||||||
|
if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
|
||||||
|
BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
|
||||||
|
} else {
|
||||||
|
return BlitScaleHelper(true);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Image::ScaleDown(bool ignore) {
|
||||||
|
if (False(flags & ImageFlagBits::Rescaled)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ASSERT(info.type != ImageType::Linear);
|
||||||
|
flags &= ~ImageFlagBits::Rescaled;
|
||||||
|
const auto& resolution = runtime->resolution;
|
||||||
|
if (!resolution.active) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
current_image = *original_image;
|
||||||
|
if (ignore) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (aspect_mask == 0) {
|
||||||
|
aspect_mask = ImageAspectMask(info.format);
|
||||||
|
}
|
||||||
|
static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
|
||||||
|
const PixelFormat format = StorageFormat(info.format);
|
||||||
|
const auto& device = runtime->device;
|
||||||
|
const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
|
||||||
|
const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
|
||||||
|
if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
|
||||||
|
BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
|
||||||
|
} else {
|
||||||
|
return BlitScaleHelper(false);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Image::BlitScaleHelper(bool scale_up) {
|
||||||
|
using namespace VideoCommon;
|
||||||
|
static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;
|
||||||
|
const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT};
|
||||||
|
const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)};
|
||||||
|
const auto operation = is_bilinear ? Tegra::Engines::Fermi2D::Filter::Bilinear
|
||||||
|
: Tegra::Engines::Fermi2D::Filter::Point;
|
||||||
|
|
||||||
|
const bool is_2d = info.type == ImageType::e2D;
|
||||||
|
const auto& resolution = runtime->resolution;
|
||||||
|
const u32 scaled_width = resolution.ScaleUp(info.size.width);
|
||||||
|
const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
|
||||||
|
std::unique_ptr<ImageView>& blit_view = scale_up ? scale_view : normal_view;
|
||||||
|
std::unique_ptr<Framebuffer>& blit_framebuffer =
|
||||||
|
scale_up ? scale_framebuffer : normal_framebuffer;
|
||||||
|
if (!blit_view) {
|
||||||
|
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
|
||||||
|
blit_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 src_width = scale_up ? info.size.width : scaled_width;
|
||||||
|
const u32 src_height = scale_up ? info.size.height : scaled_height;
|
||||||
|
const u32 dst_width = scale_up ? scaled_width : info.size.width;
|
||||||
|
const u32 dst_height = scale_up ? scaled_height : info.size.height;
|
||||||
|
const Region2D src_region{
|
||||||
|
.start = {0, 0},
|
||||||
|
.end = {static_cast<s32>(src_width), static_cast<s32>(src_height)},
|
||||||
|
};
|
||||||
|
const Region2D dst_region{
|
||||||
|
.start = {0, 0},
|
||||||
|
.end = {static_cast<s32>(dst_width), static_cast<s32>(dst_height)},
|
||||||
|
};
|
||||||
|
const VkExtent2D extent{
|
||||||
|
.width = std::max(scaled_width, info.size.width),
|
||||||
|
.height = std::max(scaled_height, info.size.width),
|
||||||
|
};
|
||||||
|
|
||||||
|
auto* view_ptr = blit_view.get();
|
||||||
|
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||||
|
if (!blit_framebuffer) {
|
||||||
|
blit_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
|
||||||
|
}
|
||||||
|
const auto color_view = blit_view->Handle(Shader::TextureType::Color2D);
|
||||||
|
|
||||||
|
runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region,
|
||||||
|
src_region, operation, BLIT_OPERATION);
|
||||||
|
} else if (!runtime->device.IsBlitDepthStencilSupported() &&
|
||||||
|
aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
||||||
|
if (!blit_framebuffer) {
|
||||||
|
blit_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent);
|
||||||
|
}
|
||||||
|
runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(),
|
||||||
|
blit_view->StencilView(), dst_region,
|
||||||
|
src_region, operation, BLIT_OPERATION);
|
||||||
|
} else {
|
||||||
|
// TODO: Use helper blits where applicable
|
||||||
|
flags &= ~ImageFlagBits::Rescaled;
|
||||||
|
LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", info.format);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
|
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
|
||||||
@ -1052,7 +1363,7 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
|||||||
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
|
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
|
||||||
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
|
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
|
||||||
|
|
||||||
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
|
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params)
|
||||||
: VideoCommon::ImageViewBase{params} {}
|
: VideoCommon::ImageViewBase{params} {}
|
||||||
|
|
||||||
VkImageView ImageView::DepthView() {
|
VkImageView ImageView::DepthView() {
|
||||||
@ -1162,7 +1473,27 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
|
|||||||
}
|
}
|
||||||
|
|
||||||
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
|
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
|
||||||
ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
|
ImageView* depth_buffer, const VideoCommon::RenderTargets& key)
|
||||||
|
: render_area{VkExtent2D{
|
||||||
|
.width = key.size.width,
|
||||||
|
.height = key.size.height,
|
||||||
|
}} {
|
||||||
|
CreateFramebuffer(runtime, color_buffers, depth_buffer);
|
||||||
|
if (runtime.device.HasDebuggingToolAttached()) {
|
||||||
|
framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
|
||||||
|
ImageView* depth_buffer, VkExtent2D extent)
|
||||||
|
: render_area{extent} {
|
||||||
|
std::array<ImageView*, NUM_RT> color_buffers{color_buffer};
|
||||||
|
CreateFramebuffer(runtime, color_buffers, depth_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
|
||||||
|
std::span<ImageView*, NUM_RT> color_buffers,
|
||||||
|
ImageView* depth_buffer) {
|
||||||
std::vector<VkImageView> attachments;
|
std::vector<VkImageView> attachments;
|
||||||
RenderPassKey renderpass_key{};
|
RenderPassKey renderpass_key{};
|
||||||
s32 num_layers = 1;
|
s32 num_layers = 1;
|
||||||
@ -1200,10 +1531,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
|
|||||||
|
|
||||||
renderpass = runtime.render_pass_cache.Get(renderpass_key);
|
renderpass = runtime.render_pass_cache.Get(renderpass_key);
|
||||||
|
|
||||||
render_area = VkExtent2D{
|
|
||||||
.width = key.size.width,
|
|
||||||
.height = key.size.height,
|
|
||||||
};
|
|
||||||
num_color_buffers = static_cast<u32>(num_colors);
|
num_color_buffers = static_cast<u32>(num_colors);
|
||||||
framebuffer = runtime.device.GetLogical().CreateFramebuffer({
|
framebuffer = runtime.device.GetLogical().CreateFramebuffer({
|
||||||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
||||||
@ -1212,13 +1539,10 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
|
|||||||
.renderPass = renderpass,
|
.renderPass = renderpass,
|
||||||
.attachmentCount = static_cast<u32>(attachments.size()),
|
.attachmentCount = static_cast<u32>(attachments.size()),
|
||||||
.pAttachments = attachments.data(),
|
.pAttachments = attachments.data(),
|
||||||
.width = key.size.width,
|
.width = render_area.width,
|
||||||
.height = key.size.height,
|
.height = render_area.height,
|
||||||
.layers = static_cast<u32>(std::max(num_layers, 1)),
|
.layers = static_cast<u32>(std::max(num_layers, 1)),
|
||||||
});
|
});
|
||||||
if (runtime.device.HasDebuggingToolAttached()) {
|
|
||||||
framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCacheRuntime::AccelerateImageUpload(
|
void TextureCacheRuntime::AccelerateImageUpload(
|
||||||
|
@ -13,6 +13,10 @@
|
|||||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||||
|
|
||||||
|
namespace Settings {
|
||||||
|
struct ResolutionScalingInfo;
|
||||||
|
}
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
using VideoCommon::ImageId;
|
using VideoCommon::ImageId;
|
||||||
@ -31,14 +35,14 @@ class RenderPassCache;
|
|||||||
class StagingBufferPool;
|
class StagingBufferPool;
|
||||||
class VKScheduler;
|
class VKScheduler;
|
||||||
|
|
||||||
struct TextureCacheRuntime {
|
class TextureCacheRuntime {
|
||||||
const Device& device;
|
public:
|
||||||
VKScheduler& scheduler;
|
explicit TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
|
||||||
MemoryAllocator& memory_allocator;
|
MemoryAllocator& memory_allocator_,
|
||||||
StagingBufferPool& staging_buffer_pool;
|
StagingBufferPool& staging_buffer_pool_,
|
||||||
BlitImageHelper& blit_image_helper;
|
BlitImageHelper& blit_image_helper_,
|
||||||
ASTCDecoderPass& astc_decoder_pass;
|
ASTCDecoderPass& astc_decoder_pass_,
|
||||||
RenderPassCache& render_pass_cache;
|
RenderPassCache& render_pass_cache_);
|
||||||
|
|
||||||
void Finish();
|
void Finish();
|
||||||
|
|
||||||
@ -46,6 +50,10 @@ struct TextureCacheRuntime {
|
|||||||
|
|
||||||
StagingBufferRef DownloadStagingBuffer(size_t size);
|
StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||||
|
|
||||||
|
void TickFrame();
|
||||||
|
|
||||||
|
u64 GetDeviceLocalMemory() const;
|
||||||
|
|
||||||
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||||
const Region2D& dst_region, const Region2D& src_region,
|
const Region2D& dst_region, const Region2D& src_region,
|
||||||
Tegra::Engines::Fermi2D::Filter filter,
|
Tegra::Engines::Fermi2D::Filter filter,
|
||||||
@ -53,7 +61,7 @@ struct TextureCacheRuntime {
|
|||||||
|
|
||||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
|
||||||
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
|
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled);
|
||||||
|
|
||||||
bool CanAccelerateImageUpload(Image&) const noexcept {
|
bool CanAccelerateImageUpload(Image&) const noexcept {
|
||||||
return false;
|
return false;
|
||||||
@ -74,13 +82,21 @@ struct TextureCacheRuntime {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 GetDeviceLocalMemory() const;
|
const Device& device;
|
||||||
|
VKScheduler& scheduler;
|
||||||
|
MemoryAllocator& memory_allocator;
|
||||||
|
StagingBufferPool& staging_buffer_pool;
|
||||||
|
BlitImageHelper& blit_image_helper;
|
||||||
|
ASTCDecoderPass& astc_decoder_pass;
|
||||||
|
RenderPassCache& render_pass_cache;
|
||||||
|
const Settings::ResolutionScalingInfo& resolution;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Image : public VideoCommon::ImageBase {
|
class Image : public VideoCommon::ImageBase {
|
||||||
public:
|
public:
|
||||||
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
||||||
VAddr cpu_addr);
|
VAddr cpu_addr);
|
||||||
|
explicit Image(const VideoCommon::NullImageParams&);
|
||||||
|
|
||||||
~Image();
|
~Image();
|
||||||
|
|
||||||
@ -97,7 +113,7 @@ public:
|
|||||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||||
|
|
||||||
[[nodiscard]] VkImage Handle() const noexcept {
|
[[nodiscard]] VkImage Handle() const noexcept {
|
||||||
return *image;
|
return current_image;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
|
[[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
|
||||||
@ -113,14 +129,30 @@ public:
|
|||||||
return std::exchange(initialized, true);
|
return std::exchange(initialized, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ScaleUp(bool ignore = false);
|
||||||
|
|
||||||
|
bool ScaleDown(bool ignore = false);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
VKScheduler* scheduler;
|
bool BlitScaleHelper(bool scale_up);
|
||||||
vk::Image image;
|
|
||||||
|
VKScheduler* scheduler{};
|
||||||
|
TextureCacheRuntime* runtime{};
|
||||||
|
|
||||||
|
vk::Image original_image;
|
||||||
MemoryCommit commit;
|
MemoryCommit commit;
|
||||||
vk::ImageView image_view;
|
|
||||||
std::vector<vk::ImageView> storage_image_views;
|
std::vector<vk::ImageView> storage_image_views;
|
||||||
VkImageAspectFlags aspect_mask = 0;
|
VkImageAspectFlags aspect_mask = 0;
|
||||||
bool initialized = false;
|
bool initialized = false;
|
||||||
|
vk::Image scaled_image{};
|
||||||
|
MemoryCommit scaled_commit{};
|
||||||
|
VkImage current_image{};
|
||||||
|
|
||||||
|
std::unique_ptr<Framebuffer> scale_framebuffer;
|
||||||
|
std::unique_ptr<ImageView> scale_view;
|
||||||
|
|
||||||
|
std::unique_ptr<Framebuffer> normal_framebuffer;
|
||||||
|
std::unique_ptr<ImageView> normal_view;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ImageView : public VideoCommon::ImageViewBase {
|
class ImageView : public VideoCommon::ImageViewBase {
|
||||||
@ -128,7 +160,7 @@ public:
|
|||||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
|
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
|
||||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
|
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
|
||||||
const VideoCommon::ImageViewInfo&, GPUVAddr);
|
const VideoCommon::ImageViewInfo&, GPUVAddr);
|
||||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
|
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
|
||||||
|
|
||||||
[[nodiscard]] VkImageView DepthView();
|
[[nodiscard]] VkImageView DepthView();
|
||||||
|
|
||||||
@ -197,9 +229,15 @@ private:
|
|||||||
|
|
||||||
class Framebuffer {
|
class Framebuffer {
|
||||||
public:
|
public:
|
||||||
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
|
explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
|
||||||
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
|
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
|
||||||
|
|
||||||
|
explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
|
||||||
|
ImageView* depth_buffer, VkExtent2D extent);
|
||||||
|
|
||||||
|
void CreateFramebuffer(TextureCacheRuntime& runtime,
|
||||||
|
std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer);
|
||||||
|
|
||||||
[[nodiscard]] VkFramebuffer Handle() const noexcept {
|
[[nodiscard]] VkFramebuffer Handle() const noexcept {
|
||||||
return *framebuffer;
|
return *framebuffer;
|
||||||
}
|
}
|
||||||
|
@ -279,6 +279,80 @@ bool IsPixelFormatSRGB(PixelFormat format) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsPixelFormatInteger(PixelFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case PixelFormat::A8B8G8R8_SINT:
|
||||||
|
case PixelFormat::A8B8G8R8_UINT:
|
||||||
|
case PixelFormat::A2B10G10R10_UINT:
|
||||||
|
case PixelFormat::R8_SINT:
|
||||||
|
case PixelFormat::R8_UINT:
|
||||||
|
case PixelFormat::R16G16B16A16_SINT:
|
||||||
|
case PixelFormat::R16G16B16A16_UINT:
|
||||||
|
case PixelFormat::R32G32B32A32_UINT:
|
||||||
|
case PixelFormat::R32G32B32A32_SINT:
|
||||||
|
case PixelFormat::R32G32_SINT:
|
||||||
|
case PixelFormat::R16_UINT:
|
||||||
|
case PixelFormat::R16_SINT:
|
||||||
|
case PixelFormat::R16G16_UINT:
|
||||||
|
case PixelFormat::R16G16_SINT:
|
||||||
|
case PixelFormat::R8G8_SINT:
|
||||||
|
case PixelFormat::R8G8_UINT:
|
||||||
|
case PixelFormat::R32G32_UINT:
|
||||||
|
case PixelFormat::R32_UINT:
|
||||||
|
case PixelFormat::R32_SINT:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsPixelFormatSignedInteger(PixelFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case PixelFormat::A8B8G8R8_SINT:
|
||||||
|
case PixelFormat::R8_SINT:
|
||||||
|
case PixelFormat::R16G16B16A16_SINT:
|
||||||
|
case PixelFormat::R32G32B32A32_SINT:
|
||||||
|
case PixelFormat::R32G32_SINT:
|
||||||
|
case PixelFormat::R16_SINT:
|
||||||
|
case PixelFormat::R16G16_SINT:
|
||||||
|
case PixelFormat::R8G8_SINT:
|
||||||
|
case PixelFormat::R32_SINT:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t PixelComponentSizeBitsInteger(PixelFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case PixelFormat::A8B8G8R8_SINT:
|
||||||
|
case PixelFormat::A8B8G8R8_UINT:
|
||||||
|
case PixelFormat::R8_SINT:
|
||||||
|
case PixelFormat::R8_UINT:
|
||||||
|
case PixelFormat::R8G8_SINT:
|
||||||
|
case PixelFormat::R8G8_UINT:
|
||||||
|
return 8;
|
||||||
|
case PixelFormat::A2B10G10R10_UINT:
|
||||||
|
return 10;
|
||||||
|
case PixelFormat::R16G16B16A16_SINT:
|
||||||
|
case PixelFormat::R16G16B16A16_UINT:
|
||||||
|
case PixelFormat::R16_UINT:
|
||||||
|
case PixelFormat::R16_SINT:
|
||||||
|
case PixelFormat::R16G16_UINT:
|
||||||
|
case PixelFormat::R16G16_SINT:
|
||||||
|
return 16;
|
||||||
|
case PixelFormat::R32G32B32A32_UINT:
|
||||||
|
case PixelFormat::R32G32B32A32_SINT:
|
||||||
|
case PixelFormat::R32G32_SINT:
|
||||||
|
case PixelFormat::R32G32_UINT:
|
||||||
|
case PixelFormat::R32_UINT:
|
||||||
|
case PixelFormat::R32_SINT:
|
||||||
|
return 32;
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
|
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
|
||||||
return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
|
return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
|
||||||
}
|
}
|
||||||
|
@ -460,6 +460,12 @@ bool IsPixelFormatASTC(PixelFormat format);
|
|||||||
|
|
||||||
bool IsPixelFormatSRGB(PixelFormat format);
|
bool IsPixelFormatSRGB(PixelFormat format);
|
||||||
|
|
||||||
|
bool IsPixelFormatInteger(PixelFormat format);
|
||||||
|
|
||||||
|
bool IsPixelFormatSignedInteger(PixelFormat format);
|
||||||
|
|
||||||
|
size_t PixelComponentSizeBitsInteger(PixelFormat format);
|
||||||
|
|
||||||
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
|
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
|
||||||
|
|
||||||
u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format);
|
u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format);
|
||||||
|
@ -60,15 +60,17 @@ namespace {
|
|||||||
ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
|
ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
|
||||||
: info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
|
: info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
|
||||||
unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
|
unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
|
||||||
converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_},
|
converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{},
|
||||||
cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes},
|
has_scaled{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_},
|
||||||
mip_level_offsets{CalculateMipLevelOffsets(info)} {
|
cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} {
|
||||||
if (info.type == ImageType::e3D) {
|
if (info.type == ImageType::e3D) {
|
||||||
slice_offsets = CalculateSliceOffsets(info);
|
slice_offsets = CalculateSliceOffsets(info);
|
||||||
slice_subresources = CalculateSliceSubresources(info);
|
slice_subresources = CalculateSliceSubresources(info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ImageBase::ImageBase(const NullImageParams&) {}
|
||||||
|
|
||||||
ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
|
ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
|
||||||
: gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
|
: gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
|
||||||
|
|
||||||
@ -254,6 +256,8 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
|
|||||||
}
|
}
|
||||||
lhs.aliased_images.push_back(std::move(lhs_alias));
|
lhs.aliased_images.push_back(std::move(lhs_alias));
|
||||||
rhs.aliased_images.push_back(std::move(rhs_alias));
|
rhs.aliased_images.push_back(std::move(rhs_alias));
|
||||||
|
lhs.flags &= ~ImageFlagBits::IsRescalable;
|
||||||
|
rhs.flags &= ~ImageFlagBits::IsRescalable;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
@ -33,6 +33,12 @@ enum class ImageFlagBits : u32 {
|
|||||||
///< garbage collection priority
|
///< garbage collection priority
|
||||||
Alias = 1 << 11, ///< This image has aliases and has priority on garbage
|
Alias = 1 << 11, ///< This image has aliases and has priority on garbage
|
||||||
///< collection
|
///< collection
|
||||||
|
|
||||||
|
// Rescaler
|
||||||
|
Rescaled = 1 << 12,
|
||||||
|
CheckingRescalable = 1 << 13,
|
||||||
|
IsRescalable = 1 << 14,
|
||||||
|
Blacklisted = 1 << 15,
|
||||||
};
|
};
|
||||||
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
||||||
|
|
||||||
@ -43,8 +49,11 @@ struct AliasedImage {
|
|||||||
ImageId id;
|
ImageId id;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct NullImageParams {};
|
||||||
|
|
||||||
struct ImageBase {
|
struct ImageBase {
|
||||||
explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
|
explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
|
||||||
|
explicit ImageBase(const NullImageParams&);
|
||||||
|
|
||||||
[[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
|
[[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
|
||||||
|
|
||||||
@ -68,11 +77,18 @@ struct ImageBase {
|
|||||||
void CheckBadOverlapState();
|
void CheckBadOverlapState();
|
||||||
void CheckAliasState();
|
void CheckAliasState();
|
||||||
|
|
||||||
|
bool HasScaled() {
|
||||||
|
return has_scaled;
|
||||||
|
}
|
||||||
|
|
||||||
ImageInfo info;
|
ImageInfo info;
|
||||||
|
|
||||||
u32 guest_size_bytes = 0;
|
u32 guest_size_bytes = 0;
|
||||||
u32 unswizzled_size_bytes = 0;
|
u32 unswizzled_size_bytes = 0;
|
||||||
u32 converted_size_bytes = 0;
|
u32 converted_size_bytes = 0;
|
||||||
|
u32 scale_rating = 0;
|
||||||
|
u64 scale_tick = 0;
|
||||||
|
bool has_scaled = false;
|
||||||
ImageFlagBits flags = ImageFlagBits::CpuModified;
|
ImageFlagBits flags = ImageFlagBits::CpuModified;
|
||||||
|
|
||||||
GPUVAddr gpu_addr = 0;
|
GPUVAddr gpu_addr = 0;
|
||||||
|
@ -31,6 +31,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
|
|||||||
.depth = config.block_depth,
|
.depth = config.block_depth,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
rescaleable = false;
|
||||||
tile_width_spacing = config.tile_width_spacing;
|
tile_width_spacing = config.tile_width_spacing;
|
||||||
if (config.texture_type != TextureType::Texture2D &&
|
if (config.texture_type != TextureType::Texture2D &&
|
||||||
config.texture_type != TextureType::Texture2DNoMipmap) {
|
config.texture_type != TextureType::Texture2DNoMipmap) {
|
||||||
@ -41,6 +42,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
|
|||||||
ASSERT(config.BaseLayer() == 0);
|
ASSERT(config.BaseLayer() == 0);
|
||||||
type = ImageType::e1D;
|
type = ImageType::e1D;
|
||||||
size.width = config.Width();
|
size.width = config.Width();
|
||||||
|
resources.layers = 1;
|
||||||
break;
|
break;
|
||||||
case TextureType::Texture1DArray:
|
case TextureType::Texture1DArray:
|
||||||
UNIMPLEMENTED_IF(config.BaseLayer() != 0);
|
UNIMPLEMENTED_IF(config.BaseLayer() != 0);
|
||||||
@ -52,12 +54,14 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
|
|||||||
case TextureType::Texture2DNoMipmap:
|
case TextureType::Texture2DNoMipmap:
|
||||||
ASSERT(config.Depth() == 1);
|
ASSERT(config.Depth() == 1);
|
||||||
type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D;
|
type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D;
|
||||||
|
rescaleable = !config.IsPitchLinear();
|
||||||
size.width = config.Width();
|
size.width = config.Width();
|
||||||
size.height = config.Height();
|
size.height = config.Height();
|
||||||
resources.layers = config.BaseLayer() + 1;
|
resources.layers = config.BaseLayer() + 1;
|
||||||
break;
|
break;
|
||||||
case TextureType::Texture2DArray:
|
case TextureType::Texture2DArray:
|
||||||
type = ImageType::e2D;
|
type = ImageType::e2D;
|
||||||
|
rescaleable = true;
|
||||||
size.width = config.Width();
|
size.width = config.Width();
|
||||||
size.height = config.Height();
|
size.height = config.Height();
|
||||||
resources.layers = config.BaseLayer() + config.Depth();
|
resources.layers = config.BaseLayer() + config.Depth();
|
||||||
@ -82,10 +86,12 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
|
|||||||
size.width = config.Width();
|
size.width = config.Width();
|
||||||
size.height = config.Height();
|
size.height = config.Height();
|
||||||
size.depth = config.Depth();
|
size.depth = config.Depth();
|
||||||
|
resources.layers = 1;
|
||||||
break;
|
break;
|
||||||
case TextureType::Texture1DBuffer:
|
case TextureType::Texture1DBuffer:
|
||||||
type = ImageType::Buffer;
|
type = ImageType::Buffer;
|
||||||
size.width = config.Width();
|
size.width = config.Width();
|
||||||
|
resources.layers = 1;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
|
UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
|
||||||
@ -95,12 +101,15 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
|
|||||||
// FIXME: Call this without passing *this
|
// FIXME: Call this without passing *this
|
||||||
layer_stride = CalculateLayerStride(*this);
|
layer_stride = CalculateLayerStride(*this);
|
||||||
maybe_unaligned_layer_stride = CalculateLayerSize(*this);
|
maybe_unaligned_layer_stride = CalculateLayerSize(*this);
|
||||||
|
rescaleable &= (block.depth == 0) && resources.levels == 1;
|
||||||
|
downscaleable = size.height > 512;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept {
|
ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept {
|
||||||
const auto& rt = regs.rt[index];
|
const auto& rt = regs.rt[index];
|
||||||
format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format);
|
format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format);
|
||||||
|
rescaleable = false;
|
||||||
if (rt.tile_mode.is_pitch_linear) {
|
if (rt.tile_mode.is_pitch_linear) {
|
||||||
ASSERT(rt.tile_mode.is_3d == 0);
|
ASSERT(rt.tile_mode.is_3d == 0);
|
||||||
type = ImageType::Linear;
|
type = ImageType::Linear;
|
||||||
@ -126,6 +135,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index)
|
|||||||
type = ImageType::e3D;
|
type = ImageType::e3D;
|
||||||
size.depth = rt.depth;
|
size.depth = rt.depth;
|
||||||
} else {
|
} else {
|
||||||
|
rescaleable = block.depth == 0 && size.height > 256;
|
||||||
|
downscaleable = size.height > 512;
|
||||||
type = ImageType::e2D;
|
type = ImageType::e2D;
|
||||||
resources.layers = rt.depth;
|
resources.layers = rt.depth;
|
||||||
}
|
}
|
||||||
@ -135,6 +146,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
|
|||||||
format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format);
|
format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format);
|
||||||
size.width = regs.zeta_width;
|
size.width = regs.zeta_width;
|
||||||
size.height = regs.zeta_height;
|
size.height = regs.zeta_height;
|
||||||
|
rescaleable = false;
|
||||||
resources.levels = 1;
|
resources.levels = 1;
|
||||||
layer_stride = regs.zeta.layer_stride * 4;
|
layer_stride = regs.zeta.layer_stride * 4;
|
||||||
maybe_unaligned_layer_stride = layer_stride;
|
maybe_unaligned_layer_stride = layer_stride;
|
||||||
@ -153,6 +165,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
|
|||||||
type = ImageType::e3D;
|
type = ImageType::e3D;
|
||||||
size.depth = regs.zeta_depth;
|
size.depth = regs.zeta_depth;
|
||||||
} else {
|
} else {
|
||||||
|
rescaleable = block.depth == 0 && size.height > 256;
|
||||||
|
downscaleable = size.height > 512;
|
||||||
type = ImageType::e2D;
|
type = ImageType::e2D;
|
||||||
resources.layers = regs.zeta_depth;
|
resources.layers = regs.zeta_depth;
|
||||||
}
|
}
|
||||||
@ -161,6 +175,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
|
|||||||
ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
|
ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
|
||||||
UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero");
|
UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero");
|
||||||
format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format);
|
format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format);
|
||||||
|
rescaleable = false;
|
||||||
if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) {
|
if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) {
|
||||||
type = ImageType::Linear;
|
type = ImageType::Linear;
|
||||||
size = Extent3D{
|
size = Extent3D{
|
||||||
@ -171,6 +186,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
|
|||||||
pitch = config.pitch;
|
pitch = config.pitch;
|
||||||
} else {
|
} else {
|
||||||
type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D;
|
type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D;
|
||||||
|
|
||||||
block = Extent3D{
|
block = Extent3D{
|
||||||
.width = config.block_width,
|
.width = config.block_width,
|
||||||
.height = config.block_height,
|
.height = config.block_height,
|
||||||
@ -183,6 +199,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
|
|||||||
.height = config.height,
|
.height = config.height,
|
||||||
.depth = 1,
|
.depth = 1,
|
||||||
};
|
};
|
||||||
|
rescaleable = block.depth == 0 && size.height > 256;
|
||||||
|
downscaleable = size.height > 512;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ using Tegra::Texture::TICEntry;
|
|||||||
using VideoCore::Surface::PixelFormat;
|
using VideoCore::Surface::PixelFormat;
|
||||||
|
|
||||||
struct ImageInfo {
|
struct ImageInfo {
|
||||||
explicit ImageInfo() = default;
|
ImageInfo() = default;
|
||||||
explicit ImageInfo(const TICEntry& config) noexcept;
|
explicit ImageInfo(const TICEntry& config) noexcept;
|
||||||
explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept;
|
explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept;
|
||||||
explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept;
|
explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept;
|
||||||
@ -33,6 +33,8 @@ struct ImageInfo {
|
|||||||
u32 maybe_unaligned_layer_stride = 0;
|
u32 maybe_unaligned_layer_stride = 0;
|
||||||
u32 num_samples = 1;
|
u32 num_samples = 1;
|
||||||
u32 tile_width_spacing = 0;
|
u32 tile_width_spacing = 0;
|
||||||
|
bool rescaleable = false;
|
||||||
|
bool downscaleable = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
@ -37,7 +37,8 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
|
|||||||
}
|
}
|
||||||
|
|
||||||
ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info)
|
ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info)
|
||||||
: format{info.format}, type{ImageViewType::Buffer}, size{
|
: image_id{NULL_IMAGE_ID}, format{info.format}, type{ImageViewType::Buffer},
|
||||||
|
size{
|
||||||
.width = info.size.width,
|
.width = info.size.width,
|
||||||
.height = 1,
|
.height = 1,
|
||||||
.depth = 1,
|
.depth = 1,
|
||||||
@ -45,6 +46,6 @@ ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_in
|
|||||||
ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer");
|
ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer");
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageViewBase::ImageViewBase(const NullImageParams&) {}
|
ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {}
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
@ -15,7 +15,7 @@ using VideoCore::Surface::PixelFormat;
|
|||||||
struct ImageViewInfo;
|
struct ImageViewInfo;
|
||||||
struct ImageInfo;
|
struct ImageInfo;
|
||||||
|
|
||||||
struct NullImageParams {};
|
struct NullImageViewParams {};
|
||||||
|
|
||||||
enum class ImageViewFlagBits : u16 {
|
enum class ImageViewFlagBits : u16 {
|
||||||
PreemtiveDownload = 1 << 0,
|
PreemtiveDownload = 1 << 0,
|
||||||
@ -28,7 +28,7 @@ struct ImageViewBase {
|
|||||||
explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
|
explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
|
||||||
ImageId image_id);
|
ImageId image_id);
|
||||||
explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info);
|
explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info);
|
||||||
explicit ImageViewBase(const NullImageParams&);
|
explicit ImageViewBase(const NullImageViewParams&);
|
||||||
|
|
||||||
[[nodiscard]] bool IsBuffer() const noexcept {
|
[[nodiscard]] bool IsBuffer() const noexcept {
|
||||||
return type == ImageViewType::Buffer;
|
return type == ImageViewType::Buffer;
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
|
#include "common/settings.h"
|
||||||
#include "video_core/dirty_flags.h"
|
#include "video_core/dirty_flags.h"
|
||||||
#include "video_core/engines/kepler_compute.h"
|
#include "video_core/engines/kepler_compute.h"
|
||||||
#include "video_core/texture_cache/image_view_base.h"
|
#include "video_core/texture_cache/image_view_base.h"
|
||||||
@ -44,21 +45,22 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
|
|||||||
|
|
||||||
// Make sure the first index is reserved for the null resources
|
// Make sure the first index is reserved for the null resources
|
||||||
// This way the null resource becomes a compile time constant
|
// This way the null resource becomes a compile time constant
|
||||||
void(slot_image_views.insert(runtime, NullImageParams{}));
|
void(slot_images.insert(NullImageParams{}));
|
||||||
|
void(slot_image_views.insert(runtime, NullImageViewParams{}));
|
||||||
void(slot_samplers.insert(runtime, sampler_descriptor));
|
void(slot_samplers.insert(runtime, sampler_descriptor));
|
||||||
|
|
||||||
if constexpr (HAS_DEVICE_MEMORY_INFO) {
|
if constexpr (HAS_DEVICE_MEMORY_INFO) {
|
||||||
const auto device_memory = runtime.GetDeviceLocalMemory();
|
const auto device_memory = runtime.GetDeviceLocalMemory();
|
||||||
const u64 possible_expected_memory = (device_memory * 3) / 10;
|
const u64 possible_expected_memory = (device_memory * 4) / 10;
|
||||||
const u64 possible_critical_memory = (device_memory * 6) / 10;
|
const u64 possible_critical_memory = (device_memory * 7) / 10;
|
||||||
expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
|
expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
|
||||||
critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
|
critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
|
||||||
minimum_memory = 0;
|
minimum_memory = 0;
|
||||||
} else {
|
} else {
|
||||||
// on OGL we can be more conservatives as the driver takes care.
|
// On OpenGL we can be more conservatives as the driver takes care.
|
||||||
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
|
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
|
||||||
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
|
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
|
||||||
minimum_memory = expected_memory;
|
minimum_memory = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -67,7 +69,7 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||||||
const bool high_priority_mode = total_used_memory >= expected_memory;
|
const bool high_priority_mode = total_used_memory >= expected_memory;
|
||||||
const bool aggressive_mode = total_used_memory >= critical_memory;
|
const bool aggressive_mode = total_used_memory >= critical_memory;
|
||||||
const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
|
const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
|
||||||
size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5);
|
size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10);
|
||||||
const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
|
const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
|
||||||
if (num_iterations == 0) {
|
if (num_iterations == 0) {
|
||||||
return true;
|
return true;
|
||||||
@ -89,7 +91,7 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||||||
UntrackImage(image, image_id);
|
UntrackImage(image, image_id);
|
||||||
}
|
}
|
||||||
UnregisterImage(image_id);
|
UnregisterImage(image_id);
|
||||||
DeleteImage(image_id);
|
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
|
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
|
||||||
@ -103,6 +105,7 @@ void TextureCache<P>::TickFrame() {
|
|||||||
sentenced_images.Tick();
|
sentenced_images.Tick();
|
||||||
sentenced_framebuffers.Tick();
|
sentenced_framebuffers.Tick();
|
||||||
sentenced_image_view.Tick();
|
sentenced_image_view.Tick();
|
||||||
|
runtime.TickFrame();
|
||||||
++frame_tick;
|
++frame_tick;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -122,15 +125,14 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
|
template <bool has_blacklists>
|
||||||
std::span<ImageViewId> image_view_ids) {
|
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
|
||||||
FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
|
FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
|
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
|
||||||
std::span<ImageViewId> image_view_ids) {
|
FillImageViews<true>(compute_image_table, compute_image_view_ids, views);
|
||||||
FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
@ -202,24 +204,109 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
|||||||
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
|
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 scale_rating = 0;
|
||||||
|
bool rescaled = false;
|
||||||
|
std::array<ImageId, NUM_RT> tmp_color_images{};
|
||||||
|
ImageId tmp_depth_image{};
|
||||||
|
do {
|
||||||
flags[Dirty::RenderTargets] = false;
|
flags[Dirty::RenderTargets] = false;
|
||||||
|
|
||||||
// Render target control is used on all render targets, so force look ups when this one is up
|
has_deleted_images = false;
|
||||||
|
// Render target control is used on all render targets, so force look ups when this one is
|
||||||
|
// up
|
||||||
const bool force = flags[Dirty::RenderTargetControl];
|
const bool force = flags[Dirty::RenderTargetControl];
|
||||||
flags[Dirty::RenderTargetControl] = false;
|
flags[Dirty::RenderTargetControl] = false;
|
||||||
|
|
||||||
|
scale_rating = 0;
|
||||||
|
bool any_rescaled = false;
|
||||||
|
bool can_rescale = true;
|
||||||
|
const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) {
|
||||||
|
if (view_id != NULL_IMAGE_VIEW_ID && view_id != ImageViewId{}) {
|
||||||
|
const auto& view = slot_image_views[view_id];
|
||||||
|
const auto image_id = view.image_id;
|
||||||
|
id_save = image_id;
|
||||||
|
auto& image = slot_images[image_id];
|
||||||
|
can_rescale &= ImageCanRescale(image);
|
||||||
|
any_rescaled |= True(image.flags & ImageFlagBits::Rescaled) ||
|
||||||
|
GetFormatType(image.info.format) != SurfaceType::ColorTexture;
|
||||||
|
scale_rating = std::max<u32>(scale_rating, image.scale_tick <= frame_tick
|
||||||
|
? image.scale_rating + 1U
|
||||||
|
: image.scale_rating);
|
||||||
|
} else {
|
||||||
|
id_save = CORRUPT_ID;
|
||||||
|
}
|
||||||
|
};
|
||||||
for (size_t index = 0; index < NUM_RT; ++index) {
|
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||||
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
|
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
|
||||||
if (flags[Dirty::ColorBuffer0 + index] || force) {
|
if (flags[Dirty::ColorBuffer0 + index] || force) {
|
||||||
flags[Dirty::ColorBuffer0 + index] = false;
|
flags[Dirty::ColorBuffer0 + index] = false;
|
||||||
BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
|
BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
|
||||||
}
|
}
|
||||||
PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
|
check_rescale(color_buffer_id, tmp_color_images[index]);
|
||||||
}
|
}
|
||||||
if (flags[Dirty::ZetaBuffer] || force) {
|
if (flags[Dirty::ZetaBuffer] || force) {
|
||||||
flags[Dirty::ZetaBuffer] = false;
|
flags[Dirty::ZetaBuffer] = false;
|
||||||
BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
|
BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
|
||||||
}
|
}
|
||||||
|
check_rescale(render_targets.depth_buffer_id, tmp_depth_image);
|
||||||
|
|
||||||
|
if (can_rescale) {
|
||||||
|
rescaled = any_rescaled || scale_rating >= 2;
|
||||||
|
const auto scale_up = [this](ImageId image_id) {
|
||||||
|
if (image_id != CORRUPT_ID) {
|
||||||
|
Image& image = slot_images[image_id];
|
||||||
|
ScaleUp(image);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if (rescaled) {
|
||||||
|
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||||
|
scale_up(tmp_color_images[index]);
|
||||||
|
}
|
||||||
|
scale_up(tmp_depth_image);
|
||||||
|
scale_rating = 2;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rescaled = false;
|
||||||
|
const auto scale_down = [this](ImageId image_id) {
|
||||||
|
if (image_id != CORRUPT_ID) {
|
||||||
|
Image& image = slot_images[image_id];
|
||||||
|
ScaleDown(image);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||||
|
scale_down(tmp_color_images[index]);
|
||||||
|
}
|
||||||
|
scale_down(tmp_depth_image);
|
||||||
|
scale_rating = 1;
|
||||||
|
}
|
||||||
|
} while (has_deleted_images);
|
||||||
|
// Rescale End
|
||||||
|
|
||||||
|
const auto set_rating = [this, scale_rating](ImageId image_id) {
|
||||||
|
if (image_id != CORRUPT_ID) {
|
||||||
|
Image& image = slot_images[image_id];
|
||||||
|
image.scale_rating = scale_rating;
|
||||||
|
if (image.scale_tick <= frame_tick) {
|
||||||
|
image.scale_tick = frame_tick + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||||
|
set_rating(tmp_color_images[index]);
|
||||||
|
}
|
||||||
|
set_rating(tmp_depth_image);
|
||||||
|
|
||||||
|
if (is_rescaling != rescaled) {
|
||||||
|
flags[Dirty::RescaleViewports] = true;
|
||||||
|
flags[Dirty::RescaleScissors] = true;
|
||||||
|
is_rescaling = rescaled;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||||
|
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
|
||||||
|
PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
|
||||||
|
}
|
||||||
const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
|
const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
|
||||||
|
|
||||||
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
|
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
|
||||||
@ -227,9 +314,15 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
|||||||
for (size_t index = 0; index < NUM_RT; ++index) {
|
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||||
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
|
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
|
||||||
}
|
}
|
||||||
|
u32 up_scale = 1;
|
||||||
|
u32 down_shift = 0;
|
||||||
|
if (is_rescaling) {
|
||||||
|
up_scale = Settings::values.resolution_info.up_scale;
|
||||||
|
down_shift = Settings::values.resolution_info.down_shift;
|
||||||
|
}
|
||||||
render_targets.size = Extent2D{
|
render_targets.size = Extent2D{
|
||||||
maxwell3d.regs.render_area.width,
|
(maxwell3d.regs.render_area.width * up_scale) >> down_shift,
|
||||||
maxwell3d.regs.render_area.height,
|
(maxwell3d.regs.render_area.height * up_scale) >> down_shift,
|
||||||
};
|
};
|
||||||
|
|
||||||
flags[Dirty::DepthBiasGlobal] = true;
|
flags[Dirty::DepthBiasGlobal] = true;
|
||||||
@ -241,17 +334,29 @@ typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
template <bool has_blacklists>
|
||||||
void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
|
void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
|
||||||
std::span<ImageViewId> cached_image_view_ids,
|
std::span<ImageViewId> cached_image_view_ids,
|
||||||
std::span<const u32> indices,
|
std::span<ImageViewInOut> views) {
|
||||||
std::span<ImageViewId> image_view_ids) {
|
bool has_blacklisted;
|
||||||
ASSERT(indices.size() <= image_view_ids.size());
|
|
||||||
do {
|
do {
|
||||||
has_deleted_images = false;
|
has_deleted_images = false;
|
||||||
std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
|
if constexpr (has_blacklists) {
|
||||||
return VisitImageView(table, cached_image_view_ids, index);
|
has_blacklisted = false;
|
||||||
});
|
}
|
||||||
} while (has_deleted_images);
|
for (ImageViewInOut& view : views) {
|
||||||
|
view.id = VisitImageView(table, cached_image_view_ids, view.index);
|
||||||
|
if constexpr (has_blacklists) {
|
||||||
|
if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) {
|
||||||
|
const ImageViewBase& image_view{slot_image_views[view.id]};
|
||||||
|
auto& image = slot_images[image_view.image_id];
|
||||||
|
image.flags |= ImageFlagBits::Blacklisted;
|
||||||
|
has_blacklisted |= ScaleDown(image);
|
||||||
|
image.scale_rating = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (has_deleted_images || (has_blacklists && has_blacklisted));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
@ -369,8 +474,43 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
|||||||
PrepareImage(src_id, false, false);
|
PrepareImage(src_id, false, false);
|
||||||
PrepareImage(dst_id, true, false);
|
PrepareImage(dst_id, true, false);
|
||||||
|
|
||||||
ImageBase& dst_image = slot_images[dst_id];
|
Image& dst_image = slot_images[dst_id];
|
||||||
const ImageBase& src_image = slot_images[src_id];
|
Image& src_image = slot_images[src_id];
|
||||||
|
bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
|
||||||
|
bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
|
||||||
|
|
||||||
|
const bool is_resolve = src_image.info.num_samples != 1 && dst_image.info.num_samples == 1;
|
||||||
|
if (is_src_rescaled != is_dst_rescaled) {
|
||||||
|
if (ImageCanRescale(src_image)) {
|
||||||
|
ScaleUp(src_image);
|
||||||
|
is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
|
||||||
|
if (is_resolve) {
|
||||||
|
dst_image.info.rescaleable = true;
|
||||||
|
for (const auto& alias : dst_image.aliased_images) {
|
||||||
|
Image& other_image = slot_images[alias.id];
|
||||||
|
other_image.info.rescaleable = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ImageCanRescale(dst_image)) {
|
||||||
|
ScaleUp(dst_image);
|
||||||
|
is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (is_resolve && (is_src_rescaled != is_dst_rescaled)) {
|
||||||
|
// A resolve requires both images to be the same dimensions. Resize down if needed.
|
||||||
|
ScaleDown(src_image);
|
||||||
|
ScaleDown(dst_image);
|
||||||
|
is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
|
||||||
|
is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
|
||||||
|
}
|
||||||
|
const auto& resolution = Settings::values.resolution_info;
|
||||||
|
const auto scale_region = [&](Region2D& region) {
|
||||||
|
region.start.x = resolution.ScaleUp(region.start.x);
|
||||||
|
region.start.y = resolution.ScaleUp(region.start.y);
|
||||||
|
region.end.x = resolution.ScaleUp(region.end.x);
|
||||||
|
region.end.y = resolution.ScaleUp(region.end.y);
|
||||||
|
};
|
||||||
|
|
||||||
// TODO: Deduplicate
|
// TODO: Deduplicate
|
||||||
const std::optional src_base = src_image.TryFindBase(src.Address());
|
const std::optional src_base = src_image.TryFindBase(src.Address());
|
||||||
@ -378,20 +518,26 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
|||||||
const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
|
const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
|
||||||
const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
|
const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
|
||||||
const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
|
const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
|
||||||
const Region2D src_region{
|
Region2D src_region{
|
||||||
Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
|
Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
|
||||||
Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
|
Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
|
||||||
};
|
};
|
||||||
|
if (is_src_rescaled) {
|
||||||
|
scale_region(src_region);
|
||||||
|
}
|
||||||
|
|
||||||
const std::optional dst_base = dst_image.TryFindBase(dst.Address());
|
const std::optional dst_base = dst_image.TryFindBase(dst.Address());
|
||||||
const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
|
const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
|
||||||
const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
|
const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
|
||||||
const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
|
const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
|
||||||
const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
|
const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
|
||||||
const Region2D dst_region{
|
Region2D dst_region{
|
||||||
Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
|
Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
|
||||||
Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
|
Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
|
||||||
};
|
};
|
||||||
|
if (is_dst_rescaled) {
|
||||||
|
scale_region(dst_region);
|
||||||
|
}
|
||||||
|
|
||||||
// Always call this after src_framebuffer_id was queried, as the address might be invalidated.
|
// Always call this after src_framebuffer_id was queried, as the address might be invalidated.
|
||||||
Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
|
Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
|
||||||
@ -486,6 +632,20 @@ void TextureCache<P>::PopAsyncFlushes() {
|
|||||||
committed_downloads.pop();
|
committed_downloads.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
bool TextureCache<P>::IsRescaling() const noexcept {
|
||||||
|
return is_rescaling;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcept {
|
||||||
|
if (image_view.type == ImageViewType::Buffer) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const ImageBase& image = slot_images[image_view.image_id];
|
||||||
|
return True(image.flags & ImageFlagBits::Rescaled);
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
|
bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
|
||||||
bool is_modified = false;
|
bool is_modified = false;
|
||||||
@ -623,6 +783,105 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
|||||||
return image_id;
|
return image_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
bool TextureCache<P>::ImageCanRescale(ImageBase& image) {
|
||||||
|
if (!image.info.rescaleable) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (Settings::values.resolution_info.downscale && !image.info.downscaleable) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (True(image.flags & (ImageFlagBits::Rescaled | ImageFlagBits::CheckingRescalable))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (True(image.flags & ImageFlagBits::IsRescalable)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
image.flags |= ImageFlagBits::CheckingRescalable;
|
||||||
|
for (const auto& alias : image.aliased_images) {
|
||||||
|
Image& other_image = slot_images[alias.id];
|
||||||
|
if (!ImageCanRescale(other_image)) {
|
||||||
|
image.flags &= ~ImageFlagBits::CheckingRescalable;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
image.flags &= ~ImageFlagBits::CheckingRescalable;
|
||||||
|
image.flags |= ImageFlagBits::IsRescalable;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void TextureCache<P>::InvalidateScale(Image& image) {
|
||||||
|
if (image.scale_tick <= frame_tick) {
|
||||||
|
image.scale_tick = frame_tick + 1;
|
||||||
|
}
|
||||||
|
const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
|
||||||
|
auto& dirty = maxwell3d.dirty.flags;
|
||||||
|
dirty[Dirty::RenderTargets] = true;
|
||||||
|
dirty[Dirty::ZetaBuffer] = true;
|
||||||
|
for (size_t rt = 0; rt < NUM_RT; ++rt) {
|
||||||
|
dirty[Dirty::ColorBuffer0 + rt] = true;
|
||||||
|
}
|
||||||
|
for (const ImageViewId image_view_id : image_view_ids) {
|
||||||
|
std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
|
||||||
|
if (render_targets.depth_buffer_id == image_view_id) {
|
||||||
|
render_targets.depth_buffer_id = ImageViewId{};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RemoveImageViewReferences(image_view_ids);
|
||||||
|
RemoveFramebuffers(image_view_ids);
|
||||||
|
for (const ImageViewId image_view_id : image_view_ids) {
|
||||||
|
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
|
||||||
|
slot_image_views.erase(image_view_id);
|
||||||
|
}
|
||||||
|
image.image_view_ids.clear();
|
||||||
|
image.image_view_infos.clear();
|
||||||
|
if constexpr (ENABLE_VALIDATION) {
|
||||||
|
std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
|
||||||
|
std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
|
||||||
|
}
|
||||||
|
graphics_image_table.Invalidate();
|
||||||
|
compute_image_table.Invalidate();
|
||||||
|
has_deleted_images = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) {
|
||||||
|
const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale *
|
||||||
|
Settings::values.resolution_info.up_scale);
|
||||||
|
const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift +
|
||||||
|
Settings::values.resolution_info.down_shift);
|
||||||
|
const u64 image_size_bytes =
|
||||||
|
static_cast<u64>(std::max(image.guest_size_bytes, image.unswizzled_size_bytes));
|
||||||
|
const u64 tentative_size = (image_size_bytes * scale_up) >> down_shift;
|
||||||
|
const u64 fitted_size = Common::AlignUp(tentative_size, 1024);
|
||||||
|
return fitted_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
bool TextureCache<P>::ScaleUp(Image& image) {
|
||||||
|
const bool has_copy = image.HasScaled();
|
||||||
|
const bool rescaled = image.ScaleUp();
|
||||||
|
if (!rescaled) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!has_copy) {
|
||||||
|
total_used_memory += GetScaledImageSizeBytes(image);
|
||||||
|
}
|
||||||
|
InvalidateScale(image);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
bool TextureCache<P>::ScaleDown(Image& image) {
|
||||||
|
const bool rescaled = image.ScaleDown();
|
||||||
|
if (!rescaled) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
InvalidateScale(image);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||||
RelaxedOptions options) {
|
RelaxedOptions options) {
|
||||||
@ -660,12 +919,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
|||||||
std::vector<ImageId> right_aliased_ids;
|
std::vector<ImageId> right_aliased_ids;
|
||||||
std::unordered_set<ImageId> ignore_textures;
|
std::unordered_set<ImageId> ignore_textures;
|
||||||
std::vector<ImageId> bad_overlap_ids;
|
std::vector<ImageId> bad_overlap_ids;
|
||||||
|
std::vector<ImageId> all_siblings;
|
||||||
|
const bool this_is_linear = info.type == ImageType::Linear;
|
||||||
const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
|
const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
|
||||||
if (True(overlap.flags & ImageFlagBits::Remapped)) {
|
if (True(overlap.flags & ImageFlagBits::Remapped)) {
|
||||||
ignore_textures.insert(overlap_id);
|
ignore_textures.insert(overlap_id);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (info.type == ImageType::Linear) {
|
const bool overlap_is_linear = overlap.info.type == ImageType::Linear;
|
||||||
|
if (this_is_linear != overlap_is_linear) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (this_is_linear && overlap_is_linear) {
|
||||||
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
|
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
|
||||||
// Alias linear images with the same pitch
|
// Alias linear images with the same pitch
|
||||||
left_aliased_ids.push_back(overlap_id);
|
left_aliased_ids.push_back(overlap_id);
|
||||||
@ -681,6 +946,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
|||||||
cpu_addr = solution->cpu_addr;
|
cpu_addr = solution->cpu_addr;
|
||||||
new_info.resources = solution->resources;
|
new_info.resources = solution->resources;
|
||||||
overlap_ids.push_back(overlap_id);
|
overlap_ids.push_back(overlap_id);
|
||||||
|
all_siblings.push_back(overlap_id);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
|
static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
|
||||||
@ -688,10 +954,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
|||||||
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
|
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
|
||||||
left_aliased_ids.push_back(overlap_id);
|
left_aliased_ids.push_back(overlap_id);
|
||||||
overlap.flags |= ImageFlagBits::Alias;
|
overlap.flags |= ImageFlagBits::Alias;
|
||||||
|
all_siblings.push_back(overlap_id);
|
||||||
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
|
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
|
||||||
broken_views, native_bgr)) {
|
broken_views, native_bgr)) {
|
||||||
right_aliased_ids.push_back(overlap_id);
|
right_aliased_ids.push_back(overlap_id);
|
||||||
overlap.flags |= ImageFlagBits::Alias;
|
overlap.flags |= ImageFlagBits::Alias;
|
||||||
|
all_siblings.push_back(overlap_id);
|
||||||
} else {
|
} else {
|
||||||
bad_overlap_ids.push_back(overlap_id);
|
bad_overlap_ids.push_back(overlap_id);
|
||||||
overlap.flags |= ImageFlagBits::BadOverlap;
|
overlap.flags |= ImageFlagBits::BadOverlap;
|
||||||
@ -709,6 +977,37 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
|
ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
|
||||||
|
|
||||||
|
bool can_rescale = info.rescaleable;
|
||||||
|
bool any_rescaled = false;
|
||||||
|
bool any_blacklisted = false;
|
||||||
|
for (const ImageId sibling_id : all_siblings) {
|
||||||
|
if (!can_rescale) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Image& sibling = slot_images[sibling_id];
|
||||||
|
can_rescale &= ImageCanRescale(sibling);
|
||||||
|
any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
|
||||||
|
any_blacklisted |= True(sibling.flags & ImageFlagBits::Blacklisted);
|
||||||
|
}
|
||||||
|
|
||||||
|
can_rescale &= any_rescaled;
|
||||||
|
|
||||||
|
if (can_rescale) {
|
||||||
|
for (const ImageId sibling_id : all_siblings) {
|
||||||
|
Image& sibling = slot_images[sibling_id];
|
||||||
|
ScaleUp(sibling);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (const ImageId sibling_id : all_siblings) {
|
||||||
|
Image& sibling = slot_images[sibling_id];
|
||||||
|
ScaleDown(sibling);
|
||||||
|
if (any_blacklisted) {
|
||||||
|
sibling.flags |= ImageFlagBits::Blacklisted;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||||
Image& new_image = slot_images[new_image_id];
|
Image& new_image = slot_images[new_image_id];
|
||||||
|
|
||||||
@ -731,14 +1030,23 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
|||||||
// TODO: Only upload what we need
|
// TODO: Only upload what we need
|
||||||
RefreshContents(new_image, new_image_id);
|
RefreshContents(new_image, new_image_id);
|
||||||
|
|
||||||
|
if (can_rescale) {
|
||||||
|
ScaleUp(new_image);
|
||||||
|
} else {
|
||||||
|
ScaleDown(new_image);
|
||||||
|
}
|
||||||
|
|
||||||
for (const ImageId overlap_id : overlap_ids) {
|
for (const ImageId overlap_id : overlap_ids) {
|
||||||
Image& overlap = slot_images[overlap_id];
|
Image& overlap = slot_images[overlap_id];
|
||||||
if (overlap.info.num_samples != new_image.info.num_samples) {
|
if (overlap.info.num_samples != new_image.info.num_samples) {
|
||||||
LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
|
LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
|
||||||
} else {
|
} else {
|
||||||
|
const auto& resolution = Settings::values.resolution_info;
|
||||||
const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
|
const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
|
||||||
const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
|
const u32 up_scale = can_rescale ? resolution.up_scale : 1;
|
||||||
runtime.CopyImage(new_image, overlap, copies);
|
const u32 down_shift = can_rescale ? resolution.down_shift : 0;
|
||||||
|
auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
|
||||||
|
runtime.CopyImage(new_image, overlap, std::move(copies));
|
||||||
}
|
}
|
||||||
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
||||||
UntrackImage(overlap, overlap_id);
|
UntrackImage(overlap, overlap_id);
|
||||||
@ -1083,13 +1391,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
|||||||
"Trying to unregister an already registered image");
|
"Trying to unregister an already registered image");
|
||||||
image.flags &= ~ImageFlagBits::Registered;
|
image.flags &= ~ImageFlagBits::Registered;
|
||||||
image.flags &= ~ImageFlagBits::BadOverlap;
|
image.flags &= ~ImageFlagBits::BadOverlap;
|
||||||
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
|
||||||
if ((IsPixelFormatASTC(image.info.format) &&
|
|
||||||
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
|
||||||
True(image.flags & ImageFlagBits::Converted)) {
|
|
||||||
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
|
||||||
}
|
|
||||||
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
|
||||||
lru_cache.Free(image.lru_index);
|
lru_cache.Free(image.lru_index);
|
||||||
const auto& clear_page_table =
|
const auto& clear_page_table =
|
||||||
[this, image_id](
|
[this, image_id](
|
||||||
@ -1213,8 +1514,18 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::DeleteImage(ImageId image_id) {
|
void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
||||||
ImageBase& image = slot_images[image_id];
|
ImageBase& image = slot_images[image_id];
|
||||||
|
if (image.HasScaled()) {
|
||||||
|
total_used_memory -= GetScaledImageSizeBytes(image);
|
||||||
|
}
|
||||||
|
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
||||||
|
if ((IsPixelFormatASTC(image.info.format) &&
|
||||||
|
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
||||||
|
True(image.flags & ImageFlagBits::Converted)) {
|
||||||
|
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
||||||
|
}
|
||||||
|
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
||||||
const GPUVAddr gpu_addr = image.gpu_addr;
|
const GPUVAddr gpu_addr = image.gpu_addr;
|
||||||
const auto alloc_it = image_allocs_table.find(gpu_addr);
|
const auto alloc_it = image_allocs_table.find(gpu_addr);
|
||||||
if (alloc_it == image_allocs_table.end()) {
|
if (alloc_it == image_allocs_table.end()) {
|
||||||
@ -1269,10 +1580,14 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
|
|||||||
num_removed_overlaps);
|
num_removed_overlaps);
|
||||||
}
|
}
|
||||||
for (const ImageViewId image_view_id : image_view_ids) {
|
for (const ImageViewId image_view_id : image_view_ids) {
|
||||||
|
if (!immediate_delete) {
|
||||||
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
|
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
|
||||||
|
}
|
||||||
slot_image_views.erase(image_view_id);
|
slot_image_views.erase(image_view_id);
|
||||||
}
|
}
|
||||||
|
if (!immediate_delete) {
|
||||||
sentenced_images.Push(std::move(slot_images[image_id]));
|
sentenced_images.Push(std::move(slot_images[image_id]));
|
||||||
|
}
|
||||||
slot_images.erase(image_id);
|
slot_images.erase(image_id);
|
||||||
|
|
||||||
alloc_images.erase(alloc_image_it);
|
alloc_images.erase(alloc_image_it);
|
||||||
@ -1322,26 +1637,68 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
|
|||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
|
void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
|
||||||
boost::container::small_vector<const AliasedImage*, 1> aliased_images;
|
boost::container::small_vector<const AliasedImage*, 1> aliased_images;
|
||||||
ImageBase& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
|
bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
|
||||||
|
bool any_blacklisted = True(image.flags & ImageFlagBits::Blacklisted);
|
||||||
u64 most_recent_tick = image.modification_tick;
|
u64 most_recent_tick = image.modification_tick;
|
||||||
for (const AliasedImage& aliased : image.aliased_images) {
|
for (const AliasedImage& aliased : image.aliased_images) {
|
||||||
ImageBase& aliased_image = slot_images[aliased.id];
|
ImageBase& aliased_image = slot_images[aliased.id];
|
||||||
if (image.modification_tick < aliased_image.modification_tick) {
|
if (image.modification_tick < aliased_image.modification_tick) {
|
||||||
most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
|
most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
|
||||||
aliased_images.push_back(&aliased);
|
aliased_images.push_back(&aliased);
|
||||||
|
any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled);
|
||||||
|
any_blacklisted |= True(aliased_image.flags & ImageFlagBits::Blacklisted);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (aliased_images.empty()) {
|
if (aliased_images.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const bool can_rescale = ImageCanRescale(image);
|
||||||
|
if (any_rescaled) {
|
||||||
|
if (can_rescale) {
|
||||||
|
ScaleUp(image);
|
||||||
|
} else {
|
||||||
|
ScaleDown(image);
|
||||||
|
if (any_blacklisted) {
|
||||||
|
image.flags |= ImageFlagBits::Blacklisted;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
image.modification_tick = most_recent_tick;
|
image.modification_tick = most_recent_tick;
|
||||||
std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
|
std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
|
||||||
const ImageBase& lhs_image = slot_images[lhs->id];
|
const ImageBase& lhs_image = slot_images[lhs->id];
|
||||||
const ImageBase& rhs_image = slot_images[rhs->id];
|
const ImageBase& rhs_image = slot_images[rhs->id];
|
||||||
return lhs_image.modification_tick < rhs_image.modification_tick;
|
return lhs_image.modification_tick < rhs_image.modification_tick;
|
||||||
});
|
});
|
||||||
|
const auto& resolution = Settings::values.resolution_info;
|
||||||
for (const AliasedImage* const aliased : aliased_images) {
|
for (const AliasedImage* const aliased : aliased_images) {
|
||||||
|
if (!resolution.active | !any_rescaled) {
|
||||||
CopyImage(image_id, aliased->id, aliased->copies);
|
CopyImage(image_id, aliased->id, aliased->copies);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Image& aliased_image = slot_images[aliased->id];
|
||||||
|
if (!can_rescale) {
|
||||||
|
ScaleDown(aliased_image);
|
||||||
|
if (any_blacklisted) {
|
||||||
|
aliased_image.flags |= ImageFlagBits::Blacklisted;
|
||||||
|
}
|
||||||
|
CopyImage(image_id, aliased->id, aliased->copies);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ScaleUp(aliased_image);
|
||||||
|
|
||||||
|
const bool both_2d{image.info.type == ImageType::e2D &&
|
||||||
|
aliased_image.info.type == ImageType::e2D};
|
||||||
|
auto copies = aliased->copies;
|
||||||
|
for (auto copy : copies) {
|
||||||
|
copy.extent.width = std::max<u32>(
|
||||||
|
(copy.extent.width * resolution.up_scale) >> resolution.down_shift, 1);
|
||||||
|
if (both_2d) {
|
||||||
|
copy.extent.height = std::max<u32>(
|
||||||
|
(copy.extent.height * resolution.up_scale) >> resolution.down_shift, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CopyImage(image_id, aliased->id, copies);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1377,9 +1734,25 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
|
void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) {
|
||||||
Image& dst = slot_images[dst_id];
|
Image& dst = slot_images[dst_id];
|
||||||
Image& src = slot_images[src_id];
|
Image& src = slot_images[src_id];
|
||||||
|
const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled);
|
||||||
|
if (is_rescaled) {
|
||||||
|
ASSERT(True(dst.flags & ImageFlagBits::Rescaled));
|
||||||
|
const bool both_2d{src.info.type == ImageType::e2D && dst.info.type == ImageType::e2D};
|
||||||
|
const auto& resolution = Settings::values.resolution_info;
|
||||||
|
for (auto& copy : copies) {
|
||||||
|
copy.src_offset.x = resolution.ScaleUp(copy.src_offset.x);
|
||||||
|
copy.dst_offset.x = resolution.ScaleUp(copy.dst_offset.x);
|
||||||
|
copy.extent.width = resolution.ScaleUp(copy.extent.width);
|
||||||
|
if (both_2d) {
|
||||||
|
copy.src_offset.y = resolution.ScaleUp(copy.src_offset.y);
|
||||||
|
copy.dst_offset.y = resolution.ScaleUp(copy.dst_offset.y);
|
||||||
|
copy.extent.height = resolution.ScaleUp(copy.extent.height);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
const auto dst_format_type = GetFormatType(dst.info.format);
|
const auto dst_format_type = GetFormatType(dst.info.format);
|
||||||
const auto src_format_type = GetFormatType(src.info.format);
|
const auto src_format_type = GetFormatType(src.info.format);
|
||||||
if (src_format_type == dst_format_type) {
|
if (src_format_type == dst_format_type) {
|
||||||
@ -1424,7 +1797,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const
|
|||||||
};
|
};
|
||||||
UNIMPLEMENTED_IF(copy.extent != expected_size);
|
UNIMPLEMENTED_IF(copy.extent != expected_size);
|
||||||
|
|
||||||
runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
|
runtime.ConvertImage(dst_framebuffer, dst_view, src_view, is_rescaled);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1433,8 +1806,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
|
|||||||
if (*old_id == new_id) {
|
if (*old_id == new_id) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (*old_id) {
|
if (new_id) {
|
||||||
const ImageViewBase& old_view = slot_image_views[*old_id];
|
const ImageViewBase& old_view = slot_image_views[new_id];
|
||||||
if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
|
if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
|
||||||
uncommitted_downloads.push_back(old_view.image_id);
|
uncommitted_downloads.push_back(old_view.image_id);
|
||||||
}
|
}
|
||||||
@ -1447,10 +1820,18 @@ std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
|
|||||||
ImageId image_id, const ImageViewInfo& view_info) {
|
ImageId image_id, const ImageViewInfo& view_info) {
|
||||||
const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
|
const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
|
||||||
const ImageBase& image = slot_images[image_id];
|
const ImageBase& image = slot_images[image_id];
|
||||||
|
const bool is_rescaled = True(image.flags & ImageFlagBits::Rescaled);
|
||||||
const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
|
const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
|
||||||
const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
|
const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
|
||||||
const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
|
const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
|
||||||
const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
|
Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
|
||||||
|
if (is_rescaled) {
|
||||||
|
const auto& resolution = Settings::values.resolution_info;
|
||||||
|
extent.width = resolution.ScaleUp(extent.width);
|
||||||
|
if (image.info.type == ImageType::e2D) {
|
||||||
|
extent.height = resolution.ScaleUp(extent.height);
|
||||||
|
}
|
||||||
|
}
|
||||||
const u32 num_samples = image.info.num_samples;
|
const u32 num_samples = image.info.num_samples;
|
||||||
const auto [samples_x, samples_y] = SamplesLog2(num_samples);
|
const auto [samples_x, samples_y] = SamplesLog2(num_samples);
|
||||||
const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
|
const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "video_core/texture_cache/descriptor_table.h"
|
#include "video_core/texture_cache/descriptor_table.h"
|
||||||
#include "video_core/texture_cache/image_base.h"
|
#include "video_core/texture_cache/image_base.h"
|
||||||
#include "video_core/texture_cache/image_info.h"
|
#include "video_core/texture_cache/image_info.h"
|
||||||
|
#include "video_core/texture_cache/image_view_base.h"
|
||||||
#include "video_core/texture_cache/image_view_info.h"
|
#include "video_core/texture_cache/image_view_info.h"
|
||||||
#include "video_core/texture_cache/render_targets.h"
|
#include "video_core/texture_cache/render_targets.h"
|
||||||
#include "video_core/texture_cache/slot_vector.h"
|
#include "video_core/texture_cache/slot_vector.h"
|
||||||
@ -39,6 +40,12 @@ using VideoCore::Surface::PixelFormatFromDepthFormat;
|
|||||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||||
using namespace Common::Literals;
|
using namespace Common::Literals;
|
||||||
|
|
||||||
|
struct ImageViewInOut {
|
||||||
|
u32 index{};
|
||||||
|
bool blacklist{};
|
||||||
|
ImageViewId id{};
|
||||||
|
};
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
class TextureCache {
|
class TextureCache {
|
||||||
/// Address shift for caching images into a hash table
|
/// Address shift for caching images into a hash table
|
||||||
@ -53,11 +60,6 @@ class TextureCache {
|
|||||||
/// True when the API can provide info about the memory of the device.
|
/// True when the API can provide info about the memory of the device.
|
||||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
|
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
|
||||||
|
|
||||||
/// Image view ID for null descriptors
|
|
||||||
static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
|
|
||||||
/// Sampler ID for bugged sampler ids
|
|
||||||
static constexpr SamplerId NULL_SAMPLER_ID{0};
|
|
||||||
|
|
||||||
static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
|
static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
|
||||||
static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
|
static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
|
||||||
|
|
||||||
@ -99,11 +101,11 @@ public:
|
|||||||
void MarkModification(ImageId id) noexcept;
|
void MarkModification(ImageId id) noexcept;
|
||||||
|
|
||||||
/// Fill image_view_ids with the graphics images in indices
|
/// Fill image_view_ids with the graphics images in indices
|
||||||
void FillGraphicsImageViews(std::span<const u32> indices,
|
template <bool has_blacklists>
|
||||||
std::span<ImageViewId> image_view_ids);
|
void FillGraphicsImageViews(std::span<ImageViewInOut> views);
|
||||||
|
|
||||||
/// Fill image_view_ids with the compute images in indices
|
/// Fill image_view_ids with the compute images in indices
|
||||||
void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
|
void FillComputeImageViews(std::span<ImageViewInOut> views);
|
||||||
|
|
||||||
/// Get the sampler from the graphics descriptor table in the specified index
|
/// Get the sampler from the graphics descriptor table in the specified index
|
||||||
Sampler* GetGraphicsSampler(u32 index);
|
Sampler* GetGraphicsSampler(u32 index);
|
||||||
@ -160,6 +162,10 @@ public:
|
|||||||
/// Return true when a CPU region is modified from the GPU
|
/// Return true when a CPU region is modified from the GPU
|
||||||
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
|
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
|
||||||
|
|
||||||
|
[[nodiscard]] bool IsRescaling() const noexcept;
|
||||||
|
|
||||||
|
[[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
|
||||||
|
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -198,9 +204,10 @@ private:
|
|||||||
void RunGarbageCollector();
|
void RunGarbageCollector();
|
||||||
|
|
||||||
/// Fills image_view_ids in the image views in indices
|
/// Fills image_view_ids in the image views in indices
|
||||||
|
template <bool has_blacklists>
|
||||||
void FillImageViews(DescriptorTable<TICEntry>& table,
|
void FillImageViews(DescriptorTable<TICEntry>& table,
|
||||||
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
|
std::span<ImageViewId> cached_image_view_ids,
|
||||||
std::span<ImageViewId> image_view_ids);
|
std::span<ImageViewInOut> views);
|
||||||
|
|
||||||
/// Find or create an image view in the guest descriptor table
|
/// Find or create an image view in the guest descriptor table
|
||||||
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
|
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
|
||||||
@ -285,7 +292,7 @@ private:
|
|||||||
void UntrackImage(ImageBase& image, ImageId image_id);
|
void UntrackImage(ImageBase& image, ImageId image_id);
|
||||||
|
|
||||||
/// Delete image from the cache
|
/// Delete image from the cache
|
||||||
void DeleteImage(ImageId image);
|
void DeleteImage(ImageId image, bool immediate_delete = false);
|
||||||
|
|
||||||
/// Remove image views references from the cache
|
/// Remove image views references from the cache
|
||||||
void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
|
void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
|
||||||
@ -306,7 +313,7 @@ private:
|
|||||||
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
|
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
|
||||||
|
|
||||||
/// Execute copies from one image to the other, even if they are incompatible
|
/// Execute copies from one image to the other, even if they are incompatible
|
||||||
void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
|
void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies);
|
||||||
|
|
||||||
/// Bind an image view as render target, downloading resources preemtively if needed
|
/// Bind an image view as render target, downloading resources preemtively if needed
|
||||||
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
|
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
|
||||||
@ -318,6 +325,12 @@ private:
|
|||||||
/// Returns true if the current clear parameters clear the whole image of a given image view
|
/// Returns true if the current clear parameters clear the whole image of a given image view
|
||||||
[[nodiscard]] bool IsFullClear(ImageViewId id);
|
[[nodiscard]] bool IsFullClear(ImageViewId id);
|
||||||
|
|
||||||
|
bool ImageCanRescale(ImageBase& image);
|
||||||
|
void InvalidateScale(Image& image);
|
||||||
|
bool ScaleUp(Image& image);
|
||||||
|
bool ScaleDown(Image& image);
|
||||||
|
u64 GetScaledImageSizeBytes(ImageBase& image);
|
||||||
|
|
||||||
Runtime& runtime;
|
Runtime& runtime;
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||||
@ -349,6 +362,7 @@ private:
|
|||||||
VAddr virtual_invalid_space{};
|
VAddr virtual_invalid_space{};
|
||||||
|
|
||||||
bool has_deleted_images = false;
|
bool has_deleted_images = false;
|
||||||
|
bool is_rescaling = false;
|
||||||
u64 total_used_memory = 0;
|
u64 total_used_memory = 0;
|
||||||
u64 minimum_memory;
|
u64 minimum_memory;
|
||||||
u64 expected_memory;
|
u64 expected_memory;
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user