early-access version 3290
This commit is contained in:
@@ -85,6 +85,7 @@ add_library(video_core STATIC
|
||||
gpu.h
|
||||
gpu_thread.cpp
|
||||
gpu_thread.h
|
||||
invalidation_accumulator.h
|
||||
memory_manager.cpp
|
||||
memory_manager.h
|
||||
precompiled_headers.h
|
||||
@@ -99,6 +100,8 @@ add_library(video_core STATIC
|
||||
renderer_null/null_rasterizer.h
|
||||
renderer_null/renderer_null.cpp
|
||||
renderer_null/renderer_null.h
|
||||
renderer_opengl/blit_image.cpp
|
||||
renderer_opengl/blit_image.h
|
||||
renderer_opengl/gl_buffer_cache.cpp
|
||||
renderer_opengl/gl_buffer_cache.h
|
||||
renderer_opengl/gl_compute_pipeline.cpp
|
||||
@@ -190,6 +193,8 @@ add_library(video_core STATIC
|
||||
renderer_vulkan/vk_texture_cache.cpp
|
||||
renderer_vulkan/vk_texture_cache.h
|
||||
renderer_vulkan/vk_texture_cache_base.cpp
|
||||
renderer_vulkan/vk_turbo_mode.cpp
|
||||
renderer_vulkan/vk_turbo_mode.h
|
||||
renderer_vulkan/vk_update_descriptor.cpp
|
||||
renderer_vulkan/vk_update_descriptor.h
|
||||
shader_cache.cpp
|
||||
|
@@ -430,7 +430,7 @@ private:
|
||||
if (query_begin >= SizeBytes() || size < 0) {
|
||||
return;
|
||||
}
|
||||
u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
|
||||
u64* const state_words = Array<type>();
|
||||
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
|
||||
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
|
||||
@@ -483,7 +483,7 @@ private:
|
||||
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
|
||||
}
|
||||
// Exclude CPU modified pages when visiting GPU pages
|
||||
const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
|
||||
const u64 word = current_word;
|
||||
u64 page = page_begin;
|
||||
page_begin = 0;
|
||||
|
||||
@@ -531,7 +531,7 @@ private:
|
||||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
|
||||
const u64* const state_words = Array<type>();
|
||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||
@@ -539,8 +539,7 @@ private:
|
||||
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
|
||||
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
|
||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
|
||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
||||
const u64 word = state_words[word_index] & ~off_word;
|
||||
const u64 word = state_words[word_index];
|
||||
if (word == 0) {
|
||||
continue;
|
||||
}
|
||||
@@ -564,7 +563,7 @@ private:
|
||||
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
|
||||
const u64* const state_words = Array<type>();
|
||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||
@@ -574,8 +573,7 @@ private:
|
||||
u64 begin = std::numeric_limits<u64>::max();
|
||||
u64 end = 0;
|
||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
|
||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
||||
const u64 word = state_words[word_index] & ~off_word;
|
||||
const u64 word = state_words[word_index];
|
||||
if (word == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@@ -1938,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
|
||||
bool is_written) const {
|
||||
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
|
||||
const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
const u32 alignment = runtime.GetStorageBufferAlignment();
|
||||
|
||||
const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
|
||||
const u32 aligned_size =
|
||||
Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
|
||||
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
|
||||
if (!cpu_addr || size == 0) {
|
||||
return NULL_BINDING;
|
||||
}
|
||||
const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
|
||||
|
||||
const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
|
||||
const Binding binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
|
||||
.size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr),
|
||||
.buffer_id = BufferId{},
|
||||
};
|
||||
return binding;
|
||||
|
@@ -51,6 +51,10 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
|
||||
LOG_WARNING(HW_GPU, "(STUBBED) called");
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(draw_texture.src_y0): {
|
||||
DrawTexture();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -179,6 +183,33 @@ void DrawManager::DrawIndexSmall(u32 argument) {
|
||||
ProcessDraw(true, 1);
|
||||
}
|
||||
|
||||
void DrawManager::DrawTexture() {
|
||||
const auto& regs{maxwell3d->regs};
|
||||
draw_texture_state.dst_x0 = static_cast<float>(regs.draw_texture.dst_x0) / 4096.f;
|
||||
draw_texture_state.dst_y0 = static_cast<float>(regs.draw_texture.dst_y0) / 4096.f;
|
||||
const auto dst_width = static_cast<float>(regs.draw_texture.dst_width) / 4096.f;
|
||||
const auto dst_height = static_cast<float>(regs.draw_texture.dst_height) / 4096.f;
|
||||
const bool lower_left{regs.window_origin.mode !=
|
||||
Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft};
|
||||
if (lower_left) {
|
||||
draw_texture_state.dst_y0 -= dst_height;
|
||||
}
|
||||
draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width;
|
||||
draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height;
|
||||
draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f;
|
||||
draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f;
|
||||
draw_texture_state.src_x1 =
|
||||
(static_cast<float>(regs.draw_texture.dx_du) / 4294967295.f) * dst_width +
|
||||
draw_texture_state.src_x0;
|
||||
draw_texture_state.src_y1 =
|
||||
(static_cast<float>(regs.draw_texture.dy_dv) / 4294967295.f) * dst_height +
|
||||
draw_texture_state.src_y0;
|
||||
draw_texture_state.src_sampler = regs.draw_texture.src_sampler;
|
||||
draw_texture_state.src_texture = regs.draw_texture.src_texture;
|
||||
|
||||
maxwell3d->rasterizer->DrawTexture();
|
||||
}
|
||||
|
||||
void DrawManager::UpdateTopology() {
|
||||
const auto& regs{maxwell3d->regs};
|
||||
switch (regs.primitive_topology_control) {
|
||||
|
@@ -32,6 +32,19 @@ public:
|
||||
std::vector<u8> inline_index_draw_indexes;
|
||||
};
|
||||
|
||||
struct DrawTextureState {
|
||||
f32 dst_x0;
|
||||
f32 dst_y0;
|
||||
f32 dst_x1;
|
||||
f32 dst_y1;
|
||||
f32 src_x0;
|
||||
f32 src_y0;
|
||||
f32 src_x1;
|
||||
f32 src_y1;
|
||||
u32 src_sampler;
|
||||
u32 src_texture;
|
||||
};
|
||||
|
||||
struct IndirectParams {
|
||||
bool is_indexed;
|
||||
bool include_count;
|
||||
@@ -64,6 +77,10 @@ public:
|
||||
return draw_state;
|
||||
}
|
||||
|
||||
const DrawTextureState& GetDrawTextureState() const {
|
||||
return draw_texture_state;
|
||||
}
|
||||
|
||||
IndirectParams& GetIndirectParams() {
|
||||
return indirect_state;
|
||||
}
|
||||
@@ -81,6 +98,8 @@ private:
|
||||
|
||||
void DrawIndexSmall(u32 argument);
|
||||
|
||||
void DrawTexture();
|
||||
|
||||
void UpdateTopology();
|
||||
|
||||
void ProcessDraw(bool draw_indexed, u32 instance_count);
|
||||
@@ -89,6 +108,7 @@ private:
|
||||
|
||||
Maxwell3D* maxwell3d{};
|
||||
State draw_state{};
|
||||
DrawTextureState draw_texture_state{};
|
||||
IndirectParams indirect_state{};
|
||||
};
|
||||
} // namespace Tegra::Engines
|
||||
|
@@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
|
||||
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
|
||||
x_elements, regs.line_count, regs.dest.BlockHeight(),
|
||||
regs.dest.BlockDepth(), regs.line_length_in);
|
||||
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -6,6 +6,7 @@
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/sw_blitter/blitter.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
@@ -20,8 +21,8 @@ namespace Tegra::Engines {
|
||||
|
||||
using namespace Texture;
|
||||
|
||||
Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
|
||||
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
|
||||
Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} {
|
||||
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager);
|
||||
// Nvidia's OpenGL driver seems to assume these values
|
||||
regs.src.depth = 1;
|
||||
regs.dst.depth = 1;
|
||||
@@ -104,6 +105,7 @@ void Fermi2D::Blit() {
|
||||
config.src_x0 = 0;
|
||||
}
|
||||
|
||||
memory_manager.FlushCaching();
|
||||
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
|
||||
sw_blitter->Blit(src, regs.dst, config);
|
||||
}
|
||||
|
@@ -305,6 +305,7 @@ public:
|
||||
private:
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
/// Performs the copy from the source surface to the destination surface as configured in the
|
||||
/// registers.
|
||||
|
@@ -149,6 +149,7 @@ bool Maxwell3D::IsMethodExecutable(u32 method) {
|
||||
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
|
||||
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
|
||||
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(draw_texture.src_y0):
|
||||
case MAXWELL3D_REG_INDEX(wait_for_idle):
|
||||
case MAXWELL3D_REG_INDEX(shadow_ram_control):
|
||||
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
|
||||
@@ -485,11 +486,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessQueryGet() {
|
||||
// TODO(Subv): Support the other query units.
|
||||
if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
|
||||
LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
|
||||
}
|
||||
|
||||
switch (regs.report_semaphore.query.operation) {
|
||||
case Regs::ReportSemaphore::Operation::Release:
|
||||
if (regs.report_semaphore.query.short_query != 0) {
|
||||
@@ -649,7 +645,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
|
||||
|
||||
const GPUVAddr address{buffer_address + regs.const_buffer.offset};
|
||||
const size_t copy_size = amount * sizeof(u32);
|
||||
memory_manager.WriteBlock(address, start_base, copy_size);
|
||||
memory_manager.WriteBlockCached(address, start_base, copy_size);
|
||||
|
||||
// Increment the current buffer position.
|
||||
regs.const_buffer.offset += static_cast<u32>(copy_size);
|
||||
|
@@ -1599,6 +1599,20 @@ public:
|
||||
};
|
||||
static_assert(sizeof(TIRModulationCoeff) == 0x4);
|
||||
|
||||
struct DrawTexture {
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_width;
|
||||
s32 dst_height;
|
||||
s64 dx_du;
|
||||
s64 dy_dv;
|
||||
u32 src_sampler;
|
||||
u32 src_texture;
|
||||
s32 src_x0;
|
||||
s32 src_y0;
|
||||
};
|
||||
static_assert(sizeof(DrawTexture) == 0x30);
|
||||
|
||||
struct ReduceColorThreshold {
|
||||
union {
|
||||
BitField<0, 8, u32> all_hit_once;
|
||||
@@ -2751,7 +2765,7 @@ public:
|
||||
u32 reserved_sw_method2; ///< 0x102C
|
||||
std::array<TIRModulationCoeff, 5> tir_modulation_coeff; ///< 0x1030
|
||||
std::array<u32, 15> spare_nop; ///< 0x1044
|
||||
INSERT_PADDING_BYTES_NOINIT(0x30);
|
||||
DrawTexture draw_texture; ///< 0x1080
|
||||
std::array<u32, 7> reserved_sw_method3_to_7; ///< 0x10B0
|
||||
ReduceColorThreshold reduce_color_thresholds_unorm8; ///< 0x10CC
|
||||
std::array<u32, 4> reserved_sw_method10_to_13; ///< 0x10D0
|
||||
|
@@ -69,7 +69,7 @@ void MaxwellDMA::Launch() {
|
||||
if (launch.multi_line_enable) {
|
||||
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
|
||||
memory_manager.FlushCaching();
|
||||
if (!is_src_pitch && !is_dst_pitch) {
|
||||
// If both the source and the destination are in block layout, assert.
|
||||
CopyBlockLinearToBlockLinear();
|
||||
@@ -104,6 +104,7 @@ void MaxwellDMA::Launch() {
|
||||
reinterpret_cast<u8*>(tmp_buffer.data()),
|
||||
regs.line_length_in * sizeof(u32));
|
||||
} else {
|
||||
memory_manager.FlushCaching();
|
||||
const auto convert_linear_2_blocklinear_addr = [](u64 address) {
|
||||
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
|
||||
((address & 0x180) >> 1) | ((address & 0x20) << 3);
|
||||
@@ -121,8 +122,8 @@ void MaxwellDMA::Launch() {
|
||||
memory_manager.ReadBlockUnsafe(
|
||||
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
|
||||
tmp_buffer.data(), tmp_buffer.size());
|
||||
memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
}
|
||||
} else if (is_src_pitch && !is_dst_pitch) {
|
||||
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
|
||||
@@ -132,7 +133,7 @@ void MaxwellDMA::Launch() {
|
||||
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
memory_manager.WriteBlock(
|
||||
memory_manager.WriteBlockCached(
|
||||
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
|
||||
tmp_buffer.data(), tmp_buffer.size());
|
||||
}
|
||||
@@ -141,8 +142,8 @@ void MaxwellDMA::Launch() {
|
||||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||
@@ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_in);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
||||
@@ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
||||
regs.src_params.block_size.height, regs.src_params.block_size.depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
|
||||
@@ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
|
||||
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
|
||||
dst.block_size.height, dst.block_size.depth, pitch);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::ReleaseSemaphore() {
|
||||
|
@@ -11,6 +11,7 @@ set(GLSL_INCLUDES
|
||||
|
||||
set(SHADER_FILES
|
||||
astc_decoder.comp
|
||||
blit_color_float.frag
|
||||
block_linear_unswizzle_2d.comp
|
||||
block_linear_unswizzle_3d.comp
|
||||
convert_abgr8_to_d24s8.frag
|
||||
@@ -36,7 +37,6 @@ set(SHADER_FILES
|
||||
smaa_blending_weight_calculation.frag
|
||||
smaa_neighborhood_blending.vert
|
||||
smaa_neighborhood_blending.frag
|
||||
vulkan_blit_color_float.frag
|
||||
vulkan_blit_depth_stencil.frag
|
||||
vulkan_fidelityfx_fsr_easu_fp16.comp
|
||||
vulkan_fidelityfx_fsr_easu_fp32.comp
|
||||
@@ -47,6 +47,7 @@ set(SHADER_FILES
|
||||
vulkan_present_scaleforce_fp16.frag
|
||||
vulkan_present_scaleforce_fp32.frag
|
||||
vulkan_quad_indexed.comp
|
||||
vulkan_turbo_mode.comp
|
||||
vulkan_uint8.comp
|
||||
)
|
||||
|
||||
|
@@ -4,13 +4,20 @@
|
||||
#version 450
|
||||
|
||||
#ifdef VULKAN
|
||||
#define VERTEX_ID gl_VertexIndex
|
||||
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
|
||||
#define END_PUSH_CONSTANTS };
|
||||
#define UNIFORM(n)
|
||||
#define FLIPY 1
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
#define VERTEX_ID gl_VertexID
|
||||
#define BEGIN_PUSH_CONSTANTS
|
||||
#define END_PUSH_CONSTANTS
|
||||
#define FLIPY -1
|
||||
#define UNIFORM(n) layout (location = n) uniform
|
||||
out gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
};
|
||||
#endif
|
||||
|
||||
BEGIN_PUSH_CONSTANTS
|
||||
@@ -21,8 +28,8 @@ END_PUSH_CONSTANTS
|
||||
layout(location = 0) out vec2 texcoord;
|
||||
|
||||
void main() {
|
||||
float x = float((gl_VertexIndex & 1) << 2);
|
||||
float y = float((gl_VertexIndex & 2) << 1);
|
||||
gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
|
||||
float x = float((VERTEX_ID & 1) << 2);
|
||||
float y = float((VERTEX_ID & 2) << 1);
|
||||
gl_Position = vec4(x - 1.0, FLIPY * (y - 1.0), 0.0, 1.0);
|
||||
texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
|
||||
}
|
||||
}
|
@@ -50,38 +50,6 @@ protected:
|
||||
Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
class HLE_DrawArrays final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_DrawArrays(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
|
||||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
||||
maxwell3d.draw_manager->DrawArray(topology, parameters[1], parameters[2],
|
||||
maxwell3d.regs.global_base_instance_index, 1);
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_DrawIndexed final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_DrawIndexed(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
maxwell3d.regs.index_buffer.start_addr_high = parameters[1];
|
||||
maxwell3d.regs.index_buffer.start_addr_low = parameters[2];
|
||||
maxwell3d.regs.index_buffer.format =
|
||||
static_cast<Engines::Maxwell3D::Regs::IndexFormat>(parameters[3]);
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
|
||||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
||||
maxwell3d.draw_manager->DrawIndex(topology, 0, parameters[4],
|
||||
maxwell3d.regs.global_base_vertex_index,
|
||||
maxwell3d.regs.global_base_instance_index, 1);
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* @note: these macros have two versions, a normal and extended version, with the extended version
|
||||
* also assigning the base vertex/instance.
|
||||
@@ -497,11 +465,6 @@ public:
|
||||
} // Anonymous namespace
|
||||
|
||||
HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
|
||||
builders.emplace(0xDD6A7FA92A7D2674ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawArrays>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x0D61FC9FAAC9FCADULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
@@ -512,11 +475,6 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x2DB33AADB741839CULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawIndexed>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x771BB18C62444DA0ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
|
@@ -6,11 +6,13 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "core/device_memory.h"
|
||||
#include "core/hle/kernel/k_page_table.h"
|
||||
#include "core/hle/kernel/k_process.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/invalidation_accumulator.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
@@ -26,7 +28,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
|
||||
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
|
||||
page_bits != big_page_bits ? page_bits : 0},
|
||||
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
|
||||
1, std::memory_order_acq_rel)} {
|
||||
1, std::memory_order_acq_rel)},
|
||||
accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
|
||||
address_space_size = 1ULL << address_space_bits;
|
||||
page_size = 1ULL << page_bits;
|
||||
page_mask = page_size - 1ULL;
|
||||
@@ -43,6 +46,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
|
||||
big_page_table_cpu.resize(big_page_table_size);
|
||||
big_page_continous.resize(big_page_table_size / continous_bits, 0);
|
||||
entries.resize(page_table_size / 32, 0);
|
||||
if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
|
||||
fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
|
||||
} else {
|
||||
fastmem_arena = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
MemoryManager::~MemoryManager() = default;
|
||||
@@ -185,15 +193,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
|
||||
GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
|
||||
|
||||
for (const auto& [map_addr, map_size] : submapped_ranges) {
|
||||
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
||||
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
rasterizer->UnmapMemory(*cpu_addr, map_size);
|
||||
for (const auto& [map_addr, map_size] : page_stash) {
|
||||
rasterizer->UnmapMemory(map_addr, map_size);
|
||||
}
|
||||
page_stash.clear();
|
||||
|
||||
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
||||
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
||||
@@ -355,7 +360,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si
|
||||
}
|
||||
}
|
||||
|
||||
template <bool is_safe>
|
||||
template <bool is_safe, bool use_fastmem>
|
||||
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
|
||||
[[maybe_unused]] VideoCommon::CacheType which) const {
|
||||
auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
|
||||
@@ -369,8 +374,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
|
||||
if constexpr (is_safe) {
|
||||
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
|
||||
}
|
||||
u8* physical = memory.GetPointer(cpu_addr_base);
|
||||
std::memcpy(dest_buffer, physical, copy_amount);
|
||||
if constexpr (use_fastmem) {
|
||||
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
|
||||
} else {
|
||||
u8* physical = memory.GetPointer(cpu_addr_base);
|
||||
std::memcpy(dest_buffer, physical, copy_amount);
|
||||
}
|
||||
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
||||
};
|
||||
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
||||
@@ -379,11 +388,15 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
|
||||
if constexpr (is_safe) {
|
||||
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
|
||||
}
|
||||
if (!IsBigPageContinous(page_index)) [[unlikely]] {
|
||||
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
|
||||
if constexpr (use_fastmem) {
|
||||
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
|
||||
} else {
|
||||
u8* physical = memory.GetPointer(cpu_addr_base);
|
||||
std::memcpy(dest_buffer, physical, copy_amount);
|
||||
if (!IsBigPageContinous(page_index)) [[unlikely]] {
|
||||
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
|
||||
} else {
|
||||
u8* physical = memory.GetPointer(cpu_addr_base);
|
||||
std::memcpy(dest_buffer, physical, copy_amount);
|
||||
}
|
||||
}
|
||||
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
||||
};
|
||||
@@ -397,12 +410,20 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
|
||||
|
||||
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
|
||||
VideoCommon::CacheType which) const {
|
||||
ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which);
|
||||
if (fastmem_arena) [[likely]] {
|
||||
ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which);
|
||||
return;
|
||||
}
|
||||
ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which);
|
||||
}
|
||||
|
||||
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
|
||||
const std::size_t size) const {
|
||||
ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
|
||||
if (fastmem_arena) [[likely]] {
|
||||
ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
|
||||
return;
|
||||
}
|
||||
ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
|
||||
}
|
||||
|
||||
template <bool is_safe>
|
||||
@@ -454,6 +475,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
|
||||
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
|
||||
}
|
||||
|
||||
void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer,
|
||||
std::size_t size) {
|
||||
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
|
||||
accumulator->Add(gpu_dest_addr, size);
|
||||
}
|
||||
|
||||
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
|
||||
VideoCommon::CacheType which) const {
|
||||
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
|
||||
@@ -663,7 +690,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
|
||||
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||
GPUVAddr gpu_addr, std::size_t size) const {
|
||||
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
|
||||
std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
|
||||
GetSubmappedRangeImpl<true>(gpu_addr, size, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <bool is_gpu_address>
|
||||
void MemoryManager::GetSubmappedRangeImpl(
|
||||
GPUVAddr gpu_addr, std::size_t size,
|
||||
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
|
||||
result) const {
|
||||
std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
|
||||
last_segment{};
|
||||
std::optional<VAddr> old_page_addr{};
|
||||
const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
|
||||
[[maybe_unused]] std::size_t offset,
|
||||
@@ -685,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||
}
|
||||
old_page_addr = {cpu_addr_base + copy_amount};
|
||||
if (!last_segment) {
|
||||
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
|
||||
last_segment = {new_base_addr, copy_amount};
|
||||
if constexpr (is_gpu_address) {
|
||||
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
|
||||
last_segment = {new_base_addr, copy_amount};
|
||||
} else {
|
||||
last_segment = {cpu_addr_base, copy_amount};
|
||||
}
|
||||
} else {
|
||||
last_segment->second += copy_amount;
|
||||
}
|
||||
@@ -703,8 +744,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||
}
|
||||
old_page_addr = {cpu_addr_base + copy_amount};
|
||||
if (!last_segment) {
|
||||
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
|
||||
last_segment = {new_base_addr, copy_amount};
|
||||
if constexpr (is_gpu_address) {
|
||||
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
|
||||
last_segment = {new_base_addr, copy_amount};
|
||||
} else {
|
||||
last_segment = {cpu_addr_base, copy_amount};
|
||||
}
|
||||
} else {
|
||||
last_segment->second += copy_amount;
|
||||
}
|
||||
@@ -715,7 +760,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||
};
|
||||
MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
|
||||
split(0, 0, 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
void MemoryManager::FlushCaching() {
|
||||
if (!accumulator->AnyAccumulated()) {
|
||||
return;
|
||||
}
|
||||
accumulator->Callback([this](GPUVAddr addr, size_t size) {
|
||||
GetSubmappedRangeImpl<false>(addr, size, page_stash);
|
||||
});
|
||||
rasterizer->InnerInvalidation(page_stash);
|
||||
page_stash.clear();
|
||||
accumulator->Clear();
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
@@ -19,6 +19,10 @@ namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace VideoCommon {
|
||||
class InvalidationAccumulator;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
class DeviceMemory;
|
||||
namespace Memory {
|
||||
@@ -80,6 +84,7 @@ public:
|
||||
*/
|
||||
void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
|
||||
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||
void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||
|
||||
/**
|
||||
* Checks if a gpu region can be simply read with a pointer.
|
||||
@@ -129,12 +134,14 @@ public:
|
||||
size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
|
||||
size_t max_size = std::numeric_limits<size_t>::max()) const;
|
||||
|
||||
void FlushCaching();
|
||||
|
||||
private:
|
||||
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
|
||||
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
|
||||
FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
|
||||
|
||||
template <bool is_safe>
|
||||
template <bool is_safe, bool use_fastmem>
|
||||
void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
|
||||
VideoCommon::CacheType which) const;
|
||||
|
||||
@@ -154,6 +161,12 @@ private:
|
||||
inline bool IsBigPageContinous(size_t big_page_index) const;
|
||||
inline void SetBigPageContinous(size_t big_page_index, bool value);
|
||||
|
||||
template <bool is_gpu_address>
|
||||
void GetSubmappedRangeImpl(
|
||||
GPUVAddr gpu_addr, std::size_t size,
|
||||
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
|
||||
result) const;
|
||||
|
||||
Core::System& system;
|
||||
Core::Memory::Memory& memory;
|
||||
Core::DeviceMemory& device_memory;
|
||||
@@ -201,10 +214,13 @@ private:
|
||||
Common::VirtualBuffer<u32> big_page_table_cpu;
|
||||
|
||||
std::vector<u64> big_page_continous;
|
||||
std::vector<std::pair<VAddr, std::size_t>> page_stash{};
|
||||
u8* fastmem_arena{};
|
||||
|
||||
constexpr static size_t continous_bits = 64;
|
||||
|
||||
const size_t unique_identifier;
|
||||
std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
|
||||
|
||||
static std::atomic<size_t> unique_identifier_generator;
|
||||
};
|
||||
|
@@ -6,6 +6,7 @@
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
#include "common/common_types.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "video_core/cache_types.h"
|
||||
@@ -46,6 +47,9 @@ public:
|
||||
/// Dispatches an indirect draw invocation
|
||||
virtual void DrawIndirect() {}
|
||||
|
||||
/// Dispatches an draw texture invocation
|
||||
virtual void DrawTexture() = 0;
|
||||
|
||||
/// Clear the current framebuffer
|
||||
virtual void Clear(u32 layer_count) = 0;
|
||||
|
||||
@@ -95,6 +99,12 @@ public:
|
||||
virtual void InvalidateRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
|
||||
|
||||
virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
|
||||
for (const auto& [cpu_addr, size] : sequences) {
|
||||
InvalidateRegion(cpu_addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region are desync with guest
|
||||
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
|
||||
|
||||
|
@@ -21,6 +21,7 @@ RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gp
|
||||
RasterizerNull::~RasterizerNull() = default;
|
||||
|
||||
void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {}
|
||||
void RasterizerNull::DrawTexture() {}
|
||||
void RasterizerNull::Clear(u32 layer_count) {}
|
||||
void RasterizerNull::DispatchCompute() {}
|
||||
void RasterizerNull::ResetCounter(VideoCore::QueryType type) {}
|
||||
|
@@ -31,6 +31,7 @@ public:
|
||||
~RasterizerNull() override;
|
||||
|
||||
void Draw(bool is_indexed, u32 instance_count) override;
|
||||
void DrawTexture() override;
|
||||
void Clear(u32 layer_count) override;
|
||||
void DispatchCompute() override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
|
@@ -160,6 +160,10 @@ public:
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
u32 GetStorageBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetShaderStorageBufferAlignment());
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::array PABO_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
|
@@ -166,6 +166,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
|
||||
has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
|
||||
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
|
||||
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
|
||||
has_draw_texture = GLAD_GL_NV_draw_texture;
|
||||
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
|
||||
need_fastmath_off = is_nvidia;
|
||||
can_report_memory = GLAD_GL_NVX_gpu_memory_info;
|
||||
|
@@ -4,6 +4,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "shader_recompiler/stage.h"
|
||||
@@ -146,6 +148,10 @@ public:
|
||||
return has_sparse_texture_2;
|
||||
}
|
||||
|
||||
bool HasDrawTexture() const {
|
||||
return has_draw_texture;
|
||||
}
|
||||
|
||||
bool IsWarpSizePotentiallyLargerThanGuest() const {
|
||||
return warp_size_potentially_larger_than_guest;
|
||||
}
|
||||
@@ -216,6 +222,7 @@ private:
|
||||
bool has_shader_int64{};
|
||||
bool has_amd_shader_half_float{};
|
||||
bool has_sparse_texture_2{};
|
||||
bool has_draw_texture{};
|
||||
bool warp_size_potentially_larger_than_guest{};
|
||||
bool need_fastmath_off{};
|
||||
bool has_cbuf_ftou_bug{};
|
||||
|
@@ -64,7 +64,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
|
||||
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
|
||||
state_tracker, gpu.ShaderNotify()),
|
||||
query_cache(*this), accelerate_dma(buffer_cache),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
|
||||
blit_image(program_manager_) {}
|
||||
|
||||
RasterizerOpenGL::~RasterizerOpenGL() = default;
|
||||
|
||||
@@ -318,6 +319,47 @@ void RasterizerOpenGL::DrawIndirect() {
|
||||
buffer_cache.SetDrawIndirect(nullptr);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DrawTexture() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||
|
||||
SCOPE_EXIT({ gpu.TickWork(); });
|
||||
query_cache.UpdateCounters();
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
|
||||
SyncState();
|
||||
|
||||
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
|
||||
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
|
||||
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
|
||||
|
||||
if (device.HasDrawTexture()) {
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
|
||||
glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(), draw_texture_state.dst_x0,
|
||||
draw_texture_state.dst_y0, draw_texture_state.dst_x1,
|
||||
draw_texture_state.dst_y1, 0,
|
||||
draw_texture_state.src_x0 / static_cast<float>(texture.size.width),
|
||||
draw_texture_state.src_y0 / static_cast<float>(texture.size.height),
|
||||
draw_texture_state.src_x1 / static_cast<float>(texture.size.width),
|
||||
draw_texture_state.src_y1 / static_cast<float>(texture.size.height));
|
||||
} else {
|
||||
Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0),
|
||||
.y = static_cast<s32>(draw_texture_state.dst_y0)},
|
||||
Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1),
|
||||
.y = static_cast<s32>(draw_texture_state.dst_y1)}};
|
||||
Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0),
|
||||
.y = static_cast<s32>(draw_texture_state.src_y0)},
|
||||
Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
|
||||
.y = static_cast<s32>(draw_texture_state.src_y1)}};
|
||||
blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(),
|
||||
sampler->Handle(), dst_region, src_region, texture.size);
|
||||
}
|
||||
|
||||
++num_queued_commands;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DispatchCompute() {
|
||||
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
|
||||
if (!pipeline) {
|
||||
|
@@ -16,6 +16,7 @@
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/blit_image.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_fence_manager.h"
|
||||
@@ -70,6 +71,7 @@ public:
|
||||
|
||||
void Draw(bool is_indexed, u32 instance_count) override;
|
||||
void DrawIndirect() override;
|
||||
void DrawTexture() override;
|
||||
void Clear(u32 layer_count) override;
|
||||
void DispatchCompute() override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
@@ -224,6 +226,8 @@ private:
|
||||
AccelerateDMA accelerate_dma;
|
||||
FenceManagerOpenGL fence_manager;
|
||||
|
||||
BlitImageHelper blit_image;
|
||||
|
||||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
|
||||
|
@@ -236,6 +236,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
|
||||
.needs_demote_reorder = device.IsAmd(),
|
||||
.support_snorm_render_buffer = false,
|
||||
.support_viewport_index_layer = device.HasVertexViewportLayer(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
|
||||
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
|
||||
} {
|
||||
if (use_asynchronous_shaders) {
|
||||
workers = CreateWorkers();
|
||||
|
@@ -1,2 +1,123 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
|
||||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
|
||||
ProgramManager::ProgramManager(const Device& device) {
|
||||
glCreateProgramPipelines(1, &pipeline.handle);
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindComputeProgram(GLuint program) {
|
||||
glUseProgram(program);
|
||||
is_compute_bound = true;
|
||||
}
|
||||
|
||||
void ProgramManager::BindComputeAssemblyProgram(GLuint program) {
|
||||
if (current_assembly_compute_program != program) {
|
||||
current_assembly_compute_program = program;
|
||||
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
|
||||
}
|
||||
UnbindPipeline();
|
||||
}
|
||||
|
||||
void ProgramManager::BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
|
||||
static constexpr std::array<GLenum, 5> stage_enums{
|
||||
GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
|
||||
GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
|
||||
};
|
||||
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
|
||||
if (current_programs[stage] != programs[stage].handle) {
|
||||
current_programs[stage] = programs[stage].handle;
|
||||
glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
|
||||
}
|
||||
}
|
||||
BindPipeline();
|
||||
}
|
||||
|
||||
void ProgramManager::BindPresentPrograms(GLuint vertex, GLuint fragment) {
|
||||
if (current_programs[0] != vertex) {
|
||||
current_programs[0] = vertex;
|
||||
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
|
||||
}
|
||||
if (current_programs[4] != fragment) {
|
||||
current_programs[4] = fragment;
|
||||
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
|
||||
}
|
||||
glUseProgramStages(
|
||||
pipeline.handle,
|
||||
GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
|
||||
current_programs[1] = 0;
|
||||
current_programs[2] = 0;
|
||||
current_programs[3] = 0;
|
||||
|
||||
if (current_stage_mask != 0) {
|
||||
current_stage_mask = 0;
|
||||
for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
|
||||
glDisable(program_type);
|
||||
}
|
||||
}
|
||||
BindPipeline();
|
||||
}
|
||||
|
||||
void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
|
||||
u32 stage_mask) {
|
||||
const u32 changed_mask = current_stage_mask ^ stage_mask;
|
||||
current_stage_mask = stage_mask;
|
||||
|
||||
if (changed_mask != 0) {
|
||||
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
|
||||
if (((changed_mask >> stage) & 1) != 0) {
|
||||
if (((stage_mask >> stage) & 1) != 0) {
|
||||
glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
|
||||
} else {
|
||||
glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
|
||||
if (current_programs[stage] != programs[stage].handle) {
|
||||
current_programs[stage] = programs[stage].handle;
|
||||
glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
|
||||
}
|
||||
}
|
||||
UnbindPipeline();
|
||||
}
|
||||
|
||||
void ProgramManager::RestoreGuestCompute() {}
|
||||
|
||||
void ProgramManager::BindPipeline() {
|
||||
if (!is_pipeline_bound) {
|
||||
is_pipeline_bound = true;
|
||||
glBindProgramPipeline(pipeline.handle);
|
||||
}
|
||||
UnbindCompute();
|
||||
}
|
||||
|
||||
void ProgramManager::UnbindPipeline() {
|
||||
if (is_pipeline_bound) {
|
||||
is_pipeline_bound = false;
|
||||
glBindProgramPipeline(0);
|
||||
}
|
||||
UnbindCompute();
|
||||
}
|
||||
|
||||
void ProgramManager::UnbindCompute() {
|
||||
if (is_compute_bound) {
|
||||
is_compute_bound = false;
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
} // namespace OpenGL
|
||||
|
@@ -6,8 +6,6 @@
|
||||
#include <array>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
@@ -16,121 +14,28 @@ namespace OpenGL {
|
||||
class ProgramManager {
|
||||
static constexpr size_t NUM_STAGES = 5;
|
||||
|
||||
static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
|
||||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
|
||||
public:
|
||||
explicit ProgramManager(const Device& device) {
|
||||
glCreateProgramPipelines(1, &pipeline.handle);
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||
}
|
||||
}
|
||||
explicit ProgramManager(const Device& device);
|
||||
|
||||
void BindComputeProgram(GLuint program) {
|
||||
glUseProgram(program);
|
||||
is_compute_bound = true;
|
||||
}
|
||||
void BindComputeProgram(GLuint program);
|
||||
|
||||
void BindComputeAssemblyProgram(GLuint program) {
|
||||
if (current_assembly_compute_program != program) {
|
||||
current_assembly_compute_program = program;
|
||||
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
|
||||
}
|
||||
UnbindPipeline();
|
||||
}
|
||||
void BindComputeAssemblyProgram(GLuint program);
|
||||
|
||||
void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
|
||||
static constexpr std::array<GLenum, 5> stage_enums{
|
||||
GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
|
||||
GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
|
||||
};
|
||||
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
|
||||
if (current_programs[stage] != programs[stage].handle) {
|
||||
current_programs[stage] = programs[stage].handle;
|
||||
glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
|
||||
}
|
||||
}
|
||||
BindPipeline();
|
||||
}
|
||||
void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs);
|
||||
|
||||
void BindPresentPrograms(GLuint vertex, GLuint fragment) {
|
||||
if (current_programs[0] != vertex) {
|
||||
current_programs[0] = vertex;
|
||||
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
|
||||
}
|
||||
if (current_programs[4] != fragment) {
|
||||
current_programs[4] = fragment;
|
||||
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
|
||||
}
|
||||
glUseProgramStages(
|
||||
pipeline.handle,
|
||||
GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
|
||||
current_programs[1] = 0;
|
||||
current_programs[2] = 0;
|
||||
current_programs[3] = 0;
|
||||
|
||||
if (current_stage_mask != 0) {
|
||||
current_stage_mask = 0;
|
||||
for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
|
||||
glDisable(program_type);
|
||||
}
|
||||
}
|
||||
BindPipeline();
|
||||
}
|
||||
void BindPresentPrograms(GLuint vertex, GLuint fragment);
|
||||
|
||||
void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
|
||||
u32 stage_mask) {
|
||||
const u32 changed_mask = current_stage_mask ^ stage_mask;
|
||||
current_stage_mask = stage_mask;
|
||||
u32 stage_mask);
|
||||
|
||||
if (changed_mask != 0) {
|
||||
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
|
||||
if (((changed_mask >> stage) & 1) != 0) {
|
||||
if (((stage_mask >> stage) & 1) != 0) {
|
||||
glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
|
||||
} else {
|
||||
glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
|
||||
if (current_programs[stage] != programs[stage].handle) {
|
||||
current_programs[stage] = programs[stage].handle;
|
||||
glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
|
||||
}
|
||||
}
|
||||
UnbindPipeline();
|
||||
}
|
||||
|
||||
void RestoreGuestCompute() {}
|
||||
void RestoreGuestCompute();
|
||||
|
||||
private:
|
||||
void BindPipeline() {
|
||||
if (!is_pipeline_bound) {
|
||||
is_pipeline_bound = true;
|
||||
glBindProgramPipeline(pipeline.handle);
|
||||
}
|
||||
UnbindCompute();
|
||||
}
|
||||
void BindPipeline();
|
||||
|
||||
void UnbindPipeline() {
|
||||
if (is_pipeline_bound) {
|
||||
is_pipeline_bound = false;
|
||||
glBindProgramPipeline(0);
|
||||
}
|
||||
UnbindCompute();
|
||||
}
|
||||
void UnbindPipeline();
|
||||
|
||||
void UnbindCompute() {
|
||||
if (is_compute_bound) {
|
||||
is_compute_bound = false;
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
void UnbindCompute();
|
||||
|
||||
OGLPipeline pipeline;
|
||||
bool is_pipeline_bound{};
|
||||
|
@@ -4,13 +4,13 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include "common/settings.h"
|
||||
#include "video_core/host_shaders/blit_color_float_frag_spv.h"
|
||||
#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
|
||||
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
|
||||
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
|
||||
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
|
||||
#include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h"
|
||||
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
|
||||
#include "video_core/renderer_vulkan/blit_image.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
@@ -303,7 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri
|
||||
}
|
||||
|
||||
void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region,
|
||||
const Region2D& src_region) {
|
||||
const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) {
|
||||
const VkOffset2D offset{
|
||||
.x = std::min(dst_region.start.x, dst_region.end.x),
|
||||
.y = std::min(dst_region.start.y, dst_region.end.y),
|
||||
@@ -325,12 +325,15 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
|
||||
.offset = offset,
|
||||
.extent = extent,
|
||||
};
|
||||
const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x);
|
||||
const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y);
|
||||
const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) /
|
||||
static_cast<float>(src_size.width);
|
||||
const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) /
|
||||
static_cast<float>(src_size.height);
|
||||
const PushConstants push_constants{
|
||||
.tex_scale = {scale_x, scale_y},
|
||||
.tex_offset = {static_cast<float>(src_region.start.x),
|
||||
static_cast<float>(src_region.start.y)},
|
||||
.tex_offset = {static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width),
|
||||
static_cast<float>(src_region.start.y) /
|
||||
static_cast<float>(src_size.height)},
|
||||
};
|
||||
cmdbuf.SetViewport(0, viewport);
|
||||
cmdbuf.SetScissor(0, scissor);
|
||||
@@ -365,7 +368,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
|
||||
two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
|
||||
PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
|
||||
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
|
||||
blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
|
||||
blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)),
|
||||
blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
|
||||
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
|
||||
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
|
||||
@@ -404,6 +407,30 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView
|
||||
scheduler.InvalidateState();
|
||||
}
|
||||
|
||||
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
|
||||
VkSampler src_sampler, const Region2D& dst_region,
|
||||
const Region2D& src_region, const Extent3D& src_size) {
|
||||
const BlitImagePipelineKey key{
|
||||
.renderpass = dst_framebuffer->RenderPass(),
|
||||
.operation = Tegra::Engines::Fermi2D::Operation::SrcCopy,
|
||||
};
|
||||
const VkPipelineLayout layout = *one_texture_pipeline_layout;
|
||||
const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
|
||||
scheduler.RequestRenderpass(dst_framebuffer);
|
||||
scheduler.Record([this, dst_region, src_region, src_size, pipeline, layout, src_sampler,
|
||||
src_image_view](vk::CommandBuffer cmdbuf) {
|
||||
// TODO: Barriers
|
||||
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
|
||||
UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
|
||||
nullptr);
|
||||
BindBlitState(cmdbuf, layout, dst_region, src_region, src_size);
|
||||
cmdbuf.Draw(3, 1, 0, 0);
|
||||
});
|
||||
scheduler.InvalidateState();
|
||||
}
|
||||
|
||||
void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
|
||||
VkImageView src_depth_view, VkImageView src_stencil_view,
|
||||
const Region2D& dst_region, const Region2D& src_region,
|
||||
|
@@ -10,6 +10,8 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCommon::Extent3D;
|
||||
using VideoCommon::Offset2D;
|
||||
using VideoCommon::Region2D;
|
||||
|
||||
class Device;
|
||||
@@ -36,6 +38,10 @@ public:
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
|
||||
VkSampler src_sampler, const Region2D& dst_region, const Region2D& src_region,
|
||||
const Extent3D& src_size);
|
||||
|
||||
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
|
||||
VkImageView src_stencil_view, const Region2D& dst_region,
|
||||
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
|
||||
|
@@ -78,6 +78,8 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
|
||||
return separated_extensions;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
|
||||
VkSurfaceKHR surface) {
|
||||
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
|
||||
@@ -89,7 +91,6 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl
|
||||
const vk::PhysicalDevice physical_device(devices[device_index], dld);
|
||||
return Device(*instance, physical_device, surface, dld);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
|
||||
Core::Frontend::EmuWindow& emu_window,
|
||||
@@ -109,6 +110,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
|
||||
screen_info),
|
||||
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
|
||||
state_tracker, scheduler) {
|
||||
if (Settings::values.renderer_force_max_clock.GetValue()) {
|
||||
turbo_mode.emplace(instance, dld);
|
||||
}
|
||||
Report();
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
|
||||
|
@@ -13,6 +13,7 @@
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/renderer_vulkan/vk_turbo_mode.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
@@ -31,6 +32,9 @@ class GPU;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
|
||||
VkSurfaceKHR surface);
|
||||
|
||||
class RendererVulkan final : public VideoCore::RendererBase {
|
||||
public:
|
||||
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
|
||||
@@ -74,6 +78,7 @@ private:
|
||||
Swapchain swapchain;
|
||||
BlitScreen blit_screen;
|
||||
RasterizerVulkan rasterizer;
|
||||
std::optional<TurboMode> turbo_mode;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@@ -330,6 +330,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetStorageBufferAlignment());
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
@@ -73,6 +73,8 @@ public:
|
||||
|
||||
bool CanReportMemoryUsage() const;
|
||||
|
||||
u32 GetStorageBufferAlignment() const;
|
||||
|
||||
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
|
||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||
|
@@ -331,6 +331,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
|
||||
.need_declared_frag_colors = false,
|
||||
|
||||
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS,
|
||||
.has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
|
||||
.has_broken_unsigned_image_offsets = false,
|
||||
.has_broken_signed_operations = false,
|
||||
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
|
||||
@@ -343,6 +344,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
|
||||
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE,
|
||||
.support_snorm_render_buffer = true,
|
||||
.support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
|
||||
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
|
||||
};
|
||||
|
||||
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
|
||||
|
@@ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
||||
|
||||
SCOPE_EXIT({ gpu.TickWork(); });
|
||||
FlushWork();
|
||||
gpu_memory->FlushCaching();
|
||||
|
||||
query_cache.UpdateCounters();
|
||||
|
||||
@@ -265,6 +266,34 @@ void RasterizerVulkan::DrawIndirect() {
|
||||
buffer_cache.SetDrawIndirect(nullptr);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::DrawTexture() {
|
||||
MICROPROFILE_SCOPE(Vulkan_Drawing);
|
||||
|
||||
SCOPE_EXIT({ gpu.TickWork(); });
|
||||
FlushWork();
|
||||
|
||||
query_cache.UpdateCounters();
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
|
||||
UpdateDynamicStates();
|
||||
|
||||
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
|
||||
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
|
||||
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
|
||||
Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0),
|
||||
.y = static_cast<s32>(draw_texture_state.dst_y0)},
|
||||
Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1),
|
||||
.y = static_cast<s32>(draw_texture_state.dst_y1)}};
|
||||
Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0),
|
||||
.y = static_cast<s32>(draw_texture_state.src_y0)},
|
||||
Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
|
||||
.y = static_cast<s32>(draw_texture_state.src_y1)}};
|
||||
blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(), sampler->Handle(),
|
||||
dst_region, src_region, texture.size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::Clear(u32 layer_count) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Clearing);
|
||||
|
||||
@@ -393,6 +422,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
|
||||
|
||||
void RasterizerVulkan::DispatchCompute() {
|
||||
FlushWork();
|
||||
gpu_memory->FlushCaching();
|
||||
|
||||
ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
|
||||
if (!pipeline) {
|
||||
@@ -481,6 +511,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
|
||||
{
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
for (const auto& [addr, size] : sequences) {
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
for (const auto& [addr, size] : sequences) {
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
}
|
||||
{
|
||||
for (const auto& [addr, size] : sequences) {
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
pipeline_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
|
@@ -66,6 +66,7 @@ public:
|
||||
|
||||
void Draw(bool is_indexed, u32 instance_count) override;
|
||||
void DrawIndirect() override;
|
||||
void DrawTexture() override;
|
||||
void Clear(u32 layer_count) override;
|
||||
void DispatchCompute() override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
@@ -79,6 +80,7 @@ public:
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void InvalidateGPUCache() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
|
@@ -148,6 +148,13 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
|
||||
return slot_image_views[id];
|
||||
}
|
||||
|
||||
template <class P>
|
||||
typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
|
||||
const auto image_view_id = VisitImageView(channel_state->graphics_image_table,
|
||||
channel_state->graphics_image_view_ids, index);
|
||||
return slot_image_views[image_view_id];
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::MarkModification(ImageId id) noexcept {
|
||||
MarkModification(slot_images[id]);
|
||||
|
@@ -129,6 +129,9 @@ public:
|
||||
/// Return a reference to the given image view id
|
||||
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
|
||||
|
||||
/// Get the imageview from the graphics descriptor table in the specified index
|
||||
[[nodiscard]] ImageView& GetImageView(u32 index) noexcept;
|
||||
|
||||
/// Mark an image as modified from the GPU
|
||||
void MarkModification(ImageId id) noexcept;
|
||||
|
||||
|
@@ -1472,7 +1472,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
|
||||
is_patch_list_restart_supported =
|
||||
primitive_topology_list_restart.primitiveTopologyPatchListRestart;
|
||||
}
|
||||
if (has_khr_image_format_list && has_khr_swapchain_mutable_format) {
|
||||
if (requires_surface && has_khr_image_format_list && has_khr_swapchain_mutable_format) {
|
||||
extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
|
||||
extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME);
|
||||
khr_swapchain_mutable_format = true;
|
||||
|
Reference in New Issue
Block a user