early-access version 3289

This commit is contained in:
pineappleEA
2023-01-06 01:55:16 +01:00
parent 0258245bbf
commit 69c608cf26
142 changed files with 885 additions and 1871 deletions

View File

@@ -85,7 +85,6 @@ add_library(video_core STATIC
gpu.h
gpu_thread.cpp
gpu_thread.h
invalidation_accumulator.h
memory_manager.cpp
memory_manager.h
precompiled_headers.h
@@ -100,8 +99,6 @@ add_library(video_core STATIC
renderer_null/null_rasterizer.h
renderer_null/renderer_null.cpp
renderer_null/renderer_null.h
renderer_opengl/blit_image.cpp
renderer_opengl/blit_image.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_compute_pipeline.cpp
@@ -193,8 +190,6 @@ add_library(video_core STATIC
renderer_vulkan/vk_texture_cache.cpp
renderer_vulkan/vk_texture_cache.h
renderer_vulkan/vk_texture_cache_base.cpp
renderer_vulkan/vk_turbo_mode.cpp
renderer_vulkan/vk_turbo_mode.h
renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h
shader_cache.cpp

View File

@@ -430,7 +430,7 @@ private:
if (query_begin >= SizeBytes() || size < 0) {
return;
}
[[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
u64* const untracked_words = Array<Type::Untracked>();
u64* const state_words = Array<type>();
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
@@ -483,7 +483,7 @@ private:
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
}
// Exclude CPU modified pages when visiting GPU pages
const u64 word = current_word;
const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
u64 page = page_begin;
page_begin = 0;
@@ -531,7 +531,7 @@ private:
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@@ -539,7 +539,8 @@ private:
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
const u64 word = state_words[word_index];
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
const u64 word = state_words[word_index] & ~off_word;
if (word == 0) {
continue;
}
@@ -563,7 +564,7 @@ private:
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@@ -573,7 +574,8 @@ private:
u64 begin = std::numeric_limits<u64>::max();
u64 end = 0;
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
const u64 word = state_words[word_index];
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
const u64 word = state_words[word_index] & ~off_word;
if (word == 0) {
continue;
}

View File

@@ -51,10 +51,6 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
LOG_WARNING(HW_GPU, "(STUBBED) called");
break;
}
case MAXWELL3D_REG_INDEX(draw_texture.src_y0): {
DrawTexture();
break;
}
default:
break;
}
@@ -183,33 +179,6 @@ void DrawManager::DrawIndexSmall(u32 argument) {
ProcessDraw(true, 1);
}
void DrawManager::DrawTexture() {
const auto& regs{maxwell3d->regs};
draw_texture_state.dst_x0 = static_cast<float>(regs.draw_texture.dst_x0) / 4096.f;
draw_texture_state.dst_y0 = static_cast<float>(regs.draw_texture.dst_y0) / 4096.f;
const auto dst_width = static_cast<float>(regs.draw_texture.dst_width) / 4096.f;
const auto dst_height = static_cast<float>(regs.draw_texture.dst_height) / 4096.f;
const bool lower_left{regs.window_origin.mode !=
Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft};
if (lower_left) {
draw_texture_state.dst_y0 -= dst_height;
}
draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width;
draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height;
draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f;
draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f;
draw_texture_state.src_x1 =
(static_cast<float>(regs.draw_texture.dx_du) / 4294967295.f) * dst_width +
draw_texture_state.src_x0;
draw_texture_state.src_y1 =
(static_cast<float>(regs.draw_texture.dy_dv) / 4294967295.f) * dst_height +
draw_texture_state.src_y0;
draw_texture_state.src_sampler = regs.draw_texture.src_sampler;
draw_texture_state.src_texture = regs.draw_texture.src_texture;
maxwell3d->rasterizer->DrawTexture();
}
void DrawManager::UpdateTopology() {
const auto& regs{maxwell3d->regs};
switch (regs.primitive_topology_control) {

View File

@@ -32,19 +32,6 @@ public:
std::vector<u8> inline_index_draw_indexes;
};
struct DrawTextureState {
f32 dst_x0;
f32 dst_y0;
f32 dst_x1;
f32 dst_y1;
f32 src_x0;
f32 src_y0;
f32 src_x1;
f32 src_y1;
u32 src_sampler;
u32 src_texture;
};
struct IndirectParams {
bool is_indexed;
bool include_count;
@@ -77,10 +64,6 @@ public:
return draw_state;
}
const DrawTextureState& GetDrawTextureState() const {
return draw_texture_state;
}
IndirectParams& GetIndirectParams() {
return indirect_state;
}
@@ -98,8 +81,6 @@ private:
void DrawIndexSmall(u32 argument);
void DrawTexture();
void UpdateTopology();
void ProcessDraw(bool draw_indexed, u32 instance_count);
@@ -108,7 +89,6 @@ private:
Maxwell3D* maxwell3d{};
State draw_state{};
DrawTextureState draw_texture_state{};
IndirectParams indirect_state{};
};
} // namespace Tegra::Engines

View File

@@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
x_elements, regs.line_count, regs.dest.BlockHeight(),
regs.dest.BlockDepth(), regs.line_length_in);
memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
}
}

View File

@@ -6,7 +6,6 @@
#include "common/microprofile.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/sw_blitter/blitter.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
@@ -21,8 +20,8 @@ namespace Tegra::Engines {
using namespace Texture;
Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} {
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager);
Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
// Nvidia's OpenGL driver seems to assume these values
regs.src.depth = 1;
regs.dst.depth = 1;
@@ -105,7 +104,6 @@ void Fermi2D::Blit() {
config.src_x0 = 0;
}
memory_manager.FlushCaching();
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
sw_blitter->Blit(src, regs.dst, config);
}

View File

@@ -305,7 +305,6 @@ public:
private:
VideoCore::RasterizerInterface* rasterizer = nullptr;
std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
MemoryManager& memory_manager;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.

View File

@@ -149,7 +149,6 @@ bool Maxwell3D::IsMethodExecutable(u32 method) {
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
case MAXWELL3D_REG_INDEX(draw_texture.src_y0):
case MAXWELL3D_REG_INDEX(wait_for_idle):
case MAXWELL3D_REG_INDEX(shadow_ram_control):
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
@@ -486,6 +485,11 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
}
void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
}
switch (regs.report_semaphore.query.operation) {
case Regs::ReportSemaphore::Operation::Release:
if (regs.report_semaphore.query.short_query != 0) {
@@ -645,7 +649,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
const GPUVAddr address{buffer_address + regs.const_buffer.offset};
const size_t copy_size = amount * sizeof(u32);
memory_manager.WriteBlockCached(address, start_base, copy_size);
memory_manager.WriteBlock(address, start_base, copy_size);
// Increment the current buffer position.
regs.const_buffer.offset += static_cast<u32>(copy_size);

View File

@@ -1599,20 +1599,6 @@ public:
};
static_assert(sizeof(TIRModulationCoeff) == 0x4);
struct DrawTexture {
s32 dst_x0;
s32 dst_y0;
s32 dst_width;
s32 dst_height;
s64 dx_du;
s64 dy_dv;
u32 src_sampler;
u32 src_texture;
s32 src_x0;
s32 src_y0;
};
static_assert(sizeof(DrawTexture) == 0x30);
struct ReduceColorThreshold {
union {
BitField<0, 8, u32> all_hit_once;
@@ -2765,7 +2751,7 @@ public:
u32 reserved_sw_method2; ///< 0x102C
std::array<TIRModulationCoeff, 5> tir_modulation_coeff; ///< 0x1030
std::array<u32, 15> spare_nop; ///< 0x1044
DrawTexture draw_texture; ///< 0x1080
INSERT_PADDING_BYTES_NOINIT(0x30);
std::array<u32, 7> reserved_sw_method3_to_7; ///< 0x10B0
ReduceColorThreshold reduce_color_thresholds_unorm8; ///< 0x10CC
std::array<u32, 4> reserved_sw_method10_to_13; ///< 0x10D0

View File

@@ -69,7 +69,7 @@ void MaxwellDMA::Launch() {
if (launch.multi_line_enable) {
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
memory_manager.FlushCaching();
if (!is_src_pitch && !is_dst_pitch) {
// If both the source and the destination are in block layout, assert.
CopyBlockLinearToBlockLinear();
@@ -104,7 +104,6 @@ void MaxwellDMA::Launch() {
reinterpret_cast<u8*>(tmp_buffer.data()),
regs.line_length_in * sizeof(u32));
} else {
memory_manager.FlushCaching();
const auto convert_linear_2_blocklinear_addr = [](u64 address) {
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
((address & 0x180) >> 1) | ((address & 0x20) << 3);
@@ -122,8 +121,8 @@ void MaxwellDMA::Launch() {
memory_manager.ReadBlockUnsafe(
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
tmp_buffer.data(), tmp_buffer.size());
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
tmp_buffer.size());
memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(),
tmp_buffer.size());
}
} else if (is_src_pitch && !is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@@ -133,7 +132,7 @@ void MaxwellDMA::Launch() {
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
tmp_buffer.size());
memory_manager.WriteBlockCached(
memory_manager.WriteBlock(
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
tmp_buffer.data(), tmp_buffer.size());
}
@@ -142,8 +141,8 @@ void MaxwellDMA::Launch() {
std::vector<u8> tmp_buffer(regs.line_length_in);
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
regs.line_length_in);
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
regs.line_length_in);
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
regs.line_length_in);
}
}
}
@@ -205,7 +204,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
regs.pitch_out);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::CopyPitchToBlockLinear() {
@@ -257,7 +256,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
regs.pitch_in);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::FastCopyBlockLinearToPitch() {
@@ -288,7 +287,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
regs.src_params.block_size.height, regs.src_params.block_size.depth,
regs.pitch_out);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
@@ -348,7 +347,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
dst.block_size.height, dst.block_size.depth, pitch);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::ReleaseSemaphore() {

View File

@@ -11,7 +11,6 @@ set(GLSL_INCLUDES
set(SHADER_FILES
astc_decoder.comp
blit_color_float.frag
block_linear_unswizzle_2d.comp
block_linear_unswizzle_3d.comp
convert_abgr8_to_d24s8.frag
@@ -37,6 +36,7 @@ set(SHADER_FILES
smaa_blending_weight_calculation.frag
smaa_neighborhood_blending.vert
smaa_neighborhood_blending.frag
vulkan_blit_color_float.frag
vulkan_blit_depth_stencil.frag
vulkan_fidelityfx_fsr_easu_fp16.comp
vulkan_fidelityfx_fsr_easu_fp32.comp
@@ -47,7 +47,6 @@ set(SHADER_FILES
vulkan_present_scaleforce_fp16.frag
vulkan_present_scaleforce_fp32.frag
vulkan_quad_indexed.comp
vulkan_turbo_mode.comp
vulkan_uint8.comp
)

View File

@@ -4,20 +4,13 @@
#version 450
#ifdef VULKAN
#define VERTEX_ID gl_VertexIndex
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
#define END_PUSH_CONSTANTS };
#define UNIFORM(n)
#define FLIPY 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define VERTEX_ID gl_VertexID
#define BEGIN_PUSH_CONSTANTS
#define END_PUSH_CONSTANTS
#define FLIPY -1
#define UNIFORM(n) layout (location = n) uniform
out gl_PerVertex {
vec4 gl_Position;
};
#endif
BEGIN_PUSH_CONSTANTS
@@ -28,8 +21,8 @@ END_PUSH_CONSTANTS
layout(location = 0) out vec2 texcoord;
void main() {
float x = float((VERTEX_ID & 1) << 2);
float y = float((VERTEX_ID & 2) << 1);
gl_Position = vec4(x - 1.0, FLIPY * (y - 1.0), 0.0, 1.0);
float x = float((gl_VertexIndex & 1) << 2);
float y = float((gl_VertexIndex & 2) << 1);
gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
}
}

View File

@@ -6,13 +6,11 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/device_memory.h"
#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h"
#include "core/memory.h"
#include "video_core/invalidation_accumulator.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
@@ -28,8 +26,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
page_bits != big_page_bits ? page_bits : 0},
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
1, std::memory_order_acq_rel)},
accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
1, std::memory_order_acq_rel)} {
address_space_size = 1ULL << address_space_bits;
page_size = 1ULL << page_bits;
page_mask = page_size - 1ULL;
@@ -46,11 +43,6 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
big_page_table_cpu.resize(big_page_table_size);
big_page_continous.resize(big_page_table_size / continous_bits, 0);
entries.resize(page_table_size / 32, 0);
if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
} else {
fastmem_arena = nullptr;
}
}
MemoryManager::~MemoryManager() = default;
@@ -193,12 +185,15 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
if (size == 0) {
return;
}
GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
for (const auto& [map_addr, map_size] : page_stash) {
rasterizer->UnmapMemory(map_addr, map_size);
for (const auto& [map_addr, map_size] : submapped_ranges) {
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
ASSERT(cpu_addr);
rasterizer->UnmapMemory(*cpu_addr, map_size);
}
page_stash.clear();
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
@@ -360,7 +355,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si
}
}
template <bool is_safe, bool use_fastmem>
template <bool is_safe>
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
[[maybe_unused]] VideoCommon::CacheType which) const {
auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
@@ -374,12 +369,8 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
if constexpr (is_safe) {
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
}
if constexpr (use_fastmem) {
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
} else {
u8* physical = memory.GetPointer(cpu_addr_base);
std::memcpy(dest_buffer, physical, copy_amount);
}
u8* physical = memory.GetPointer(cpu_addr_base);
std::memcpy(dest_buffer, physical, copy_amount);
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
};
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
@@ -388,15 +379,11 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
if constexpr (is_safe) {
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
}
if constexpr (use_fastmem) {
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
if (!IsBigPageContinous(page_index)) [[unlikely]] {
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
} else {
if (!IsBigPageContinous(page_index)) [[unlikely]] {
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
} else {
u8* physical = memory.GetPointer(cpu_addr_base);
std::memcpy(dest_buffer, physical, copy_amount);
}
u8* physical = memory.GetPointer(cpu_addr_base);
std::memcpy(dest_buffer, physical, copy_amount);
}
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
};
@@ -410,20 +397,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
VideoCommon::CacheType which) const {
if (fastmem_arena) [[likely]] {
ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which);
return;
}
ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which);
ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which);
}
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
const std::size_t size) const {
if (fastmem_arena) [[likely]] {
ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
return;
}
ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
}
template <bool is_safe>
@@ -475,12 +454,6 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
}
void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer,
std::size_t size) {
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
accumulator->Add(gpu_dest_addr, size);
}
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
VideoCommon::CacheType which) const {
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
@@ -690,17 +663,7 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
GPUVAddr gpu_addr, std::size_t size) const {
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
GetSubmappedRangeImpl<true>(gpu_addr, size, result);
return result;
}
template <bool is_gpu_address>
void MemoryManager::GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size,
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
result) const {
std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
last_segment{};
std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
std::optional<VAddr> old_page_addr{};
const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
[[maybe_unused]] std::size_t offset,
@@ -722,12 +685,8 @@ void MemoryManager::GetSubmappedRangeImpl(
}
old_page_addr = {cpu_addr_base + copy_amount};
if (!last_segment) {
if constexpr (is_gpu_address) {
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
last_segment = {new_base_addr, copy_amount};
} else {
last_segment = {cpu_addr_base, copy_amount};
}
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
last_segment = {new_base_addr, copy_amount};
} else {
last_segment->second += copy_amount;
}
@@ -744,12 +703,8 @@ void MemoryManager::GetSubmappedRangeImpl(
}
old_page_addr = {cpu_addr_base + copy_amount};
if (!last_segment) {
if constexpr (is_gpu_address) {
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
last_segment = {new_base_addr, copy_amount};
} else {
last_segment = {cpu_addr_base, copy_amount};
}
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
last_segment = {new_base_addr, copy_amount};
} else {
last_segment->second += copy_amount;
}
@@ -760,18 +715,7 @@ void MemoryManager::GetSubmappedRangeImpl(
};
MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
split(0, 0, 0);
}
void MemoryManager::FlushCaching() {
if (!accumulator->AnyAccumulated()) {
return;
}
accumulator->Callback([this](GPUVAddr addr, size_t size) {
GetSubmappedRangeImpl<false>(addr, size, page_stash);
});
rasterizer->InnerInvalidation(page_stash);
page_stash.clear();
accumulator->Clear();
return result;
}
} // namespace Tegra

View File

@@ -19,10 +19,6 @@ namespace VideoCore {
class RasterizerInterface;
}
namespace VideoCommon {
class InvalidationAccumulator;
}
namespace Core {
class DeviceMemory;
namespace Memory {
@@ -84,7 +80,6 @@ public:
*/
void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
/**
* Checks if a gpu region can be simply read with a pointer.
@@ -134,14 +129,12 @@ public:
size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
size_t max_size = std::numeric_limits<size_t>::max()) const;
void FlushCaching();
private:
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
template <bool is_safe, bool use_fastmem>
template <bool is_safe>
void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
VideoCommon::CacheType which) const;
@@ -161,12 +154,6 @@ private:
inline bool IsBigPageContinous(size_t big_page_index) const;
inline void SetBigPageContinous(size_t big_page_index, bool value);
template <bool is_gpu_address>
void GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size,
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
result) const;
Core::System& system;
Core::Memory::Memory& memory;
Core::DeviceMemory& device_memory;
@@ -214,13 +201,10 @@ private:
Common::VirtualBuffer<u32> big_page_table_cpu;
std::vector<u64> big_page_continous;
std::vector<std::pair<VAddr, std::size_t>> page_stash{};
u8* fastmem_arena{};
constexpr static size_t continous_bits = 64;
const size_t unique_identifier;
std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
static std::atomic<size_t> unique_identifier_generator;
};

View File

@@ -6,7 +6,6 @@
#include <functional>
#include <optional>
#include <span>
#include <utility>
#include "common/common_types.h"
#include "common/polyfill_thread.h"
#include "video_core/cache_types.h"
@@ -47,9 +46,6 @@ public:
/// Dispatches an indirect draw invocation
virtual void DrawIndirect() {}
/// Dispatches an draw texture invocation
virtual void DrawTexture() = 0;
/// Clear the current framebuffer
virtual void Clear(u32 layer_count) = 0;
@@ -99,12 +95,6 @@ public:
virtual void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
for (const auto& [cpu_addr, size] : sequences) {
InvalidateRegion(cpu_addr, size);
}
}
/// Notify rasterizer that any caches of the specified region are desync with guest
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;

View File

@@ -21,7 +21,6 @@ RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gp
RasterizerNull::~RasterizerNull() = default;
void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {}
void RasterizerNull::DrawTexture() {}
void RasterizerNull::Clear(u32 layer_count) {}
void RasterizerNull::DispatchCompute() {}
void RasterizerNull::ResetCounter(VideoCore::QueryType type) {}

View File

@@ -31,7 +31,6 @@ public:
~RasterizerNull() override;
void Draw(bool is_indexed, u32 instance_count) override;
void DrawTexture() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;

View File

@@ -166,7 +166,6 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
has_draw_texture = GLAD_GL_NV_draw_texture;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
can_report_memory = GLAD_GL_NVX_gpu_memory_info;

View File

@@ -4,8 +4,6 @@
#pragma once
#include <cstddef>
#include <string>
#include "common/common_types.h"
#include "core/frontend/emu_window.h"
#include "shader_recompiler/stage.h"
@@ -148,10 +146,6 @@ public:
return has_sparse_texture_2;
}
bool HasDrawTexture() const {
return has_draw_texture;
}
bool IsWarpSizePotentiallyLargerThanGuest() const {
return warp_size_potentially_larger_than_guest;
}
@@ -222,7 +216,6 @@ private:
bool has_shader_int64{};
bool has_amd_shader_half_float{};
bool has_sparse_texture_2{};
bool has_draw_texture{};
bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
bool has_cbuf_ftou_bug{};

View File

@@ -64,8 +64,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),
query_cache(*this), accelerate_dma(buffer_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
blit_image(program_manager_) {}
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -319,47 +318,6 @@ void RasterizerOpenGL::DrawIndirect() {
buffer_cache.SetDrawIndirect(nullptr);
}
void RasterizerOpenGL::DrawTexture() {
MICROPROFILE_SCOPE(OpenGL_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
query_cache.UpdateCounters();
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false);
SyncState();
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
if (device.HasDrawTexture()) {
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(), draw_texture_state.dst_x0,
draw_texture_state.dst_y0, draw_texture_state.dst_x1,
draw_texture_state.dst_y1, 0,
draw_texture_state.src_x0 / static_cast<float>(texture.size.width),
draw_texture_state.src_y0 / static_cast<float>(texture.size.height),
draw_texture_state.src_x1 / static_cast<float>(texture.size.width),
draw_texture_state.src_y1 / static_cast<float>(texture.size.height));
} else {
Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0),
.y = static_cast<s32>(draw_texture_state.dst_y0)},
Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1),
.y = static_cast<s32>(draw_texture_state.dst_y1)}};
Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0),
.y = static_cast<s32>(draw_texture_state.src_y0)},
Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
.y = static_cast<s32>(draw_texture_state.src_y1)}};
blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(),
sampler->Handle(), dst_region, src_region, texture.size);
}
++num_queued_commands;
}
void RasterizerOpenGL::DispatchCompute() {
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
if (!pipeline) {

View File

@@ -16,7 +16,6 @@
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/blit_image.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
@@ -71,7 +70,6 @@ public:
void Draw(bool is_indexed, u32 instance_count) override;
void DrawIndirect() override;
void DrawTexture() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -226,8 +224,6 @@ private:
AccelerateDMA accelerate_dma;
FenceManagerOpenGL fence_manager;
BlitImageHelper blit_image;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;

View File

@@ -1,123 +1,2 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL {
static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
ProgramManager::ProgramManager(const Device& device) {
glCreateProgramPipelines(1, &pipeline.handle);
if (device.UseAssemblyShaders()) {
glEnable(GL_COMPUTE_PROGRAM_NV);
}
}
void ProgramManager::BindComputeProgram(GLuint program) {
glUseProgram(program);
is_compute_bound = true;
}
void ProgramManager::BindComputeAssemblyProgram(GLuint program) {
if (current_assembly_compute_program != program) {
current_assembly_compute_program = program;
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
}
UnbindPipeline();
}
void ProgramManager::BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
static constexpr std::array<GLenum, 5> stage_enums{
GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
};
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
}
}
BindPipeline();
}
void ProgramManager::BindPresentPrograms(GLuint vertex, GLuint fragment) {
if (current_programs[0] != vertex) {
current_programs[0] = vertex;
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
}
if (current_programs[4] != fragment) {
current_programs[4] = fragment;
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
}
glUseProgramStages(
pipeline.handle,
GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
current_programs[1] = 0;
current_programs[2] = 0;
current_programs[3] = 0;
if (current_stage_mask != 0) {
current_stage_mask = 0;
for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
glDisable(program_type);
}
}
BindPipeline();
}
void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
u32 stage_mask) {
const u32 changed_mask = current_stage_mask ^ stage_mask;
current_stage_mask = stage_mask;
if (changed_mask != 0) {
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (((changed_mask >> stage) & 1) != 0) {
if (((stage_mask >> stage) & 1) != 0) {
glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
} else {
glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
}
}
}
}
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
}
}
UnbindPipeline();
}
void ProgramManager::RestoreGuestCompute() {}
void ProgramManager::BindPipeline() {
if (!is_pipeline_bound) {
is_pipeline_bound = true;
glBindProgramPipeline(pipeline.handle);
}
UnbindCompute();
}
void ProgramManager::UnbindPipeline() {
if (is_pipeline_bound) {
is_pipeline_bound = false;
glBindProgramPipeline(0);
}
UnbindCompute();
}
void ProgramManager::UnbindCompute() {
if (is_compute_bound) {
is_compute_bound = false;
glUseProgram(0);
}
}
} // namespace OpenGL

View File

@@ -6,6 +6,8 @@
#include <array>
#include <span>
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -14,28 +16,121 @@ namespace OpenGL {
class ProgramManager {
static constexpr size_t NUM_STAGES = 5;
static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
public:
explicit ProgramManager(const Device& device);
explicit ProgramManager(const Device& device) {
glCreateProgramPipelines(1, &pipeline.handle);
if (device.UseAssemblyShaders()) {
glEnable(GL_COMPUTE_PROGRAM_NV);
}
}
void BindComputeProgram(GLuint program);
void BindComputeProgram(GLuint program) {
glUseProgram(program);
is_compute_bound = true;
}
void BindComputeAssemblyProgram(GLuint program);
void BindComputeAssemblyProgram(GLuint program) {
if (current_assembly_compute_program != program) {
current_assembly_compute_program = program;
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
}
UnbindPipeline();
}
void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs);
void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
static constexpr std::array<GLenum, 5> stage_enums{
GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
};
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
}
}
BindPipeline();
}
void BindPresentPrograms(GLuint vertex, GLuint fragment);
void BindPresentPrograms(GLuint vertex, GLuint fragment) {
if (current_programs[0] != vertex) {
current_programs[0] = vertex;
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
}
if (current_programs[4] != fragment) {
current_programs[4] = fragment;
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
}
glUseProgramStages(
pipeline.handle,
GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
current_programs[1] = 0;
current_programs[2] = 0;
current_programs[3] = 0;
if (current_stage_mask != 0) {
current_stage_mask = 0;
for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
glDisable(program_type);
}
}
BindPipeline();
}
void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
u32 stage_mask);
u32 stage_mask) {
const u32 changed_mask = current_stage_mask ^ stage_mask;
current_stage_mask = stage_mask;
void RestoreGuestCompute();
if (changed_mask != 0) {
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (((changed_mask >> stage) & 1) != 0) {
if (((stage_mask >> stage) & 1) != 0) {
glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
} else {
glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
}
}
}
}
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
}
}
UnbindPipeline();
}
void RestoreGuestCompute() {}
private:
void BindPipeline();
void BindPipeline() {
if (!is_pipeline_bound) {
is_pipeline_bound = true;
glBindProgramPipeline(pipeline.handle);
}
UnbindCompute();
}
void UnbindPipeline();
void UnbindPipeline() {
if (is_pipeline_bound) {
is_pipeline_bound = false;
glBindProgramPipeline(0);
}
UnbindCompute();
}
void UnbindCompute();
void UnbindCompute() {
if (is_compute_bound) {
is_compute_bound = false;
glUseProgram(0);
}
}
OGLPipeline pipeline;
bool is_pipeline_bound{};

View File

@@ -4,13 +4,13 @@
#include <algorithm>
#include "common/settings.h"
#include "video_core/host_shaders/blit_color_float_frag_spv.h"
#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
#include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
@@ -303,7 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri
}
void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region,
const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) {
const Region2D& src_region) {
const VkOffset2D offset{
.x = std::min(dst_region.start.x, dst_region.end.x),
.y = std::min(dst_region.start.y, dst_region.end.y),
@@ -325,15 +325,12 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
.offset = offset,
.extent = extent,
};
const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) /
static_cast<float>(src_size.width);
const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) /
static_cast<float>(src_size.height);
const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x);
const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y);
const PushConstants push_constants{
.tex_scale = {scale_x, scale_y},
.tex_offset = {static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width),
static_cast<float>(src_region.start.y) /
static_cast<float>(src_size.height)},
.tex_offset = {static_cast<float>(src_region.start.x),
static_cast<float>(src_region.start.y)},
};
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
@@ -368,7 +365,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)),
blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
@@ -407,30 +404,6 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView
scheduler.InvalidateState();
}
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
VkSampler src_sampler, const Region2D& dst_region,
const Region2D& src_region, const Extent3D& src_size) {
const BlitImagePipelineKey key{
.renderpass = dst_framebuffer->RenderPass(),
.operation = Tegra::Engines::Fermi2D::Operation::SrcCopy,
};
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([this, dst_region, src_region, src_size, pipeline, layout, src_sampler,
src_image_view](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
nullptr);
BindBlitState(cmdbuf, layout, dst_region, src_region, src_size);
cmdbuf.Draw(3, 1, 0, 0);
});
scheduler.InvalidateState();
}
void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
VkImageView src_depth_view, VkImageView src_stencil_view,
const Region2D& dst_region, const Region2D& src_region,

View File

@@ -10,8 +10,6 @@
namespace Vulkan {
using VideoCommon::Extent3D;
using VideoCommon::Offset2D;
using VideoCommon::Region2D;
class Device;
@@ -38,10 +36,6 @@ public:
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
VkSampler src_sampler, const Region2D& dst_region, const Region2D& src_region,
const Extent3D& src_size);
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
VkImageView src_stencil_view, const Region2D& dst_region,
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,

View File

@@ -78,8 +78,6 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
return separated_extensions;
}
} // Anonymous namespace
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface) {
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
@@ -91,6 +89,7 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl
const vk::PhysicalDevice physical_device(devices[device_index], dld);
return Device(*instance, physical_device, surface, dld);
}
} // Anonymous namespace
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window,
@@ -99,7 +98,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
: RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
true, Settings::values.renderer_debug.GetValue())),
Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
@@ -110,9 +109,6 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
screen_info),
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
state_tracker, scheduler) {
if (Settings::values.renderer_force_max_clock.GetValue()) {
turbo_mode.emplace(instance, dld);
}
Report();
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());

View File

@@ -13,7 +13,6 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/vk_turbo_mode.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -32,9 +31,6 @@ class GPU;
namespace Vulkan {
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface);
class RendererVulkan final : public VideoCore::RendererBase {
public:
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@@ -78,7 +74,6 @@ private:
Swapchain swapchain;
BlitScreen blit_screen;
RasterizerVulkan rasterizer;
std::optional<TurboMode> turbo_mode;
};
} // namespace Vulkan

View File

@@ -336,6 +336,9 @@ void BufferCacheRuntime::Finish() {
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
return;
}
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -394,6 +397,9 @@ void BufferCacheRuntime::PostCopyBarrier() {
}
void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) {
if (dest_buffer == VK_NULL_HANDLE) {
return;
}
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -473,6 +479,11 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
});
} else {
if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) {
ReserveNullBuffer();
buffer = *null_buffer;
offset = 0;
}
scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffer(index, buffer, offset);
});

View File

@@ -186,7 +186,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
SCOPE_EXIT({ gpu.TickWork(); });
FlushWork();
gpu_memory->FlushCaching();
query_cache.UpdateCounters();
@@ -266,34 +265,6 @@ void RasterizerVulkan::DrawIndirect() {
buffer_cache.SetDrawIndirect(nullptr);
}
void RasterizerVulkan::DrawTexture() {
MICROPROFILE_SCOPE(Vulkan_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
FlushWork();
query_cache.UpdateCounters();
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false);
UpdateDynamicStates();
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0),
.y = static_cast<s32>(draw_texture_state.dst_y0)},
Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1),
.y = static_cast<s32>(draw_texture_state.dst_y1)}};
Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0),
.y = static_cast<s32>(draw_texture_state.src_y0)},
Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
.y = static_cast<s32>(draw_texture_state.src_y1)}};
blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(), sampler->Handle(),
dst_region, src_region, texture.size);
}
void RasterizerVulkan::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(Vulkan_Clearing);
@@ -422,7 +393,6 @@ void RasterizerVulkan::Clear(u32 layer_count) {
void RasterizerVulkan::DispatchCompute() {
FlushWork();
gpu_memory->FlushCaching();
ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
if (!pipeline) {
@@ -511,27 +481,6 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
}
}
void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
{
std::scoped_lock lock{texture_cache.mutex};
for (const auto& [addr, size] : sequences) {
texture_cache.WriteMemory(addr, size);
}
}
{
std::scoped_lock lock{buffer_cache.mutex};
for (const auto& [addr, size] : sequences) {
buffer_cache.WriteMemory(addr, size);
}
}
{
for (const auto& [addr, size] : sequences) {
query_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(addr, size);
}
}
}
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;

View File

@@ -66,7 +66,6 @@ public:
void Draw(bool is_indexed, u32 instance_count) override;
void DrawIndirect() override;
void DrawTexture() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -80,7 +79,6 @@ public:
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;

View File

@@ -148,13 +148,6 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
return slot_image_views[id];
}
template <class P>
typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
const auto image_view_id = VisitImageView(channel_state->graphics_image_table,
channel_state->graphics_image_view_ids, index);
return slot_image_views[image_view_id];
}
template <class P>
void TextureCache<P>::MarkModification(ImageId id) noexcept {
MarkModification(slot_images[id]);

View File

@@ -129,9 +129,6 @@ public:
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
/// Get the imageview from the graphics descriptor table in the specified index
[[nodiscard]] ImageView& GetImageView(u32 index) noexcept;
/// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept;

View File

@@ -1472,7 +1472,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
is_patch_list_restart_supported =
primitive_topology_list_restart.primitiveTopologyPatchListRestart;
}
if (requires_surface && has_khr_image_format_list && has_khr_swapchain_mutable_format) {
if (has_khr_image_format_list && has_khr_swapchain_mutable_format) {
extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME);
khr_swapchain_mutable_format = true;
@@ -1487,6 +1487,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
max_push_descriptors = push_descriptor.maxPushDescriptors;
}
has_null_descriptor = true;
return extensions;
}

View File

@@ -397,6 +397,10 @@ public:
return must_emulate_bgr565;
}
bool HasNullDescriptor() const {
return has_null_descriptor;
}
u32 GetMaxVertexInputAttributes() const {
return max_vertex_input_attributes;
}
@@ -511,6 +515,7 @@ private:
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
bool has_null_descriptor{}; ///< Has support for null descriptors.
u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline
u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline

View File

@@ -32,7 +32,7 @@
namespace Vulkan {
namespace {
[[nodiscard]] std::vector<const char*> RequiredExtensions(
Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) {
Core::Frontend::WindowSystemType window_type, bool enable_validation) {
std::vector<const char*> extensions;
extensions.reserve(6);
switch (window_type) {
@@ -65,7 +65,7 @@ namespace {
if (window_type != Core::Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_debug_utils) {
if (enable_validation) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
@@ -95,9 +95,9 @@ namespace {
return true;
}
[[nodiscard]] std::vector<const char*> Layers(bool enable_layers) {
[[nodiscard]] std::vector<const char*> Layers(bool enable_validation) {
std::vector<const char*> layers;
if (enable_layers) {
if (enable_validation) {
layers.push_back("VK_LAYER_KHRONOS_validation");
}
return layers;
@@ -125,7 +125,7 @@ void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const
vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
u32 required_version, Core::Frontend::WindowSystemType window_type,
bool enable_debug_utils, bool enable_layers) {
bool enable_validation) {
if (!library.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Vulkan library not available");
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
@@ -138,11 +138,11 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils);
const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_validation);
if (!AreExtensionsSupported(dld, extensions)) {
throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
}
std::vector<const char*> layers = Layers(enable_layers);
std::vector<const char*> layers = Layers(enable_validation);
RemoveUnavailableLayers(dld, layers);
const u32 available_version = vk::AvailableVersion(dld);

View File

@@ -17,8 +17,7 @@ namespace Vulkan {
* @param dld Dispatch table to load function pointers into
* @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1)
* @param window_type Window system type's enabled extension
* @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not
* @param enable_layers Whether to enable Vulkan validation layers or not
* @param enable_validation Whether to enable Vulkan validation layers or not
*
* @return A new Vulkan instance
* @throw vk::Exception on failure
@@ -26,6 +25,6 @@ namespace Vulkan {
[[nodiscard]] vk::Instance CreateInstance(
const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version,
Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless,
bool enable_debug_utils = false, bool enable_layers = false);
bool enable_validation = false);
} // namespace Vulkan