early-access version 3252

This commit is contained in:
pineappleEA
2022-12-25 07:12:57 +01:00
parent eb371309f1
commit 88a4a2d4aa
70 changed files with 2515 additions and 508 deletions

View File

@@ -91,6 +91,23 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
ProcessDraw(true, num_instances);
}
void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
draw_state.topology = topology;
ProcessDrawIndirect(true);
}
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first,
u32 index_count) {
const auto& regs{maxwell3d->regs};
draw_state.topology = topology;
draw_state.index_buffer = regs.index_buffer;
draw_state.index_buffer.first = index_first;
draw_state.index_buffer.count = index_count;
ProcessDrawIndirect(true);
}
void DrawManager::SetInlineIndexBuffer(u32 index) {
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
@@ -198,4 +215,15 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
maxwell3d->rasterizer->Draw(draw_indexed, instance_count);
}
}
void DrawManager::ProcessDrawIndirect(bool draw_indexed) {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology,
draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count);
UpdateTopology();
if (maxwell3d->ShouldExecute()) {
maxwell3d->rasterizer->DrawIndirect();
}
}
} // namespace Tegra::Engines

View File

@@ -32,6 +32,16 @@ public:
std::vector<u8> inline_index_draw_indexes;
};
struct IndirectParams {
bool is_indexed;
bool include_count;
GPUVAddr count_start_address;
GPUVAddr indirect_start_address;
size_t buffer_size;
size_t max_draw_counts;
size_t stride;
};
explicit DrawManager(Maxwell3D* maxwell_3d);
void ProcessMethodCall(u32 method, u32 argument);
@@ -46,10 +56,22 @@ public:
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
u32 base_instance, u32 num_instances);
void DrawArrayIndirect(PrimitiveTopology topology);
void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count);
const State& GetDrawState() const {
return draw_state;
}
IndirectParams& GetIndirectParams() {
return indirect_state;
}
const IndirectParams& GetIndirectParams() const {
return indirect_state;
}
private:
void SetInlineIndexBuffer(u32 index);
@@ -63,7 +85,10 @@ private:
void ProcessDraw(bool draw_indexed, u32 instance_count);
void ProcessDrawIndirect(bool draw_indexed);
Maxwell3D* maxwell3d{};
State draw_state{};
IndirectParams indirect_state{};
};
} // namespace Tegra::Engines

View File

@@ -3,6 +3,10 @@
#pragma once
#include <bitset>
#include <limits>
#include <vector>
#include "common/common_types.h"
namespace Tegra::Engines {
@@ -17,6 +21,26 @@ public:
/// Write multiple values to the register identified by method.
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) = 0;
void ConsumeSink() {
if (method_sink.empty()) {
return;
}
ConsumeSinkImpl();
}
std::bitset<std::numeric_limits<u16>::max()> execution_mask{};
std::vector<std::pair<u32, u32>> method_sink{};
bool current_dirty{};
GPUVAddr current_dma_segment;
protected:
virtual void ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
CallMethod(method, value, true);
}
method_sink.clear();
}
};
} // namespace Tegra::Engines

View File

@@ -25,6 +25,9 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
// Nvidia's OpenGL driver seems to assume these values
regs.src.depth = 1;
regs.dst.depth = 1;
execution_mask.reset();
execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true;
}
Fermi2D::~Fermi2D() = default;
@@ -49,6 +52,13 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
}
}
void Fermi2D::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void Fermi2D::Blit() {
MICROPROFILE_SCOPE(GPU_BlitEngine);
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",

View File

@@ -309,6 +309,8 @@ private:
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
void Blit();
void ConsumeSinkImpl() override;
};
#define ASSERT_REG_POSITION(field_name, position) \

View File

@@ -14,7 +14,12 @@
namespace Tegra::Engines {
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {
execution_mask.reset();
execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true;
execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true;
execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true;
}
KeplerCompute::~KeplerCompute() = default;
@@ -23,6 +28,13 @@ void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
upload_state.BindRasterizer(rasterizer);
}
void KeplerCompute::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid KeplerCompute register, increase the size of the Regs structure");

View File

@@ -204,6 +204,8 @@ public:
private:
void ProcessLaunch();
void ConsumeSinkImpl() override;
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;

View File

@@ -18,6 +18,17 @@ KeplerMemory::~KeplerMemory() = default;
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
upload_state.BindRasterizer(rasterizer_);
execution_mask.reset();
execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true;
execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true;
}
void KeplerMemory::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {

View File

@@ -73,6 +73,8 @@ public:
} regs{};
private:
void ConsumeSinkImpl() override;
Core::System& system;
Upload::State upload_state;
};

View File

@@ -4,6 +4,8 @@
#include <cstring>
#include <optional>
#include "common/assert.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/dirty_flags.h"
@@ -28,6 +30,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
regs.upload} {
dirty.flags.flip();
InitializeRegisterDefaults();
execution_mask.reset();
for (size_t i = 0; i < execution_mask.size(); i++) {
execution_mask[i] = IsMethodExecutable(static_cast<u32>(i));
}
}
Maxwell3D::~Maxwell3D() = default;
@@ -121,6 +127,71 @@ void Maxwell3D::InitializeRegisterDefaults() {
shadow_state = regs;
}
bool Maxwell3D::IsMethodExecutable(u32 method) {
if (method >= MacroRegistersStart) {
return true;
}
switch (method) {
case MAXWELL3D_REG_INDEX(draw.end):
case MAXWELL3D_REG_INDEX(draw.begin):
case MAXWELL3D_REG_INDEX(vertex_buffer.first):
case MAXWELL3D_REG_INDEX(vertex_buffer.count):
case MAXWELL3D_REG_INDEX(index_buffer.first):
case MAXWELL3D_REG_INDEX(index_buffer.count):
case MAXWELL3D_REG_INDEX(draw_inline_index):
case MAXWELL3D_REG_INDEX(index_buffer32_subsequent):
case MAXWELL3D_REG_INDEX(index_buffer16_subsequent):
case MAXWELL3D_REG_INDEX(index_buffer8_subsequent):
case MAXWELL3D_REG_INDEX(index_buffer32_first):
case MAXWELL3D_REG_INDEX(index_buffer16_first):
case MAXWELL3D_REG_INDEX(index_buffer8_first):
case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
case MAXWELL3D_REG_INDEX(wait_for_idle):
case MAXWELL3D_REG_INDEX(shadow_ram_control):
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
case MAXWELL3D_REG_INDEX(load_mme.instruction):
case MAXWELL3D_REG_INDEX(load_mme.start_address):
case MAXWELL3D_REG_INDEX(falcon[4]):
case MAXWELL3D_REG_INDEX(const_buffer.buffer):
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 1:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 2:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 3:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 4:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 5:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 6:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 7:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 8:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 9:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 10:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 11:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 12:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 13:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 14:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
case MAXWELL3D_REG_INDEX(bind_groups[0].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[1].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[2].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[3].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config):
case MAXWELL3D_REG_INDEX(topology_override):
case MAXWELL3D_REG_INDEX(clear_surface):
case MAXWELL3D_REG_INDEX(report_semaphore.query):
case MAXWELL3D_REG_INDEX(render_enable.mode):
case MAXWELL3D_REG_INDEX(clear_report_value):
case MAXWELL3D_REG_INDEX(sync_info):
case MAXWELL3D_REG_INDEX(launch_dma):
case MAXWELL3D_REG_INDEX(inline_data):
case MAXWELL3D_REG_INDEX(fragment_barrier):
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
return true;
default:
return false;
}
}
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
@@ -130,14 +201,75 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
}
macro_params.insert(macro_params.end(), base_start, base_start + amount);
for (size_t i = 0; i < amount; i++) {
macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
}
macro_segments.emplace_back(current_dma_segment, amount);
current_macro_dirty |= current_dirty;
current_dirty = false;
// Call the macro when there are no more parameters in the command buffer
if (is_last_call) {
ConsumeSink();
CallMacroMethod(executing_macro, macro_params);
macro_params.clear();
macro_addresses.clear();
macro_segments.clear();
current_macro_dirty = false;
}
}
void Maxwell3D::RefreshParametersImpl() {
if (!Settings::IsGPULevelHigh()) {
return;
}
size_t current_index = 0;
for (auto& segment : macro_segments) {
if (segment.first == 0) {
current_index += segment.second;
continue;
}
memory_manager.ReadBlock(segment.first, &macro_params[current_index],
sizeof(u32) * segment.second);
current_index += segment.second;
}
}
u32 Maxwell3D::GetMaxCurrentVertices() {
u32 num_vertices = 0;
for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
const auto& array = regs.vertex_streams[index];
if (array.enable == 0) {
continue;
}
const auto& attribute = regs.vertex_attrib_format[index];
if (attribute.constant) {
num_vertices = std::max(num_vertices, 1U);
continue;
}
const auto& limit = regs.vertex_stream_limits[index];
const GPUVAddr gpu_addr_begin = array.Address();
const GPUVAddr gpu_addr_end = limit.Address() + 1;
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
num_vertices = std::max(
num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value()));
}
return num_vertices;
}
size_t Maxwell3D::EstimateIndexBufferSize() {
GPUVAddr start_address = regs.index_buffer.StartAddress();
GPUVAddr end_address = regs.index_buffer.EndAddress();
constexpr std::array<size_t, 4> max_sizes = {
std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
return std::min<size_t>(
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
byte_size,
static_cast<size_t>(end_address - start_address));
}
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
// Keep track of the register value in shadow_state when requested.
const auto control = shadow_state.shadow_ram_control;
@@ -152,6 +284,29 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
return argument;
}
void Maxwell3D::ConsumeSinkImpl() {
SCOPE_EXIT({ method_sink.clear(); });
const auto control = shadow_state.shadow_ram_control;
if (control == Regs::ShadowRamControl::Track ||
control == Regs::ShadowRamControl::TrackWithFilter) {
for (auto [method, value] : method_sink) {
shadow_state.reg_array[method] = value;
ProcessDirtyRegisters(method, value);
}
return;
}
if (control == Regs::ShadowRamControl::Replay) {
for (auto [method, value] : method_sink) {
ProcessDirtyRegisters(method, shadow_state.reg_array[method]);
}
return;
}
for (auto [method, value] : method_sink) {
ProcessDirtyRegisters(method, value);
}
}
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
if (regs.reg_array[method] == argument) {
return;
@@ -263,7 +418,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
const u32 argument = ProcessShadowRam(method, method_argument);
ProcessDirtyRegisters(method, argument);
ProcessMethodCall(method, argument, method_argument, is_last_call);
}
@@ -383,13 +537,17 @@ void Maxwell3D::ProcessQueryGet() {
void Maxwell3D::ProcessQueryCondition() {
const GPUVAddr condition_address{regs.render_enable.Address()};
switch (regs.render_enable_override) {
case Regs::RenderEnable::Override::AlwaysRender:
case Regs::RenderEnable::Override::AlwaysRender: {
execute_on = true;
break;
case Regs::RenderEnable::Override::NeverRender:
execute_on = false;
break;
case Regs::RenderEnable::Override::UseRenderEnable:
case Regs::RenderEnable::Override::UseRenderEnable: {
if (rasterizer->AccelerateConditionalRendering()) {
execute_on = true;
return;
}
switch (regs.render_enable.mode) {
case Regs::RenderEnable::Mode::True: {
execute_on = true;
@@ -427,6 +585,8 @@ void Maxwell3D::ProcessQueryCondition() {
}
break;
}
}
}
}
void Maxwell3D::ProcessCounterReset() {
@@ -463,7 +623,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
}
void Maxwell3D::ProcessCBBind(size_t stage_index) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader
// stage.
const auto& bind_data = regs.bind_groups[stage_index];
auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.shader_slot];
buffer.enabled = bind_data.valid.Value() != 0;
@@ -524,4 +685,9 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
return regs.reg_array[method];
}
void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) {
const u64 key = (static_cast<u64>(bank) << 32) | offset;
replace_table.emplace(key, name);
}
} // namespace Tegra::Engines

View File

@@ -272,6 +272,7 @@ public:
};
union {
u32 raw;
BitField<0, 1, Mode> mode;
BitField<4, 8, u32> pad;
};
@@ -1217,10 +1218,12 @@ public:
struct Window {
union {
u32 raw_1;
BitField<0, 16, u32> x_min;
BitField<16, 16, u32> x_max;
};
union {
u32 raw_2;
BitField<0, 16, u32> y_min;
BitField<16, 16, u32> y_max;
};
@@ -3020,6 +3023,23 @@ public:
/// Store temporary hw register values, used by some calls to restore state after a operation
Regs shadow_state;
// None Engine
enum class EngineHint : u32 {
None = 0x0,
OnHLEMacro = 0x1,
};
EngineHint engine_state{EngineHint::None};
enum class HLEReplaceName : u32 {
BaseVertex = 0x0,
BaseInstance = 0x1,
};
void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name);
std::unordered_map<u64, HLEReplaceName> replace_table;
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
@@ -3067,6 +3087,33 @@ public:
std::unique_ptr<DrawManager> draw_manager;
friend class DrawManager;
std::vector<u8> inline_index_draw_indexes;
GPUVAddr getMacroAddress(size_t index) const {
return macro_addresses[index];
}
void RefreshParameters() {
if (!current_macro_dirty) {
return;
}
RefreshParametersImpl();
}
bool AnyParametersDirty() {
return current_macro_dirty;
}
u32 GetMaxCurrentVertices();
size_t EstimateIndexBufferSize();
/// Handles a write to the CLEAR_BUFFERS register.
void ProcessClearBuffers(u32 layer_count);
/// Handles a write to the CB_BIND register.
void ProcessCBBind(size_t stage_index);
private:
void InitializeRegisterDefaults();
@@ -3076,6 +3123,8 @@ private:
void ProcessDirtyRegisters(u32 method, u32 argument);
void ConsumeSinkImpl() override;
void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
/// Retrieves information about a specific TIC entry from the TIC buffer.
@@ -3120,12 +3169,13 @@ private:
void ProcessCBData(u32 value);
void ProcessCBMultiData(const u32* start_base, u32 amount);
/// Handles a write to the CB_BIND register.
void ProcessCBBind(size_t stage_index);
/// Returns a query's value or an empty object if the value will be deferred through a cache.
std::optional<u64> GetQueryResult();
void RefreshParametersImpl();
bool IsMethodExecutable(u32 method);
Core::System& system;
MemoryManager& memory_manager;
@@ -3145,6 +3195,15 @@ private:
Upload::State upload_state;
bool execute_on{true};
std::array<bool, Regs::NUM_REGS> draw_command{};
std::vector<u32> deferred_draw_method;
enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
DrawMode draw_mode{DrawMode::General};
bool draw_indexed{};
std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
std::vector<GPUVAddr> macro_addresses;
bool current_macro_dirty{};
};
#define ASSERT_REG_POSITION(field_name, position) \

View File

@@ -21,7 +21,10 @@ namespace Tegra::Engines {
using namespace Texture;
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
: system{system_}, memory_manager{memory_manager_} {}
: system{system_}, memory_manager{memory_manager_} {
execution_mask.reset();
execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true;
}
MaxwellDMA::~MaxwellDMA() = default;
@@ -29,6 +32,13 @@ void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
}
void MaxwellDMA::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");

View File

@@ -231,6 +231,8 @@ private:
void ReleaseSemaphore();
void ConsumeSinkImpl() override;
Core::System& system;
MemoryManager& memory_manager;