early-access version 1866

This commit is contained in:
pineappleEA
2021-07-09 23:54:15 +02:00
parent 335eeff822
commit 7d21887d40
469 changed files with 201995 additions and 78488 deletions

View File

@@ -8,7 +8,6 @@
#include "core/core.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
@@ -57,53 +56,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
}
}
u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
ASSERT(stage == ShaderType::Compute);
const auto& buffer = launch_description.const_buffer_config[const_buffer];
u32 result;
std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
return result;
}
SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
}
SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const {
ASSERT(stage == ShaderType::Compute);
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
}
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
return result;
}
VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
return rasterizer->AccessGuestDriverProfile();
}
const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
return rasterizer->AccessGuestDriverProfile();
}
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
rasterizer->DispatchCompute(code_addr);
rasterizer->DispatchCompute();
}
Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {

View File

@@ -10,10 +10,8 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/engines/shader_type.h"
#include "video_core/gpu.h"
#include "video_core/textures/texture.h"
@@ -40,7 +38,7 @@ namespace Tegra::Engines {
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
class KeplerCompute final : public EngineInterface {
public:
explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
~KeplerCompute();
@@ -209,23 +207,6 @@ public:
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) override;
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override;
SamplerDescriptor AccessSampler(u32 handle) const override;
u32 GetBoundBuffer() const override {
return regs.tex_cb_index;
}
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
private:
void ProcessLaunch();

View File

@@ -8,7 +8,6 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -670,42 +669,4 @@ void Maxwell3D::ProcessClearBuffers() {
rasterizer->Clear();
}
u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
ASSERT(stage != ShaderType::Compute);
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& buffer = shader_stage.const_buffers[const_buffer];
return memory_manager.Read<u32>(buffer.address + offset);
}
SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
}
SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const {
ASSERT(stage != ShaderType::Compute);
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
}
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
return result;
}
VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
return rasterizer->AccessGuestDriverProfile();
}
const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
return rasterizer->AccessGuestDriverProfile();
}
} // namespace Tegra::Engines

View File

@@ -17,11 +17,9 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/engines/shader_type.h"
#include "video_core/gpu.h"
#include "video_core/macro/macro.h"
#include "video_core/textures/texture.h"
@@ -49,7 +47,7 @@ namespace Tegra::Engines {
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
class Maxwell3D final : public EngineInterface {
public:
explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
~Maxwell3D();
@@ -307,10 +305,6 @@ public:
return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
}
bool IsConstant() const {
return constant;
}
bool IsValid() const {
return size != Size::Invalid;
}
@@ -912,7 +906,11 @@ public:
u32 fill_rectangle;
INSERT_PADDING_WORDS_NOINIT(0x8);
INSERT_PADDING_WORDS_NOINIT(0x2);
u32 conservative_raster_enable;
INSERT_PADDING_WORDS_NOINIT(0x5);
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
@@ -959,7 +957,11 @@ public:
SamplerIndex sampler_index;
INSERT_PADDING_WORDS_NOINIT(0x25);
INSERT_PADDING_WORDS_NOINIT(0x2);
std::array<u32, 8> gp_passthrough_mask;
INSERT_PADDING_WORDS_NOINIT(0x1B);
u32 depth_test_enable;
@@ -1152,7 +1154,11 @@ public:
u32 index;
} primitive_restart;
INSERT_PADDING_WORDS_NOINIT(0x5F);
INSERT_PADDING_WORDS_NOINIT(0xE);
u32 provoking_vertex_last;
INSERT_PADDING_WORDS_NOINIT(0x50);
struct {
u32 start_addr_high;
@@ -1424,23 +1430,6 @@ public:
void FlushMMEInlineDraw();
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override;
SamplerDescriptor AccessSampler(u32 handle) const override;
u32 GetBoundBuffer() const override {
return regs.tex_cb_index;
}
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
bool ShouldExecute() const {
return execute_on;
}
@@ -1630,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(render_area, 0x3FD);
ASSERT_REG_POSITION(clear_flags, 0x43E);
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
ASSERT_REG_POSITION(conservative_raster_enable, 0x452);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
@@ -1638,6 +1628,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
ASSERT_REG_POSITION(zeta_depth, 0x48c);
ASSERT_REG_POSITION(sampler_index, 0x48D);
ASSERT_REG_POSITION(gp_passthrough_mask, 0x490);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1690,6 +1681,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);

View File

@@ -99,7 +99,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
// Optimized path for micro copies.
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) {
if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
regs.src_params.height > GOB_SIZE_Y) {
FastCopyBlockLinearToPitch();
return;
}