early-access version 3252

This commit is contained in:
pineappleEA
2022-12-25 07:12:57 +01:00
parent eb371309f1
commit 88a4a2d4aa
70 changed files with 2515 additions and 508 deletions

View File

@@ -40,6 +40,7 @@ struct GraphicsPipelineKey {
BitField<6, 2, Maxwell::Tessellation::DomainType> tessellation_primitive;
BitField<8, 2, Maxwell::Tessellation::Spacing> tessellation_spacing;
BitField<10, 1, u32> tessellation_clockwise;
BitField<11, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
};
std::array<u32, 3> padding;
VideoCommon::TransformFeedbackState xfb_state;

View File

@@ -202,7 +202,8 @@ void RasterizerOpenGL::Clear(u32 layer_count) {
++num_queued_commands;
}
void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
template <typename Func>
void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
@@ -226,48 +227,97 @@ void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(draw_state.topology);
BeginTransformFeedback(pipeline, primitive_mode);
const GLuint base_instance = static_cast<GLuint>(draw_state.base_instance);
const GLsizei num_instances = static_cast<GLsizei>(instance_count);
if (is_indexed) {
const GLint base_vertex = static_cast<GLint>(draw_state.base_index);
const GLsizei num_vertices = static_cast<GLsizei>(draw_state.index_buffer.count);
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
} else if (num_instances == 1 && base_instance == 0) {
glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
} else if (base_vertex == 0 && base_instance == 0) {
glDrawElementsInstanced(primitive_mode, num_vertices, format, offset, num_instances);
} else if (base_vertex == 0) {
glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
num_instances, base_instance);
} else if (base_instance == 0) {
glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
num_instances, base_vertex);
} else {
glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
offset, num_instances, base_vertex,
base_instance);
}
} else {
const GLint base_vertex = static_cast<GLint>(draw_state.vertex_buffer.first);
const GLsizei num_vertices = static_cast<GLsizei>(draw_state.vertex_buffer.count);
if (num_instances == 1 && base_instance == 0) {
glDrawArrays(primitive_mode, base_vertex, num_vertices);
} else if (base_instance == 0) {
glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
} else {
glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
num_instances, base_instance);
}
}
draw_func(primitive_mode);
EndTransformFeedback();
++num_queued_commands;
has_written_global_memory |= pipeline->WritesGlobalMemory();
}
void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
PrepareDraw(is_indexed, [this, is_indexed, instance_count](GLenum primitive_mode) {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const GLuint base_instance = static_cast<GLuint>(draw_state.base_instance);
const GLsizei num_instances = static_cast<GLsizei>(instance_count);
if (is_indexed) {
const GLint base_vertex = static_cast<GLint>(draw_state.base_index);
const GLsizei num_vertices = static_cast<GLsizei>(draw_state.index_buffer.count);
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
} else if (num_instances == 1 && base_instance == 0) {
glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
} else if (base_vertex == 0 && base_instance == 0) {
glDrawElementsInstanced(primitive_mode, num_vertices, format, offset,
num_instances);
} else if (base_vertex == 0) {
glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
num_instances, base_instance);
} else if (base_instance == 0) {
glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
num_instances, base_vertex);
} else {
glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
offset, num_instances, base_vertex,
base_instance);
}
} else {
const GLint base_vertex = static_cast<GLint>(draw_state.vertex_buffer.first);
const GLsizei num_vertices = static_cast<GLsizei>(draw_state.vertex_buffer.count);
if (num_instances == 1 && base_instance == 0) {
glDrawArrays(primitive_mode, base_vertex, num_vertices);
} else if (base_instance == 0) {
glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
} else {
glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
num_instances, base_instance);
}
}
});
}
void RasterizerOpenGL::DrawIndirect() {
const auto& params = maxwell3d->draw_manager->GetIndirectParams();
buffer_cache.SetDrawIndirect(&params);
PrepareDraw(params.is_indexed, [this, &params](GLenum primitive_mode) {
const auto [buffer, offset] = buffer_cache.GetDrawIndirectBuffer();
const GLvoid* const gl_offset =
reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(offset));
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, buffer->Handle());
if (params.include_count) {
const auto [draw_buffer, offset_base] = buffer_cache.GetDrawIndirectCount();
glBindBuffer(GL_PARAMETER_BUFFER, draw_buffer->Handle());
if (params.is_indexed) {
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format);
glMultiDrawElementsIndirectCount(primitive_mode, format, gl_offset,
static_cast<GLintptr>(offset_base),
static_cast<GLsizei>(params.max_draw_counts),
static_cast<GLsizei>(params.stride));
} else {
glMultiDrawArraysIndirectCount(primitive_mode, gl_offset,
static_cast<GLintptr>(offset_base),
static_cast<GLsizei>(params.max_draw_counts),
static_cast<GLsizei>(params.stride));
}
return;
}
if (params.is_indexed) {
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format);
glMultiDrawElementsIndirect(primitive_mode, format, gl_offset,
static_cast<GLsizei>(params.max_draw_counts),
static_cast<GLsizei>(params.stride));
} else {
glMultiDrawArraysIndirect(primitive_mode, gl_offset,
static_cast<GLsizei>(params.max_draw_counts),
static_cast<GLsizei>(params.stride));
}
});
buffer_cache.SetDrawIndirect(nullptr);
}
void RasterizerOpenGL::DispatchCompute() {
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
if (!pipeline) {
@@ -302,46 +352,60 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return;
}
{
if (bool(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
{
if ((bool(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.DownloadMemory(addr, size);
}
query_cache.FlushRegion(addr, size);
}
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.IsRegionGpuModified(addr, size);
if ((bool(which & VideoCommon::CacheType::QueryCache))) {
query_cache.FlushRegion(addr, size);
}
return texture_cache.IsRegionGpuModified(addr, size) ||
buffer_cache.IsRegionGpuModified(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
if ((bool(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
if (buffer_cache.IsRegionGpuModified(addr, size)) {
return true;
}
}
if (!Settings::IsGPULevelHigh()) {
return false;
}
if (bool(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
return texture_cache.IsRegionGpuModified(addr, size);
}
return false;
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return;
}
{
if (bool(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
{
if (bool(which & VideoCommon::CacheType::BufferCache)) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
shader_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
if (bool(which & VideoCommon::CacheType::ShaderCache)) {
shader_cache.InvalidateRegion(addr, size);
}
if (bool(which & VideoCommon::CacheType::QueryCache)) {
query_cache.InvalidateRegion(addr, size);
}
}
void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
@@ -408,11 +472,12 @@ void RasterizerOpenGL::ReleaseFences() {
fence_manager.WaitPendingFences();
}
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which) {
if (Settings::IsGPULevelExtreme()) {
FlushRegion(addr, size);
FlushRegion(addr, size, which);
}
InvalidateRegion(addr, size);
InvalidateRegion(addr, size, which);
}
void RasterizerOpenGL::WaitForIdle() {
@@ -460,6 +525,21 @@ void RasterizerOpenGL::TickFrame() {
}
}
bool RasterizerOpenGL::AccelerateConditionalRendering() {
if (Settings::IsGPULevelHigh()) {
// Reimplement Host conditional rendering.
return false;
}
// Medium / Low Hack: stub any checks on queries writen into the buffer cache.
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
Maxwell::ReportSemaphore::Compare cmp;
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
VideoCommon::CacheType::BufferCache)) {
return true;
}
return false;
}
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
@@ -481,7 +561,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
}
gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{
std::unique_lock<std::mutex> lock{buffer_cache.mutex};
std::unique_lock<std::recursive_mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
buffer_cache.WriteMemory(*cpu_addr, copy_size);
}

View File

@@ -69,6 +69,7 @@ public:
~RasterizerOpenGL() override;
void Draw(bool is_indexed, u32 instance_count) override;
void DrawIndirect() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -76,9 +77,12 @@ public:
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
bool MustFlushRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
@@ -88,12 +92,14 @@ public:
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(
VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateConditionalRendering() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
@@ -121,6 +127,9 @@ private:
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
template <typename Func>
void PrepareDraw(bool is_indexed, Func&&);
/// Syncs state to match guest's
void SyncState();

View File

@@ -51,7 +51,7 @@ using VideoCommon::LoadPipelines;
using VideoCommon::SerializePipeline;
using Context = ShaderContext::Context;
constexpr u32 CACHE_VERSION = 7;
constexpr u32 CACHE_VERSION = 8;
template <typename Container>
auto MakeSpan(Container& container) {
@@ -350,6 +350,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
regs.tessellation.params.output_primitives.Value() ==
Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
graphics_key.xfb_enabled.Assign(regs.transform_feedback_enabled != 0 ? 1 : 0);
graphics_key.app_stage.Assign(maxwell3d->engine_state);
if (graphics_key.xfb_enabled) {
SetXfbState(graphics_key.xfb_state, regs);
}