early-access version 4028
This commit is contained in:
@@ -58,6 +58,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast
|
||||
glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
|
||||
}
|
||||
glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
|
||||
if (runtime.has_unified_vertex_buffers) {
|
||||
glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
|
||||
}
|
||||
}
|
||||
|
||||
void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
|
||||
@@ -109,6 +112,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
|
||||
: device{device_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
has_fast_buffer_sub_data{device.HasFastBufferSubData()},
|
||||
use_assembly_shaders{device.UseAssemblyShaders()},
|
||||
has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
|
||||
stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
|
||||
GLint gl_max_attributes;
|
||||
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
|
||||
@@ -210,8 +214,14 @@ void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t siz
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
|
||||
index_buffer_offset = offset;
|
||||
if (has_unified_vertex_buffers) {
|
||||
buffer.MakeResident(GL_READ_ONLY);
|
||||
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
|
||||
static_cast<GLsizeiptr>(Common::AlignUp(size, 4)));
|
||||
} else {
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
|
||||
index_buffer_offset = offset;
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
|
||||
@@ -219,8 +229,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset,
|
||||
if (index >= max_attributes) {
|
||||
return;
|
||||
}
|
||||
glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizei>(stride));
|
||||
if (has_unified_vertex_buffers) {
|
||||
buffer.MakeResident(GL_READ_ONLY);
|
||||
glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
|
||||
buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizei>(stride));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) {
|
||||
@@ -233,9 +250,23 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
|
||||
[](u64 stride) { return static_cast<GLsizei>(stride); });
|
||||
const u32 count =
|
||||
std::min(static_cast<u32>(bindings.buffers.size()), max_attributes - bindings.min_index);
|
||||
glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(),
|
||||
reinterpret_cast<const GLintptr*>(bindings.offsets.data()),
|
||||
buffer_strides.data());
|
||||
if (has_unified_vertex_buffers) {
|
||||
for (u32 index = 0; index < count; ++index) {
|
||||
Buffer& buffer = *bindings.buffers[index];
|
||||
buffer.MakeResident(GL_READ_ONLY);
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, bindings.min_index + index,
|
||||
buffer.HostGpuAddr() + bindings.offsets[index],
|
||||
static_cast<GLsizeiptr>(bindings.sizes[index]));
|
||||
}
|
||||
static constexpr std::array<size_t, 32> ZEROS{};
|
||||
glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count),
|
||||
reinterpret_cast<const GLuint*>(ZEROS.data()),
|
||||
reinterpret_cast<const GLintptr*>(ZEROS.data()), buffer_strides.data());
|
||||
} else {
|
||||
glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(),
|
||||
reinterpret_cast<const GLintptr*>(bindings.offsets.data()),
|
||||
buffer_strides.data());
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
||||
|
||||
@@ -209,6 +209,7 @@ private:
|
||||
|
||||
bool has_fast_buffer_sub_data = false;
|
||||
bool use_assembly_shaders = false;
|
||||
bool has_unified_vertex_buffers = false;
|
||||
|
||||
bool use_storage_buffers = false;
|
||||
|
||||
|
||||
@@ -200,6 +200,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
|
||||
has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
|
||||
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
||||
has_derivative_control = GLAD_GL_ARB_derivative_control;
|
||||
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
|
||||
has_debugging_tool_attached = IsDebugToolAttached(extensions);
|
||||
has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
|
||||
has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
|
||||
|
||||
@@ -72,6 +72,10 @@ public:
|
||||
return has_texture_shadow_lod;
|
||||
}
|
||||
|
||||
bool HasVertexBufferUnifiedMemory() const {
|
||||
return has_vertex_buffer_unified_memory;
|
||||
}
|
||||
|
||||
bool HasASTC() const {
|
||||
return has_astc;
|
||||
}
|
||||
@@ -211,6 +215,7 @@ private:
|
||||
bool has_vertex_viewport_layer{};
|
||||
bool has_image_load_formatted{};
|
||||
bool has_texture_shadow_lod{};
|
||||
bool has_vertex_buffer_unified_memory{};
|
||||
bool has_astc{};
|
||||
bool has_variable_aoffi{};
|
||||
bool has_component_indexing_bug{};
|
||||
|
||||
@@ -18,16 +18,27 @@ namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
|
||||
|
||||
constexpr GLenum GetTarget(VideoCore::QueryType type) {
|
||||
return QueryTargets[static_cast<std::size_t>(type)];
|
||||
switch (type) {
|
||||
case VideoCore::QueryType::SamplesPassed:
|
||||
return GL_SAMPLES_PASSED;
|
||||
case VideoCore::QueryType::PrimitivesGenerated:
|
||||
return GL_PRIMITIVES_GENERATED;
|
||||
case VideoCore::QueryType::TfbPrimitivesWritten:
|
||||
return GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Query type {}", type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
|
||||
: QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
|
||||
: QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {
|
||||
EnableCounters();
|
||||
}
|
||||
|
||||
QueryCache::~QueryCache() = default;
|
||||
|
||||
@@ -103,13 +114,13 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) {
|
||||
auto& stream = cache->Stream(type);
|
||||
const bool slice_counter = WaitPending() && stream.IsEnabled();
|
||||
if (slice_counter) {
|
||||
stream.Update(false);
|
||||
stream.Disable();
|
||||
}
|
||||
|
||||
auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
|
||||
|
||||
if (slice_counter) {
|
||||
stream.Update(true);
|
||||
stream.Enable();
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
@@ -51,6 +51,22 @@ constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||
void oglEnable(GLenum cap, bool state) {
|
||||
(state ? glEnable : glDisable)(cap);
|
||||
}
|
||||
|
||||
std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryType type) {
|
||||
switch (type) {
|
||||
case VideoCommon::QueryType::PrimitivesGenerated:
|
||||
case VideoCommon::QueryType::VtgPrimitivesOut:
|
||||
return VideoCore::QueryType::PrimitivesGenerated;
|
||||
case VideoCommon::QueryType::ZPassPixelCount64:
|
||||
return VideoCore::QueryType::SamplesPassed;
|
||||
case VideoCommon::QueryType::StreamingPrimitivesSucceeded:
|
||||
// case VideoCommon::QueryType::StreamingByteCount:
|
||||
// TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride
|
||||
return VideoCore::QueryType::TfbPrimitivesWritten;
|
||||
default:
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
@@ -162,14 +178,18 @@ void RasterizerOpenGL::Clear(u32 layer_count) {
|
||||
SyncFramebufferSRGB();
|
||||
}
|
||||
if (regs.clear_surface.Z) {
|
||||
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
|
||||
if (regs.zeta_enable != 0) {
|
||||
LOG_DEBUG(Render_OpenGL, "Tried to clear Z but buffer is not enabled!");
|
||||
}
|
||||
use_depth = true;
|
||||
|
||||
state_tracker.NotifyDepthMask();
|
||||
glDepthMask(GL_TRUE);
|
||||
}
|
||||
if (regs.clear_surface.S) {
|
||||
ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!");
|
||||
if (regs.zeta_enable) {
|
||||
LOG_DEBUG(Render_OpenGL, "Tried to clear stencil but buffer is not enabled!");
|
||||
}
|
||||
use_stencil = true;
|
||||
}
|
||||
|
||||
@@ -212,7 +232,6 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
||||
|
||||
SCOPE_EXIT({ gpu.TickWork(); });
|
||||
gpu_memory->FlushCaching();
|
||||
query_cache.UpdateCounters();
|
||||
|
||||
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
|
||||
if (!pipeline) {
|
||||
@@ -330,7 +349,6 @@ void RasterizerOpenGL::DrawTexture() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||
|
||||
SCOPE_EXIT({ gpu.TickWork(); });
|
||||
query_cache.UpdateCounters();
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
@@ -397,21 +415,28 @@ void RasterizerOpenGL::DispatchCompute() {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
|
||||
if (type == VideoCommon::QueryType::ZPassPixelCount64) {
|
||||
query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed);
|
||||
const auto query_cache_type = MaxwellToVideoCoreQuery(type);
|
||||
if (!query_cache_type.has_value()) {
|
||||
UNIMPLEMENTED_MSG("Reset query type: {}", type);
|
||||
return;
|
||||
}
|
||||
query_cache.ResetCounter(*query_cache_type);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
|
||||
if (type == VideoCommon::QueryType::ZPassPixelCount64) {
|
||||
if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
|
||||
query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()});
|
||||
} else {
|
||||
query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt);
|
||||
}
|
||||
return;
|
||||
const auto query_cache_type = MaxwellToVideoCoreQuery(type);
|
||||
if (!query_cache_type.has_value()) {
|
||||
return QueryFallback(gpu_addr, type, flags, payload, subreport);
|
||||
}
|
||||
const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout);
|
||||
const auto timestamp = has_timeout ? std::optional<u64>{gpu.GetTicks()} : std::nullopt;
|
||||
query_cache.Query(gpu_addr, *query_cache_type, timestamp);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||
VideoCommon::QueryPropertiesFlags flags, u32 payload,
|
||||
u32 subreport) {
|
||||
if (type != VideoCommon::QueryType::Payload) {
|
||||
payload = 1u;
|
||||
}
|
||||
@@ -1294,15 +1319,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum
|
||||
program->ConfigureTransformFeedback();
|
||||
|
||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry));
|
||||
UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
|
||||
|
||||
// We may have to call BeginTransformFeedbackNV here since they seem to call different
|
||||
// implementations on Nvidia's driver (the pointer is different) but we are using
|
||||
// ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
|
||||
// extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
|
||||
glBeginTransformFeedback(GL_POINTS);
|
||||
glBeginTransformFeedback(primitive_mode);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::EndTransformFeedback() {
|
||||
|
||||
@@ -225,6 +225,9 @@ private:
|
||||
/// End a transform feedback
|
||||
void EndTransformFeedback();
|
||||
|
||||
void QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport);
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
const Device& device;
|
||||
|
||||
@@ -168,6 +168,14 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
|
||||
if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
|
||||
glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
|
||||
}
|
||||
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
|
||||
if (device.HasVertexBufferUnifiedMemory()) {
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
|
||||
&vertex_buffer_address);
|
||||
}
|
||||
}
|
||||
|
||||
RendererOpenGL::~RendererOpenGL() = default;
|
||||
@@ -667,7 +675,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
||||
offsetof(ScreenRectVertex, tex_coord));
|
||||
glVertexAttribBinding(PositionLocation, 0);
|
||||
glVertexAttribBinding(TexCoordLocation, 0);
|
||||
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
|
||||
if (device.HasVertexBufferUnifiedMemory()) {
|
||||
glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
|
||||
sizeof(vertices));
|
||||
} else {
|
||||
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
|
||||
}
|
||||
|
||||
if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) {
|
||||
glBindSampler(0, present_sampler.handle);
|
||||
|
||||
Reference in New Issue
Block a user