early-access version 1836

This commit is contained in:
pineappleEA
2021-07-01 04:47:14 +02:00
parent edd40a1d22
commit 9d473deb01
50 changed files with 1068 additions and 837 deletions

View File

@@ -7,6 +7,10 @@
#include "video_core/engines/fermi_2d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
using VideoCore::Surface::BytesPerBlock;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
namespace Tegra::Engines {
@@ -49,7 +53,7 @@ void Fermi2D::Blit() {
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
const auto& args = regs.pixels_from_memory;
const Config config{
Config config{
.operation = regs.operation,
.filter = args.sample_mode.filter,
.dst_x0 = args.dst_x0,
@@ -61,7 +65,19 @@ void Fermi2D::Blit() {
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
};
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
s32 src_address_offset = 0;
const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(regs.src.format));
if (regs.src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
regs.src.width == config.src_x1 &&
config.src_x1 > static_cast<s32>(regs.src.pitch / bytes_per_pixel) && config.src_x0 > 0) {
src_address_offset = config.src_x0 * bytes_per_pixel;
regs.src.width -= config.src_x0;
config.src_x1 -= config.src_x0;
config.src_x0 = 0;
}
if (!rasterizer->AccelerateSurfaceCopy(regs.src, src_address_offset, regs.dst, config)) {
UNIMPLEMENTED();
}
}

View File

@@ -18,7 +18,10 @@ set(SHADER_FILES
vulkan_uint8.comp
)
find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED)
find_program(GLSLANGVALIDATOR "glslangValidator")
if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND")
message(FATAL_ERROR "Required program `glslangValidator` not found.")
endif()
set(GLSL_FLAGS "")
set(QUIET_FLAG "--quiet")

View File

@@ -117,7 +117,8 @@ public:
/// Attempt to use a faster method to perform a surface copy
[[nodiscard]] virtual bool AccelerateSurfaceCopy(
const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
return false;
}

View File

@@ -705,11 +705,12 @@ void RasterizerOpenGL::TickFrame() {
}
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
s32 src_address_offset,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
std::scoped_lock lock{texture_cache.mutex};
texture_cache.BlitImage(dst, src, copy_config);
texture_cache.BlitImage(dst, src, src_address_offset, copy_config);
return true;
}

View File

@@ -92,7 +92,7 @@ public:
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,

View File

@@ -103,7 +103,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
gpu(gpu_),
library(OpenLibrary()),
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
true, Settings::values.renderer_debug)),
true, Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)),

View File

@@ -665,10 +665,11 @@ void RasterizerVulkan::TickFrame() {
}
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
s32 src_address_offset,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.BlitImage(dst, src, copy_config);
texture_cache.BlitImage(dst, src, src_address_offset, copy_config);
return true;
}

View File

@@ -84,7 +84,7 @@ public:
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,

View File

@@ -158,10 +158,8 @@ public:
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy,
std::optional<Region2D> src_region_override = {},
std::optional<Region2D> dst_region_override = {});
const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
const Tegra::Engines::Fermi2D::Config& copy);
/// Invalidate the contents of the color buffer index
/// These contents become unspecified, the cache can assume aggressive optimizations.
@@ -269,7 +267,8 @@ private:
/// Return a blit image pair from the given guest blit parameters
[[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src);
const Tegra::Engines::Fermi2D::Surface& src,
s32 src_address_offset);
/// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
@@ -759,11 +758,9 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
template <class P>
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy,
std::optional<Region2D> src_override,
std::optional<Region2D> dst_override) {
const BlitImages images = GetBlitImages(dst, src);
const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
const Tegra::Engines::Fermi2D::Config& copy) {
const BlitImages images = GetBlitImages(dst, src, src_address_offset);
const ImageId dst_id = images.dst_id;
const ImageId src_id = images.src_id;
PrepareImage(src_id, false, false);
@@ -778,42 +775,20 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
// out of bounds texture blit checking
const bool use_override = src_override.has_value();
const s32 src_x0 = copy.src_x0 >> src_samples_x;
s32 src_x1 = use_override ? src_override->end.x : copy.src_x1 >> src_samples_x;
const s32 src_y0 = copy.src_y0 >> src_samples_y;
const s32 src_y1 = copy.src_y1 >> src_samples_y;
const auto src_width = static_cast<s32>(src_image.info.size.width);
const bool width_oob = src_x1 > src_width;
const auto width_diff = width_oob ? src_x1 - src_width : 0;
if (width_oob) {
src_x1 = src_width;
}
const Region2D src_dimensions{
Offset2D{.x = src_x0, .y = src_y0},
Offset2D{.x = src_x1, .y = src_y1},
const Region2D src_region{
Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
};
const auto src_region = use_override ? *src_override : src_dimensions;
const std::optional src_base = src_image.TryFindBase(src.Address());
const std::optional src_base = src_image.TryFindBase(src.Address() + src_address_offset);
const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
const s32 dst_x0 = copy.dst_x0 >> dst_samples_x;
const s32 dst_x1 = copy.dst_x1 >> dst_samples_x;
const s32 dst_y0 = copy.dst_y0 >> dst_samples_y;
const s32 dst_y1 = copy.dst_y1 >> dst_samples_y;
const Region2D dst_dimensions{
Offset2D{.x = dst_x0, .y = dst_y0},
Offset2D{.x = dst_x1 - width_diff, .y = dst_y1},
const Region2D dst_region{
Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
};
const auto dst_region = use_override ? *dst_override : dst_dimensions;
// Always call this after src_framebuffer_id was queried, as the address might be invalidated.
Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
@@ -830,21 +805,6 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
copy.operation);
}
if (width_oob) {
// Continue copy of the oob region of the texture on the next row
auto oob_src = src;
oob_src.height++;
const Region2D src_region_override{
Offset2D{.x = 0, .y = src_y0 + 1},
Offset2D{.x = width_diff, .y = src_y1 + 1},
};
const Region2D dst_region_override{
Offset2D{.x = dst_x1 - width_diff, .y = dst_y0},
Offset2D{.x = dst_x1, .y = dst_y1},
};
BlitImage(dst, oob_src, copy, src_region_override, dst_region_override);
}
}
template <class P>
@@ -1236,10 +1196,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
template <class P>
typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
s32 src_address_offset) {
static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
const GPUVAddr dst_addr = dst.Address();
const GPUVAddr src_addr = src.Address();
const GPUVAddr src_addr = src.Address() + src_address_offset;
ImageInfo dst_info(dst);
ImageInfo src_info(src);
ImageId dst_id;