remove old files
This commit is contained in:
@@ -1,62 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
class BufferBlock {
|
||||
public:
|
||||
[[nodiscard]] bool Overlaps(VAddr start, VAddr end) const {
|
||||
return (cpu_addr < end) && (cpu_addr_end > start);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const {
|
||||
return cpu_addr <= other_start && other_end <= cpu_addr_end;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::size_t Offset(VAddr in_addr) const {
|
||||
return static_cast<std::size_t>(in_addr - cpu_addr);
|
||||
}
|
||||
|
||||
[[nodiscard]] VAddr CpuAddr() const {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
[[nodiscard]] VAddr CpuAddrEnd() const {
|
||||
return cpu_addr_end;
|
||||
}
|
||||
|
||||
void SetCpuAddr(VAddr new_addr) {
|
||||
cpu_addr = new_addr;
|
||||
cpu_addr_end = new_addr + size;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::size_t Size() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
[[nodiscard]] u64 Epoch() const {
|
||||
return epoch;
|
||||
}
|
||||
|
||||
void SetEpoch(u64 new_epoch) {
|
||||
epoch = new_epoch;
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
|
||||
SetCpuAddr(cpu_addr_);
|
||||
}
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
VAddr cpu_addr_end{};
|
||||
std::size_t size{};
|
||||
u64 epoch{};
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -1,33 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
|
||||
#include "video_core/buffer_cache/map_interval.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
MapIntervalAllocator::MapIntervalAllocator() {
|
||||
FillFreeList(first_chunk);
|
||||
}
|
||||
|
||||
MapIntervalAllocator::~MapIntervalAllocator() = default;
|
||||
|
||||
void MapIntervalAllocator::AllocateNewChunk() {
|
||||
*new_chunk = std::make_unique<Chunk>();
|
||||
FillFreeList(**new_chunk);
|
||||
new_chunk = &(*new_chunk)->next;
|
||||
}
|
||||
|
||||
void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
|
||||
const std::size_t old_size = free_list.size();
|
||||
free_list.resize(old_size + chunk.data.size());
|
||||
std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
|
||||
[](MapInterval& interval) { return &interval; });
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -1,93 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/intrusive/set_hook.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
|
||||
MapInterval() = default;
|
||||
|
||||
/*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
|
||||
|
||||
explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
|
||||
: start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
|
||||
|
||||
bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
|
||||
return start <= other_start && other_end <= end;
|
||||
}
|
||||
|
||||
bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
|
||||
return start < other_end && other_start < end;
|
||||
}
|
||||
|
||||
void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
|
||||
is_modified = is_modified_;
|
||||
ticks = ticks_;
|
||||
}
|
||||
|
||||
boost::intrusive::set_member_hook<> member_hook_;
|
||||
VAddr start = 0;
|
||||
VAddr end = 0;
|
||||
GPUVAddr gpu_addr = 0;
|
||||
u64 ticks = 0;
|
||||
bool is_written = false;
|
||||
bool is_modified = false;
|
||||
bool is_registered = false;
|
||||
bool is_memory_marked = false;
|
||||
bool is_sync_pending = false;
|
||||
};
|
||||
|
||||
struct MapIntervalCompare {
|
||||
constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
|
||||
return lhs.start < rhs.start;
|
||||
}
|
||||
};
|
||||
|
||||
class MapIntervalAllocator {
|
||||
public:
|
||||
MapIntervalAllocator();
|
||||
~MapIntervalAllocator();
|
||||
|
||||
MapInterval* Allocate() {
|
||||
if (free_list.empty()) {
|
||||
AllocateNewChunk();
|
||||
}
|
||||
MapInterval* const interval = free_list.back();
|
||||
free_list.pop_back();
|
||||
return interval;
|
||||
}
|
||||
|
||||
void Release(MapInterval* interval) {
|
||||
free_list.push_back(interval);
|
||||
}
|
||||
|
||||
private:
|
||||
struct Chunk {
|
||||
std::unique_ptr<Chunk> next;
|
||||
std::array<MapInterval, 0x8000> data;
|
||||
};
|
||||
|
||||
void AllocateNewChunk();
|
||||
|
||||
void FillFreeList(Chunk& chunk);
|
||||
|
||||
std::vector<MapInterval*> free_list;
|
||||
|
||||
Chunk first_chunk;
|
||||
|
||||
std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -1,103 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/guest_driver.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
struct SamplerDescriptor {
|
||||
union {
|
||||
u32 raw = 0;
|
||||
BitField<0, 2, Tegra::Shader::TextureType> texture_type;
|
||||
BitField<2, 3, Tegra::Texture::ComponentType> r_type;
|
||||
BitField<5, 1, u32> is_array;
|
||||
BitField<6, 1, u32> is_buffer;
|
||||
BitField<7, 1, u32> is_shadow;
|
||||
BitField<8, 3, Tegra::Texture::ComponentType> g_type;
|
||||
BitField<11, 3, Tegra::Texture::ComponentType> b_type;
|
||||
BitField<14, 3, Tegra::Texture::ComponentType> a_type;
|
||||
BitField<17, 7, Tegra::Texture::TextureFormat> format;
|
||||
};
|
||||
|
||||
bool operator==(const SamplerDescriptor& rhs) const noexcept {
|
||||
return raw == rhs.raw;
|
||||
}
|
||||
|
||||
bool operator!=(const SamplerDescriptor& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
|
||||
using Tegra::Shader::TextureType;
|
||||
SamplerDescriptor result;
|
||||
|
||||
result.format.Assign(tic.format.Value());
|
||||
result.r_type.Assign(tic.r_type.Value());
|
||||
result.g_type.Assign(tic.g_type.Value());
|
||||
result.b_type.Assign(tic.b_type.Value());
|
||||
result.a_type.Assign(tic.a_type.Value());
|
||||
|
||||
switch (tic.texture_type.Value()) {
|
||||
case Tegra::Texture::TextureType::Texture1D:
|
||||
result.texture_type.Assign(TextureType::Texture1D);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture2D:
|
||||
result.texture_type.Assign(TextureType::Texture2D);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture3D:
|
||||
result.texture_type.Assign(TextureType::Texture3D);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::TextureCubemap:
|
||||
result.texture_type.Assign(TextureType::TextureCube);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture1DArray:
|
||||
result.texture_type.Assign(TextureType::Texture1D);
|
||||
result.is_array.Assign(1);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture2DArray:
|
||||
result.texture_type.Assign(TextureType::Texture2D);
|
||||
result.is_array.Assign(1);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture1DBuffer:
|
||||
result.texture_type.Assign(TextureType::Texture1D);
|
||||
result.is_buffer.Assign(1);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture2DNoMipmap:
|
||||
result.texture_type.Assign(TextureType::Texture2D);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::TextureCubeArray:
|
||||
result.texture_type.Assign(TextureType::TextureCube);
|
||||
result.is_array.Assign(1);
|
||||
return result;
|
||||
default:
|
||||
result.texture_type.Assign(TextureType::Texture2D);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
};
|
||||
static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
|
||||
|
||||
class ConstBufferEngineInterface {
|
||||
public:
|
||||
virtual ~ConstBufferEngineInterface() = default;
|
||||
virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
|
||||
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
|
||||
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const = 0;
|
||||
virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
|
||||
virtual u32 GetBoundBuffer() const = 0;
|
||||
|
||||
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
|
||||
virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,158 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Shader {
|
||||
|
||||
enum class OutputTopology : u32 {
|
||||
PointList = 1,
|
||||
LineStrip = 6,
|
||||
TriangleStrip = 7,
|
||||
};
|
||||
|
||||
enum class PixelImap : u8 {
|
||||
Unused = 0,
|
||||
Constant = 1,
|
||||
Perspective = 2,
|
||||
ScreenLinear = 3,
|
||||
};
|
||||
|
||||
// Documentation in:
|
||||
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
|
||||
struct Header {
|
||||
union {
|
||||
BitField<0, 5, u32> sph_type;
|
||||
BitField<5, 5, u32> version;
|
||||
BitField<10, 4, u32> shader_type;
|
||||
BitField<14, 1, u32> mrt_enable;
|
||||
BitField<15, 1, u32> kills_pixels;
|
||||
BitField<16, 1, u32> does_global_store;
|
||||
BitField<17, 4, u32> sass_version;
|
||||
BitField<21, 5, u32> reserved;
|
||||
BitField<26, 1, u32> does_load_or_store;
|
||||
BitField<27, 1, u32> does_fp64;
|
||||
BitField<28, 4, u32> stream_out_mask;
|
||||
} common0;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_low_size;
|
||||
BitField<24, 8, u32> per_patch_attribute_count;
|
||||
} common1;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_high_size;
|
||||
BitField<24, 8, u32> threads_per_input_primitive;
|
||||
} common2;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_crs_size;
|
||||
BitField<24, 4, OutputTopology> output_topology;
|
||||
BitField<28, 4, u32> reserved;
|
||||
} common3;
|
||||
|
||||
union {
|
||||
BitField<0, 12, u32> max_output_vertices;
|
||||
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
|
||||
BitField<20, 4, u32> reserved;
|
||||
BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
|
||||
} common4;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
|
||||
INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
|
||||
union {
|
||||
BitField<0, 8, u16> clip_distances;
|
||||
BitField<8, 1, u16> point_sprite_s;
|
||||
BitField<9, 1, u16> point_sprite_t;
|
||||
BitField<10, 1, u16> fog_coordinate;
|
||||
BitField<12, 1, u16> tessellation_eval_point_u;
|
||||
BitField<13, 1, u16> tessellation_eval_point_v;
|
||||
BitField<14, 1, u16> instance_id;
|
||||
BitField<15, 1, u16> vertex_id;
|
||||
};
|
||||
INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
|
||||
INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
|
||||
INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC
|
||||
INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
|
||||
} vtg;
|
||||
|
||||
struct {
|
||||
INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
|
||||
|
||||
union {
|
||||
BitField<0, 2, PixelImap> x;
|
||||
BitField<2, 2, PixelImap> y;
|
||||
BitField<4, 2, PixelImap> z;
|
||||
BitField<6, 2, PixelImap> w;
|
||||
u8 raw;
|
||||
} imap_generic_vector[32];
|
||||
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
|
||||
INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
|
||||
|
||||
struct {
|
||||
u32 target;
|
||||
union {
|
||||
BitField<0, 1, u32> sample_mask;
|
||||
BitField<1, 1, u32> depth;
|
||||
BitField<2, 30, u32> reserved;
|
||||
};
|
||||
} omap;
|
||||
|
||||
bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
|
||||
const u32 bit = render_target * 4 + component;
|
||||
return omap.target & (1 << bit);
|
||||
}
|
||||
|
||||
PixelImap GetPixelImap(u32 attribute) const {
|
||||
const auto get_index = [this, attribute](u32 index) {
|
||||
return static_cast<PixelImap>(
|
||||
(imap_generic_vector[attribute].raw >> (index * 2)) & 3);
|
||||
};
|
||||
|
||||
std::optional<PixelImap> result;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
const PixelImap index = get_index(component);
|
||||
if (index == PixelImap::Unused) {
|
||||
continue;
|
||||
}
|
||||
if (result && result != index) {
|
||||
LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
|
||||
}
|
||||
result = index;
|
||||
}
|
||||
return result.value_or(PixelImap::Unused);
|
||||
}
|
||||
} ps;
|
||||
|
||||
std::array<u32, 0xF> raw;
|
||||
};
|
||||
|
||||
u64 GetLocalMemorySize() const {
|
||||
return (common1.shader_local_memory_low_size |
|
||||
(common2.shader_local_memory_high_size << 24));
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
|
||||
|
||||
} // namespace Tegra::Shader
|
||||
@@ -1,21 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
enum class ShaderType : u32 {
|
||||
Vertex = 0,
|
||||
TesselationControl = 1,
|
||||
TesselationEval = 2,
|
||||
Geometry = 3,
|
||||
Fragment = 4,
|
||||
Compute = 5,
|
||||
};
|
||||
static constexpr std::size_t MaxShaderTypes = 6;
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
@@ -1,86 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "core/core.h"
|
||||
#include "core/hardware_interrupt_manager.h"
|
||||
#include "video_core/gpu_asynch.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
GPUAsynch::GPUAsynch(Core::System& system_, bool use_nvdec_)
|
||||
: GPU{system_, true, use_nvdec_}, gpu_thread{system_} {}
|
||||
|
||||
GPUAsynch::~GPUAsynch() = default;
|
||||
|
||||
void GPUAsynch::Start() {
|
||||
gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
|
||||
cpu_context = renderer->GetRenderWindow().CreateSharedContext();
|
||||
cpu_context->MakeCurrent();
|
||||
}
|
||||
|
||||
void GPUAsynch::ObtainContext() {
|
||||
cpu_context->MakeCurrent();
|
||||
}
|
||||
|
||||
void GPUAsynch::ReleaseContext() {
|
||||
cpu_context->DoneCurrent();
|
||||
}
|
||||
|
||||
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
gpu_thread.SubmitList(std::move(entries));
|
||||
}
|
||||
|
||||
void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
|
||||
if (!use_nvdec) {
|
||||
return;
|
||||
}
|
||||
// This condition fires when a video stream ends, clear all intermediary data
|
||||
if (entries[0].raw == 0xDEADB33F) {
|
||||
cdma_pusher.reset();
|
||||
return;
|
||||
}
|
||||
if (!cdma_pusher) {
|
||||
cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
|
||||
}
|
||||
|
||||
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
|
||||
// TODO(ameerj): RE proper async nvdec operation
|
||||
// gpu_thread.SubmitCommandBuffer(std::move(entries));
|
||||
|
||||
cdma_pusher->Push(std::move(entries));
|
||||
cdma_pusher->DispatchCalls();
|
||||
}
|
||||
|
||||
void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
gpu_thread.SwapBuffers(framebuffer);
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
|
||||
auto& interrupt_manager = system.InterruptManager();
|
||||
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
|
||||
}
|
||||
|
||||
void GPUAsynch::WaitIdle() const {
|
||||
gpu_thread.WaitIdle();
|
||||
}
|
||||
|
||||
void GPUAsynch::OnCommandListEnd() {
|
||||
gpu_thread.OnCommandListEnd();
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -1,47 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
|
||||
namespace Core::Frontend {
|
||||
class GraphicsContext;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
/// Implementation of GPU interface that runs the GPU asynchronously
|
||||
class GPUAsynch final : public Tegra::GPU {
|
||||
public:
|
||||
explicit GPUAsynch(Core::System& system_, bool use_nvdec_);
|
||||
~GPUAsynch() override;
|
||||
|
||||
void Start() override;
|
||||
void ObtainContext() override;
|
||||
void ReleaseContext() override;
|
||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitIdle() const override;
|
||||
|
||||
void OnCommandListEnd() override;
|
||||
|
||||
protected:
|
||||
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
|
||||
|
||||
private:
|
||||
GPUThread::ThreadManager gpu_thread;
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -1,61 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/gpu_synch.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
GPUSynch::GPUSynch(Core::System& system_, bool use_nvdec_) : GPU{system_, false, use_nvdec_} {}
|
||||
|
||||
GPUSynch::~GPUSynch() = default;
|
||||
|
||||
void GPUSynch::Start() {}
|
||||
|
||||
void GPUSynch::ObtainContext() {
|
||||
renderer->Context().MakeCurrent();
|
||||
}
|
||||
|
||||
void GPUSynch::ReleaseContext() {
|
||||
renderer->Context().DoneCurrent();
|
||||
}
|
||||
|
||||
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
dma_pusher->Push(std::move(entries));
|
||||
dma_pusher->DispatchCalls();
|
||||
}
|
||||
|
||||
void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
|
||||
if (!use_nvdec) {
|
||||
return;
|
||||
}
|
||||
// This condition fires when a video stream ends, clears all intermediary data
|
||||
if (entries[0].raw == 0xDEADB33F) {
|
||||
cdma_pusher.reset();
|
||||
return;
|
||||
}
|
||||
if (!cdma_pusher) {
|
||||
cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
|
||||
}
|
||||
cdma_pusher->Push(std::move(entries));
|
||||
cdma_pusher->DispatchCalls();
|
||||
}
|
||||
|
||||
void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
renderer->SwapBuffers(framebuffer);
|
||||
}
|
||||
|
||||
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
|
||||
renderer->Rasterizer().FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer->Rasterizer().InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -1,41 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Core::Frontend {
|
||||
class GraphicsContext;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
/// Implementation of GPU interface that runs the GPU synchronously
|
||||
class GPUSynch final : public Tegra::GPU {
|
||||
public:
|
||||
explicit GPUSynch(Core::System& system_, bool use_nvdec_);
|
||||
~GPUSynch() override;
|
||||
|
||||
void Start() override;
|
||||
void ObtainContext() override;
|
||||
void ReleaseContext() override;
|
||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitIdle() const override {}
|
||||
|
||||
protected:
|
||||
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
|
||||
[[maybe_unused]] u32 value) const override {}
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -1,37 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/guest_driver.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
|
||||
if (texture_handler_size) {
|
||||
return;
|
||||
}
|
||||
const std::size_t size = bound_offsets.size();
|
||||
if (size < 2) {
|
||||
return;
|
||||
}
|
||||
std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
|
||||
u32 min_val = std::numeric_limits<u32>::max();
|
||||
for (std::size_t i = 1; i < size; ++i) {
|
||||
if (bound_offsets[i] == bound_offsets[i - 1]) {
|
||||
continue;
|
||||
}
|
||||
const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
|
||||
min_val = std::min(min_val, new_min);
|
||||
}
|
||||
if (min_val > 2) {
|
||||
return;
|
||||
}
|
||||
texture_handler_size = min_texture_handler_size * min_val;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
@@ -1,46 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
/**
|
||||
* The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
|
||||
* information necessary for impossible to avoid HLE methods like shader tracks as they are
|
||||
* Entscheidungsproblems.
|
||||
*/
|
||||
class GuestDriverProfile {
|
||||
public:
|
||||
explicit GuestDriverProfile() = default;
|
||||
explicit GuestDriverProfile(std::optional<u32> texture_handler_size_)
|
||||
: texture_handler_size{texture_handler_size_} {}
|
||||
|
||||
void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
|
||||
|
||||
u32 GetTextureHandlerSize() const {
|
||||
return texture_handler_size.value_or(default_texture_handler_size);
|
||||
}
|
||||
|
||||
bool IsTextureHandlerSizeKnown() const {
|
||||
return texture_handler_size.has_value();
|
||||
}
|
||||
|
||||
private:
|
||||
// Minimum size of texture handler any driver can use.
|
||||
static constexpr u32 min_texture_handler_size = 4;
|
||||
|
||||
// This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
|
||||
// Thus, certain drivers may squish the size.
|
||||
static constexpr u32 default_texture_handler_size = 8;
|
||||
|
||||
std::optional<u32> texture_handler_size = default_texture_handler_size;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
@@ -1,27 +0,0 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
#extension GL_ARB_shader_stencil_export : require
|
||||
|
||||
layout(binding = 0) uniform sampler2D color_texture;
|
||||
|
||||
uint conv_from_float(float value_f, uint mantissa_bits) {
|
||||
uint value = floatBitsToInt(value_f);
|
||||
uint exp = (value >> 23) & 0x1Fu;
|
||||
uint mantissa_shift = 32u - mantissa_bits;
|
||||
uint mantissa = (value << 9u) >> mantissa_shift;
|
||||
return (exp << mantissa_bits) | mantissa;
|
||||
}
|
||||
|
||||
void main() {
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||
vec4 color = texelFetch(color_texture, coord, 0).rgba;
|
||||
uint depth_stencil_unorm = (conv_from_float(color.r, 6u) << 21)
|
||||
| (conv_from_float(color.g, 6u) << 10)
|
||||
| conv_from_float(color.b, 5u);
|
||||
|
||||
gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f);
|
||||
gl_FragStencilRefARB = int(depth_stencil_unorm >> 24);
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
|
||||
layout(binding = 0) uniform sampler2D depth_tex;
|
||||
layout(binding = 1) uniform isampler2D stencil_tex;
|
||||
|
||||
layout(location = 0) out vec4 color;
|
||||
|
||||
float conv_to_float(uint value, uint mantissa_bits) {
|
||||
uint exp = (value >> mantissa_bits) & 0x1Fu;
|
||||
uint mantissa_shift = 32u - mantissa_bits;
|
||||
uint mantissa = (value << mantissa_shift) >> mantissa_shift;
|
||||
return uintBitsToFloat((exp << 23) | (mantissa << (23 - mantissa_bits)));
|
||||
}
|
||||
|
||||
void main() {
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||
uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0f));
|
||||
uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
|
||||
uint depth_stencil = (stencil << 24) | (depth >> 8);
|
||||
uint red_int = (depth_stencil >> 21) & 0x07FF;
|
||||
uint green_int = (depth_stencil >> 10) & 0x07FF;
|
||||
uint blue_int = depth_stencil & 0x03FF;
|
||||
|
||||
color.r = conv_to_float(red_int, 6u);
|
||||
color.g = conv_to_float(green_int, 6u);
|
||||
color.b = conv_to_float(blue_int, 5u);
|
||||
color.a = 1.0f;
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
|
||||
layout(binding = 0) uniform sampler2D depth_tex;
|
||||
layout(binding = 1) uniform isampler2D stencil_tex;
|
||||
|
||||
layout(location = 0) out vec4 color;
|
||||
|
||||
void main() {
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||
uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0f));
|
||||
uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
|
||||
uint depth_stencil = (stencil << 24) | (depth >> 8);
|
||||
|
||||
color.r = float(depth_stencil & 0x0000FFFFu) / (exp2(16) - 1.0);
|
||||
color.g = float(depth_stencil >> 16) / (exp2(16) - 1.0);
|
||||
color.b = 0.0f;
|
||||
color.a = 1.0f;
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
#extension GL_ARB_shader_stencil_export : require
|
||||
|
||||
layout(binding = 0) uniform sampler2D color_texture;
|
||||
|
||||
void main() {
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||
vec4 color = texelFetch(color_texture, coord, 0).rgba;
|
||||
uvec2 bytes = uvec2(color.rg * (exp2(16) - 1.0f)) << uvec2(0, 16);
|
||||
uint depth_stencil_unorm =
|
||||
uint(color.r * (exp2(16) - 1.0f)) | (uint(color.g * (exp2(16) - 1.0f)) << 16);
|
||||
|
||||
gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f);
|
||||
gl_FragStencilRefARB = int(depth_stencil_unorm >> 24);
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430 core
|
||||
|
||||
layout (local_size_x = 1, local_size_y = 1) in;
|
||||
|
||||
layout(binding = 0) buffer BgrImage {
|
||||
uint bgr_copy[];
|
||||
};
|
||||
|
||||
void main() {
|
||||
const uint index = gl_GlobalInvocationID.y * gl_NumWorkGroups.x + gl_GlobalInvocationID.x;
|
||||
const uint packed_bits = bgr_copy[index];
|
||||
// R5 G6 B5
|
||||
// RRRRRGGG GGGBBBBB
|
||||
const uint blue = bitfieldExtract(packed_bits, 0, 5);
|
||||
const uint green = bitfieldExtract(packed_bits, 5 + 0, 6);
|
||||
const uint red = bitfieldExtract(packed_bits, 11 + 0, 5);
|
||||
const uint swapped_bits = ((blue << 11) | (green << 5 ) | red);
|
||||
bgr_copy[index] = swapped_bits;
|
||||
return;
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430 core
|
||||
|
||||
layout (local_size_x = 4, local_size_y = 4) in;
|
||||
|
||||
layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input;
|
||||
layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output;
|
||||
|
||||
void main() {
|
||||
vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID));
|
||||
imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra);
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (local_size_x = 1024) in;
|
||||
|
||||
layout (std430, set = 0, binding = 0) buffer OutputBuffer {
|
||||
uint output_indexes[];
|
||||
};
|
||||
|
||||
layout (push_constant) uniform PushConstants {
|
||||
uint first;
|
||||
};
|
||||
|
||||
void main() {
|
||||
uint primitive = gl_GlobalInvocationID.x;
|
||||
if (primitive * 6 >= output_indexes.length()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
|
||||
for (uint vertex = 0; vertex < 6; ++vertex) {
|
||||
uint index = first + primitive * 4 + quad_map[vertex];
|
||||
output_indexes[primitive * 6 + vertex] = index;
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,29 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
enum class ShaderType : u32;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class ShaderIR;
|
||||
class Registry;
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
Tegra::Engines::ShaderType stage, std::string_view identifier);
|
||||
|
||||
} // namespace OpenGL
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,69 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using SamplerEntry = VideoCommon::Shader::SamplerEntry;
|
||||
using ImageEntry = VideoCommon::Shader::ImageEntry;
|
||||
|
||||
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
|
||||
public:
|
||||
explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
|
||||
: ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
|
||||
|
||||
u32 GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 index = 0;
|
||||
};
|
||||
|
||||
struct GlobalMemoryEntry {
|
||||
constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
|
||||
bool is_written_)
|
||||
: cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
|
||||
is_written_} {}
|
||||
|
||||
u32 cbuf_index = 0;
|
||||
u32 cbuf_offset = 0;
|
||||
bool is_read = false;
|
||||
bool is_written = false;
|
||||
};
|
||||
|
||||
struct ShaderEntries {
|
||||
std::vector<ConstBufferEntry> const_buffers;
|
||||
std::vector<GlobalMemoryEntry> global_memory_entries;
|
||||
std::vector<SamplerEntry> samplers;
|
||||
std::vector<ImageEntry> images;
|
||||
std::size_t shader_length{};
|
||||
u32 clip_distances{};
|
||||
u32 enabled_uniform_buffers{};
|
||||
};
|
||||
|
||||
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
Tegra::Engines::ShaderType stage);
|
||||
|
||||
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
Tegra::Engines::ShaderType stage, std::string_view identifier,
|
||||
std::string_view suffix = {});
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -1,482 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/fs/file.h"
|
||||
#include "common/fs/fs.h"
|
||||
#include "common/fs/path_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scm_rev.h"
|
||||
#include "common/settings.h"
|
||||
#include "common/zstd_compression.h"
|
||||
#include "core/core.h"
|
||||
#include "core/hle/kernel/k_process.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Tegra::Engines::ShaderType;
|
||||
using VideoCommon::Shader::BindlessSamplerMap;
|
||||
using VideoCommon::Shader::BoundSamplerMap;
|
||||
using VideoCommon::Shader::KeyMap;
|
||||
using VideoCommon::Shader::SeparateSamplerKey;
|
||||
using ShaderCacheVersionHash = std::array<u8, 64>;
|
||||
|
||||
struct ConstBufferKey {
|
||||
u32 cbuf = 0;
|
||||
u32 offset = 0;
|
||||
u32 value = 0;
|
||||
};
|
||||
|
||||
struct BoundSamplerEntry {
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct SeparateSamplerEntry {
|
||||
u32 cbuf1 = 0;
|
||||
u32 cbuf2 = 0;
|
||||
u32 offset1 = 0;
|
||||
u32 offset2 = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct BindlessSamplerEntry {
|
||||
u32 cbuf = 0;
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr u32 NativeVersion = 21;
|
||||
|
||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||
ShaderCacheVersionHash hash{};
|
||||
const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
|
||||
std::memcpy(hash.data(), Common::g_shader_cache_version, length);
|
||||
return hash;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
|
||||
|
||||
ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
|
||||
|
||||
bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
|
||||
if (!file.ReadObject(type)) {
|
||||
return false;
|
||||
}
|
||||
u32 code_size;
|
||||
u32 code_size_b;
|
||||
if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
|
||||
return false;
|
||||
}
|
||||
code.resize(code_size);
|
||||
code_b.resize(code_size_b);
|
||||
if (file.Read(code) != code_size) {
|
||||
return false;
|
||||
}
|
||||
if (HasProgramA() && file.Read(code_b) != code_size_b) {
|
||||
return false;
|
||||
}
|
||||
|
||||
u8 is_texture_handler_size_known;
|
||||
u32 texture_handler_size_value;
|
||||
u32 num_keys;
|
||||
u32 num_bound_samplers;
|
||||
u32 num_separate_samplers;
|
||||
u32 num_bindless_samplers;
|
||||
if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
|
||||
!file.ReadObject(is_texture_handler_size_known) ||
|
||||
!file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
|
||||
!file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
|
||||
!file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
|
||||
!file.ReadObject(num_bindless_samplers)) {
|
||||
return false;
|
||||
}
|
||||
if (is_texture_handler_size_known) {
|
||||
texture_handler_size = texture_handler_size_value;
|
||||
}
|
||||
|
||||
std::vector<ConstBufferKey> flat_keys(num_keys);
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
|
||||
if (file.Read(flat_keys) != flat_keys.size() ||
|
||||
file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
|
||||
file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
|
||||
file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& entry : flat_keys) {
|
||||
keys.insert({{entry.cbuf, entry.offset}, entry.value});
|
||||
}
|
||||
for (const auto& entry : flat_bound_samplers) {
|
||||
bound_samplers.emplace(entry.offset, entry.sampler);
|
||||
}
|
||||
for (const auto& entry : flat_separate_samplers) {
|
||||
SeparateSamplerKey key;
|
||||
key.buffers = {entry.cbuf1, entry.cbuf2};
|
||||
key.offsets = {entry.offset1, entry.offset2};
|
||||
separate_samplers.emplace(key, entry.sampler);
|
||||
}
|
||||
for (const auto& entry : flat_bindless_samplers) {
|
||||
bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
|
||||
if (!file.WriteObject(static_cast<u32>(type)) ||
|
||||
!file.WriteObject(static_cast<u32>(code.size())) ||
|
||||
!file.WriteObject(static_cast<u32>(code_b.size()))) {
|
||||
return false;
|
||||
}
|
||||
if (file.Write(code) != code.size()) {
|
||||
return false;
|
||||
}
|
||||
if (HasProgramA() && file.Write(code_b) != code_b.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
|
||||
!file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
|
||||
!file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
|
||||
!file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
|
||||
!file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
|
||||
!file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
|
||||
!file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<ConstBufferKey> flat_keys;
|
||||
flat_keys.reserve(keys.size());
|
||||
for (const auto& [address, value] : keys) {
|
||||
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
|
||||
}
|
||||
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers;
|
||||
flat_bound_samplers.reserve(bound_samplers.size());
|
||||
for (const auto& [address, sampler] : bound_samplers) {
|
||||
flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
|
||||
}
|
||||
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers;
|
||||
flat_separate_samplers.reserve(separate_samplers.size());
|
||||
for (const auto& [key, sampler] : separate_samplers) {
|
||||
SeparateSamplerEntry entry;
|
||||
std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
|
||||
std::tie(entry.offset1, entry.offset2) = key.offsets;
|
||||
entry.sampler = sampler;
|
||||
flat_separate_samplers.push_back(entry);
|
||||
}
|
||||
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers;
|
||||
flat_bindless_samplers.reserve(bindless_samplers.size());
|
||||
for (const auto& [address, sampler] : bindless_samplers) {
|
||||
flat_bindless_samplers.push_back(
|
||||
BindlessSamplerEntry{address.first, address.second, sampler});
|
||||
}
|
||||
|
||||
return file.Write(flat_keys) == flat_keys.size() &&
|
||||
file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
|
||||
file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
|
||||
file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
|
||||
}
|
||||
|
||||
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
|
||||
|
||||
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
|
||||
|
||||
void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
|
||||
title_id = title_id_;
|
||||
}
|
||||
|
||||
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
|
||||
// Skip games without title id
|
||||
const bool has_title_id = title_id != 0;
|
||||
if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
|
||||
Common::FS::FileType::BinaryFile};
|
||||
if (!file.IsOpen()) {
|
||||
LOG_INFO(Render_OpenGL, "No transferable shader cache found");
|
||||
is_usable = true;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
u32 version{};
|
||||
if (!file.ReadObject(version)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (version < NativeVersion) {
|
||||
LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
|
||||
file.Close();
|
||||
InvalidateTransferable();
|
||||
is_usable = true;
|
||||
return std::nullopt;
|
||||
}
|
||||
if (version > NativeVersion) {
|
||||
LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
|
||||
"of the emulator, skipping");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Version is valid, load the shaders
|
||||
std::vector<ShaderDiskCacheEntry> entries;
|
||||
while (static_cast<u64>(file.Tell()) < file.GetSize()) {
|
||||
ShaderDiskCacheEntry& entry = entries.emplace_back();
|
||||
if (!entry.Load(file)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
is_usable = true;
|
||||
return {std::move(entries)};
|
||||
}
|
||||
|
||||
std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
|
||||
if (!is_usable) {
|
||||
return {};
|
||||
}
|
||||
|
||||
Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
|
||||
Common::FS::FileType::BinaryFile};
|
||||
if (!file.IsOpen()) {
|
||||
LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
|
||||
return {};
|
||||
}
|
||||
|
||||
if (const auto result = LoadPrecompiledFile(file)) {
|
||||
return *result;
|
||||
}
|
||||
|
||||
LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
|
||||
file.Close();
|
||||
InvalidatePrecompiled();
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
|
||||
Common::FS::IOFile& file) {
|
||||
// Read compressed file from disk and decompress to virtual precompiled cache file
|
||||
std::vector<u8> compressed(file.GetSize());
|
||||
if (file.Read(compressed) != file.GetSize()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
|
||||
SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
|
||||
ShaderCacheVersionHash file_hash{};
|
||||
if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
return std::nullopt;
|
||||
}
|
||||
if (GetShaderCacheVersionHash() != file_hash) {
|
||||
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::vector<ShaderDiskCachePrecompiled> entries;
|
||||
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
|
||||
u32 binary_size;
|
||||
auto& entry = entries.emplace_back();
|
||||
if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
|
||||
!LoadObjectFromPrecompiled(entry.binary_format) ||
|
||||
!LoadObjectFromPrecompiled(binary_size)) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
entry.binary.resize(binary_size);
|
||||
if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
|
||||
if (!Common::FS::RemoveFile(GetTransferablePath())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
|
||||
Common::FS::PathToUTF8String(GetTransferablePath()));
|
||||
}
|
||||
InvalidatePrecompiled();
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
|
||||
// Clear virtaul precompiled cache file
|
||||
precompiled_cache_virtual_file.Resize(0);
|
||||
|
||||
if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
|
||||
Common::FS::PathToUTF8String(GetPrecompiledPath()));
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
|
||||
if (!is_usable) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 id = entry.unique_identifier;
|
||||
if (stored_transferable.contains(id)) {
|
||||
// The shader already exists
|
||||
return;
|
||||
}
|
||||
|
||||
Common::FS::IOFile file = AppendTransferableFile();
|
||||
if (!file.IsOpen()) {
|
||||
return;
|
||||
}
|
||||
if (!entry.Save(file)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
|
||||
file.Close();
|
||||
InvalidateTransferable();
|
||||
return;
|
||||
}
|
||||
|
||||
stored_transferable.insert(id);
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
|
||||
if (!is_usable) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
|
||||
// when writing the dump. This should be done the moment I get access to write to the virtual
|
||||
// file.
|
||||
if (precompiled_cache_virtual_file.GetSize() == 0) {
|
||||
SavePrecompiledHeaderToVirtualPrecompiledCache();
|
||||
}
|
||||
|
||||
GLint binary_length;
|
||||
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
|
||||
|
||||
GLenum binary_format;
|
||||
std::vector<u8> binary(binary_length);
|
||||
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
|
||||
|
||||
if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
|
||||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
|
||||
unique_identifier);
|
||||
InvalidatePrecompiled();
|
||||
}
|
||||
}
|
||||
|
||||
Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
|
||||
if (!EnsureDirectories()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto transferable_path{GetTransferablePath()};
|
||||
const bool existed = Common::FS::Exists(transferable_path);
|
||||
|
||||
Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
|
||||
Common::FS::FileType::BinaryFile};
|
||||
if (!file.IsOpen()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
|
||||
Common::FS::PathToUTF8String(transferable_path));
|
||||
return {};
|
||||
}
|
||||
if (!existed || file.GetSize() == 0) {
|
||||
// If the file didn't exist, write its version
|
||||
if (!file.WriteObject(NativeVersion)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
|
||||
Common::FS::PathToUTF8String(transferable_path));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return file;
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
|
||||
const auto hash{GetShaderCacheVersionHash()};
|
||||
if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
|
||||
LOG_ERROR(
|
||||
Render_OpenGL,
|
||||
"Failed to write precompiled cache version hash to virtual precompiled cache file");
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
|
||||
const std::vector<u8> compressed =
|
||||
Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
|
||||
|
||||
const auto precompiled_path = GetPrecompiledPath();
|
||||
Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
|
||||
Common::FS::FileType::BinaryFile};
|
||||
|
||||
if (!file.IsOpen()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
|
||||
Common::FS::PathToUTF8String(precompiled_path));
|
||||
return;
|
||||
}
|
||||
if (file.Write(compressed) != compressed.size()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
|
||||
Common::FS::PathToUTF8String(precompiled_path));
|
||||
}
|
||||
}
|
||||
|
||||
bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
|
||||
const auto CreateDir = [](const std::filesystem::path& dir) {
|
||||
if (!Common::FS::CreateDir(dir)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
|
||||
Common::FS::PathToUTF8String(dir));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
|
||||
CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
|
||||
CreateDir(GetPrecompiledDir());
|
||||
}
|
||||
|
||||
std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
|
||||
return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
|
||||
}
|
||||
|
||||
std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
|
||||
return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
|
||||
}
|
||||
|
||||
std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
|
||||
return GetBaseDir() / "transferable";
|
||||
}
|
||||
|
||||
std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
|
||||
return GetBaseDir() / "precompiled";
|
||||
}
|
||||
|
||||
std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
|
||||
return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
|
||||
}
|
||||
|
||||
std::string ShaderDiskCacheOpenGL::GetTitleID() const {
|
||||
return fmt::format("{:016X}", title_id);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -1,176 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/file_sys/vfs_vector.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
|
||||
namespace Common::FS {
|
||||
class IOFile;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using ProgramCode = std::vector<u64>;
|
||||
|
||||
/// Describes a shader and how it's used by the guest GPU
|
||||
struct ShaderDiskCacheEntry {
|
||||
ShaderDiskCacheEntry();
|
||||
~ShaderDiskCacheEntry();
|
||||
|
||||
bool Load(Common::FS::IOFile& file);
|
||||
|
||||
bool Save(Common::FS::IOFile& file) const;
|
||||
|
||||
bool HasProgramA() const {
|
||||
return !code.empty() && !code_b.empty();
|
||||
}
|
||||
|
||||
Tegra::Engines::ShaderType type{};
|
||||
ProgramCode code;
|
||||
ProgramCode code_b;
|
||||
|
||||
u64 unique_identifier = 0;
|
||||
std::optional<u32> texture_handler_size;
|
||||
u32 bound_buffer = 0;
|
||||
VideoCommon::Shader::GraphicsInfo graphics_info;
|
||||
VideoCommon::Shader::ComputeInfo compute_info;
|
||||
VideoCommon::Shader::KeyMap keys;
|
||||
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
||||
VideoCommon::Shader::SeparateSamplerMap separate_samplers;
|
||||
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
||||
};
|
||||
|
||||
/// Contains an OpenGL dumped binary program
|
||||
struct ShaderDiskCachePrecompiled {
|
||||
u64 unique_identifier = 0;
|
||||
GLenum binary_format = 0;
|
||||
std::vector<u8> binary;
|
||||
};
|
||||
|
||||
class ShaderDiskCacheOpenGL {
|
||||
public:
|
||||
explicit ShaderDiskCacheOpenGL();
|
||||
~ShaderDiskCacheOpenGL();
|
||||
|
||||
/// Binds a title ID for all future operations.
|
||||
void BindTitleID(u64 title_id);
|
||||
|
||||
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
|
||||
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
|
||||
|
||||
/// Loads current game's precompiled cache. Invalidates on failure.
|
||||
std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
|
||||
|
||||
/// Removes the transferable (and precompiled) cache file.
|
||||
void InvalidateTransferable();
|
||||
|
||||
/// Removes the precompiled cache file and clears virtual precompiled cache file.
|
||||
void InvalidatePrecompiled();
|
||||
|
||||
/// Saves a raw dump to the transferable file. Checks for collisions.
|
||||
void SaveEntry(const ShaderDiskCacheEntry& entry);
|
||||
|
||||
/// Saves a dump entry to the precompiled file. Does not check for collisions.
|
||||
void SavePrecompiled(u64 unique_identifier, GLuint program);
|
||||
|
||||
/// Serializes virtual precompiled shader cache file to real file
|
||||
void SaveVirtualPrecompiledFile();
|
||||
|
||||
private:
|
||||
/// Loads the transferable cache. Returns empty on failure.
|
||||
std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
|
||||
Common::FS::IOFile& file);
|
||||
|
||||
/// Opens current game's transferable file and write it's header if it doesn't exist
|
||||
Common::FS::IOFile AppendTransferableFile() const;
|
||||
|
||||
/// Save precompiled header to precompiled_cache_in_memory
|
||||
void SavePrecompiledHeaderToVirtualPrecompiledCache();
|
||||
|
||||
/// Create shader disk cache directories. Returns true on success.
|
||||
bool EnsureDirectories() const;
|
||||
|
||||
/// Gets current game's transferable file path
|
||||
std::filesystem::path GetTransferablePath() const;
|
||||
|
||||
/// Gets current game's precompiled file path
|
||||
std::filesystem::path GetPrecompiledPath() const;
|
||||
|
||||
/// Get user's transferable directory path
|
||||
std::filesystem::path GetTransferableDir() const;
|
||||
|
||||
/// Get user's precompiled directory path
|
||||
std::filesystem::path GetPrecompiledDir() const;
|
||||
|
||||
/// Get user's shader directory path
|
||||
std::filesystem::path GetBaseDir() const;
|
||||
|
||||
/// Get current game's title id
|
||||
std::string GetTitleID() const;
|
||||
|
||||
template <typename T>
|
||||
bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
|
||||
const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
|
||||
data, length, precompiled_cache_virtual_file_offset);
|
||||
precompiled_cache_virtual_file_offset += write_length;
|
||||
return write_length == sizeof(T) * length;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
|
||||
const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
|
||||
data, length, precompiled_cache_virtual_file_offset);
|
||||
precompiled_cache_virtual_file_offset += read_length;
|
||||
return read_length == sizeof(T) * length;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SaveObjectToPrecompiled(const T& object) {
|
||||
return SaveArrayToPrecompiled(&object, 1);
|
||||
}
|
||||
|
||||
bool SaveObjectToPrecompiled(bool object) {
|
||||
const auto value = static_cast<u8>(object);
|
||||
return SaveArrayToPrecompiled(&value, 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool LoadObjectFromPrecompiled(T& object) {
|
||||
return LoadArrayFromPrecompiled(&object, 1);
|
||||
}
|
||||
|
||||
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
|
||||
// file
|
||||
FileSys::VectorVfsFile precompiled_cache_virtual_file;
|
||||
// Stores the current offset of the precompiled cache file for IO purposes
|
||||
std::size_t precompiled_cache_virtual_file_offset = 0;
|
||||
|
||||
// Stored transferable shaders
|
||||
std::unordered_set<u64> stored_transferable;
|
||||
|
||||
/// Title ID to operate on
|
||||
u64 title_id = 0;
|
||||
|
||||
// The cache has been loaded at boot
|
||||
bool is_usable = false;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -1,220 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#ifdef HAS_NSIGHT_AFTERMATH
|
||||
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#define VK_NO_PROTOTYPES
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#include <GFSDK_Aftermath.h>
|
||||
#include <GFSDK_Aftermath_Defines.h>
|
||||
#include <GFSDK_Aftermath_GpuCrashDump.h>
|
||||
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
|
||||
|
||||
#include "common/common_paths.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/file_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
|
||||
#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll";
|
||||
|
||||
NsightAftermathTracker::NsightAftermathTracker() = default;
|
||||
|
||||
NsightAftermathTracker::~NsightAftermathTracker() {
|
||||
if (initialized) {
|
||||
(void)GFSDK_Aftermath_DisableGpuCrashDumps();
|
||||
}
|
||||
}
|
||||
|
||||
bool NsightAftermathTracker::Initialize() {
|
||||
if (!dl.Open(AFTERMATH_LIB_NAME)) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps",
|
||||
&GFSDK_Aftermath_DisableGpuCrashDumps) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps",
|
||||
&GFSDK_Aftermath_EnableGpuCrashDumps) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GetShaderDebugInfoIdentifier",
|
||||
&GFSDK_Aftermath_GetShaderDebugInfoIdentifier) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GetShaderHashSpirv", &GFSDK_Aftermath_GetShaderHashSpirv) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_CreateDecoder",
|
||||
&GFSDK_Aftermath_GpuCrashDump_CreateDecoder) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_DestroyDecoder",
|
||||
&GFSDK_Aftermath_GpuCrashDump_DestroyDecoder) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GenerateJSON",
|
||||
&GFSDK_Aftermath_GpuCrashDump_GenerateJSON) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON",
|
||||
&GFSDK_Aftermath_GpuCrashDump_GetJSON)) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers");
|
||||
return false;
|
||||
}
|
||||
|
||||
dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash";
|
||||
|
||||
(void)Common::FS::DeleteDirRecursively(dump_dir);
|
||||
if (!Common::FS::CreateDir(dump_dir)) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps(
|
||||
GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
|
||||
GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback,
|
||||
ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) {
|
||||
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir);
|
||||
|
||||
initialized = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
|
||||
if (!initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<u32> spirv_copy = spirv;
|
||||
GFSDK_Aftermath_SpirvCode shader;
|
||||
shader.pData = spirv_copy.data();
|
||||
shader.size = static_cast<u32>(spirv_copy.size() * 4);
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
GFSDK_Aftermath_ShaderHash hash;
|
||||
if (!GFSDK_Aftermath_SUCCEED(
|
||||
GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module");
|
||||
return;
|
||||
}
|
||||
|
||||
Common::FS::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
|
||||
if (!file.IsOpen()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
|
||||
return;
|
||||
}
|
||||
if (file.WriteArray(spirv.data(), spirv.size()) != spirv.size()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump,
|
||||
u32 gpu_crash_dump_size) {
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
LOG_CRITICAL(Render_Vulkan, "called");
|
||||
|
||||
GFSDK_Aftermath_GpuCrashDump_Decoder decoder;
|
||||
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_CreateDecoder(
|
||||
GFSDK_Aftermath_Version_API, gpu_crash_dump, gpu_crash_dump_size, &decoder))) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create decoder");
|
||||
return;
|
||||
}
|
||||
SCOPE_EXIT({ GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder); });
|
||||
|
||||
u32 json_size = 0;
|
||||
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON(
|
||||
decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO,
|
||||
GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr,
|
||||
this, &json_size))) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to generate JSON");
|
||||
return;
|
||||
}
|
||||
std::vector<char> json(json_size);
|
||||
if (!GFSDK_Aftermath_SUCCEED(
|
||||
GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, json_size, json.data()))) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to query JSON");
|
||||
return;
|
||||
}
|
||||
|
||||
const std::string base_name = [this] {
|
||||
const int id = dump_id++;
|
||||
if (id == 0) {
|
||||
return fmt::format("{}/crash.nv-gpudmp", dump_dir);
|
||||
} else {
|
||||
return fmt::format("{}/crash_{}.nv-gpudmp", dump_dir, id);
|
||||
}
|
||||
}();
|
||||
|
||||
std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size);
|
||||
if (Common::FS::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to write dump file");
|
||||
return;
|
||||
}
|
||||
const std::string_view json_view(json.data(), json.size());
|
||||
if (Common::FS::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to write JSON");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_info,
|
||||
u32 shader_debug_info_size) {
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier;
|
||||
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GetShaderDebugInfoIdentifier(
|
||||
GFSDK_Aftermath_Version_API, shader_debug_info, shader_debug_info_size, &identifier))) {
|
||||
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_GetShaderDebugInfoIdentifier failed");
|
||||
return;
|
||||
}
|
||||
|
||||
const std::string path =
|
||||
fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]);
|
||||
Common::FS::IOFile file(path, "wb");
|
||||
if (!file.IsOpen()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create file {}", path);
|
||||
return;
|
||||
}
|
||||
if (file.WriteBytes(static_cast<const u8*>(shader_debug_info), shader_debug_info_size) !=
|
||||
shader_debug_info_size) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to write file {}", path);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::OnCrashDumpDescriptionCallback(
|
||||
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description) {
|
||||
add_description(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, "yuzu");
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::GpuCrashDumpCallback(const void* gpu_crash_dump,
|
||||
u32 gpu_crash_dump_size, void* user_data) {
|
||||
static_cast<NsightAftermathTracker*>(user_data)->OnGpuCrashDumpCallback(gpu_crash_dump,
|
||||
gpu_crash_dump_size);
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::ShaderDebugInfoCallback(const void* shader_debug_info,
|
||||
u32 shader_debug_info_size, void* user_data) {
|
||||
static_cast<NsightAftermathTracker*>(user_data)->OnShaderDebugInfoCallback(
|
||||
shader_debug_info, shader_debug_info_size);
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::CrashDumpDescriptionCallback(
|
||||
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data) {
|
||||
static_cast<NsightAftermathTracker*>(user_data)->OnCrashDumpDescriptionCallback(
|
||||
add_description);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
#endif // HAS_NSIGHT_AFTERMATH
|
||||
@@ -1,87 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define VK_NO_PROTOTYPES
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#ifdef HAS_NSIGHT_AFTERMATH
|
||||
#include <GFSDK_Aftermath_Defines.h>
|
||||
#include <GFSDK_Aftermath_GpuCrashDump.h>
|
||||
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
|
||||
#endif
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/dynamic_library.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class NsightAftermathTracker {
|
||||
public:
|
||||
NsightAftermathTracker();
|
||||
~NsightAftermathTracker();
|
||||
|
||||
NsightAftermathTracker(const NsightAftermathTracker&) = delete;
|
||||
NsightAftermathTracker& operator=(const NsightAftermathTracker&) = delete;
|
||||
|
||||
// Delete move semantics because Aftermath initialization uses a pointer to this.
|
||||
NsightAftermathTracker(NsightAftermathTracker&&) = delete;
|
||||
NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
|
||||
|
||||
bool Initialize();
|
||||
|
||||
void SaveShader(const std::vector<u32>& spirv) const;
|
||||
|
||||
private:
|
||||
#ifdef HAS_NSIGHT_AFTERMATH
|
||||
static void GpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size,
|
||||
void* user_data);
|
||||
|
||||
static void ShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size,
|
||||
void* user_data);
|
||||
|
||||
static void CrashDumpDescriptionCallback(
|
||||
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data);
|
||||
|
||||
void OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size);
|
||||
|
||||
void OnShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size);
|
||||
|
||||
void OnCrashDumpDescriptionCallback(
|
||||
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description);
|
||||
|
||||
mutable std::mutex mutex;
|
||||
|
||||
std::string dump_dir;
|
||||
int dump_id = 0;
|
||||
|
||||
bool initialized = false;
|
||||
|
||||
Common::DynamicLibrary dl;
|
||||
PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps;
|
||||
PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps;
|
||||
PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier;
|
||||
PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv;
|
||||
PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder;
|
||||
PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder;
|
||||
PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON;
|
||||
PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON;
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifndef HAS_NSIGHT_AFTERMATH
|
||||
inline NsightAftermathTracker::NsightAftermathTracker() = default;
|
||||
inline NsightAftermathTracker::~NsightAftermathTracker() = default;
|
||||
inline bool NsightAftermathTracker::Initialize() {
|
||||
return false;
|
||||
}
|
||||
inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
|
||||
#endif
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -1,883 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <bitset>
|
||||
#include <chrono>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <thread>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
namespace Alternatives {
|
||||
|
||||
constexpr std::array Depth24UnormS8_UINT{
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
VK_FORMAT_D16_UNORM_S8_UINT,
|
||||
VkFormat{},
|
||||
};
|
||||
|
||||
constexpr std::array Depth16UnormS8_UINT{
|
||||
VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
VkFormat{},
|
||||
};
|
||||
|
||||
} // namespace Alternatives
|
||||
|
||||
constexpr std::array REQUIRED_EXTENSIONS{
|
||||
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
VK_KHR_MAINTENANCE1_EXTENSION_NAME,
|
||||
VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
|
||||
VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
|
||||
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
|
||||
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
|
||||
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
|
||||
VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
|
||||
VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
|
||||
VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
|
||||
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
|
||||
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void SetNext(void**& next, T& data) {
|
||||
*next = &data;
|
||||
next = &data.pNext;
|
||||
}
|
||||
|
||||
constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
|
||||
switch (format) {
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
return Alternatives::Depth24UnormS8_UINT.data();
|
||||
case VK_FORMAT_D16_UNORM_S8_UINT:
|
||||
return Alternatives::Depth16UnormS8_UINT.data();
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType format_type) {
|
||||
switch (format_type) {
|
||||
case FormatType::Linear:
|
||||
return properties.linearTilingFeatures;
|
||||
case FormatType::Optimal:
|
||||
return properties.optimalTilingFeatures;
|
||||
case FormatType::Buffer:
|
||||
return properties.bufferFeatures;
|
||||
default:
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsRDNA(std::string_view device_name, VkDriverIdKHR driver_id) {
|
||||
static constexpr std::array RDNA_DEVICES{
|
||||
"5700",
|
||||
"5600",
|
||||
"5500",
|
||||
"5300",
|
||||
};
|
||||
if (driver_id != VK_DRIVER_ID_AMD_PROPRIETARY_KHR) {
|
||||
return false;
|
||||
}
|
||||
return std::any_of(RDNA_DEVICES.begin(), RDNA_DEVICES.end(), [device_name](const char* name) {
|
||||
return device_name.find(name) != std::string_view::npos;
|
||||
});
|
||||
}
|
||||
|
||||
std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
|
||||
vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
|
||||
static constexpr std::array formats{
|
||||
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
||||
VK_FORMAT_A8B8G8R8_UINT_PACK32,
|
||||
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
|
||||
VK_FORMAT_A8B8G8R8_SINT_PACK32,
|
||||
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
|
||||
VK_FORMAT_B5G6R5_UNORM_PACK16,
|
||||
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
|
||||
VK_FORMAT_A2B10G10R10_UINT_PACK32,
|
||||
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
|
||||
VK_FORMAT_R32G32B32A32_SFLOAT,
|
||||
VK_FORMAT_R32G32B32A32_SINT,
|
||||
VK_FORMAT_R32G32B32A32_UINT,
|
||||
VK_FORMAT_R32G32_SFLOAT,
|
||||
VK_FORMAT_R32G32_SINT,
|
||||
VK_FORMAT_R32G32_UINT,
|
||||
VK_FORMAT_R16G16B16A16_SINT,
|
||||
VK_FORMAT_R16G16B16A16_UINT,
|
||||
VK_FORMAT_R16G16B16A16_SNORM,
|
||||
VK_FORMAT_R16G16B16A16_UNORM,
|
||||
VK_FORMAT_R16G16_UNORM,
|
||||
VK_FORMAT_R16G16_SNORM,
|
||||
VK_FORMAT_R16G16_SFLOAT,
|
||||
VK_FORMAT_R16G16_SINT,
|
||||
VK_FORMAT_R16_UNORM,
|
||||
VK_FORMAT_R16_UINT,
|
||||
VK_FORMAT_R8G8B8A8_SRGB,
|
||||
VK_FORMAT_R8G8_UNORM,
|
||||
VK_FORMAT_R8G8_SNORM,
|
||||
VK_FORMAT_R8G8_SINT,
|
||||
VK_FORMAT_R8G8_UINT,
|
||||
VK_FORMAT_R8_UNORM,
|
||||
VK_FORMAT_R8_SNORM,
|
||||
VK_FORMAT_R8_SINT,
|
||||
VK_FORMAT_R8_UINT,
|
||||
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
|
||||
VK_FORMAT_R32_SFLOAT,
|
||||
VK_FORMAT_R32_UINT,
|
||||
VK_FORMAT_R32_SINT,
|
||||
VK_FORMAT_R16_SFLOAT,
|
||||
VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
VK_FORMAT_B8G8R8A8_SRGB,
|
||||
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
|
||||
VK_FORMAT_D32_SFLOAT,
|
||||
VK_FORMAT_D16_UNORM,
|
||||
VK_FORMAT_D16_UNORM_S8_UINT,
|
||||
VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
|
||||
VK_FORMAT_BC2_UNORM_BLOCK,
|
||||
VK_FORMAT_BC3_UNORM_BLOCK,
|
||||
VK_FORMAT_BC4_UNORM_BLOCK,
|
||||
VK_FORMAT_BC4_SNORM_BLOCK,
|
||||
VK_FORMAT_BC5_UNORM_BLOCK,
|
||||
VK_FORMAT_BC5_SNORM_BLOCK,
|
||||
VK_FORMAT_BC7_UNORM_BLOCK,
|
||||
VK_FORMAT_BC6H_UFLOAT_BLOCK,
|
||||
VK_FORMAT_BC6H_SFLOAT_BLOCK,
|
||||
VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
|
||||
VK_FORMAT_BC2_SRGB_BLOCK,
|
||||
VK_FORMAT_BC3_SRGB_BLOCK,
|
||||
VK_FORMAT_BC7_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
|
||||
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
|
||||
};
|
||||
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
|
||||
for (const auto format : formats) {
|
||||
format_properties.emplace(format, physical.GetFormatProperties(format));
|
||||
}
|
||||
return format_properties;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
|
||||
const vk::InstanceDispatch& dld_)
|
||||
: instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
|
||||
format_properties{GetFormatProperties(physical, dld)} {
|
||||
CheckSuitability();
|
||||
SetupFamilies(surface);
|
||||
SetupFeatures();
|
||||
|
||||
const auto queue_cis = GetDeviceQueueCreateInfos();
|
||||
const std::vector extensions = LoadExtensions();
|
||||
|
||||
VkPhysicalDeviceFeatures2 features2{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
||||
.pNext = nullptr,
|
||||
};
|
||||
const void* first_next = &features2;
|
||||
void** next = &features2.pNext;
|
||||
|
||||
features2.features = {
|
||||
.robustBufferAccess = false,
|
||||
.fullDrawIndexUint32 = false,
|
||||
.imageCubeArray = true,
|
||||
.independentBlend = true,
|
||||
.geometryShader = true,
|
||||
.tessellationShader = true,
|
||||
.sampleRateShading = false,
|
||||
.dualSrcBlend = false,
|
||||
.logicOp = false,
|
||||
.multiDrawIndirect = false,
|
||||
.drawIndirectFirstInstance = false,
|
||||
.depthClamp = true,
|
||||
.depthBiasClamp = true,
|
||||
.fillModeNonSolid = false,
|
||||
.depthBounds = false,
|
||||
.wideLines = false,
|
||||
.largePoints = true,
|
||||
.alphaToOne = false,
|
||||
.multiViewport = true,
|
||||
.samplerAnisotropy = true,
|
||||
.textureCompressionETC2 = false,
|
||||
.textureCompressionASTC_LDR = is_optimal_astc_supported,
|
||||
.textureCompressionBC = false,
|
||||
.occlusionQueryPrecise = true,
|
||||
.pipelineStatisticsQuery = false,
|
||||
.vertexPipelineStoresAndAtomics = true,
|
||||
.fragmentStoresAndAtomics = true,
|
||||
.shaderTessellationAndGeometryPointSize = false,
|
||||
.shaderImageGatherExtended = true,
|
||||
.shaderStorageImageExtendedFormats = false,
|
||||
.shaderStorageImageMultisample = true,
|
||||
.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
|
||||
.shaderStorageImageWriteWithoutFormat = true,
|
||||
.shaderUniformBufferArrayDynamicIndexing = false,
|
||||
.shaderSampledImageArrayDynamicIndexing = false,
|
||||
.shaderStorageBufferArrayDynamicIndexing = false,
|
||||
.shaderStorageImageArrayDynamicIndexing = false,
|
||||
.shaderClipDistance = false,
|
||||
.shaderCullDistance = false,
|
||||
.shaderFloat64 = false,
|
||||
.shaderInt64 = false,
|
||||
.shaderInt16 = false,
|
||||
.shaderResourceResidency = false,
|
||||
.shaderResourceMinLod = false,
|
||||
.sparseBinding = false,
|
||||
.sparseResidencyBuffer = false,
|
||||
.sparseResidencyImage2D = false,
|
||||
.sparseResidencyImage3D = false,
|
||||
.sparseResidency2Samples = false,
|
||||
.sparseResidency4Samples = false,
|
||||
.sparseResidency8Samples = false,
|
||||
.sparseResidency16Samples = false,
|
||||
.sparseResidencyAliased = false,
|
||||
.variableMultisampleRate = false,
|
||||
.inheritedQueries = false,
|
||||
};
|
||||
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
|
||||
.pNext = nullptr,
|
||||
.timelineSemaphore = true,
|
||||
};
|
||||
SetNext(next, timeline_semaphore);
|
||||
|
||||
VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
|
||||
.pNext = nullptr,
|
||||
.storageBuffer16BitAccess = false,
|
||||
.uniformAndStorageBuffer16BitAccess = true,
|
||||
.storagePushConstant16 = false,
|
||||
.storageInputOutput16 = false,
|
||||
};
|
||||
SetNext(next, bit16_storage);
|
||||
|
||||
VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR,
|
||||
.pNext = nullptr,
|
||||
.storageBuffer8BitAccess = false,
|
||||
.uniformAndStorageBuffer8BitAccess = true,
|
||||
.storagePushConstant8 = false,
|
||||
};
|
||||
SetNext(next, bit8_storage);
|
||||
|
||||
VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT,
|
||||
.hostQueryReset = true,
|
||||
};
|
||||
SetNext(next, host_query_reset);
|
||||
|
||||
VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
|
||||
if (is_float16_supported) {
|
||||
float16_int8 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR,
|
||||
.pNext = nullptr,
|
||||
.shaderFloat16 = true,
|
||||
.shaderInt8 = false,
|
||||
};
|
||||
SetNext(next, float16_int8);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
|
||||
}
|
||||
|
||||
if (!nv_viewport_swizzle) {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
|
||||
}
|
||||
|
||||
VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
|
||||
if (khr_uniform_buffer_standard_layout) {
|
||||
std430_layout = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR,
|
||||
.pNext = nullptr,
|
||||
.uniformBufferStandardLayout = true,
|
||||
};
|
||||
SetNext(next, std430_layout);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs");
|
||||
}
|
||||
|
||||
VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8;
|
||||
if (ext_index_type_uint8) {
|
||||
index_type_uint8 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT,
|
||||
.pNext = nullptr,
|
||||
.indexTypeUint8 = true,
|
||||
};
|
||||
SetNext(next, index_type_uint8);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
|
||||
}
|
||||
|
||||
VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
|
||||
if (ext_transform_feedback) {
|
||||
transform_feedback = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT,
|
||||
.pNext = nullptr,
|
||||
.transformFeedback = true,
|
||||
.geometryStreams = true,
|
||||
};
|
||||
SetNext(next, transform_feedback);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
|
||||
}
|
||||
|
||||
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border;
|
||||
if (ext_custom_border_color) {
|
||||
custom_border = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT,
|
||||
.pNext = nullptr,
|
||||
.customBorderColors = VK_TRUE,
|
||||
.customBorderColorWithoutFormat = VK_TRUE,
|
||||
};
|
||||
SetNext(next, custom_border);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors");
|
||||
}
|
||||
|
||||
VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
|
||||
if (ext_extended_dynamic_state) {
|
||||
dynamic_state = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT,
|
||||
.pNext = nullptr,
|
||||
.extendedDynamicState = VK_TRUE,
|
||||
};
|
||||
SetNext(next, dynamic_state);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
|
||||
}
|
||||
|
||||
VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
|
||||
if (ext_robustness2) {
|
||||
robustness2 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
|
||||
.pNext = nullptr,
|
||||
.robustBufferAccess2 = false,
|
||||
.robustImageAccess2 = true,
|
||||
.nullDescriptor = true,
|
||||
};
|
||||
SetNext(next, robustness2);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support robustness2");
|
||||
}
|
||||
|
||||
if (!ext_depth_range_unrestricted) {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
|
||||
}
|
||||
|
||||
VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
|
||||
if (nv_device_diagnostics_config) {
|
||||
nsight_aftermath_tracker.Initialize();
|
||||
|
||||
diagnostics_nv = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV,
|
||||
.pNext = &features2,
|
||||
.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV |
|
||||
VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV |
|
||||
VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV,
|
||||
};
|
||||
first_next = &diagnostics_nv;
|
||||
}
|
||||
logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
|
||||
|
||||
CollectTelemetryParameters();
|
||||
CollectToolingInfo();
|
||||
|
||||
if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
|
||||
LOG_WARNING(
|
||||
Render_Vulkan,
|
||||
"Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu");
|
||||
ext_extended_dynamic_state = false;
|
||||
}
|
||||
if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) {
|
||||
// AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it
|
||||
// seems to cause stability issues
|
||||
LOG_WARNING(
|
||||
Render_Vulkan,
|
||||
"Blacklisting AMD proprietary on RDNA devices from VK_EXT_extended_dynamic_state");
|
||||
ext_extended_dynamic_state = false;
|
||||
}
|
||||
|
||||
graphics_queue = logical.GetQueue(graphics_family);
|
||||
present_queue = logical.GetQueue(present_family);
|
||||
|
||||
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
|
||||
}
|
||||
|
||||
Device::~Device() = default;
|
||||
|
||||
VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const {
|
||||
if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
|
||||
return wanted_format;
|
||||
}
|
||||
// The wanted format is not supported by hardware, search for alternatives
|
||||
const VkFormat* alternatives = GetFormatAlternatives(wanted_format);
|
||||
if (alternatives == nullptr) {
|
||||
UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host "
|
||||
"hardware does not support it",
|
||||
wanted_format, wanted_usage, format_type);
|
||||
return wanted_format;
|
||||
}
|
||||
|
||||
std::size_t i = 0;
|
||||
for (VkFormat alternative = *alternatives; alternative; alternative = alternatives[++i]) {
|
||||
if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
|
||||
continue;
|
||||
}
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Emulating format={} with alternative format={} with usage={} and type={}",
|
||||
wanted_format, alternative, wanted_usage, format_type);
|
||||
return alternative;
|
||||
}
|
||||
|
||||
// No alternatives found, panic
|
||||
UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and "
|
||||
"doesn't support any of the alternatives",
|
||||
wanted_format, wanted_usage, format_type);
|
||||
return wanted_format;
|
||||
}
|
||||
|
||||
void Device::ReportLoss() const {
|
||||
LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
|
||||
|
||||
// Wait for the log to flush and for Nsight Aftermath to dump the results
|
||||
std::this_thread::sleep_for(std::chrono::seconds{15});
|
||||
}
|
||||
|
||||
void Device::SaveShader(const std::vector<u32>& spirv) const {
|
||||
nsight_aftermath_tracker.SaveShader(spirv);
|
||||
}
|
||||
|
||||
bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
|
||||
// Disable for now to avoid converting ASTC twice.
|
||||
static constexpr std::array astc_formats = {
|
||||
VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_6x5_UNORM_BLOCK, VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x5_UNORM_BLOCK, VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_UNORM_BLOCK, VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x8_UNORM_BLOCK, VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x5_UNORM_BLOCK, VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x6_UNORM_BLOCK, VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x8_UNORM_BLOCK, VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
|
||||
};
|
||||
if (!features.textureCompressionASTC_LDR) {
|
||||
return false;
|
||||
}
|
||||
const auto format_feature_usage{
|
||||
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
|
||||
for (const auto format : astc_formats) {
|
||||
const auto physical_format_properties{physical.GetFormatProperties(format)};
|
||||
if ((physical_format_properties.optimalTilingFeatures & format_feature_usage) == 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Device::TestDepthStencilBlits() const {
|
||||
static constexpr VkFormatFeatureFlags required_features =
|
||||
VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
|
||||
const auto test_features = [](VkFormatProperties props) {
|
||||
return (props.optimalTilingFeatures & required_features) == required_features;
|
||||
};
|
||||
return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) &&
|
||||
test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT));
|
||||
}
|
||||
|
||||
bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const {
|
||||
const auto it = format_properties.find(wanted_format);
|
||||
if (it == format_properties.end()) {
|
||||
UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format);
|
||||
return true;
|
||||
}
|
||||
const auto supported_usage = GetFormatFeatures(it->second, format_type);
|
||||
return (supported_usage & wanted_usage) == wanted_usage;
|
||||
}
|
||||
|
||||
void Device::CheckSuitability() const {
|
||||
std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
|
||||
for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) {
|
||||
for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
|
||||
if (available_extensions[i]) {
|
||||
continue;
|
||||
}
|
||||
const std::string_view name{property.extensionName};
|
||||
available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
|
||||
if (available_extensions[i]) {
|
||||
continue;
|
||||
}
|
||||
LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
|
||||
throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
|
||||
}
|
||||
struct LimitTuple {
|
||||
u32 minimum;
|
||||
u32 value;
|
||||
const char* name;
|
||||
};
|
||||
const VkPhysicalDeviceLimits& limits{properties.limits};
|
||||
const std::array limits_report{
|
||||
LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"},
|
||||
LimitTuple{16, limits.maxViewports, "maxViewports"},
|
||||
LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"},
|
||||
LimitTuple{8, limits.maxClipDistances, "maxClipDistances"},
|
||||
};
|
||||
for (const auto& tuple : limits_report) {
|
||||
if (tuple.value < tuple.minimum) {
|
||||
LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name,
|
||||
tuple.minimum, tuple.value);
|
||||
throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
}
|
||||
const VkPhysicalDeviceFeatures features{physical.GetFeatures()};
|
||||
const std::array feature_report{
|
||||
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
|
||||
std::make_pair(features.imageCubeArray, "imageCubeArray"),
|
||||
std::make_pair(features.independentBlend, "independentBlend"),
|
||||
std::make_pair(features.depthClamp, "depthClamp"),
|
||||
std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
|
||||
std::make_pair(features.largePoints, "largePoints"),
|
||||
std::make_pair(features.multiViewport, "multiViewport"),
|
||||
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
|
||||
std::make_pair(features.geometryShader, "geometryShader"),
|
||||
std::make_pair(features.tessellationShader, "tessellationShader"),
|
||||
std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
|
||||
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
|
||||
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
|
||||
std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"),
|
||||
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
|
||||
"shaderStorageImageWriteWithoutFormat"),
|
||||
};
|
||||
for (const auto& [is_supported, name] : feature_report) {
|
||||
if (is_supported) {
|
||||
continue;
|
||||
}
|
||||
LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
|
||||
throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const char*> Device::LoadExtensions() {
|
||||
std::vector<const char*> extensions;
|
||||
extensions.reserve(7 + REQUIRED_EXTENSIONS.size());
|
||||
extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
|
||||
|
||||
bool has_khr_shader_float16_int8{};
|
||||
bool has_ext_subgroup_size_control{};
|
||||
bool has_ext_transform_feedback{};
|
||||
bool has_ext_custom_border_color{};
|
||||
bool has_ext_extended_dynamic_state{};
|
||||
bool has_ext_robustness2{};
|
||||
for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
|
||||
const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
|
||||
bool push) {
|
||||
if (extension.extensionName != std::string_view(name)) {
|
||||
return;
|
||||
}
|
||||
if (push) {
|
||||
extensions.push_back(name);
|
||||
}
|
||||
if (status) {
|
||||
status->get() = true;
|
||||
}
|
||||
};
|
||||
test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
|
||||
test(khr_uniform_buffer_standard_layout,
|
||||
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
|
||||
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
|
||||
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
|
||||
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
|
||||
test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);
|
||||
test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,
|
||||
true);
|
||||
test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
|
||||
test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
|
||||
test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
|
||||
test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
|
||||
test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
|
||||
test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
|
||||
test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
|
||||
if (Settings::values.renderer_debug) {
|
||||
test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
VkPhysicalDeviceFeatures2KHR features;
|
||||
features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
|
||||
|
||||
VkPhysicalDeviceProperties2KHR physical_properties;
|
||||
physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
|
||||
|
||||
if (has_khr_shader_float16_int8) {
|
||||
VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features;
|
||||
float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
|
||||
float16_int8_features.pNext = nullptr;
|
||||
features.pNext = &float16_int8_features;
|
||||
|
||||
physical.GetFeatures2KHR(features);
|
||||
is_float16_supported = float16_int8_features.shaderFloat16;
|
||||
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
if (has_ext_subgroup_size_control) {
|
||||
VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features;
|
||||
subgroup_features.sType =
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
|
||||
subgroup_features.pNext = nullptr;
|
||||
features.pNext = &subgroup_features;
|
||||
physical.GetFeatures2KHR(features);
|
||||
|
||||
VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_properties;
|
||||
subgroup_properties.sType =
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
|
||||
subgroup_properties.pNext = nullptr;
|
||||
physical_properties.pNext = &subgroup_properties;
|
||||
physical.GetProperties2KHR(physical_properties);
|
||||
|
||||
is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize;
|
||||
|
||||
if (subgroup_features.subgroupSizeControl &&
|
||||
subgroup_properties.minSubgroupSize <= GuestWarpSize &&
|
||||
subgroup_properties.maxSubgroupSize >= GuestWarpSize) {
|
||||
extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
|
||||
guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages;
|
||||
}
|
||||
} else {
|
||||
is_warp_potentially_bigger = true;
|
||||
}
|
||||
|
||||
if (has_ext_transform_feedback) {
|
||||
VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
|
||||
tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
|
||||
tfb_features.pNext = nullptr;
|
||||
features.pNext = &tfb_features;
|
||||
physical.GetFeatures2KHR(features);
|
||||
|
||||
VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties;
|
||||
tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
|
||||
tfb_properties.pNext = nullptr;
|
||||
physical_properties.pNext = &tfb_properties;
|
||||
physical.GetProperties2KHR(physical_properties);
|
||||
|
||||
if (tfb_features.transformFeedback && tfb_features.geometryStreams &&
|
||||
tfb_properties.maxTransformFeedbackStreams >= 4 &&
|
||||
tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries &&
|
||||
tfb_properties.transformFeedbackDraw) {
|
||||
extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
|
||||
ext_transform_feedback = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_ext_custom_border_color) {
|
||||
VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features;
|
||||
border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
|
||||
border_features.pNext = nullptr;
|
||||
features.pNext = &border_features;
|
||||
physical.GetFeatures2KHR(features);
|
||||
|
||||
if (border_features.customBorderColors && border_features.customBorderColorWithoutFormat) {
|
||||
extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
ext_custom_border_color = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_ext_extended_dynamic_state) {
|
||||
VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
|
||||
dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
|
||||
dynamic_state.pNext = nullptr;
|
||||
features.pNext = &dynamic_state;
|
||||
physical.GetFeatures2KHR(features);
|
||||
|
||||
if (dynamic_state.extendedDynamicState) {
|
||||
extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
ext_extended_dynamic_state = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_ext_robustness2) {
|
||||
VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
|
||||
robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
|
||||
robustness2.pNext = nullptr;
|
||||
features.pNext = &robustness2;
|
||||
physical.GetFeatures2KHR(features);
|
||||
if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
|
||||
extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||
ext_robustness2 = true;
|
||||
}
|
||||
}
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
void Device::SetupFamilies(VkSurfaceKHR surface) {
|
||||
const std::vector queue_family_properties = physical.GetQueueFamilyProperties();
|
||||
std::optional<u32> graphics;
|
||||
std::optional<u32> present;
|
||||
for (u32 index = 0; index < static_cast<u32>(queue_family_properties.size()); ++index) {
|
||||
if (graphics && present) {
|
||||
break;
|
||||
}
|
||||
const VkQueueFamilyProperties& queue_family = queue_family_properties[index];
|
||||
if (queue_family.queueCount == 0) {
|
||||
continue;
|
||||
}
|
||||
if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
|
||||
graphics = index;
|
||||
}
|
||||
if (physical.GetSurfaceSupportKHR(index, surface)) {
|
||||
present = index;
|
||||
}
|
||||
}
|
||||
if (!graphics) {
|
||||
LOG_ERROR(Render_Vulkan, "Device lacks a graphics queue");
|
||||
throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
if (!present) {
|
||||
LOG_ERROR(Render_Vulkan, "Device lacks a present queue");
|
||||
throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
graphics_family = *graphics;
|
||||
present_family = *present;
|
||||
}
|
||||
|
||||
void Device::SetupFeatures() {
|
||||
const auto supported_features{physical.GetFeatures()};
|
||||
is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
|
||||
is_blit_depth_stencil_supported = TestDepthStencilBlits();
|
||||
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
|
||||
}
|
||||
|
||||
void Device::CollectTelemetryParameters() {
|
||||
VkPhysicalDeviceDriverPropertiesKHR driver{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
|
||||
.pNext = nullptr,
|
||||
.driverID = {},
|
||||
.driverName = {},
|
||||
.driverInfo = {},
|
||||
.conformanceVersion = {},
|
||||
};
|
||||
|
||||
VkPhysicalDeviceProperties2KHR device_properties{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
|
||||
.pNext = &driver,
|
||||
.properties = {},
|
||||
};
|
||||
physical.GetProperties2KHR(device_properties);
|
||||
|
||||
driver_id = driver.driverID;
|
||||
vendor_name = driver.driverName;
|
||||
|
||||
const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
|
||||
reported_extensions.reserve(std::size(extensions));
|
||||
for (const auto& extension : extensions) {
|
||||
reported_extensions.emplace_back(extension.extensionName);
|
||||
}
|
||||
}
|
||||
|
||||
void Device::CollectToolingInfo() {
|
||||
if (!ext_tooling_info) {
|
||||
return;
|
||||
}
|
||||
const auto vkGetPhysicalDeviceToolPropertiesEXT =
|
||||
reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>(
|
||||
dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT"));
|
||||
if (!vkGetPhysicalDeviceToolPropertiesEXT) {
|
||||
return;
|
||||
}
|
||||
u32 tool_count = 0;
|
||||
if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count);
|
||||
if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) {
|
||||
const std::string_view name = tool.name;
|
||||
LOG_INFO(Render_Vulkan, "{}", name);
|
||||
has_renderdoc = has_renderdoc || name == "RenderDoc";
|
||||
has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics";
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<VkDeviceQueueCreateInfo> Device::GetDeviceQueueCreateInfos() const {
|
||||
static constexpr float QUEUE_PRIORITY = 1.0f;
|
||||
|
||||
std::unordered_set<u32> unique_queue_families{graphics_family, present_family};
|
||||
std::vector<VkDeviceQueueCreateInfo> queue_cis;
|
||||
queue_cis.reserve(unique_queue_families.size());
|
||||
|
||||
for (const u32 queue_family : unique_queue_families) {
|
||||
auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.queueFamilyIndex = queue_family,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = nullptr,
|
||||
});
|
||||
ci.pQueuePriorities = &QUEUE_PRIORITY;
|
||||
}
|
||||
|
||||
return queue_cis;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -1,306 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
/// Format usage descriptor.
|
||||
enum class FormatType { Linear, Optimal, Buffer };
|
||||
|
||||
/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
|
||||
const u32 GuestWarpSize = 32;
|
||||
|
||||
/// Handles data specific to a physical device.
|
||||
class Device final {
|
||||
public:
|
||||
explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
|
||||
const vk::InstanceDispatch& dld);
|
||||
~Device();
|
||||
|
||||
/**
|
||||
* Returns a format supported by the device for the passed requeriments.
|
||||
* @param wanted_format The ideal format to be returned. It may not be the returned format.
|
||||
* @param wanted_usage The usage that must be fulfilled even if the format is not supported.
|
||||
* @param format_type Format type usage.
|
||||
* @returns A format supported by the device.
|
||||
*/
|
||||
VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const;
|
||||
|
||||
/// Reports a device loss.
|
||||
void ReportLoss() const;
|
||||
|
||||
/// Reports a shader to Nsight Aftermath.
|
||||
void SaveShader(const std::vector<u32>& spirv) const;
|
||||
|
||||
/// Returns the dispatch loader with direct function pointers of the device.
|
||||
const vk::DeviceDispatch& GetDispatchLoader() const {
|
||||
return dld;
|
||||
}
|
||||
|
||||
/// Returns the logical device.
|
||||
const vk::Device& GetLogical() const {
|
||||
return logical;
|
||||
}
|
||||
|
||||
/// Returns the physical device.
|
||||
vk::PhysicalDevice GetPhysical() const {
|
||||
return physical;
|
||||
}
|
||||
|
||||
/// Returns the main graphics queue.
|
||||
vk::Queue GetGraphicsQueue() const {
|
||||
return graphics_queue;
|
||||
}
|
||||
|
||||
/// Returns the main present queue.
|
||||
vk::Queue GetPresentQueue() const {
|
||||
return present_queue;
|
||||
}
|
||||
|
||||
/// Returns main graphics queue family index.
|
||||
u32 GetGraphicsFamily() const {
|
||||
return graphics_family;
|
||||
}
|
||||
|
||||
/// Returns main present queue family index.
|
||||
u32 GetPresentFamily() const {
|
||||
return present_family;
|
||||
}
|
||||
|
||||
/// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
|
||||
u32 ApiVersion() const {
|
||||
return properties.apiVersion;
|
||||
}
|
||||
|
||||
/// Returns the current driver version provided in Vulkan-formatted version numbers.
|
||||
u32 GetDriverVersion() const {
|
||||
return properties.driverVersion;
|
||||
}
|
||||
|
||||
/// Returns the device name.
|
||||
std::string_view GetModelName() const {
|
||||
return properties.deviceName;
|
||||
}
|
||||
|
||||
/// Returns the driver ID.
|
||||
VkDriverIdKHR GetDriverID() const {
|
||||
return driver_id;
|
||||
}
|
||||
|
||||
/// Returns uniform buffer alignment requeriment.
|
||||
VkDeviceSize GetUniformBufferAlignment() const {
|
||||
return properties.limits.minUniformBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
/// Returns storage alignment requeriment.
|
||||
VkDeviceSize GetStorageBufferAlignment() const {
|
||||
return properties.limits.minStorageBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
/// Returns the maximum range for storage buffers.
|
||||
VkDeviceSize GetMaxStorageBufferRange() const {
|
||||
return properties.limits.maxStorageBufferRange;
|
||||
}
|
||||
|
||||
/// Returns the maximum size for push constants.
|
||||
VkDeviceSize GetMaxPushConstantsSize() const {
|
||||
return properties.limits.maxPushConstantsSize;
|
||||
}
|
||||
|
||||
/// Returns the maximum size for shared memory.
|
||||
u32 GetMaxComputeSharedMemorySize() const {
|
||||
return properties.limits.maxComputeSharedMemorySize;
|
||||
}
|
||||
|
||||
/// Returns true if ASTC is natively supported.
|
||||
bool IsOptimalAstcSupported() const {
|
||||
return is_optimal_astc_supported;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports float16 natively
|
||||
bool IsFloat16Supported() const {
|
||||
return is_float16_supported;
|
||||
}
|
||||
|
||||
/// Returns true if the device warp size can potentially be bigger than guest's warp size.
|
||||
bool IsWarpSizePotentiallyBiggerThanGuest() const {
|
||||
return is_warp_potentially_bigger;
|
||||
}
|
||||
|
||||
/// Returns true if the device can be forced to use the guest warp size.
|
||||
bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const {
|
||||
return guest_warp_stages & stage;
|
||||
}
|
||||
|
||||
/// Returns true if formatless image load is supported.
|
||||
bool IsFormatlessImageLoadSupported() const {
|
||||
return is_formatless_image_load_supported;
|
||||
}
|
||||
|
||||
/// Returns true when blitting from and to depth stencil images is supported.
|
||||
bool IsBlitDepthStencilSupported() const {
|
||||
return is_blit_depth_stencil_supported;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_NV_viewport_swizzle.
|
||||
bool IsNvViewportSwizzleSupported() const {
|
||||
return nv_viewport_swizzle;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_scalar_block_layout.
|
||||
bool IsKhrUniformBufferStandardLayoutSupported() const {
|
||||
return khr_uniform_buffer_standard_layout;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_index_type_uint8.
|
||||
bool IsExtIndexTypeUint8Supported() const {
|
||||
return ext_index_type_uint8;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_sampler_filter_minmax.
|
||||
bool IsExtSamplerFilterMinmaxSupported() const {
|
||||
return ext_sampler_filter_minmax;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
|
||||
bool IsExtDepthRangeUnrestrictedSupported() const {
|
||||
return ext_depth_range_unrestricted;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
|
||||
bool IsExtShaderViewportIndexLayerSupported() const {
|
||||
return ext_shader_viewport_index_layer;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_transform_feedback.
|
||||
bool IsExtTransformFeedbackSupported() const {
|
||||
return ext_transform_feedback;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_custom_border_color.
|
||||
bool IsExtCustomBorderColorSupported() const {
|
||||
return ext_custom_border_color;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_extended_dynamic_state.
|
||||
bool IsExtExtendedDynamicStateSupported() const {
|
||||
return ext_extended_dynamic_state;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_shader_stencil_export.
|
||||
bool IsExtShaderStencilExportSupported() const {
|
||||
return ext_shader_stencil_export;
|
||||
}
|
||||
|
||||
/// Returns true when a known debugging tool is attached.
|
||||
bool HasDebuggingToolAttached() const {
|
||||
return has_renderdoc || has_nsight_graphics;
|
||||
}
|
||||
|
||||
/// Returns the vendor name reported from Vulkan.
|
||||
std::string_view GetVendorName() const {
|
||||
return vendor_name;
|
||||
}
|
||||
|
||||
/// Returns the list of available extensions.
|
||||
const std::vector<std::string>& GetAvailableExtensions() const {
|
||||
return reported_extensions;
|
||||
}
|
||||
|
||||
/// Returns true if the setting for async shader compilation is enabled.
|
||||
bool UseAsynchronousShaders() const {
|
||||
return use_asynchronous_shaders;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Checks if the physical device is suitable.
|
||||
void CheckSuitability() const;
|
||||
|
||||
/// Loads extensions into a vector and stores available ones in this object.
|
||||
std::vector<const char*> LoadExtensions();
|
||||
|
||||
/// Sets up queue families.
|
||||
void SetupFamilies(VkSurfaceKHR surface);
|
||||
|
||||
/// Sets up device features.
|
||||
void SetupFeatures();
|
||||
|
||||
/// Collects telemetry information from the device.
|
||||
void CollectTelemetryParameters();
|
||||
|
||||
/// Collects information about attached tools.
|
||||
void CollectToolingInfo();
|
||||
|
||||
/// Returns a list of queue initialization descriptors.
|
||||
std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
|
||||
|
||||
/// Returns true if ASTC textures are natively supported.
|
||||
bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const;
|
||||
|
||||
/// Returns true if the device natively supports blitting depth stencil images.
|
||||
bool TestDepthStencilBlits() const;
|
||||
|
||||
/// Returns true if a format is supported.
|
||||
bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const;
|
||||
|
||||
VkInstance instance; ///< Vulkan instance.
|
||||
vk::DeviceDispatch dld; ///< Device function pointers.
|
||||
vk::PhysicalDevice physical; ///< Physical device.
|
||||
VkPhysicalDeviceProperties properties; ///< Device properties.
|
||||
vk::Device logical; ///< Logical device.
|
||||
vk::Queue graphics_queue; ///< Main graphics queue.
|
||||
vk::Queue present_queue; ///< Main present queue.
|
||||
u32 instance_version{}; ///< Vulkan onstance version.
|
||||
u32 graphics_family{}; ///< Main graphics queue family index.
|
||||
u32 present_family{}; ///< Main present queue family index.
|
||||
VkDriverIdKHR driver_id{}; ///< Driver ID.
|
||||
VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
|
||||
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
|
||||
bool is_float16_supported{}; ///< Support for float16 arithmetics.
|
||||
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
||||
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
|
||||
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
|
||||
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
|
||||
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
|
||||
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
|
||||
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
|
||||
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
|
||||
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
|
||||
bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
|
||||
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
|
||||
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
|
||||
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
|
||||
bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.
|
||||
bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
|
||||
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
|
||||
bool has_renderdoc{}; ///< Has RenderDoc attached
|
||||
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
|
||||
|
||||
// Asynchronous Graphics Pipeline setting
|
||||
bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
|
||||
|
||||
// Telemetry parameters
|
||||
std::string vendor_name; ///< Device's driver name.
|
||||
std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
|
||||
|
||||
/// Format properties dictionary.
|
||||
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
|
||||
|
||||
/// Nsight Aftermath GPU crash tracker
|
||||
NsightAftermathTracker nsight_aftermath_tracker;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -1,230 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
u64 GetAllocationChunkSize(u64 required_size) {
|
||||
static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
|
||||
auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
|
||||
return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
class VKMemoryAllocation final {
|
||||
public:
|
||||
explicit VKMemoryAllocation(const Device& device_, vk::DeviceMemory memory_,
|
||||
VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_)
|
||||
: device{device_}, memory{std::move(memory_)}, properties{properties_},
|
||||
allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {}
|
||||
|
||||
VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) {
|
||||
auto found = TryFindFreeSection(free_iterator, allocation_size,
|
||||
static_cast<u64>(commit_size), static_cast<u64>(alignment));
|
||||
if (!found) {
|
||||
found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
|
||||
static_cast<u64>(alignment));
|
||||
if (!found) {
|
||||
// Signal out of memory, it'll try to do more allocations.
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
|
||||
*found + commit_size);
|
||||
commits.push_back(commit.get());
|
||||
|
||||
// Last commit's address is highly probable to be free.
|
||||
free_iterator = *found + commit_size;
|
||||
|
||||
return commit;
|
||||
}
|
||||
|
||||
void Free(const VKMemoryCommitImpl* commit) {
|
||||
ASSERT(commit);
|
||||
|
||||
const auto it = std::find(std::begin(commits), std::end(commits), commit);
|
||||
if (it == commits.end()) {
|
||||
UNREACHABLE_MSG("Freeing unallocated commit!");
|
||||
return;
|
||||
}
|
||||
commits.erase(it);
|
||||
}
|
||||
|
||||
/// Returns whether this allocation is compatible with the arguments.
|
||||
bool IsCompatible(VkMemoryPropertyFlags wanted_properties, u32 type_mask) const {
|
||||
return (wanted_properties & properties) && (type_mask & shifted_type) != 0;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr u32 ShiftType(u32 type) {
|
||||
return 1U << type;
|
||||
}
|
||||
|
||||
/// A memory allocator, it may return a free region between "start" and "end" with the solicited
|
||||
/// requirements.
|
||||
std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
|
||||
u64 iterator = Common::AlignUp(start, alignment);
|
||||
while (iterator + size <= end) {
|
||||
const u64 try_left = iterator;
|
||||
const u64 try_right = try_left + size;
|
||||
|
||||
bool overlap = false;
|
||||
for (const auto& commit : commits) {
|
||||
const auto [commit_left, commit_right] = commit->interval;
|
||||
if (try_left < commit_right && commit_left < try_right) {
|
||||
// There's an overlap, continue the search where the overlapping commit ends.
|
||||
iterator = Common::AlignUp(commit_right, alignment);
|
||||
overlap = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!overlap) {
|
||||
// A free address has been found.
|
||||
return try_left;
|
||||
}
|
||||
}
|
||||
|
||||
// No free regions where found, return an empty optional.
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const Device& device; ///< Vulkan device.
|
||||
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
|
||||
const VkMemoryPropertyFlags properties; ///< Vulkan properties.
|
||||
const u64 allocation_size; ///< Size of this allocation.
|
||||
const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
|
||||
|
||||
/// Hints where the next free region is likely going to be.
|
||||
u64 free_iterator{};
|
||||
|
||||
/// Stores all commits done from this allocation.
|
||||
std::vector<const VKMemoryCommitImpl*> commits;
|
||||
};
|
||||
|
||||
VKMemoryManager::VKMemoryManager(const Device& device_)
|
||||
: device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {}
|
||||
|
||||
VKMemoryManager::~VKMemoryManager() = default;
|
||||
|
||||
VKMemoryCommit VKMemoryManager::Commit(const VkMemoryRequirements& requirements,
|
||||
bool host_visible) {
|
||||
const u64 chunk_size = GetAllocationChunkSize(requirements.size);
|
||||
|
||||
// When a host visible commit is asked, search for host visible and coherent, otherwise search
|
||||
// for a fast device local type.
|
||||
const VkMemoryPropertyFlags wanted_properties =
|
||||
host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
|
||||
: VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
|
||||
if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
|
||||
return commit;
|
||||
}
|
||||
|
||||
// Commit has failed, allocate more memory.
|
||||
if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
|
||||
// TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
|
||||
// Allocation has failed, panic.
|
||||
UNREACHABLE_MSG("Ran out of VRAM!");
|
||||
return {};
|
||||
}
|
||||
|
||||
// Commit again, this time it won't fail since there's a fresh allocation above. If it does,
|
||||
// there's a bug.
|
||||
auto commit = TryAllocCommit(requirements, wanted_properties);
|
||||
ASSERT(commit);
|
||||
return commit;
|
||||
}
|
||||
|
||||
VKMemoryCommit VKMemoryManager::Commit(const vk::Buffer& buffer, bool host_visible) {
|
||||
auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), host_visible);
|
||||
buffer.BindMemory(commit->GetMemory(), commit->GetOffset());
|
||||
return commit;
|
||||
}
|
||||
|
||||
VKMemoryCommit VKMemoryManager::Commit(const vk::Image& image, bool host_visible) {
|
||||
auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), host_visible);
|
||||
image.BindMemory(commit->GetMemory(), commit->GetOffset());
|
||||
return commit;
|
||||
}
|
||||
|
||||
bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask,
|
||||
u64 size) {
|
||||
const u32 type = [&] {
|
||||
for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
|
||||
const auto flags = properties.memoryTypes[type_index].propertyFlags;
|
||||
if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
|
||||
// The type matches in type and in the wanted properties.
|
||||
return type_index;
|
||||
}
|
||||
}
|
||||
UNREACHABLE_MSG("Couldn't find a compatible memory type!");
|
||||
return 0U;
|
||||
}();
|
||||
|
||||
// Try to allocate found type.
|
||||
vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.allocationSize = size,
|
||||
.memoryTypeIndex = type,
|
||||
});
|
||||
if (!memory) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Device allocation failed!");
|
||||
return false;
|
||||
}
|
||||
|
||||
allocations.push_back(std::make_unique<VKMemoryAllocation>(device, std::move(memory),
|
||||
wanted_properties, size, type));
|
||||
return true;
|
||||
}
|
||||
|
||||
VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requirements,
|
||||
VkMemoryPropertyFlags wanted_properties) {
|
||||
for (auto& allocation : allocations) {
|
||||
if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
|
||||
continue;
|
||||
}
|
||||
if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
|
||||
return commit;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
VKMemoryCommitImpl::VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_,
|
||||
const vk::DeviceMemory& memory_, u64 begin_, u64 end_)
|
||||
: device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {}
|
||||
|
||||
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
|
||||
allocation->Free(this);
|
||||
}
|
||||
|
||||
MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
|
||||
return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size));
|
||||
}
|
||||
|
||||
void VKMemoryCommitImpl::Unmap() const {
|
||||
memory.Unmap();
|
||||
}
|
||||
|
||||
MemoryMap VKMemoryCommitImpl::Map() const {
|
||||
return Map(interval.second - interval.first);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -1,132 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class MemoryMap;
|
||||
class VKMemoryAllocation;
|
||||
class VKMemoryCommitImpl;
|
||||
|
||||
using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
|
||||
|
||||
class VKMemoryManager final {
|
||||
public:
|
||||
explicit VKMemoryManager(const Device& device_);
|
||||
VKMemoryManager(const VKMemoryManager&) = delete;
|
||||
~VKMemoryManager();
|
||||
|
||||
/**
|
||||
* Commits a memory with the specified requeriments.
|
||||
* @param requirements Requirements returned from a Vulkan call.
|
||||
* @param host_visible Signals the allocator that it *must* use host visible and coherent
|
||||
* memory. When passing false, it will try to allocate device local memory.
|
||||
* @returns A memory commit.
|
||||
*/
|
||||
VKMemoryCommit Commit(const VkMemoryRequirements& requirements, bool host_visible);
|
||||
|
||||
/// Commits memory required by the buffer and binds it.
|
||||
VKMemoryCommit Commit(const vk::Buffer& buffer, bool host_visible);
|
||||
|
||||
/// Commits memory required by the image and binds it.
|
||||
VKMemoryCommit Commit(const vk::Image& image, bool host_visible);
|
||||
|
||||
private:
|
||||
/// Allocates a chunk of memory.
|
||||
bool AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
|
||||
|
||||
/// Tries to allocate a memory commit.
|
||||
VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements,
|
||||
VkMemoryPropertyFlags wanted_properties);
|
||||
|
||||
const Device& device; ///< Device handler.
|
||||
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
|
||||
std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
|
||||
};
|
||||
|
||||
class VKMemoryCommitImpl final {
|
||||
friend VKMemoryAllocation;
|
||||
friend MemoryMap;
|
||||
|
||||
public:
|
||||
explicit VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_,
|
||||
const vk::DeviceMemory& memory_, u64 begin_, u64 end_);
|
||||
~VKMemoryCommitImpl();
|
||||
|
||||
/// Maps a memory region and returns a pointer to it.
|
||||
/// It's illegal to have more than one memory map at the same time.
|
||||
MemoryMap Map(u64 size, u64 offset = 0) const;
|
||||
|
||||
/// Maps the whole commit and returns a pointer to it.
|
||||
/// It's illegal to have more than one memory map at the same time.
|
||||
MemoryMap Map() const;
|
||||
|
||||
/// Returns the Vulkan memory handler.
|
||||
VkDeviceMemory GetMemory() const {
|
||||
return *memory;
|
||||
}
|
||||
|
||||
/// Returns the start position of the commit relative to the allocation.
|
||||
VkDeviceSize GetOffset() const {
|
||||
return static_cast<VkDeviceSize>(interval.first);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Unmaps memory.
|
||||
void Unmap() const;
|
||||
|
||||
const Device& device; ///< Vulkan device.
|
||||
const vk::DeviceMemory& memory; ///< Vulkan device memory handler.
|
||||
std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
|
||||
VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
|
||||
};
|
||||
|
||||
/// Holds ownership of a memory map.
|
||||
class MemoryMap final {
|
||||
public:
|
||||
explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_)
|
||||
: commit{commit_}, span{span_} {}
|
||||
|
||||
~MemoryMap() {
|
||||
if (commit) {
|
||||
commit->Unmap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Prematurely releases the memory map.
|
||||
void Release() {
|
||||
commit->Unmap();
|
||||
commit = nullptr;
|
||||
}
|
||||
|
||||
/// Returns a span to the memory map.
|
||||
[[nodiscard]] std::span<u8> Span() const noexcept {
|
||||
return span;
|
||||
}
|
||||
|
||||
/// Returns the address of the memory map.
|
||||
[[nodiscard]] u8* Address() const noexcept {
|
||||
return span.data();
|
||||
}
|
||||
|
||||
/// Returns the address of the memory map;
|
||||
[[nodiscard]] operator u8*() const noexcept {
|
||||
return span.data();
|
||||
}
|
||||
|
||||
private:
|
||||
const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
|
||||
std::span<u8> span; ///< Address to the mapped memory.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,99 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
|
||||
using SamplerEntry = VideoCommon::Shader::SamplerEntry;
|
||||
using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
|
||||
using ImageEntry = VideoCommon::Shader::ImageEntry;
|
||||
|
||||
constexpr u32 DESCRIPTOR_SET = 0;
|
||||
|
||||
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
|
||||
public:
|
||||
explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_)
|
||||
: ConstBuffer{entry_}, index{index_} {}
|
||||
|
||||
constexpr u32 GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 index{};
|
||||
};
|
||||
|
||||
struct GlobalBufferEntry {
|
||||
u32 cbuf_index{};
|
||||
u32 cbuf_offset{};
|
||||
bool is_written{};
|
||||
};
|
||||
|
||||
struct ShaderEntries {
|
||||
u32 NumBindings() const {
|
||||
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
|
||||
uniform_texels.size() + samplers.size() + storage_texels.size() +
|
||||
images.size());
|
||||
}
|
||||
|
||||
std::vector<ConstBufferEntry> const_buffers;
|
||||
std::vector<GlobalBufferEntry> global_buffers;
|
||||
std::vector<UniformTexelEntry> uniform_texels;
|
||||
std::vector<SamplerEntry> samplers;
|
||||
std::vector<StorageTexelEntry> storage_texels;
|
||||
std::vector<ImageEntry> images;
|
||||
std::set<u32> attributes;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
std::size_t shader_length{};
|
||||
u32 enabled_uniform_buffers{};
|
||||
bool uses_warps{};
|
||||
};
|
||||
|
||||
struct Specialization final {
|
||||
u32 base_binding{};
|
||||
|
||||
// Compute specific
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
u32 shared_memory_size{};
|
||||
|
||||
// Graphics specific
|
||||
std::optional<float> point_size;
|
||||
std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
|
||||
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
|
||||
bool ndc_minus_one_to_one{};
|
||||
bool early_fragment_tests{};
|
||||
float alpha_test_ref{};
|
||||
Maxwell::ComparisonOp alpha_test_func{};
|
||||
};
|
||||
// Old gcc versions don't consider this trivially copyable.
|
||||
// static_assert(std::is_trivially_copyable_v<Specialization>);
|
||||
|
||||
struct SPIRVShader {
|
||||
std::vector<u32> code;
|
||||
ShaderEntries entries;
|
||||
};
|
||||
|
||||
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
|
||||
|
||||
std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
Tegra::Engines::ShaderType stage,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
const Specialization& specialization);
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -1,168 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/literals.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace Common::Literals;
|
||||
|
||||
constexpr VkBufferUsageFlags BUFFER_USAGE =
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||
|
||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||
|
||||
constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256_MiB;
|
||||
|
||||
/// Find a memory type with the passed requirements
|
||||
std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
|
||||
VkMemoryPropertyFlags wanted,
|
||||
u32 filter = std::numeric_limits<u32>::max()) {
|
||||
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
|
||||
const auto flags = properties.memoryTypes[i].propertyFlags;
|
||||
if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/// Get the preferred host visible memory type.
|
||||
u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
|
||||
u32 filter = std::numeric_limits<u32>::max()) {
|
||||
// Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
|
||||
// Otherwise search for a host visible allocation.
|
||||
static constexpr auto HOST_MEMORY =
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
|
||||
std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
|
||||
if (!preferred_type) {
|
||||
preferred_type = FindMemoryType(properties, HOST_MEMORY);
|
||||
ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
|
||||
}
|
||||
return preferred_type.value_or(0);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKStreamBuffer::VKStreamBuffer(const Device& device_, VKScheduler& scheduler_)
|
||||
: device{device_}, scheduler{scheduler_} {
|
||||
CreateBuffers();
|
||||
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
|
||||
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
|
||||
}
|
||||
|
||||
VKStreamBuffer::~VKStreamBuffer() = default;
|
||||
|
||||
std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) {
|
||||
ASSERT(size <= stream_buffer_size);
|
||||
mapped_size = size;
|
||||
|
||||
if (alignment > 0) {
|
||||
offset = Common::AlignUp(offset, alignment);
|
||||
}
|
||||
|
||||
WaitPendingOperations(offset);
|
||||
|
||||
if (offset + size > stream_buffer_size) {
|
||||
// The buffer would overflow, save the amount of used watches and reset the state.
|
||||
invalidation_mark = current_watch_cursor;
|
||||
current_watch_cursor = 0;
|
||||
offset = 0;
|
||||
|
||||
// Swap watches and reset waiting cursors.
|
||||
std::swap(previous_watches, current_watches);
|
||||
wait_cursor = 0;
|
||||
wait_bound = 0;
|
||||
|
||||
// Ensure that we don't wait for uncommitted fences.
|
||||
scheduler.Flush();
|
||||
}
|
||||
|
||||
return std::make_pair(memory.Map(offset, size), offset);
|
||||
}
|
||||
|
||||
void VKStreamBuffer::Unmap(u64 size) {
|
||||
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
|
||||
|
||||
memory.Unmap();
|
||||
|
||||
offset += size;
|
||||
|
||||
if (current_watch_cursor + 1 >= current_watches.size()) {
|
||||
// Ensure that there are enough watches.
|
||||
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
|
||||
}
|
||||
auto& watch = current_watches[current_watch_cursor++];
|
||||
watch.upper_bound = offset;
|
||||
watch.tick = scheduler.CurrentTick();
|
||||
}
|
||||
|
||||
void VKStreamBuffer::CreateBuffers() {
|
||||
const auto memory_properties = device.GetPhysical().GetMemoryProperties();
|
||||
const u32 preferred_type = GetMemoryType(memory_properties);
|
||||
const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
|
||||
|
||||
// Substract from the preferred heap size some bytes to avoid getting out of memory.
|
||||
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
|
||||
// As per DXVK's example, using `heap_size / 2`
|
||||
const VkDeviceSize allocable_size = heap_size / 2;
|
||||
buffer = device.GetLogical().CreateBuffer({
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
|
||||
.usage = BUFFER_USAGE,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
|
||||
const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
|
||||
const u32 required_flags = requirements.memoryTypeBits;
|
||||
stream_buffer_size = static_cast<u64>(requirements.size);
|
||||
|
||||
memory = device.GetLogical().AllocateMemory({
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.allocationSize = requirements.size,
|
||||
.memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
|
||||
});
|
||||
buffer.BindMemory(*memory, 0);
|
||||
}
|
||||
|
||||
void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
|
||||
watches.resize(watches.size() + grow_size);
|
||||
}
|
||||
|
||||
void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
||||
if (!invalidation_mark) {
|
||||
return;
|
||||
}
|
||||
while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
|
||||
auto& watch = previous_watches[wait_cursor];
|
||||
wait_bound = watch.upper_bound;
|
||||
scheduler.Wait(watch.tick);
|
||||
++wait_cursor;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -1,76 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class VKFenceWatch;
|
||||
class VKScheduler;
|
||||
|
||||
class VKStreamBuffer final {
|
||||
public:
|
||||
explicit VKStreamBuffer(const Device& device, VKScheduler& scheduler);
|
||||
~VKStreamBuffer();
|
||||
|
||||
/**
|
||||
* Reserves a region of memory from the stream buffer.
|
||||
* @param size Size to reserve.
|
||||
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
|
||||
*/
|
||||
std::pair<u8*, u64> Map(u64 size, u64 alignment);
|
||||
|
||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||
void Unmap(u64 size);
|
||||
|
||||
VkBuffer Handle() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
u64 Address() const noexcept {
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
struct Watch {
|
||||
u64 tick{};
|
||||
u64 upper_bound{};
|
||||
};
|
||||
|
||||
/// Creates Vulkan buffer handles committing the required the required memory.
|
||||
void CreateBuffers();
|
||||
|
||||
/// Increases the amount of watches available.
|
||||
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
|
||||
|
||||
void WaitPendingOperations(u64 requested_upper_bound);
|
||||
|
||||
const Device& device; ///< Vulkan device manager.
|
||||
VKScheduler& scheduler; ///< Command scheduler.
|
||||
|
||||
vk::Buffer buffer; ///< Mapped buffer.
|
||||
vk::DeviceMemory memory; ///< Memory allocation.
|
||||
u64 stream_buffer_size{}; ///< Stream buffer size.
|
||||
|
||||
u64 offset{}; ///< Buffer iterator.
|
||||
u64 mapped_size{}; ///< Size reserved for the current copy.
|
||||
|
||||
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
|
||||
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
|
||||
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
|
||||
|
||||
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
|
||||
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
|
||||
u64 wait_bound{}; ///< Highest offset being watched for completion.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -1,928 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
|
||||
namespace Vulkan::vk {
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename Func>
|
||||
void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld,
|
||||
Func&& func) {
|
||||
// Calling GetProperties calls Vulkan more than needed. But they are supposed to be cheap
|
||||
// functions.
|
||||
std::stable_sort(devices.begin(), devices.end(),
|
||||
[&dld, &func](VkPhysicalDevice lhs, VkPhysicalDevice rhs) {
|
||||
return func(vk::PhysicalDevice(lhs, dld).GetProperties(),
|
||||
vk::PhysicalDevice(rhs, dld).GetProperties());
|
||||
});
|
||||
}
|
||||
|
||||
void SortPhysicalDevicesPerVendor(std::vector<VkPhysicalDevice>& devices,
|
||||
const InstanceDispatch& dld,
|
||||
std::initializer_list<u32> vendor_ids) {
|
||||
for (auto it = vendor_ids.end(); it != vendor_ids.begin();) {
|
||||
--it;
|
||||
SortPhysicalDevices(devices, dld, [id = *it](const auto& lhs, const auto& rhs) {
|
||||
return lhs.vendorID == id && rhs.vendorID != id;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld) {
|
||||
// Sort by name, this will set a base and make GPUs with higher numbers appear first
|
||||
// (e.g. GTX 1650 will intentionally be listed before a GTX 1080).
|
||||
SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) {
|
||||
return std::string_view{lhs.deviceName} > std::string_view{rhs.deviceName};
|
||||
});
|
||||
// Prefer discrete over non-discrete
|
||||
SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) {
|
||||
return lhs.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU &&
|
||||
rhs.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
|
||||
});
|
||||
// Prefer Nvidia over AMD, AMD over Intel, Intel over the rest.
|
||||
SortPhysicalDevicesPerVendor(devices, dld, {0x10DE, 0x1002, 0x8086});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool Proc(T& result, const InstanceDispatch& dld, const char* proc_name,
|
||||
VkInstance instance = nullptr) noexcept {
|
||||
result = reinterpret_cast<T>(dld.vkGetInstanceProcAddr(instance, proc_name));
|
||||
return result != nullptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Proc(T& result, const DeviceDispatch& dld, const char* proc_name, VkDevice device) noexcept {
|
||||
result = reinterpret_cast<T>(dld.vkGetDeviceProcAddr(device, proc_name));
|
||||
}
|
||||
|
||||
void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||
#define X(name) Proc(dld.name, dld, #name, device)
|
||||
X(vkAcquireNextImageKHR);
|
||||
X(vkAllocateCommandBuffers);
|
||||
X(vkAllocateDescriptorSets);
|
||||
X(vkAllocateMemory);
|
||||
X(vkBeginCommandBuffer);
|
||||
X(vkBindBufferMemory);
|
||||
X(vkBindImageMemory);
|
||||
X(vkCmdBeginQuery);
|
||||
X(vkCmdBeginRenderPass);
|
||||
X(vkCmdBeginTransformFeedbackEXT);
|
||||
X(vkCmdBeginDebugUtilsLabelEXT);
|
||||
X(vkCmdBindDescriptorSets);
|
||||
X(vkCmdBindIndexBuffer);
|
||||
X(vkCmdBindPipeline);
|
||||
X(vkCmdBindTransformFeedbackBuffersEXT);
|
||||
X(vkCmdBindVertexBuffers);
|
||||
X(vkCmdBlitImage);
|
||||
X(vkCmdClearAttachments);
|
||||
X(vkCmdCopyBuffer);
|
||||
X(vkCmdCopyBufferToImage);
|
||||
X(vkCmdCopyImage);
|
||||
X(vkCmdCopyImageToBuffer);
|
||||
X(vkCmdDispatch);
|
||||
X(vkCmdDraw);
|
||||
X(vkCmdDrawIndexed);
|
||||
X(vkCmdEndQuery);
|
||||
X(vkCmdEndRenderPass);
|
||||
X(vkCmdEndTransformFeedbackEXT);
|
||||
X(vkCmdEndDebugUtilsLabelEXT);
|
||||
X(vkCmdFillBuffer);
|
||||
X(vkCmdPipelineBarrier);
|
||||
X(vkCmdPushConstants);
|
||||
X(vkCmdSetBlendConstants);
|
||||
X(vkCmdSetDepthBias);
|
||||
X(vkCmdSetDepthBounds);
|
||||
X(vkCmdSetEvent);
|
||||
X(vkCmdSetScissor);
|
||||
X(vkCmdSetStencilCompareMask);
|
||||
X(vkCmdSetStencilReference);
|
||||
X(vkCmdSetStencilWriteMask);
|
||||
X(vkCmdSetViewport);
|
||||
X(vkCmdWaitEvents);
|
||||
X(vkCmdBindVertexBuffers2EXT);
|
||||
X(vkCmdSetCullModeEXT);
|
||||
X(vkCmdSetDepthBoundsTestEnableEXT);
|
||||
X(vkCmdSetDepthCompareOpEXT);
|
||||
X(vkCmdSetDepthTestEnableEXT);
|
||||
X(vkCmdSetDepthWriteEnableEXT);
|
||||
X(vkCmdSetFrontFaceEXT);
|
||||
X(vkCmdSetPrimitiveTopologyEXT);
|
||||
X(vkCmdSetStencilOpEXT);
|
||||
X(vkCmdSetStencilTestEnableEXT);
|
||||
X(vkCmdResolveImage);
|
||||
X(vkCreateBuffer);
|
||||
X(vkCreateBufferView);
|
||||
X(vkCreateCommandPool);
|
||||
X(vkCreateComputePipelines);
|
||||
X(vkCreateDescriptorPool);
|
||||
X(vkCreateDescriptorSetLayout);
|
||||
X(vkCreateDescriptorUpdateTemplateKHR);
|
||||
X(vkCreateEvent);
|
||||
X(vkCreateFence);
|
||||
X(vkCreateFramebuffer);
|
||||
X(vkCreateGraphicsPipelines);
|
||||
X(vkCreateImage);
|
||||
X(vkCreateImageView);
|
||||
X(vkCreatePipelineLayout);
|
||||
X(vkCreateQueryPool);
|
||||
X(vkCreateRenderPass);
|
||||
X(vkCreateSampler);
|
||||
X(vkCreateSemaphore);
|
||||
X(vkCreateShaderModule);
|
||||
X(vkCreateSwapchainKHR);
|
||||
X(vkDestroyBuffer);
|
||||
X(vkDestroyBufferView);
|
||||
X(vkDestroyCommandPool);
|
||||
X(vkDestroyDescriptorPool);
|
||||
X(vkDestroyDescriptorSetLayout);
|
||||
X(vkDestroyDescriptorUpdateTemplateKHR);
|
||||
X(vkDestroyEvent);
|
||||
X(vkDestroyFence);
|
||||
X(vkDestroyFramebuffer);
|
||||
X(vkDestroyImage);
|
||||
X(vkDestroyImageView);
|
||||
X(vkDestroyPipeline);
|
||||
X(vkDestroyPipelineLayout);
|
||||
X(vkDestroyQueryPool);
|
||||
X(vkDestroyRenderPass);
|
||||
X(vkDestroySampler);
|
||||
X(vkDestroySemaphore);
|
||||
X(vkDestroyShaderModule);
|
||||
X(vkDestroySwapchainKHR);
|
||||
X(vkDeviceWaitIdle);
|
||||
X(vkEndCommandBuffer);
|
||||
X(vkFreeCommandBuffers);
|
||||
X(vkFreeDescriptorSets);
|
||||
X(vkFreeMemory);
|
||||
X(vkGetBufferMemoryRequirements);
|
||||
X(vkGetDeviceQueue);
|
||||
X(vkGetEventStatus);
|
||||
X(vkGetFenceStatus);
|
||||
X(vkGetImageMemoryRequirements);
|
||||
X(vkGetQueryPoolResults);
|
||||
X(vkGetSemaphoreCounterValueKHR);
|
||||
X(vkMapMemory);
|
||||
X(vkQueueSubmit);
|
||||
X(vkResetFences);
|
||||
X(vkResetQueryPoolEXT);
|
||||
X(vkSetDebugUtilsObjectNameEXT);
|
||||
X(vkSetDebugUtilsObjectTagEXT);
|
||||
X(vkUnmapMemory);
|
||||
X(vkUpdateDescriptorSetWithTemplateKHR);
|
||||
X(vkUpdateDescriptorSets);
|
||||
X(vkWaitForFences);
|
||||
X(vkWaitSemaphoresKHR);
|
||||
#undef X
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type,
|
||||
const char* name) {
|
||||
const VkDebugUtilsObjectNameInfoEXT name_info{
|
||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.objectType = VK_OBJECT_TYPE_IMAGE,
|
||||
.objectHandle = reinterpret_cast<u64>(handle),
|
||||
.pObjectName = name,
|
||||
};
|
||||
Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info));
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
bool Load(InstanceDispatch& dld) noexcept {
|
||||
#define X(name) Proc(dld.name, dld, #name)
|
||||
return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) &&
|
||||
X(vkEnumerateInstanceLayerProperties);
|
||||
#undef X
|
||||
}
|
||||
|
||||
bool Load(VkInstance instance, InstanceDispatch& dld) noexcept {
|
||||
#define X(name) Proc(dld.name, dld, #name, instance)
|
||||
// These functions may fail to load depending on the enabled extensions.
|
||||
// Don't return a failure on these.
|
||||
X(vkCreateDebugUtilsMessengerEXT);
|
||||
X(vkDestroyDebugUtilsMessengerEXT);
|
||||
X(vkDestroySurfaceKHR);
|
||||
X(vkGetPhysicalDeviceFeatures2KHR);
|
||||
X(vkGetPhysicalDeviceProperties2KHR);
|
||||
X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR);
|
||||
X(vkGetPhysicalDeviceSurfaceFormatsKHR);
|
||||
X(vkGetPhysicalDeviceSurfacePresentModesKHR);
|
||||
X(vkGetPhysicalDeviceSurfaceSupportKHR);
|
||||
X(vkGetSwapchainImagesKHR);
|
||||
X(vkQueuePresentKHR);
|
||||
|
||||
return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) &&
|
||||
X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) &&
|
||||
X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) &&
|
||||
X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceProperties) &&
|
||||
X(vkGetPhysicalDeviceQueueFamilyProperties);
|
||||
#undef X
|
||||
}
|
||||
|
||||
const char* Exception::what() const noexcept {
|
||||
return ToString(result);
|
||||
}
|
||||
|
||||
const char* ToString(VkResult result) noexcept {
|
||||
switch (result) {
|
||||
case VkResult::VK_SUCCESS:
|
||||
return "VK_SUCCESS";
|
||||
case VkResult::VK_NOT_READY:
|
||||
return "VK_NOT_READY";
|
||||
case VkResult::VK_TIMEOUT:
|
||||
return "VK_TIMEOUT";
|
||||
case VkResult::VK_EVENT_SET:
|
||||
return "VK_EVENT_SET";
|
||||
case VkResult::VK_EVENT_RESET:
|
||||
return "VK_EVENT_RESET";
|
||||
case VkResult::VK_INCOMPLETE:
|
||||
return "VK_INCOMPLETE";
|
||||
case VkResult::VK_ERROR_OUT_OF_HOST_MEMORY:
|
||||
return "VK_ERROR_OUT_OF_HOST_MEMORY";
|
||||
case VkResult::VK_ERROR_OUT_OF_DEVICE_MEMORY:
|
||||
return "VK_ERROR_OUT_OF_DEVICE_MEMORY";
|
||||
case VkResult::VK_ERROR_INITIALIZATION_FAILED:
|
||||
return "VK_ERROR_INITIALIZATION_FAILED";
|
||||
case VkResult::VK_ERROR_DEVICE_LOST:
|
||||
return "VK_ERROR_DEVICE_LOST";
|
||||
case VkResult::VK_ERROR_MEMORY_MAP_FAILED:
|
||||
return "VK_ERROR_MEMORY_MAP_FAILED";
|
||||
case VkResult::VK_ERROR_LAYER_NOT_PRESENT:
|
||||
return "VK_ERROR_LAYER_NOT_PRESENT";
|
||||
case VkResult::VK_ERROR_EXTENSION_NOT_PRESENT:
|
||||
return "VK_ERROR_EXTENSION_NOT_PRESENT";
|
||||
case VkResult::VK_ERROR_FEATURE_NOT_PRESENT:
|
||||
return "VK_ERROR_FEATURE_NOT_PRESENT";
|
||||
case VkResult::VK_ERROR_INCOMPATIBLE_DRIVER:
|
||||
return "VK_ERROR_INCOMPATIBLE_DRIVER";
|
||||
case VkResult::VK_ERROR_TOO_MANY_OBJECTS:
|
||||
return "VK_ERROR_TOO_MANY_OBJECTS";
|
||||
case VkResult::VK_ERROR_FORMAT_NOT_SUPPORTED:
|
||||
return "VK_ERROR_FORMAT_NOT_SUPPORTED";
|
||||
case VkResult::VK_ERROR_FRAGMENTED_POOL:
|
||||
return "VK_ERROR_FRAGMENTED_POOL";
|
||||
case VkResult::VK_ERROR_OUT_OF_POOL_MEMORY:
|
||||
return "VK_ERROR_OUT_OF_POOL_MEMORY";
|
||||
case VkResult::VK_ERROR_INVALID_EXTERNAL_HANDLE:
|
||||
return "VK_ERROR_INVALID_EXTERNAL_HANDLE";
|
||||
case VkResult::VK_ERROR_SURFACE_LOST_KHR:
|
||||
return "VK_ERROR_SURFACE_LOST_KHR";
|
||||
case VkResult::VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
|
||||
return "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR";
|
||||
case VkResult::VK_SUBOPTIMAL_KHR:
|
||||
return "VK_SUBOPTIMAL_KHR";
|
||||
case VkResult::VK_ERROR_OUT_OF_DATE_KHR:
|
||||
return "VK_ERROR_OUT_OF_DATE_KHR";
|
||||
case VkResult::VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
|
||||
return "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR";
|
||||
case VkResult::VK_ERROR_VALIDATION_FAILED_EXT:
|
||||
return "VK_ERROR_VALIDATION_FAILED_EXT";
|
||||
case VkResult::VK_ERROR_INVALID_SHADER_NV:
|
||||
return "VK_ERROR_INVALID_SHADER_NV";
|
||||
case VkResult::VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
|
||||
return "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT";
|
||||
case VkResult::VK_ERROR_FRAGMENTATION_EXT:
|
||||
return "VK_ERROR_FRAGMENTATION_EXT";
|
||||
case VkResult::VK_ERROR_NOT_PERMITTED_EXT:
|
||||
return "VK_ERROR_NOT_PERMITTED_EXT";
|
||||
case VkResult::VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
|
||||
return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT";
|
||||
case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT:
|
||||
return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
|
||||
case VkResult::VK_ERROR_UNKNOWN:
|
||||
return "VK_ERROR_UNKNOWN";
|
||||
case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR:
|
||||
return "VK_ERROR_INCOMPATIBLE_VERSION_KHR";
|
||||
case VkResult::VK_THREAD_IDLE_KHR:
|
||||
return "VK_THREAD_IDLE_KHR";
|
||||
case VkResult::VK_THREAD_DONE_KHR:
|
||||
return "VK_THREAD_DONE_KHR";
|
||||
case VkResult::VK_OPERATION_DEFERRED_KHR:
|
||||
return "VK_OPERATION_DEFERRED_KHR";
|
||||
case VkResult::VK_OPERATION_NOT_DEFERRED_KHR:
|
||||
return "VK_OPERATION_NOT_DEFERRED_KHR";
|
||||
case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT:
|
||||
return "VK_PIPELINE_COMPILE_REQUIRED_EXT";
|
||||
case VkResult::VK_RESULT_MAX_ENUM:
|
||||
return "VK_RESULT_MAX_ENUM";
|
||||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
void Destroy(VkInstance instance, const InstanceDispatch& dld) noexcept {
|
||||
dld.vkDestroyInstance(instance, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, const InstanceDispatch& dld) noexcept {
|
||||
dld.vkDestroyDevice(device, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkBuffer handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyBuffer(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkBufferView handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyBufferView(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkCommandPool handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyCommandPool(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkDescriptorPool handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyDescriptorPool(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkDescriptorSetLayout handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyDescriptorSetLayout(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkDescriptorUpdateTemplateKHR handle,
|
||||
const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyDescriptorUpdateTemplateKHR(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkFreeMemory(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyEvent(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyFence(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkFramebuffer handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyFramebuffer(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkImage handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyImage(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkImageView handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyImageView(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyPipeline(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyPipelineLayout(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkQueryPool handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyQueryPool(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkRenderPass handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyRenderPass(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkSampler handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroySampler(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkSwapchainKHR handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroySwapchainKHR(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkSemaphore handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroySemaphore(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkDevice device, VkShaderModule handle, const DeviceDispatch& dld) noexcept {
|
||||
dld.vkDestroyShaderModule(device, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkInstance instance, VkDebugUtilsMessengerEXT handle,
|
||||
const InstanceDispatch& dld) noexcept {
|
||||
dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr);
|
||||
}
|
||||
|
||||
void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept {
|
||||
dld.vkDestroySurfaceKHR(instance, handle, nullptr);
|
||||
}
|
||||
|
||||
VkResult Free(VkDevice device, VkDescriptorPool handle, Span<VkDescriptorSet> sets,
|
||||
const DeviceDispatch& dld) noexcept {
|
||||
return dld.vkFreeDescriptorSets(device, handle, sets.size(), sets.data());
|
||||
}
|
||||
|
||||
VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffers,
|
||||
const DeviceDispatch& dld) noexcept {
|
||||
dld.vkFreeCommandBuffers(device, handle, buffers.size(), buffers.data());
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions,
|
||||
InstanceDispatch& dispatch) noexcept {
|
||||
const VkApplicationInfo application_info{
|
||||
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
|
||||
.pNext = nullptr,
|
||||
.pApplicationName = "yuzu Emulator",
|
||||
.applicationVersion = VK_MAKE_VERSION(0, 1, 0),
|
||||
.pEngineName = "yuzu Emulator",
|
||||
.engineVersion = VK_MAKE_VERSION(0, 1, 0),
|
||||
.apiVersion = version,
|
||||
};
|
||||
const VkInstanceCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.pApplicationInfo = &application_info,
|
||||
.enabledLayerCount = layers.size(),
|
||||
.ppEnabledLayerNames = layers.data(),
|
||||
.enabledExtensionCount = extensions.size(),
|
||||
.ppEnabledExtensionNames = extensions.data(),
|
||||
};
|
||||
|
||||
VkInstance instance;
|
||||
if (dispatch.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) {
|
||||
// Failed to create the instance.
|
||||
return {};
|
||||
}
|
||||
if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) {
|
||||
// We successfully created an instance but the destroy function couldn't be loaded.
|
||||
// This is a good moment to panic.
|
||||
return {};
|
||||
}
|
||||
|
||||
return Instance(instance, dispatch);
|
||||
}
|
||||
|
||||
std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() {
|
||||
u32 num;
|
||||
if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) {
|
||||
return std::nullopt;
|
||||
}
|
||||
std::vector<VkPhysicalDevice> physical_devices(num);
|
||||
if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) {
|
||||
return std::nullopt;
|
||||
}
|
||||
SortPhysicalDevices(physical_devices, *dld);
|
||||
return std::make_optional(std::move(physical_devices));
|
||||
}
|
||||
|
||||
DebugCallback Instance::TryCreateDebugCallback(
|
||||
PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept {
|
||||
const VkDebugUtilsMessengerCreateInfoEXT ci{
|
||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
|
||||
.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT,
|
||||
.pfnUserCallback = callback,
|
||||
.pUserData = nullptr,
|
||||
};
|
||||
|
||||
VkDebugUtilsMessengerEXT messenger;
|
||||
if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) {
|
||||
return {};
|
||||
}
|
||||
return DebugCallback(messenger, handle, *dld);
|
||||
}
|
||||
|
||||
void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
|
||||
Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
|
||||
}
|
||||
|
||||
void Buffer::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name);
|
||||
}
|
||||
|
||||
void BufferView::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name);
|
||||
}
|
||||
|
||||
void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
|
||||
Check(dld->vkBindImageMemory(owner, handle, memory, offset));
|
||||
}
|
||||
|
||||
void Image::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name);
|
||||
}
|
||||
|
||||
void ImageView::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
|
||||
}
|
||||
|
||||
void DeviceMemory::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name);
|
||||
}
|
||||
|
||||
void Fence::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name);
|
||||
}
|
||||
|
||||
void Framebuffer::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name);
|
||||
}
|
||||
|
||||
DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const {
|
||||
const std::size_t num = ai.descriptorSetCount;
|
||||
std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num);
|
||||
switch (const VkResult result = dld->vkAllocateDescriptorSets(owner, &ai, sets.get())) {
|
||||
case VK_SUCCESS:
|
||||
return DescriptorSets(std::move(sets), num, owner, handle, *dld);
|
||||
case VK_ERROR_OUT_OF_POOL_MEMORY:
|
||||
return {};
|
||||
default:
|
||||
throw Exception(result);
|
||||
}
|
||||
}
|
||||
|
||||
void DescriptorPool::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name);
|
||||
}
|
||||
|
||||
CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {
|
||||
const VkCommandBufferAllocateInfo ai{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.commandPool = handle,
|
||||
.level = level,
|
||||
.commandBufferCount = static_cast<u32>(num_buffers),
|
||||
};
|
||||
|
||||
std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers);
|
||||
switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) {
|
||||
case VK_SUCCESS:
|
||||
return CommandBuffers(std::move(buffers), num_buffers, owner, handle, *dld);
|
||||
case VK_ERROR_OUT_OF_POOL_MEMORY:
|
||||
return {};
|
||||
default:
|
||||
throw Exception(result);
|
||||
}
|
||||
}
|
||||
|
||||
void CommandPool::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name);
|
||||
}
|
||||
|
||||
std::vector<VkImage> SwapchainKHR::GetImages() const {
|
||||
u32 num;
|
||||
Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr));
|
||||
std::vector<VkImage> images(num);
|
||||
Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, images.data()));
|
||||
return images;
|
||||
}
|
||||
|
||||
void Event::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name);
|
||||
}
|
||||
|
||||
void ShaderModule::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name);
|
||||
}
|
||||
|
||||
void Semaphore::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name);
|
||||
}
|
||||
|
||||
Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
|
||||
Span<const char*> enabled_extensions, const void* next,
|
||||
DeviceDispatch& dispatch) noexcept {
|
||||
const VkDeviceCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
||||
.pNext = next,
|
||||
.flags = 0,
|
||||
.queueCreateInfoCount = queues_ci.size(),
|
||||
.pQueueCreateInfos = queues_ci.data(),
|
||||
.enabledLayerCount = 0,
|
||||
.ppEnabledLayerNames = nullptr,
|
||||
.enabledExtensionCount = enabled_extensions.size(),
|
||||
.ppEnabledExtensionNames = enabled_extensions.data(),
|
||||
.pEnabledFeatures = nullptr,
|
||||
};
|
||||
|
||||
VkDevice device;
|
||||
if (dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) {
|
||||
return {};
|
||||
}
|
||||
Load(device, dispatch);
|
||||
return Device(device, dispatch);
|
||||
}
|
||||
|
||||
Queue Device::GetQueue(u32 family_index) const noexcept {
|
||||
VkQueue queue;
|
||||
dld->vkGetDeviceQueue(handle, family_index, 0, &queue);
|
||||
return Queue(queue, *dld);
|
||||
}
|
||||
|
||||
Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const {
|
||||
VkBuffer object;
|
||||
Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object));
|
||||
return Buffer(object, handle, *dld);
|
||||
}
|
||||
|
||||
BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const {
|
||||
VkBufferView object;
|
||||
Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object));
|
||||
return BufferView(object, handle, *dld);
|
||||
}
|
||||
|
||||
Image Device::CreateImage(const VkImageCreateInfo& ci) const {
|
||||
VkImage object;
|
||||
Check(dld->vkCreateImage(handle, &ci, nullptr, &object));
|
||||
return Image(object, handle, *dld);
|
||||
}
|
||||
|
||||
ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const {
|
||||
VkImageView object;
|
||||
Check(dld->vkCreateImageView(handle, &ci, nullptr, &object));
|
||||
return ImageView(object, handle, *dld);
|
||||
}
|
||||
|
||||
Semaphore Device::CreateSemaphore() const {
|
||||
static constexpr VkSemaphoreCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
};
|
||||
return CreateSemaphore(ci);
|
||||
}
|
||||
|
||||
Semaphore Device::CreateSemaphore(const VkSemaphoreCreateInfo& ci) const {
|
||||
VkSemaphore object;
|
||||
Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object));
|
||||
return Semaphore(object, handle, *dld);
|
||||
}
|
||||
|
||||
Fence Device::CreateFence(const VkFenceCreateInfo& ci) const {
|
||||
VkFence object;
|
||||
Check(dld->vkCreateFence(handle, &ci, nullptr, &object));
|
||||
return Fence(object, handle, *dld);
|
||||
}
|
||||
|
||||
DescriptorPool Device::CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const {
|
||||
VkDescriptorPool object;
|
||||
Check(dld->vkCreateDescriptorPool(handle, &ci, nullptr, &object));
|
||||
return DescriptorPool(object, handle, *dld);
|
||||
}
|
||||
|
||||
RenderPass Device::CreateRenderPass(const VkRenderPassCreateInfo& ci) const {
|
||||
VkRenderPass object;
|
||||
Check(dld->vkCreateRenderPass(handle, &ci, nullptr, &object));
|
||||
return RenderPass(object, handle, *dld);
|
||||
}
|
||||
|
||||
DescriptorSetLayout Device::CreateDescriptorSetLayout(
|
||||
const VkDescriptorSetLayoutCreateInfo& ci) const {
|
||||
VkDescriptorSetLayout object;
|
||||
Check(dld->vkCreateDescriptorSetLayout(handle, &ci, nullptr, &object));
|
||||
return DescriptorSetLayout(object, handle, *dld);
|
||||
}
|
||||
|
||||
PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const {
|
||||
VkPipelineLayout object;
|
||||
Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object));
|
||||
return PipelineLayout(object, handle, *dld);
|
||||
}
|
||||
|
||||
Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const {
|
||||
VkPipeline object;
|
||||
Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object));
|
||||
return Pipeline(object, handle, *dld);
|
||||
}
|
||||
|
||||
Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const {
|
||||
VkPipeline object;
|
||||
Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object));
|
||||
return Pipeline(object, handle, *dld);
|
||||
}
|
||||
|
||||
Sampler Device::CreateSampler(const VkSamplerCreateInfo& ci) const {
|
||||
VkSampler object;
|
||||
Check(dld->vkCreateSampler(handle, &ci, nullptr, &object));
|
||||
return Sampler(object, handle, *dld);
|
||||
}
|
||||
|
||||
Framebuffer Device::CreateFramebuffer(const VkFramebufferCreateInfo& ci) const {
|
||||
VkFramebuffer object;
|
||||
Check(dld->vkCreateFramebuffer(handle, &ci, nullptr, &object));
|
||||
return Framebuffer(object, handle, *dld);
|
||||
}
|
||||
|
||||
CommandPool Device::CreateCommandPool(const VkCommandPoolCreateInfo& ci) const {
|
||||
VkCommandPool object;
|
||||
Check(dld->vkCreateCommandPool(handle, &ci, nullptr, &object));
|
||||
return CommandPool(object, handle, *dld);
|
||||
}
|
||||
|
||||
DescriptorUpdateTemplateKHR Device::CreateDescriptorUpdateTemplateKHR(
|
||||
const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const {
|
||||
VkDescriptorUpdateTemplateKHR object;
|
||||
Check(dld->vkCreateDescriptorUpdateTemplateKHR(handle, &ci, nullptr, &object));
|
||||
return DescriptorUpdateTemplateKHR(object, handle, *dld);
|
||||
}
|
||||
|
||||
QueryPool Device::CreateQueryPool(const VkQueryPoolCreateInfo& ci) const {
|
||||
VkQueryPool object;
|
||||
Check(dld->vkCreateQueryPool(handle, &ci, nullptr, &object));
|
||||
return QueryPool(object, handle, *dld);
|
||||
}
|
||||
|
||||
ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) const {
|
||||
VkShaderModule object;
|
||||
Check(dld->vkCreateShaderModule(handle, &ci, nullptr, &object));
|
||||
return ShaderModule(object, handle, *dld);
|
||||
}
|
||||
|
||||
Event Device::CreateEvent() const {
|
||||
static constexpr VkEventCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
VkEvent object;
|
||||
Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
|
||||
return Event(object, handle, *dld);
|
||||
}
|
||||
|
||||
SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
|
||||
VkSwapchainKHR object;
|
||||
Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
|
||||
return SwapchainKHR(object, handle, *dld);
|
||||
}
|
||||
|
||||
DeviceMemory Device::TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept {
|
||||
VkDeviceMemory memory;
|
||||
if (dld->vkAllocateMemory(handle, &ai, nullptr, &memory) != VK_SUCCESS) {
|
||||
return {};
|
||||
}
|
||||
return DeviceMemory(memory, handle, *dld);
|
||||
}
|
||||
|
||||
DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const {
|
||||
VkDeviceMemory memory;
|
||||
Check(dld->vkAllocateMemory(handle, &ai, nullptr, &memory));
|
||||
return DeviceMemory(memory, handle, *dld);
|
||||
}
|
||||
|
||||
VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept {
|
||||
VkMemoryRequirements requirements;
|
||||
dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements);
|
||||
return requirements;
|
||||
}
|
||||
|
||||
VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept {
|
||||
VkMemoryRequirements requirements;
|
||||
dld->vkGetImageMemoryRequirements(handle, image, &requirements);
|
||||
return requirements;
|
||||
}
|
||||
|
||||
void Device::UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes,
|
||||
Span<VkCopyDescriptorSet> copies) const noexcept {
|
||||
dld->vkUpdateDescriptorSets(handle, writes.size(), writes.data(), copies.size(), copies.data());
|
||||
}
|
||||
|
||||
VkPhysicalDeviceProperties PhysicalDevice::GetProperties() const noexcept {
|
||||
VkPhysicalDeviceProperties properties;
|
||||
dld->vkGetPhysicalDeviceProperties(physical_device, &properties);
|
||||
return properties;
|
||||
}
|
||||
|
||||
void PhysicalDevice::GetProperties2KHR(VkPhysicalDeviceProperties2KHR& properties) const noexcept {
|
||||
dld->vkGetPhysicalDeviceProperties2KHR(physical_device, &properties);
|
||||
}
|
||||
|
||||
VkPhysicalDeviceFeatures PhysicalDevice::GetFeatures() const noexcept {
|
||||
VkPhysicalDeviceFeatures2KHR features2;
|
||||
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
|
||||
features2.pNext = nullptr;
|
||||
dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features2);
|
||||
return features2.features;
|
||||
}
|
||||
|
||||
void PhysicalDevice::GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR& features) const noexcept {
|
||||
dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features);
|
||||
}
|
||||
|
||||
VkFormatProperties PhysicalDevice::GetFormatProperties(VkFormat format) const noexcept {
|
||||
VkFormatProperties properties;
|
||||
dld->vkGetPhysicalDeviceFormatProperties(physical_device, format, &properties);
|
||||
return properties;
|
||||
}
|
||||
|
||||
std::vector<VkExtensionProperties> PhysicalDevice::EnumerateDeviceExtensionProperties() const {
|
||||
u32 num;
|
||||
dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, nullptr);
|
||||
std::vector<VkExtensionProperties> properties(num);
|
||||
dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, properties.data());
|
||||
return properties;
|
||||
}
|
||||
|
||||
std::vector<VkQueueFamilyProperties> PhysicalDevice::GetQueueFamilyProperties() const {
|
||||
u32 num;
|
||||
dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, nullptr);
|
||||
std::vector<VkQueueFamilyProperties> properties(num);
|
||||
dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, properties.data());
|
||||
return properties;
|
||||
}
|
||||
|
||||
bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR surface) const {
|
||||
VkBool32 supported;
|
||||
Check(dld->vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index, surface,
|
||||
&supported));
|
||||
return supported == VK_TRUE;
|
||||
}
|
||||
|
||||
VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {
|
||||
VkSurfaceCapabilitiesKHR capabilities;
|
||||
Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
std::vector<VkSurfaceFormatKHR> PhysicalDevice::GetSurfaceFormatsKHR(VkSurfaceKHR surface) const {
|
||||
u32 num;
|
||||
Check(dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, nullptr));
|
||||
std::vector<VkSurfaceFormatKHR> formats(num);
|
||||
Check(
|
||||
dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, formats.data()));
|
||||
return formats;
|
||||
}
|
||||
|
||||
std::vector<VkPresentModeKHR> PhysicalDevice::GetSurfacePresentModesKHR(
|
||||
VkSurfaceKHR surface) const {
|
||||
u32 num;
|
||||
Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, nullptr));
|
||||
std::vector<VkPresentModeKHR> modes(num);
|
||||
Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num,
|
||||
modes.data()));
|
||||
return modes;
|
||||
}
|
||||
|
||||
VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept {
|
||||
VkPhysicalDeviceMemoryProperties properties;
|
||||
dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties);
|
||||
return properties;
|
||||
}
|
||||
|
||||
u32 AvailableVersion(const InstanceDispatch& dld) noexcept {
|
||||
PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion;
|
||||
if (!Proc(vkEnumerateInstanceVersion, dld, "vkEnumerateInstanceVersion")) {
|
||||
// If the procedure is not found, Vulkan 1.0 is assumed
|
||||
return VK_API_VERSION_1_0;
|
||||
}
|
||||
u32 version;
|
||||
if (const VkResult result = vkEnumerateInstanceVersion(&version); result != VK_SUCCESS) {
|
||||
LOG_ERROR(Render_Vulkan, "vkEnumerateInstanceVersion returned {}, assuming Vulkan 1.1",
|
||||
ToString(result));
|
||||
return VK_API_VERSION_1_1;
|
||||
}
|
||||
return version;
|
||||
}
|
||||
|
||||
std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
|
||||
const InstanceDispatch& dld) {
|
||||
u32 num;
|
||||
if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, nullptr) != VK_SUCCESS) {
|
||||
return std::nullopt;
|
||||
}
|
||||
std::vector<VkExtensionProperties> properties(num);
|
||||
if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, properties.data()) !=
|
||||
VK_SUCCESS) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return properties;
|
||||
}
|
||||
|
||||
std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
|
||||
const InstanceDispatch& dld) {
|
||||
u32 num;
|
||||
if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) {
|
||||
return std::nullopt;
|
||||
}
|
||||
std::vector<VkLayerProperties> properties(num);
|
||||
if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return properties;
|
||||
}
|
||||
|
||||
} // namespace Vulkan::vk
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,752 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader/ast.h"
|
||||
#include "video_core/shader/expr.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
ASTZipper::ASTZipper() = default;
|
||||
|
||||
void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) {
|
||||
ASSERT(new_first->manager == nullptr);
|
||||
first = new_first;
|
||||
last = new_first;
|
||||
|
||||
ASTNode current = first;
|
||||
while (current) {
|
||||
current->manager = this;
|
||||
current->parent = parent;
|
||||
last = current;
|
||||
current = current->next;
|
||||
}
|
||||
}
|
||||
|
||||
void ASTZipper::PushBack(const ASTNode new_node) {
|
||||
ASSERT(new_node->manager == nullptr);
|
||||
new_node->previous = last;
|
||||
if (last) {
|
||||
last->next = new_node;
|
||||
}
|
||||
new_node->next.reset();
|
||||
last = new_node;
|
||||
if (!first) {
|
||||
first = new_node;
|
||||
}
|
||||
new_node->manager = this;
|
||||
}
|
||||
|
||||
void ASTZipper::PushFront(const ASTNode new_node) {
|
||||
ASSERT(new_node->manager == nullptr);
|
||||
new_node->previous.reset();
|
||||
new_node->next = first;
|
||||
if (first) {
|
||||
first->previous = new_node;
|
||||
}
|
||||
if (last == first) {
|
||||
last = new_node;
|
||||
}
|
||||
first = new_node;
|
||||
new_node->manager = this;
|
||||
}
|
||||
|
||||
void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) {
|
||||
ASSERT(new_node->manager == nullptr);
|
||||
if (!at_node) {
|
||||
PushFront(new_node);
|
||||
return;
|
||||
}
|
||||
const ASTNode next = at_node->next;
|
||||
if (next) {
|
||||
next->previous = new_node;
|
||||
}
|
||||
new_node->previous = at_node;
|
||||
if (at_node == last) {
|
||||
last = new_node;
|
||||
}
|
||||
new_node->next = next;
|
||||
at_node->next = new_node;
|
||||
new_node->manager = this;
|
||||
}
|
||||
|
||||
void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) {
|
||||
ASSERT(new_node->manager == nullptr);
|
||||
if (!at_node) {
|
||||
PushBack(new_node);
|
||||
return;
|
||||
}
|
||||
const ASTNode previous = at_node->previous;
|
||||
if (previous) {
|
||||
previous->next = new_node;
|
||||
}
|
||||
new_node->next = at_node;
|
||||
if (at_node == first) {
|
||||
first = new_node;
|
||||
}
|
||||
new_node->previous = previous;
|
||||
at_node->previous = new_node;
|
||||
new_node->manager = this;
|
||||
}
|
||||
|
||||
void ASTZipper::DetachTail(ASTNode node) {
|
||||
ASSERT(node->manager == this);
|
||||
if (node == first) {
|
||||
first.reset();
|
||||
last.reset();
|
||||
return;
|
||||
}
|
||||
|
||||
last = node->previous;
|
||||
last->next.reset();
|
||||
node->previous.reset();
|
||||
|
||||
ASTNode current = std::move(node);
|
||||
while (current) {
|
||||
current->manager = nullptr;
|
||||
current->parent.reset();
|
||||
current = current->next;
|
||||
}
|
||||
}
|
||||
|
||||
void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) {
|
||||
ASSERT(start->manager == this && end->manager == this);
|
||||
if (start == end) {
|
||||
DetachSingle(start);
|
||||
return;
|
||||
}
|
||||
const ASTNode prev = start->previous;
|
||||
const ASTNode post = end->next;
|
||||
if (!prev) {
|
||||
first = post;
|
||||
} else {
|
||||
prev->next = post;
|
||||
}
|
||||
if (!post) {
|
||||
last = prev;
|
||||
} else {
|
||||
post->previous = prev;
|
||||
}
|
||||
start->previous.reset();
|
||||
end->next.reset();
|
||||
ASTNode current = start;
|
||||
bool found = false;
|
||||
while (current) {
|
||||
current->manager = nullptr;
|
||||
current->parent.reset();
|
||||
found |= current == end;
|
||||
current = current->next;
|
||||
}
|
||||
ASSERT(found);
|
||||
}
|
||||
|
||||
void ASTZipper::DetachSingle(const ASTNode node) {
|
||||
ASSERT(node->manager == this);
|
||||
const ASTNode prev = node->previous;
|
||||
const ASTNode post = node->next;
|
||||
node->previous.reset();
|
||||
node->next.reset();
|
||||
if (!prev) {
|
||||
first = post;
|
||||
} else {
|
||||
prev->next = post;
|
||||
}
|
||||
if (!post) {
|
||||
last = prev;
|
||||
} else {
|
||||
post->previous = prev;
|
||||
}
|
||||
|
||||
node->manager = nullptr;
|
||||
node->parent.reset();
|
||||
}
|
||||
|
||||
void ASTZipper::Remove(const ASTNode node) {
|
||||
ASSERT(node->manager == this);
|
||||
const ASTNode next = node->next;
|
||||
const ASTNode previous = node->previous;
|
||||
if (previous) {
|
||||
previous->next = next;
|
||||
}
|
||||
if (next) {
|
||||
next->previous = previous;
|
||||
}
|
||||
node->parent.reset();
|
||||
node->manager = nullptr;
|
||||
if (node == last) {
|
||||
last = previous;
|
||||
}
|
||||
if (node == first) {
|
||||
first = next;
|
||||
}
|
||||
}
|
||||
|
||||
class ExprPrinter final {
|
||||
public:
|
||||
void operator()(const ExprAnd& expr) {
|
||||
inner += "( ";
|
||||
std::visit(*this, *expr.operand1);
|
||||
inner += " && ";
|
||||
std::visit(*this, *expr.operand2);
|
||||
inner += ')';
|
||||
}
|
||||
|
||||
void operator()(const ExprOr& expr) {
|
||||
inner += "( ";
|
||||
std::visit(*this, *expr.operand1);
|
||||
inner += " || ";
|
||||
std::visit(*this, *expr.operand2);
|
||||
inner += ')';
|
||||
}
|
||||
|
||||
void operator()(const ExprNot& expr) {
|
||||
inner += "!";
|
||||
std::visit(*this, *expr.operand1);
|
||||
}
|
||||
|
||||
void operator()(const ExprPredicate& expr) {
|
||||
inner += fmt::format("P{}", expr.predicate);
|
||||
}
|
||||
|
||||
void operator()(const ExprCondCode& expr) {
|
||||
inner += fmt::format("CC{}", expr.cc);
|
||||
}
|
||||
|
||||
void operator()(const ExprVar& expr) {
|
||||
inner += fmt::format("V{}", expr.var_index);
|
||||
}
|
||||
|
||||
void operator()(const ExprBoolean& expr) {
|
||||
inner += expr.value ? "true" : "false";
|
||||
}
|
||||
|
||||
void operator()(const ExprGprEqual& expr) {
|
||||
inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value);
|
||||
}
|
||||
|
||||
const std::string& GetResult() const {
|
||||
return inner;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string inner;
|
||||
};
|
||||
|
||||
class ASTPrinter {
|
||||
public:
|
||||
void operator()(const ASTProgram& ast) {
|
||||
scope++;
|
||||
inner += "program {\n";
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
inner += "}\n";
|
||||
scope--;
|
||||
}
|
||||
|
||||
void operator()(const ASTIfThen& ast) {
|
||||
ExprPrinter expr_parser{};
|
||||
std::visit(expr_parser, *ast.condition);
|
||||
inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
|
||||
scope++;
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
scope--;
|
||||
inner += fmt::format("{}}}\n", Indent());
|
||||
}
|
||||
|
||||
void operator()(const ASTIfElse& ast) {
|
||||
inner += Indent();
|
||||
inner += "else {\n";
|
||||
|
||||
scope++;
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
scope--;
|
||||
|
||||
inner += Indent();
|
||||
inner += "}\n";
|
||||
}
|
||||
|
||||
void operator()(const ASTBlockEncoded& ast) {
|
||||
inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
|
||||
}
|
||||
|
||||
void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
|
||||
inner += Indent();
|
||||
inner += "Block;\n";
|
||||
}
|
||||
|
||||
void operator()(const ASTVarSet& ast) {
|
||||
ExprPrinter expr_parser{};
|
||||
std::visit(expr_parser, *ast.condition);
|
||||
inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
|
||||
}
|
||||
|
||||
void operator()(const ASTLabel& ast) {
|
||||
inner += fmt::format("Label_{}:\n", ast.index);
|
||||
}
|
||||
|
||||
void operator()(const ASTGoto& ast) {
|
||||
ExprPrinter expr_parser{};
|
||||
std::visit(expr_parser, *ast.condition);
|
||||
inner +=
|
||||
fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
|
||||
}
|
||||
|
||||
void operator()(const ASTDoWhile& ast) {
|
||||
ExprPrinter expr_parser{};
|
||||
std::visit(expr_parser, *ast.condition);
|
||||
inner += fmt::format("{}do {{\n", Indent());
|
||||
scope++;
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
scope--;
|
||||
inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
|
||||
}
|
||||
|
||||
void operator()(const ASTReturn& ast) {
|
||||
ExprPrinter expr_parser{};
|
||||
std::visit(expr_parser, *ast.condition);
|
||||
inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
|
||||
ast.kills ? "discard" : "exit");
|
||||
}
|
||||
|
||||
void operator()(const ASTBreak& ast) {
|
||||
ExprPrinter expr_parser{};
|
||||
std::visit(expr_parser, *ast.condition);
|
||||
inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
|
||||
}
|
||||
|
||||
void Visit(const ASTNode& node) {
|
||||
std::visit(*this, *node->GetInnerData());
|
||||
}
|
||||
|
||||
const std::string& GetResult() const {
|
||||
return inner;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string_view Indent() {
|
||||
if (space_segment_scope == scope) {
|
||||
return space_segment;
|
||||
}
|
||||
|
||||
// Ensure that we don't exceed our view.
|
||||
ASSERT(scope * 2 < spaces.size());
|
||||
|
||||
space_segment = spaces.substr(0, scope * 2);
|
||||
space_segment_scope = scope;
|
||||
return space_segment;
|
||||
}
|
||||
|
||||
std::string inner{};
|
||||
std::string_view space_segment;
|
||||
|
||||
u32 scope{};
|
||||
u32 space_segment_scope{};
|
||||
|
||||
static constexpr std::string_view spaces{" "};
|
||||
};
|
||||
|
||||
std::string ASTManager::Print() const {
|
||||
ASTPrinter printer{};
|
||||
printer.Visit(main_node);
|
||||
return printer.GetResult();
|
||||
}
|
||||
|
||||
ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_)
|
||||
: full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {}
|
||||
|
||||
ASTManager::~ASTManager() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
void ASTManager::Init() {
|
||||
main_node = ASTBase::Make<ASTProgram>(ASTNode{});
|
||||
program = std::get_if<ASTProgram>(main_node->GetInnerData());
|
||||
false_condition = MakeExpr<ExprBoolean>(false);
|
||||
}
|
||||
|
||||
void ASTManager::DeclareLabel(u32 address) {
|
||||
const auto pair = labels_map.emplace(address, labels_count);
|
||||
if (pair.second) {
|
||||
labels_count++;
|
||||
labels.resize(labels_count);
|
||||
}
|
||||
}
|
||||
|
||||
void ASTManager::InsertLabel(u32 address) {
|
||||
const u32 index = labels_map[address];
|
||||
const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index);
|
||||
labels[index] = label;
|
||||
program->nodes.PushBack(label);
|
||||
}
|
||||
|
||||
void ASTManager::InsertGoto(Expr condition, u32 address) {
|
||||
const u32 index = labels_map[address];
|
||||
const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index);
|
||||
gotos.push_back(goto_node);
|
||||
program->nodes.PushBack(goto_node);
|
||||
}
|
||||
|
||||
void ASTManager::InsertBlock(u32 start_address, u32 end_address) {
|
||||
ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address);
|
||||
program->nodes.PushBack(std::move(block));
|
||||
}
|
||||
|
||||
void ASTManager::InsertReturn(Expr condition, bool kills) {
|
||||
ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills);
|
||||
program->nodes.PushBack(std::move(node));
|
||||
}
|
||||
|
||||
// The decompile algorithm is based on
|
||||
// "Taming control flow: A structured approach to eliminating goto statements"
|
||||
// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
|
||||
// on the same structured level as the label which they jump to. This is done,
|
||||
// through outward/inward movements and lifting. Once they are at the same
|
||||
// level, you can enclose them in an "if" structure or a "do-while" structure.
|
||||
void ASTManager::Decompile() {
|
||||
auto it = gotos.begin();
|
||||
while (it != gotos.end()) {
|
||||
const ASTNode goto_node = *it;
|
||||
const auto label_index = goto_node->GetGotoLabel();
|
||||
if (!label_index) {
|
||||
return;
|
||||
}
|
||||
const ASTNode label = labels[*label_index];
|
||||
if (!full_decompile) {
|
||||
// We only decompile backward jumps
|
||||
if (!IsBackwardsJump(goto_node, label)) {
|
||||
it++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (IndirectlyRelated(goto_node, label)) {
|
||||
while (!DirectlyRelated(goto_node, label)) {
|
||||
MoveOutward(goto_node);
|
||||
}
|
||||
}
|
||||
if (DirectlyRelated(goto_node, label)) {
|
||||
u32 goto_level = goto_node->GetLevel();
|
||||
const u32 label_level = label->GetLevel();
|
||||
while (label_level < goto_level) {
|
||||
MoveOutward(goto_node);
|
||||
goto_level--;
|
||||
}
|
||||
// TODO(Blinkhawk): Implement Lifting and Inward Movements
|
||||
}
|
||||
if (label->GetParent() == goto_node->GetParent()) {
|
||||
bool is_loop = false;
|
||||
ASTNode current = goto_node->GetPrevious();
|
||||
while (current) {
|
||||
if (current == label) {
|
||||
is_loop = true;
|
||||
break;
|
||||
}
|
||||
current = current->GetPrevious();
|
||||
}
|
||||
|
||||
if (is_loop) {
|
||||
EncloseDoWhile(goto_node, label);
|
||||
} else {
|
||||
EncloseIfThen(goto_node, label);
|
||||
}
|
||||
it = gotos.erase(it);
|
||||
continue;
|
||||
}
|
||||
it++;
|
||||
}
|
||||
if (full_decompile) {
|
||||
for (const ASTNode& label : labels) {
|
||||
auto& manager = label->GetManager();
|
||||
manager.Remove(label);
|
||||
}
|
||||
labels.clear();
|
||||
} else {
|
||||
auto label_it = labels.begin();
|
||||
while (label_it != labels.end()) {
|
||||
bool can_remove = true;
|
||||
ASTNode label = *label_it;
|
||||
for (const ASTNode& goto_node : gotos) {
|
||||
const auto label_index = goto_node->GetGotoLabel();
|
||||
if (!label_index) {
|
||||
return;
|
||||
}
|
||||
ASTNode& glabel = labels[*label_index];
|
||||
if (glabel == label) {
|
||||
can_remove = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (can_remove) {
|
||||
label->MarkLabelUnused();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const {
|
||||
u32 goto_level = goto_node->GetLevel();
|
||||
u32 label_level = label_node->GetLevel();
|
||||
while (goto_level > label_level) {
|
||||
goto_level--;
|
||||
goto_node = goto_node->GetParent();
|
||||
}
|
||||
while (label_level > goto_level) {
|
||||
label_level--;
|
||||
label_node = label_node->GetParent();
|
||||
}
|
||||
while (goto_node->GetParent() != label_node->GetParent()) {
|
||||
goto_node = goto_node->GetParent();
|
||||
label_node = label_node->GetParent();
|
||||
}
|
||||
ASTNode current = goto_node->GetPrevious();
|
||||
while (current) {
|
||||
if (current == label_node) {
|
||||
return true;
|
||||
}
|
||||
current = current->GetPrevious();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const {
|
||||
return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second));
|
||||
}
|
||||
|
||||
bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const {
|
||||
if (first->GetParent() == second->GetParent()) {
|
||||
return false;
|
||||
}
|
||||
const u32 first_level = first->GetLevel();
|
||||
const u32 second_level = second->GetLevel();
|
||||
u32 min_level;
|
||||
u32 max_level;
|
||||
ASTNode max;
|
||||
ASTNode min;
|
||||
if (first_level > second_level) {
|
||||
min_level = second_level;
|
||||
min = second;
|
||||
max_level = first_level;
|
||||
max = first;
|
||||
} else {
|
||||
min_level = first_level;
|
||||
min = first;
|
||||
max_level = second_level;
|
||||
max = second;
|
||||
}
|
||||
|
||||
while (max_level > min_level) {
|
||||
max_level--;
|
||||
max = max->GetParent();
|
||||
}
|
||||
|
||||
return min->GetParent() == max->GetParent();
|
||||
}
|
||||
|
||||
void ASTManager::ShowCurrentState(std::string_view state) const {
|
||||
LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
|
||||
SanityCheck();
|
||||
}
|
||||
|
||||
void ASTManager::SanityCheck() const {
|
||||
for (const auto& label : labels) {
|
||||
if (!label->GetParent()) {
|
||||
LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) {
|
||||
ASTZipper& zipper = goto_node->GetManager();
|
||||
const ASTNode loop_start = label->GetNext();
|
||||
if (loop_start == goto_node) {
|
||||
zipper.Remove(goto_node);
|
||||
return;
|
||||
}
|
||||
const ASTNode parent = label->GetParent();
|
||||
const Expr condition = goto_node->GetGotoCondition();
|
||||
zipper.DetachSegment(loop_start, goto_node);
|
||||
const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition);
|
||||
ASTZipper* sub_zipper = do_while_node->GetSubNodes();
|
||||
sub_zipper->Init(loop_start, do_while_node);
|
||||
zipper.InsertAfter(do_while_node, label);
|
||||
sub_zipper->Remove(goto_node);
|
||||
}
|
||||
|
||||
void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) {
|
||||
ASTZipper& zipper = goto_node->GetManager();
|
||||
const ASTNode if_end = label->GetPrevious();
|
||||
if (if_end == goto_node) {
|
||||
zipper.Remove(goto_node);
|
||||
return;
|
||||
}
|
||||
const ASTNode prev = goto_node->GetPrevious();
|
||||
const Expr condition = goto_node->GetGotoCondition();
|
||||
bool do_else = false;
|
||||
if (!disable_else_derivation && prev->IsIfThen()) {
|
||||
const Expr if_condition = prev->GetIfCondition();
|
||||
do_else = ExprAreEqual(if_condition, condition);
|
||||
}
|
||||
const ASTNode parent = label->GetParent();
|
||||
zipper.DetachSegment(goto_node, if_end);
|
||||
ASTNode if_node;
|
||||
if (do_else) {
|
||||
if_node = ASTBase::Make<ASTIfElse>(parent);
|
||||
} else {
|
||||
Expr neg_condition = MakeExprNot(condition);
|
||||
if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition);
|
||||
}
|
||||
ASTZipper* sub_zipper = if_node->GetSubNodes();
|
||||
sub_zipper->Init(goto_node, if_node);
|
||||
zipper.InsertAfter(if_node, prev);
|
||||
sub_zipper->Remove(goto_node);
|
||||
}
|
||||
|
||||
void ASTManager::MoveOutward(ASTNode goto_node) {
|
||||
ASTZipper& zipper = goto_node->GetManager();
|
||||
const ASTNode parent = goto_node->GetParent();
|
||||
ASTZipper& zipper2 = parent->GetManager();
|
||||
const ASTNode grandpa = parent->GetParent();
|
||||
const bool is_loop = parent->IsLoop();
|
||||
const bool is_else = parent->IsIfElse();
|
||||
const bool is_if = parent->IsIfThen();
|
||||
|
||||
const ASTNode prev = goto_node->GetPrevious();
|
||||
const ASTNode post = goto_node->GetNext();
|
||||
|
||||
const Expr condition = goto_node->GetGotoCondition();
|
||||
zipper.DetachSingle(goto_node);
|
||||
if (is_loop) {
|
||||
const u32 var_index = NewVariable();
|
||||
const Expr var_condition = MakeExpr<ExprVar>(var_index);
|
||||
const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
|
||||
const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
|
||||
zipper2.InsertBefore(var_node_init, parent);
|
||||
zipper.InsertAfter(var_node, prev);
|
||||
goto_node->SetGotoCondition(var_condition);
|
||||
const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition);
|
||||
zipper.InsertAfter(break_node, var_node);
|
||||
} else if (is_if || is_else) {
|
||||
const u32 var_index = NewVariable();
|
||||
const Expr var_condition = MakeExpr<ExprVar>(var_index);
|
||||
const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
|
||||
const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
|
||||
if (is_if) {
|
||||
zipper2.InsertBefore(var_node_init, parent);
|
||||
} else {
|
||||
zipper2.InsertBefore(var_node_init, parent->GetPrevious());
|
||||
}
|
||||
zipper.InsertAfter(var_node, prev);
|
||||
goto_node->SetGotoCondition(var_condition);
|
||||
if (post) {
|
||||
zipper.DetachTail(post);
|
||||
const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition));
|
||||
ASTZipper* sub_zipper = if_node->GetSubNodes();
|
||||
sub_zipper->Init(post, if_node);
|
||||
zipper.InsertAfter(if_node, var_node);
|
||||
}
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
const ASTNode next = parent->GetNext();
|
||||
if (is_if && next && next->IsIfElse()) {
|
||||
zipper2.InsertAfter(goto_node, next);
|
||||
goto_node->SetParent(grandpa);
|
||||
return;
|
||||
}
|
||||
zipper2.InsertAfter(goto_node, parent);
|
||||
goto_node->SetParent(grandpa);
|
||||
}
|
||||
|
||||
class ASTClearer {
|
||||
public:
|
||||
ASTClearer() = default;
|
||||
|
||||
void operator()(const ASTProgram& ast) {
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
}
|
||||
|
||||
void operator()(const ASTIfThen& ast) {
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
}
|
||||
|
||||
void operator()(const ASTIfElse& ast) {
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
}
|
||||
|
||||
void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {}
|
||||
|
||||
void operator()(ASTBlockDecoded& ast) {
|
||||
ast.nodes.clear();
|
||||
}
|
||||
|
||||
void operator()([[maybe_unused]] const ASTVarSet& ast) {}
|
||||
|
||||
void operator()([[maybe_unused]] const ASTLabel& ast) {}
|
||||
|
||||
void operator()([[maybe_unused]] const ASTGoto& ast) {}
|
||||
|
||||
void operator()(const ASTDoWhile& ast) {
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
}
|
||||
|
||||
void operator()([[maybe_unused]] const ASTReturn& ast) {}
|
||||
|
||||
void operator()([[maybe_unused]] const ASTBreak& ast) {}
|
||||
|
||||
void Visit(const ASTNode& node) {
|
||||
std::visit(*this, *node->GetInnerData());
|
||||
node->Clear();
|
||||
}
|
||||
};
|
||||
|
||||
void ASTManager::Clear() {
|
||||
if (!main_node) {
|
||||
return;
|
||||
}
|
||||
ASTClearer clearer{};
|
||||
clearer.Visit(main_node);
|
||||
main_node.reset();
|
||||
program = nullptr;
|
||||
labels_map.clear();
|
||||
labels.clear();
|
||||
gotos.clear();
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,398 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "video_core/shader/expr.h"
|
||||
#include "video_core/shader/node.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
class ASTBase;
|
||||
class ASTBlockDecoded;
|
||||
class ASTBlockEncoded;
|
||||
class ASTBreak;
|
||||
class ASTDoWhile;
|
||||
class ASTGoto;
|
||||
class ASTIfElse;
|
||||
class ASTIfThen;
|
||||
class ASTLabel;
|
||||
class ASTProgram;
|
||||
class ASTReturn;
|
||||
class ASTVarSet;
|
||||
|
||||
using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded,
|
||||
ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>;
|
||||
|
||||
using ASTNode = std::shared_ptr<ASTBase>;
|
||||
|
||||
enum class ASTZipperType : u32 {
|
||||
Program,
|
||||
IfThen,
|
||||
IfElse,
|
||||
Loop,
|
||||
};
|
||||
|
||||
class ASTZipper final {
|
||||
public:
|
||||
explicit ASTZipper();
|
||||
|
||||
void Init(ASTNode first, ASTNode parent);
|
||||
|
||||
ASTNode GetFirst() const {
|
||||
return first;
|
||||
}
|
||||
|
||||
ASTNode GetLast() const {
|
||||
return last;
|
||||
}
|
||||
|
||||
void PushBack(ASTNode new_node);
|
||||
void PushFront(ASTNode new_node);
|
||||
void InsertAfter(ASTNode new_node, ASTNode at_node);
|
||||
void InsertBefore(ASTNode new_node, ASTNode at_node);
|
||||
void DetachTail(ASTNode node);
|
||||
void DetachSingle(ASTNode node);
|
||||
void DetachSegment(ASTNode start, ASTNode end);
|
||||
void Remove(ASTNode node);
|
||||
|
||||
ASTNode first;
|
||||
ASTNode last;
|
||||
};
|
||||
|
||||
class ASTProgram {
|
||||
public:
|
||||
ASTZipper nodes{};
|
||||
};
|
||||
|
||||
class ASTIfThen {
|
||||
public:
|
||||
explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {}
|
||||
Expr condition;
|
||||
ASTZipper nodes{};
|
||||
};
|
||||
|
||||
class ASTIfElse {
|
||||
public:
|
||||
ASTZipper nodes{};
|
||||
};
|
||||
|
||||
class ASTBlockEncoded {
|
||||
public:
|
||||
explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {}
|
||||
u32 start;
|
||||
u32 end;
|
||||
};
|
||||
|
||||
class ASTBlockDecoded {
|
||||
public:
|
||||
explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {}
|
||||
NodeBlock nodes;
|
||||
};
|
||||
|
||||
class ASTVarSet {
|
||||
public:
|
||||
explicit ASTVarSet(u32 index_, Expr condition_)
|
||||
: index{index_}, condition{std::move(condition_)} {}
|
||||
|
||||
u32 index;
|
||||
Expr condition;
|
||||
};
|
||||
|
||||
class ASTLabel {
|
||||
public:
|
||||
explicit ASTLabel(u32 index_) : index{index_} {}
|
||||
u32 index;
|
||||
bool unused{};
|
||||
};
|
||||
|
||||
class ASTGoto {
|
||||
public:
|
||||
explicit ASTGoto(Expr condition_, u32 label_)
|
||||
: condition{std::move(condition_)}, label{label_} {}
|
||||
|
||||
Expr condition;
|
||||
u32 label;
|
||||
};
|
||||
|
||||
class ASTDoWhile {
|
||||
public:
|
||||
explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {}
|
||||
Expr condition;
|
||||
ASTZipper nodes{};
|
||||
};
|
||||
|
||||
class ASTReturn {
|
||||
public:
|
||||
explicit ASTReturn(Expr condition_, bool kills_)
|
||||
: condition{std::move(condition_)}, kills{kills_} {}
|
||||
|
||||
Expr condition;
|
||||
bool kills;
|
||||
};
|
||||
|
||||
class ASTBreak {
|
||||
public:
|
||||
explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {}
|
||||
Expr condition;
|
||||
};
|
||||
|
||||
class ASTBase {
|
||||
public:
|
||||
explicit ASTBase(ASTNode parent_, ASTData data_)
|
||||
: data{std::move(data_)}, parent{std::move(parent_)} {}
|
||||
|
||||
template <class U, class... Args>
|
||||
static ASTNode Make(ASTNode parent, Args&&... args) {
|
||||
return std::make_shared<ASTBase>(std::move(parent),
|
||||
ASTData(U(std::forward<Args>(args)...)));
|
||||
}
|
||||
|
||||
void SetParent(ASTNode new_parent) {
|
||||
parent = std::move(new_parent);
|
||||
}
|
||||
|
||||
ASTNode& GetParent() {
|
||||
return parent;
|
||||
}
|
||||
|
||||
const ASTNode& GetParent() const {
|
||||
return parent;
|
||||
}
|
||||
|
||||
u32 GetLevel() const {
|
||||
u32 level = 0;
|
||||
auto next_parent = parent;
|
||||
while (next_parent) {
|
||||
next_parent = next_parent->GetParent();
|
||||
level++;
|
||||
}
|
||||
return level;
|
||||
}
|
||||
|
||||
ASTData* GetInnerData() {
|
||||
return &data;
|
||||
}
|
||||
|
||||
const ASTData* GetInnerData() const {
|
||||
return &data;
|
||||
}
|
||||
|
||||
ASTNode GetNext() const {
|
||||
return next;
|
||||
}
|
||||
|
||||
ASTNode GetPrevious() const {
|
||||
return previous;
|
||||
}
|
||||
|
||||
ASTZipper& GetManager() {
|
||||
return *manager;
|
||||
}
|
||||
|
||||
const ASTZipper& GetManager() const {
|
||||
return *manager;
|
||||
}
|
||||
|
||||
std::optional<u32> GetGotoLabel() const {
|
||||
if (const auto* inner = std::get_if<ASTGoto>(&data)) {
|
||||
return {inner->label};
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Expr GetGotoCondition() const {
|
||||
if (const auto* inner = std::get_if<ASTGoto>(&data)) {
|
||||
return inner->condition;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void MarkLabelUnused() {
|
||||
if (auto* inner = std::get_if<ASTLabel>(&data)) {
|
||||
inner->unused = true;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsLabelUnused() const {
|
||||
if (const auto* inner = std::get_if<ASTLabel>(&data)) {
|
||||
return inner->unused;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<u32> GetLabelIndex() const {
|
||||
if (const auto* inner = std::get_if<ASTLabel>(&data)) {
|
||||
return {inner->index};
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Expr GetIfCondition() const {
|
||||
if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
|
||||
return inner->condition;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void SetGotoCondition(Expr new_condition) {
|
||||
if (auto* inner = std::get_if<ASTGoto>(&data)) {
|
||||
inner->condition = std::move(new_condition);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsIfThen() const {
|
||||
return std::holds_alternative<ASTIfThen>(data);
|
||||
}
|
||||
|
||||
bool IsIfElse() const {
|
||||
return std::holds_alternative<ASTIfElse>(data);
|
||||
}
|
||||
|
||||
bool IsBlockEncoded() const {
|
||||
return std::holds_alternative<ASTBlockEncoded>(data);
|
||||
}
|
||||
|
||||
void TransformBlockEncoded(NodeBlock&& nodes) {
|
||||
data = ASTBlockDecoded(std::move(nodes));
|
||||
}
|
||||
|
||||
bool IsLoop() const {
|
||||
return std::holds_alternative<ASTDoWhile>(data);
|
||||
}
|
||||
|
||||
ASTZipper* GetSubNodes() {
|
||||
if (std::holds_alternative<ASTProgram>(data)) {
|
||||
return &std::get_if<ASTProgram>(&data)->nodes;
|
||||
}
|
||||
if (std::holds_alternative<ASTIfThen>(data)) {
|
||||
return &std::get_if<ASTIfThen>(&data)->nodes;
|
||||
}
|
||||
if (std::holds_alternative<ASTIfElse>(data)) {
|
||||
return &std::get_if<ASTIfElse>(&data)->nodes;
|
||||
}
|
||||
if (std::holds_alternative<ASTDoWhile>(data)) {
|
||||
return &std::get_if<ASTDoWhile>(&data)->nodes;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
next.reset();
|
||||
previous.reset();
|
||||
parent.reset();
|
||||
manager = nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class ASTZipper;
|
||||
|
||||
ASTData data;
|
||||
ASTNode parent;
|
||||
ASTNode next;
|
||||
ASTNode previous;
|
||||
ASTZipper* manager{};
|
||||
};
|
||||
|
||||
class ASTManager final {
|
||||
public:
|
||||
explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_);
|
||||
~ASTManager();
|
||||
|
||||
ASTManager(const ASTManager& o) = delete;
|
||||
ASTManager& operator=(const ASTManager& other) = delete;
|
||||
|
||||
ASTManager(ASTManager&& other) noexcept = default;
|
||||
ASTManager& operator=(ASTManager&& other) noexcept = default;
|
||||
|
||||
void Init();
|
||||
|
||||
void DeclareLabel(u32 address);
|
||||
|
||||
void InsertLabel(u32 address);
|
||||
|
||||
void InsertGoto(Expr condition, u32 address);
|
||||
|
||||
void InsertBlock(u32 start_address, u32 end_address);
|
||||
|
||||
void InsertReturn(Expr condition, bool kills);
|
||||
|
||||
std::string Print() const;
|
||||
|
||||
void Decompile();
|
||||
|
||||
void ShowCurrentState(std::string_view state) const;
|
||||
|
||||
void SanityCheck() const;
|
||||
|
||||
void Clear();
|
||||
|
||||
bool IsFullyDecompiled() const {
|
||||
if (full_decompile) {
|
||||
return gotos.empty();
|
||||
}
|
||||
|
||||
for (ASTNode goto_node : gotos) {
|
||||
auto label_index = goto_node->GetGotoLabel();
|
||||
if (!label_index) {
|
||||
return false;
|
||||
}
|
||||
ASTNode glabel = labels[*label_index];
|
||||
if (IsBackwardsJump(goto_node, glabel)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
ASTNode GetProgram() const {
|
||||
return main_node;
|
||||
}
|
||||
|
||||
u32 GetVariables() const {
|
||||
return variables;
|
||||
}
|
||||
|
||||
const std::vector<ASTNode>& GetLabels() const {
|
||||
return labels;
|
||||
}
|
||||
|
||||
private:
|
||||
bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const;
|
||||
|
||||
bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const;
|
||||
|
||||
bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const;
|
||||
|
||||
void EncloseDoWhile(ASTNode goto_node, ASTNode label);
|
||||
|
||||
void EncloseIfThen(ASTNode goto_node, ASTNode label);
|
||||
|
||||
void MoveOutward(ASTNode goto_node);
|
||||
|
||||
u32 NewVariable() {
|
||||
return variables++;
|
||||
}
|
||||
|
||||
bool full_decompile{};
|
||||
bool disable_else_derivation{};
|
||||
std::unordered_map<u32, u32> labels_map{};
|
||||
u32 labels_count{};
|
||||
std::vector<ASTNode> labels{};
|
||||
std::list<ASTNode> gotos{};
|
||||
u32 variables{};
|
||||
ASTProgram* program{};
|
||||
ASTNode main_node{};
|
||||
Expr false_condition{};
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,234 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {}
|
||||
|
||||
AsyncShaders::~AsyncShaders() {
|
||||
KillWorkers();
|
||||
}
|
||||
|
||||
void AsyncShaders::AllocateWorkers() {
|
||||
// Use at least one thread
|
||||
u32 num_workers = 1;
|
||||
|
||||
// Deduce how many more threads we can use
|
||||
const u32 thread_count = std::thread::hardware_concurrency();
|
||||
if (thread_count >= 8) {
|
||||
// Increase async workers by 1 for every 2 threads >= 8
|
||||
num_workers += 1 + (thread_count - 8) / 2;
|
||||
}
|
||||
|
||||
// If we already have workers queued, ignore
|
||||
if (num_workers == worker_threads.size()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If workers already exist, clear them
|
||||
if (!worker_threads.empty()) {
|
||||
FreeWorkers();
|
||||
}
|
||||
|
||||
// Create workers
|
||||
for (std::size_t i = 0; i < num_workers; i++) {
|
||||
context_list.push_back(emu_window.CreateSharedContext());
|
||||
worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this,
|
||||
context_list[i].get());
|
||||
}
|
||||
}
|
||||
|
||||
void AsyncShaders::FreeWorkers() {
|
||||
// Mark all threads to quit
|
||||
is_thread_exiting.store(true);
|
||||
cv.notify_all();
|
||||
for (auto& thread : worker_threads) {
|
||||
thread.join();
|
||||
}
|
||||
// Clear our shared contexts
|
||||
context_list.clear();
|
||||
|
||||
// Clear our worker threads
|
||||
worker_threads.clear();
|
||||
}
|
||||
|
||||
void AsyncShaders::KillWorkers() {
|
||||
is_thread_exiting.store(true);
|
||||
cv.notify_all();
|
||||
for (auto& thread : worker_threads) {
|
||||
thread.detach();
|
||||
}
|
||||
// Clear our shared contexts
|
||||
context_list.clear();
|
||||
|
||||
// Clear our worker threads
|
||||
worker_threads.clear();
|
||||
}
|
||||
|
||||
bool AsyncShaders::HasWorkQueued() const {
|
||||
return !pending_queue.empty();
|
||||
}
|
||||
|
||||
bool AsyncShaders::HasCompletedWork() const {
|
||||
std::shared_lock lock{completed_mutex};
|
||||
return !finished_work.empty();
|
||||
}
|
||||
|
||||
bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
|
||||
const auto& regs = gpu.Maxwell3D().regs;
|
||||
|
||||
// If something is using depth, we can assume that games are not rendering anything which will
|
||||
// be used one time.
|
||||
if (regs.zeta_enable) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If games are using a small index count, we can assume these are full screen quads. Usually
|
||||
// these shaders are only used once for building textures so we can assume they can't be built
|
||||
// async
|
||||
if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
|
||||
std::vector<Result> results;
|
||||
{
|
||||
std::unique_lock lock{completed_mutex};
|
||||
results = std::move(finished_work);
|
||||
finished_work.clear();
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
|
||||
Tegra::Engines::ShaderType shader_type, u64 uid,
|
||||
std::vector<u64> code, std::vector<u64> code_b,
|
||||
u32 main_offset, CompilerSettings compiler_settings,
|
||||
const Registry& registry, VAddr cpu_addr) {
|
||||
std::unique_lock lock(queue_mutex);
|
||||
pending_queue.push({
|
||||
.backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
|
||||
.device = &device,
|
||||
.shader_type = shader_type,
|
||||
.uid = uid,
|
||||
.code = std::move(code),
|
||||
.code_b = std::move(code_b),
|
||||
.main_offset = main_offset,
|
||||
.compiler_settings = compiler_settings,
|
||||
.registry = registry,
|
||||
.cpu_address = cpu_addr,
|
||||
.pp_cache = nullptr,
|
||||
.vk_device = nullptr,
|
||||
.scheduler = nullptr,
|
||||
.descriptor_pool = nullptr,
|
||||
.update_descriptor_queue = nullptr,
|
||||
.bindings{},
|
||||
.program{},
|
||||
.key{},
|
||||
.num_color_buffers = 0,
|
||||
});
|
||||
cv.notify_one();
|
||||
}
|
||||
|
||||
void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
|
||||
const Vulkan::Device& device, Vulkan::VKScheduler& scheduler,
|
||||
Vulkan::VKDescriptorPool& descriptor_pool,
|
||||
Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
std::vector<VkDescriptorSetLayoutBinding> bindings,
|
||||
Vulkan::SPIRVProgram program,
|
||||
Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
|
||||
std::unique_lock lock(queue_mutex);
|
||||
pending_queue.push({
|
||||
.backend = Backend::Vulkan,
|
||||
.device = nullptr,
|
||||
.shader_type{},
|
||||
.uid = 0,
|
||||
.code{},
|
||||
.code_b{},
|
||||
.main_offset = 0,
|
||||
.compiler_settings{},
|
||||
.registry{},
|
||||
.cpu_address = 0,
|
||||
.pp_cache = pp_cache,
|
||||
.vk_device = &device,
|
||||
.scheduler = &scheduler,
|
||||
.descriptor_pool = &descriptor_pool,
|
||||
.update_descriptor_queue = &update_descriptor_queue,
|
||||
.bindings = std::move(bindings),
|
||||
.program = std::move(program),
|
||||
.key = key,
|
||||
.num_color_buffers = num_color_buffers,
|
||||
});
|
||||
cv.notify_one();
|
||||
}
|
||||
|
||||
void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
|
||||
while (!is_thread_exiting.load(std::memory_order_relaxed)) {
|
||||
std::unique_lock lock{queue_mutex};
|
||||
cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
|
||||
if (is_thread_exiting) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Partial lock to allow all threads to read at the same time
|
||||
if (!HasWorkQueued()) {
|
||||
continue;
|
||||
}
|
||||
// Another thread beat us, just unlock and wait for the next load
|
||||
if (pending_queue.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Pull work from queue
|
||||
WorkerParams work = std::move(pending_queue.front());
|
||||
pending_queue.pop();
|
||||
lock.unlock();
|
||||
|
||||
if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
|
||||
const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
|
||||
const auto scope = context->Acquire();
|
||||
auto program =
|
||||
OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
|
||||
Result result{};
|
||||
result.backend = work.backend;
|
||||
result.cpu_address = work.cpu_address;
|
||||
result.uid = work.uid;
|
||||
result.code = std::move(work.code);
|
||||
result.code_b = std::move(work.code_b);
|
||||
result.shader_type = work.shader_type;
|
||||
|
||||
if (work.backend == Backend::OpenGL) {
|
||||
result.program.opengl = std::move(program->source_program);
|
||||
} else if (work.backend == Backend::GLASM) {
|
||||
result.program.glasm = std::move(program->assembly_program);
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_lock complete_lock(completed_mutex);
|
||||
finished_work.push_back(std::move(result));
|
||||
}
|
||||
} else if (work.backend == Backend::Vulkan) {
|
||||
auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
|
||||
*work.vk_device, *work.scheduler, *work.descriptor_pool,
|
||||
*work.update_descriptor_queue, work.key, work.bindings, work.program,
|
||||
work.num_color_buffers);
|
||||
|
||||
work.pp_cache->EmplacePipeline(std::move(pipeline));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,138 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <memory>
|
||||
#include <shared_mutex>
|
||||
#include <thread>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
|
||||
namespace Core::Frontend {
|
||||
class EmuWindow;
|
||||
class GraphicsContext;
|
||||
} // namespace Core::Frontend
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
class VKPipelineCache;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
class AsyncShaders {
|
||||
public:
|
||||
enum class Backend {
|
||||
OpenGL,
|
||||
GLASM,
|
||||
Vulkan,
|
||||
};
|
||||
|
||||
struct ResultPrograms {
|
||||
OpenGL::OGLProgram opengl;
|
||||
OpenGL::OGLAssemblyProgram glasm;
|
||||
};
|
||||
|
||||
struct Result {
|
||||
u64 uid;
|
||||
VAddr cpu_address;
|
||||
Backend backend;
|
||||
ResultPrograms program;
|
||||
std::vector<u64> code;
|
||||
std::vector<u64> code_b;
|
||||
Tegra::Engines::ShaderType shader_type;
|
||||
};
|
||||
|
||||
explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_);
|
||||
~AsyncShaders();
|
||||
|
||||
/// Start up shader worker threads
|
||||
void AllocateWorkers();
|
||||
|
||||
/// Clear the shader queue and kill all worker threads
|
||||
void FreeWorkers();
|
||||
|
||||
// Force end all threads
|
||||
void KillWorkers();
|
||||
|
||||
/// Check to see if any shaders have actually been compiled
|
||||
[[nodiscard]] bool HasCompletedWork() const;
|
||||
|
||||
/// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
|
||||
/// every shader async as some shaders are only built and executed once. We try to "guess" which
|
||||
/// shader would be used only once
|
||||
[[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
|
||||
|
||||
/// Pulls completed compiled shaders
|
||||
[[nodiscard]] std::vector<Result> GetCompletedWork();
|
||||
|
||||
void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
|
||||
u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
|
||||
CompilerSettings compiler_settings, const Registry& registry,
|
||||
VAddr cpu_addr);
|
||||
|
||||
void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device,
|
||||
Vulkan::VKScheduler& scheduler,
|
||||
Vulkan::VKDescriptorPool& descriptor_pool,
|
||||
Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
std::vector<VkDescriptorSetLayoutBinding> bindings,
|
||||
Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
|
||||
u32 num_color_buffers);
|
||||
|
||||
private:
|
||||
void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
|
||||
|
||||
/// Check our worker queue to see if we have any work queued already
|
||||
[[nodiscard]] bool HasWorkQueued() const;
|
||||
|
||||
struct WorkerParams {
|
||||
Backend backend;
|
||||
// For OGL
|
||||
const OpenGL::Device* device;
|
||||
Tegra::Engines::ShaderType shader_type;
|
||||
u64 uid;
|
||||
std::vector<u64> code;
|
||||
std::vector<u64> code_b;
|
||||
u32 main_offset;
|
||||
CompilerSettings compiler_settings;
|
||||
std::optional<Registry> registry;
|
||||
VAddr cpu_address;
|
||||
|
||||
// For Vulkan
|
||||
Vulkan::VKPipelineCache* pp_cache;
|
||||
const Vulkan::Device* vk_device;
|
||||
Vulkan::VKScheduler* scheduler;
|
||||
Vulkan::VKDescriptorPool* descriptor_pool;
|
||||
Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
|
||||
std::vector<VkDescriptorSetLayoutBinding> bindings;
|
||||
Vulkan::SPIRVProgram program;
|
||||
Vulkan::GraphicsPipelineCacheKey key;
|
||||
u32 num_color_buffers;
|
||||
};
|
||||
|
||||
std::condition_variable cv;
|
||||
mutable std::mutex queue_mutex;
|
||||
mutable std::shared_mutex completed_mutex;
|
||||
std::atomic<bool> is_thread_exiting{};
|
||||
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
|
||||
std::vector<std::thread> worker_threads;
|
||||
std::queue<WorkerParams> pending_queue;
|
||||
std::vector<Result> finished_work;
|
||||
Core::Frontend::EmuWindow& emu_window;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,26 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/shader/compiler_settings.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
std::string CompileDepthAsString(const CompileDepth cd) {
|
||||
switch (cd) {
|
||||
case CompileDepth::BruteForce:
|
||||
return "Brute Force Compile";
|
||||
case CompileDepth::FlowStack:
|
||||
return "Simple Flow Stack Mode";
|
||||
case CompileDepth::NoFlowStack:
|
||||
return "Remove Flow Stack";
|
||||
case CompileDepth::DecompileBackwards:
|
||||
return "Decompile Backward Jumps";
|
||||
case CompileDepth::FullDecompile:
|
||||
return "Full Decompilation";
|
||||
default:
|
||||
return "Unknown Compiler Process";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,26 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
enum class CompileDepth : u32 {
|
||||
BruteForce = 0,
|
||||
FlowStack = 1,
|
||||
NoFlowStack = 2,
|
||||
DecompileBackwards = 3,
|
||||
FullDecompile = 4,
|
||||
};
|
||||
|
||||
std::string CompileDepthAsString(CompileDepth cd);
|
||||
|
||||
struct CompilerSettings {
|
||||
CompileDepth depth{CompileDepth::NoFlowStack};
|
||||
bool disable_else_derivation{true};
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,751 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <stack>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader/ast.h"
|
||||
#include "video_core/shader/control_flow.h"
|
||||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
namespace {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
constexpr s32 unassigned_branch = -2;
|
||||
|
||||
struct Query {
|
||||
u32 address{};
|
||||
std::stack<u32> ssy_stack{};
|
||||
std::stack<u32> pbk_stack{};
|
||||
};
|
||||
|
||||
struct BlockStack {
|
||||
BlockStack() = default;
|
||||
explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
|
||||
std::stack<u32> ssy_stack{};
|
||||
std::stack<u32> pbk_stack{};
|
||||
};
|
||||
|
||||
template <typename T, typename... Args>
|
||||
BlockBranchInfo MakeBranchInfo(Args&&... args) {
|
||||
static_assert(std::is_convertible_v<T, BranchData>);
|
||||
return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
|
||||
}
|
||||
|
||||
bool BlockBranchIsIgnored(BlockBranchInfo first) {
|
||||
bool ignore = false;
|
||||
if (std::holds_alternative<SingleBranch>(*first)) {
|
||||
const auto branch = std::get_if<SingleBranch>(first.get());
|
||||
ignore = branch->ignore;
|
||||
}
|
||||
return ignore;
|
||||
}
|
||||
|
||||
struct BlockInfo {
|
||||
u32 start{};
|
||||
u32 end{};
|
||||
bool visited{};
|
||||
BlockBranchInfo branch{};
|
||||
|
||||
bool IsInside(const u32 address) const {
|
||||
return start <= address && address <= end;
|
||||
}
|
||||
};
|
||||
|
||||
struct CFGRebuildState {
|
||||
explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
|
||||
: program_code{program_code_}, registry{registry_}, start{start_} {}
|
||||
|
||||
const ProgramCode& program_code;
|
||||
Registry& registry;
|
||||
u32 start{};
|
||||
std::vector<BlockInfo> block_info;
|
||||
std::list<u32> inspect_queries;
|
||||
std::list<Query> queries;
|
||||
std::unordered_map<u32, u32> registered;
|
||||
std::set<u32> labels;
|
||||
std::map<u32, u32> ssy_labels;
|
||||
std::map<u32, u32> pbk_labels;
|
||||
std::unordered_map<u32, BlockStack> stacks;
|
||||
ASTManager* manager{};
|
||||
};
|
||||
|
||||
enum class BlockCollision : u32 { None, Found, Inside };
|
||||
|
||||
std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
|
||||
const auto& blocks = state.block_info;
|
||||
for (u32 index = 0; index < blocks.size(); index++) {
|
||||
if (blocks[index].start == address) {
|
||||
return {BlockCollision::Found, index};
|
||||
}
|
||||
if (blocks[index].IsInside(address)) {
|
||||
return {BlockCollision::Inside, index};
|
||||
}
|
||||
}
|
||||
return {BlockCollision::None, 0xFFFFFFFF};
|
||||
}
|
||||
|
||||
struct ParseInfo {
|
||||
BlockBranchInfo branch_info{};
|
||||
u32 end_address{};
|
||||
};
|
||||
|
||||
BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
|
||||
auto& it = state.block_info.emplace_back();
|
||||
it.start = start;
|
||||
it.end = end;
|
||||
const u32 index = static_cast<u32>(state.block_info.size() - 1);
|
||||
state.registered.insert({start, index});
|
||||
return it;
|
||||
}
|
||||
|
||||
Pred GetPredicate(u32 index, bool negated) {
|
||||
return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
|
||||
}
|
||||
|
||||
enum class ParseResult : u32 {
|
||||
ControlCaught,
|
||||
BlockEnd,
|
||||
AbnormalFlow,
|
||||
};
|
||||
|
||||
struct BranchIndirectInfo {
|
||||
u32 buffer{};
|
||||
u32 offset{};
|
||||
u32 entries{};
|
||||
s32 relative_position{};
|
||||
};
|
||||
|
||||
struct BufferInfo {
|
||||
u32 index;
|
||||
u32 offset;
|
||||
};
|
||||
|
||||
std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
|
||||
const Instruction instr = state.program_code[pos];
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
if (opcode->get().GetId() != OpCode::Id::BRX) {
|
||||
return std::nullopt;
|
||||
}
|
||||
if (instr.brx.constant_buffer != 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
--pos;
|
||||
return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
|
||||
}
|
||||
|
||||
template <typename Result, typename TestCallable, typename PackCallable>
|
||||
// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
|
||||
// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
|
||||
std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
|
||||
PackCallable pack) {
|
||||
for (; pos >= state.start; --pos) {
|
||||
if (IsSchedInstruction(pos, state.start)) {
|
||||
continue;
|
||||
}
|
||||
const Instruction instr = state.program_code[pos];
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
if (!opcode) {
|
||||
continue;
|
||||
}
|
||||
if (test(instr, opcode->get())) {
|
||||
--pos;
|
||||
return std::make_optional(pack(instr, opcode->get()));
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
|
||||
u64 brx_tracked_register) {
|
||||
return TrackInstruction<std::pair<BufferInfo, u64>>(
|
||||
state, pos,
|
||||
[brx_tracked_register](auto instr, const auto& opcode) {
|
||||
return opcode.GetId() == OpCode::Id::LD_C &&
|
||||
instr.gpr0.Value() == brx_tracked_register &&
|
||||
instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
|
||||
},
|
||||
[](auto instr, const auto& opcode) {
|
||||
const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
|
||||
static_cast<u32>(instr.cbuf36.GetOffset())};
|
||||
return std::make_pair(info, instr.gpr8.Value());
|
||||
});
|
||||
}
|
||||
|
||||
std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
|
||||
u64 ldc_tracked_register) {
|
||||
return TrackInstruction<u64>(
|
||||
state, pos,
|
||||
[ldc_tracked_register](auto instr, const auto& opcode) {
|
||||
return opcode.GetId() == OpCode::Id::SHL_IMM &&
|
||||
instr.gpr0.Value() == ldc_tracked_register;
|
||||
},
|
||||
[](auto instr, const auto&) { return instr.gpr8.Value(); });
|
||||
}
|
||||
|
||||
std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
|
||||
u64 shl_tracked_register) {
|
||||
return TrackInstruction<u32>(
|
||||
state, pos,
|
||||
[shl_tracked_register](auto instr, const auto& opcode) {
|
||||
return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
|
||||
instr.gpr0.Value() == shl_tracked_register;
|
||||
},
|
||||
[](auto instr, const auto&) {
|
||||
return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
|
||||
});
|
||||
}
|
||||
|
||||
std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
|
||||
const auto brx_info = GetBRXInfo(state, pos);
|
||||
if (!brx_info) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const auto [relative_position, brx_tracked_register] = *brx_info;
|
||||
|
||||
const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
|
||||
if (!ldc_info) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const auto [buffer_info, ldc_tracked_register] = *ldc_info;
|
||||
|
||||
const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
|
||||
if (!shl_tracked_register) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
|
||||
if (!entries) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
|
||||
}
|
||||
|
||||
std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
|
||||
u32 offset = static_cast<u32>(address);
|
||||
const u32 end_address = static_cast<u32>(state.program_code.size());
|
||||
ParseInfo parse_info{};
|
||||
SingleBranch single_branch{};
|
||||
|
||||
const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) {
|
||||
const auto pair = rebuild_state.labels.emplace(label_address);
|
||||
if (pair.second) {
|
||||
rebuild_state.inspect_queries.push_back(label_address);
|
||||
}
|
||||
};
|
||||
|
||||
while (true) {
|
||||
if (offset >= end_address) {
|
||||
// ASSERT_OR_EXECUTE can't be used, as it ignores the break
|
||||
ASSERT_MSG(false, "Shader passed the current limit!");
|
||||
|
||||
single_branch.address = exit_branch;
|
||||
single_branch.ignore = false;
|
||||
break;
|
||||
}
|
||||
if (state.registered.contains(offset)) {
|
||||
single_branch.address = offset;
|
||||
single_branch.ignore = true;
|
||||
break;
|
||||
}
|
||||
if (IsSchedInstruction(offset, state.start)) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const Instruction instr = {state.program_code[offset]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::EXIT: {
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
|
||||
if (single_branch.condition.predicate == Pred::NeverExecute) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
single_branch.condition.cc = cc;
|
||||
if (cc == ConditionCode::F) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
single_branch.address = exit_branch;
|
||||
single_branch.kill = false;
|
||||
single_branch.is_sync = false;
|
||||
single_branch.is_brk = false;
|
||||
single_branch.ignore = false;
|
||||
parse_info.end_address = offset;
|
||||
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
|
||||
single_branch.condition, single_branch.address, single_branch.kill,
|
||||
single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
|
||||
|
||||
return {ParseResult::ControlCaught, parse_info};
|
||||
}
|
||||
case OpCode::Id::BRA: {
|
||||
if (instr.bra.constant_buffer != 0) {
|
||||
return {ParseResult::AbnormalFlow, parse_info};
|
||||
}
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
|
||||
if (single_branch.condition.predicate == Pred::NeverExecute) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
single_branch.condition.cc = cc;
|
||||
if (cc == ConditionCode::F) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const u32 branch_offset = offset + instr.bra.GetBranchTarget();
|
||||
if (branch_offset == 0) {
|
||||
single_branch.address = exit_branch;
|
||||
} else {
|
||||
single_branch.address = branch_offset;
|
||||
}
|
||||
insert_label(state, branch_offset);
|
||||
single_branch.kill = false;
|
||||
single_branch.is_sync = false;
|
||||
single_branch.is_brk = false;
|
||||
single_branch.ignore = false;
|
||||
parse_info.end_address = offset;
|
||||
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
|
||||
single_branch.condition, single_branch.address, single_branch.kill,
|
||||
single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
|
||||
|
||||
return {ParseResult::ControlCaught, parse_info};
|
||||
}
|
||||
case OpCode::Id::SYNC: {
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
|
||||
if (single_branch.condition.predicate == Pred::NeverExecute) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
single_branch.condition.cc = cc;
|
||||
if (cc == ConditionCode::F) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
single_branch.address = unassigned_branch;
|
||||
single_branch.kill = false;
|
||||
single_branch.is_sync = true;
|
||||
single_branch.is_brk = false;
|
||||
single_branch.ignore = false;
|
||||
parse_info.end_address = offset;
|
||||
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
|
||||
single_branch.condition, single_branch.address, single_branch.kill,
|
||||
single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
|
||||
|
||||
return {ParseResult::ControlCaught, parse_info};
|
||||
}
|
||||
case OpCode::Id::BRK: {
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
|
||||
if (single_branch.condition.predicate == Pred::NeverExecute) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
single_branch.condition.cc = cc;
|
||||
if (cc == ConditionCode::F) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
single_branch.address = unassigned_branch;
|
||||
single_branch.kill = false;
|
||||
single_branch.is_sync = false;
|
||||
single_branch.is_brk = true;
|
||||
single_branch.ignore = false;
|
||||
parse_info.end_address = offset;
|
||||
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
|
||||
single_branch.condition, single_branch.address, single_branch.kill,
|
||||
single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
|
||||
|
||||
return {ParseResult::ControlCaught, parse_info};
|
||||
}
|
||||
case OpCode::Id::KIL: {
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
|
||||
if (single_branch.condition.predicate == Pred::NeverExecute) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
single_branch.condition.cc = cc;
|
||||
if (cc == ConditionCode::F) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
single_branch.address = exit_branch;
|
||||
single_branch.kill = true;
|
||||
single_branch.is_sync = false;
|
||||
single_branch.is_brk = false;
|
||||
single_branch.ignore = false;
|
||||
parse_info.end_address = offset;
|
||||
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
|
||||
single_branch.condition, single_branch.address, single_branch.kill,
|
||||
single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
|
||||
|
||||
return {ParseResult::ControlCaught, parse_info};
|
||||
}
|
||||
case OpCode::Id::SSY: {
|
||||
const u32 target = offset + instr.bra.GetBranchTarget();
|
||||
insert_label(state, target);
|
||||
state.ssy_labels.emplace(offset, target);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::PBK: {
|
||||
const u32 target = offset + instr.bra.GetBranchTarget();
|
||||
insert_label(state, target);
|
||||
state.pbk_labels.emplace(offset, target);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::BRX: {
|
||||
const auto tmp = TrackBranchIndirectInfo(state, offset);
|
||||
if (!tmp) {
|
||||
LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
|
||||
return {ParseResult::AbnormalFlow, parse_info};
|
||||
}
|
||||
|
||||
const auto result = *tmp;
|
||||
const s32 pc_target = offset + result.relative_position;
|
||||
std::vector<CaseBranch> branches;
|
||||
for (u32 i = 0; i < result.entries; i++) {
|
||||
auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
|
||||
if (!key) {
|
||||
return {ParseResult::AbnormalFlow, parse_info};
|
||||
}
|
||||
u32 value = *key;
|
||||
u32 target = static_cast<u32>((value >> 3) + pc_target);
|
||||
insert_label(state, target);
|
||||
branches.emplace_back(value, target);
|
||||
}
|
||||
parse_info.end_address = offset;
|
||||
parse_info.branch_info = MakeBranchInfo<MultiBranch>(
|
||||
static_cast<u32>(instr.gpr8.Value()), std::move(branches));
|
||||
|
||||
return {ParseResult::ControlCaught, parse_info};
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
offset++;
|
||||
}
|
||||
single_branch.kill = false;
|
||||
single_branch.is_sync = false;
|
||||
single_branch.is_brk = false;
|
||||
parse_info.end_address = offset - 1;
|
||||
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
|
||||
single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
|
||||
single_branch.is_brk, single_branch.ignore);
|
||||
return {ParseResult::BlockEnd, parse_info};
|
||||
}
|
||||
|
||||
bool TryInspectAddress(CFGRebuildState& state) {
|
||||
if (state.inspect_queries.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const u32 address = state.inspect_queries.front();
|
||||
state.inspect_queries.pop_front();
|
||||
const auto [result, block_index] = TryGetBlock(state, address);
|
||||
switch (result) {
|
||||
case BlockCollision::Found: {
|
||||
return true;
|
||||
}
|
||||
case BlockCollision::Inside: {
|
||||
// This case is the tricky one:
|
||||
// We need to split the block into 2 separate blocks
|
||||
const u32 end = state.block_info[block_index].end;
|
||||
BlockInfo& new_block = CreateBlockInfo(state, address, end);
|
||||
BlockInfo& current_block = state.block_info[block_index];
|
||||
current_block.end = address - 1;
|
||||
new_block.branch = std::move(current_block.branch);
|
||||
BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
|
||||
const auto branch = std::get_if<SingleBranch>(forward_branch.get());
|
||||
branch->address = address;
|
||||
branch->ignore = true;
|
||||
current_block.branch = std::move(forward_branch);
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
const auto [parse_result, parse_info] = ParseCode(state, address);
|
||||
if (parse_result == ParseResult::AbnormalFlow) {
|
||||
// if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
|
||||
return false;
|
||||
}
|
||||
|
||||
BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
|
||||
block_info.branch = parse_info.branch_info;
|
||||
if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
|
||||
const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
|
||||
if (branch->condition.IsUnconditional()) {
|
||||
return true;
|
||||
}
|
||||
const u32 fallthrough_address = parse_info.end_address + 1;
|
||||
state.inspect_queries.push_front(fallthrough_address);
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TryQuery(CFGRebuildState& state) {
|
||||
const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
|
||||
BlockInfo& block) {
|
||||
auto gather_start = labels.lower_bound(block.start);
|
||||
const auto gather_end = labels.upper_bound(block.end);
|
||||
while (gather_start != gather_end) {
|
||||
cc.push(gather_start->second);
|
||||
++gather_start;
|
||||
}
|
||||
};
|
||||
if (state.queries.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Query& q = state.queries.front();
|
||||
const u32 block_index = state.registered[q.address];
|
||||
BlockInfo& block = state.block_info[block_index];
|
||||
// If the block is visited, check if the stacks match, else gather the ssy/pbk
|
||||
// labels into the current stack and look if the branch at the end of the block
|
||||
// consumes a label. Schedule new queries accordingly
|
||||
if (block.visited) {
|
||||
BlockStack& stack = state.stacks[q.address];
|
||||
const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
|
||||
(stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
|
||||
state.queries.pop_front();
|
||||
return all_okay;
|
||||
}
|
||||
block.visited = true;
|
||||
state.stacks.insert_or_assign(q.address, BlockStack{q});
|
||||
|
||||
Query q2(q);
|
||||
state.queries.pop_front();
|
||||
gather_labels(q2.ssy_stack, state.ssy_labels, block);
|
||||
gather_labels(q2.pbk_stack, state.pbk_labels, block);
|
||||
if (std::holds_alternative<SingleBranch>(*block.branch)) {
|
||||
auto* branch = std::get_if<SingleBranch>(block.branch.get());
|
||||
if (!branch->condition.IsUnconditional()) {
|
||||
q2.address = block.end + 1;
|
||||
state.queries.push_back(q2);
|
||||
}
|
||||
|
||||
auto& conditional_query = state.queries.emplace_back(q2);
|
||||
if (branch->is_sync) {
|
||||
if (branch->address == unassigned_branch) {
|
||||
branch->address = conditional_query.ssy_stack.top();
|
||||
}
|
||||
conditional_query.ssy_stack.pop();
|
||||
}
|
||||
if (branch->is_brk) {
|
||||
if (branch->address == unassigned_branch) {
|
||||
branch->address = conditional_query.pbk_stack.top();
|
||||
}
|
||||
conditional_query.pbk_stack.pop();
|
||||
}
|
||||
conditional_query.address = branch->address;
|
||||
return true;
|
||||
}
|
||||
|
||||
const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
|
||||
for (const auto& branch_case : multi_branch->branches) {
|
||||
auto& conditional_query = state.queries.emplace_back(q2);
|
||||
conditional_query.address = branch_case.address;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
|
||||
const auto get_expr = [](const Condition& cond) -> Expr {
|
||||
Expr result;
|
||||
if (cond.cc != ConditionCode::T) {
|
||||
result = MakeExpr<ExprCondCode>(cond.cc);
|
||||
}
|
||||
if (cond.predicate != Pred::UnusedIndex) {
|
||||
u32 pred = static_cast<u32>(cond.predicate);
|
||||
bool negate = false;
|
||||
if (pred > 7) {
|
||||
negate = true;
|
||||
pred -= 8;
|
||||
}
|
||||
Expr extra = MakeExpr<ExprPredicate>(pred);
|
||||
if (negate) {
|
||||
extra = MakeExpr<ExprNot>(std::move(extra));
|
||||
}
|
||||
if (result) {
|
||||
return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
|
||||
}
|
||||
return extra;
|
||||
}
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
return MakeExpr<ExprBoolean>(true);
|
||||
};
|
||||
|
||||
if (std::holds_alternative<SingleBranch>(*branch_info)) {
|
||||
const auto* branch = std::get_if<SingleBranch>(branch_info.get());
|
||||
if (branch->address < 0) {
|
||||
if (branch->kill) {
|
||||
mm.InsertReturn(get_expr(branch->condition), true);
|
||||
return;
|
||||
}
|
||||
mm.InsertReturn(get_expr(branch->condition), false);
|
||||
return;
|
||||
}
|
||||
mm.InsertGoto(get_expr(branch->condition), branch->address);
|
||||
return;
|
||||
}
|
||||
const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
|
||||
for (const auto& branch_case : multi_branch->branches) {
|
||||
mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
|
||||
branch_case.address);
|
||||
}
|
||||
}
|
||||
|
||||
void DecompileShader(CFGRebuildState& state) {
|
||||
state.manager->Init();
|
||||
for (auto label : state.labels) {
|
||||
state.manager->DeclareLabel(label);
|
||||
}
|
||||
for (const auto& block : state.block_info) {
|
||||
if (state.labels.contains(block.start)) {
|
||||
state.manager->InsertLabel(block.start);
|
||||
}
|
||||
const bool ignore = BlockBranchIsIgnored(block.branch);
|
||||
const u32 end = ignore ? block.end + 1 : block.end;
|
||||
state.manager->InsertBlock(block.start, end);
|
||||
if (!ignore) {
|
||||
InsertBranch(*state.manager, block.branch);
|
||||
}
|
||||
}
|
||||
state.manager->Decompile();
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
|
||||
const CompilerSettings& settings,
|
||||
Registry& registry) {
|
||||
auto result_out = std::make_unique<ShaderCharacteristics>();
|
||||
if (settings.depth == CompileDepth::BruteForce) {
|
||||
result_out->settings.depth = CompileDepth::BruteForce;
|
||||
return result_out;
|
||||
}
|
||||
|
||||
CFGRebuildState state{program_code, start_address, registry};
|
||||
// Inspect Code and generate blocks
|
||||
state.labels.clear();
|
||||
state.labels.emplace(start_address);
|
||||
state.inspect_queries.push_back(state.start);
|
||||
while (!state.inspect_queries.empty()) {
|
||||
if (!TryInspectAddress(state)) {
|
||||
result_out->settings.depth = CompileDepth::BruteForce;
|
||||
return result_out;
|
||||
}
|
||||
}
|
||||
|
||||
bool use_flow_stack = true;
|
||||
|
||||
bool decompiled = false;
|
||||
|
||||
if (settings.depth != CompileDepth::FlowStack) {
|
||||
// Decompile Stacks
|
||||
state.queries.push_back(Query{state.start, {}, {}});
|
||||
decompiled = true;
|
||||
while (!state.queries.empty()) {
|
||||
if (!TryQuery(state)) {
|
||||
decompiled = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
use_flow_stack = !decompiled;
|
||||
|
||||
// Sort and organize results
|
||||
std::sort(state.block_info.begin(), state.block_info.end(),
|
||||
[](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
|
||||
if (decompiled && settings.depth != CompileDepth::NoFlowStack) {
|
||||
ASTManager manager{settings.depth != CompileDepth::DecompileBackwards,
|
||||
settings.disable_else_derivation};
|
||||
state.manager = &manager;
|
||||
DecompileShader(state);
|
||||
decompiled = state.manager->IsFullyDecompiled();
|
||||
if (!decompiled) {
|
||||
if (settings.depth == CompileDepth::FullDecompile) {
|
||||
LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:");
|
||||
} else {
|
||||
LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:");
|
||||
}
|
||||
state.manager->ShowCurrentState("Of Shader");
|
||||
state.manager->Clear();
|
||||
} else {
|
||||
auto characteristics = std::make_unique<ShaderCharacteristics>();
|
||||
characteristics->start = start_address;
|
||||
characteristics->settings.depth = settings.depth;
|
||||
characteristics->manager = std::move(manager);
|
||||
characteristics->end = state.block_info.back().end + 1;
|
||||
return characteristics;
|
||||
}
|
||||
}
|
||||
|
||||
result_out->start = start_address;
|
||||
result_out->settings.depth =
|
||||
use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
|
||||
result_out->blocks.clear();
|
||||
for (auto& block : state.block_info) {
|
||||
ShaderBlock new_block{};
|
||||
new_block.start = block.start;
|
||||
new_block.end = block.end;
|
||||
new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
|
||||
if (!new_block.ignore_branch) {
|
||||
new_block.branch = block.branch;
|
||||
}
|
||||
result_out->end = std::max(result_out->end, block.end);
|
||||
result_out->blocks.push_back(new_block);
|
||||
}
|
||||
if (!use_flow_stack) {
|
||||
result_out->labels = std::move(state.labels);
|
||||
return result_out;
|
||||
}
|
||||
|
||||
auto back = result_out->blocks.begin();
|
||||
auto next = std::next(back);
|
||||
while (next != result_out->blocks.end()) {
|
||||
if (!state.labels.contains(next->start) && next->start == back->end + 1) {
|
||||
back->end = next->end;
|
||||
next = result_out->blocks.erase(next);
|
||||
continue;
|
||||
}
|
||||
back = next;
|
||||
++next;
|
||||
}
|
||||
|
||||
return result_out;
|
||||
}
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,117 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <list>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <variant>
|
||||
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/ast.h"
|
||||
#include "video_core/shader/compiler_settings.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::ConditionCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
constexpr s32 exit_branch = -1;
|
||||
|
||||
struct Condition {
|
||||
Pred predicate{Pred::UnusedIndex};
|
||||
ConditionCode cc{ConditionCode::T};
|
||||
|
||||
bool IsUnconditional() const {
|
||||
return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
|
||||
}
|
||||
|
||||
bool operator==(const Condition& other) const {
|
||||
return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
|
||||
}
|
||||
|
||||
bool operator!=(const Condition& other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
};
|
||||
|
||||
class SingleBranch {
|
||||
public:
|
||||
SingleBranch() = default;
|
||||
explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_,
|
||||
bool is_brk_, bool ignore_)
|
||||
: condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_},
|
||||
ignore{ignore_} {}
|
||||
|
||||
bool operator==(const SingleBranch& b) const {
|
||||
return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
|
||||
std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
|
||||
}
|
||||
|
||||
bool operator!=(const SingleBranch& b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
|
||||
Condition condition{};
|
||||
s32 address{exit_branch};
|
||||
bool kill{};
|
||||
bool is_sync{};
|
||||
bool is_brk{};
|
||||
bool ignore{};
|
||||
};
|
||||
|
||||
struct CaseBranch {
|
||||
explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {}
|
||||
u32 cmp_value;
|
||||
u32 address;
|
||||
};
|
||||
|
||||
class MultiBranch {
|
||||
public:
|
||||
explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_)
|
||||
: gpr{gpr_}, branches{std::move(branches_)} {}
|
||||
|
||||
u32 gpr{};
|
||||
std::vector<CaseBranch> branches{};
|
||||
};
|
||||
|
||||
using BranchData = std::variant<SingleBranch, MultiBranch>;
|
||||
using BlockBranchInfo = std::shared_ptr<BranchData>;
|
||||
|
||||
bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
|
||||
|
||||
struct ShaderBlock {
|
||||
u32 start{};
|
||||
u32 end{};
|
||||
bool ignore_branch{};
|
||||
BlockBranchInfo branch{};
|
||||
|
||||
bool operator==(const ShaderBlock& sb) const {
|
||||
return std::tie(start, end, ignore_branch) ==
|
||||
std::tie(sb.start, sb.end, sb.ignore_branch) &&
|
||||
BlockBranchInfoAreEqual(branch, sb.branch);
|
||||
}
|
||||
|
||||
bool operator!=(const ShaderBlock& sb) const {
|
||||
return !operator==(sb);
|
||||
}
|
||||
};
|
||||
|
||||
struct ShaderCharacteristics {
|
||||
std::list<ShaderBlock> blocks{};
|
||||
std::set<u32> labels{};
|
||||
u32 start{};
|
||||
u32 end{};
|
||||
ASTManager manager{true, true};
|
||||
CompilerSettings settings{};
|
||||
};
|
||||
|
||||
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
|
||||
const CompilerSettings& settings,
|
||||
Registry& registry);
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,368 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <set>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/engines/shader_header.h"
|
||||
#include "video_core/shader/control_flow.h"
|
||||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
namespace {
|
||||
|
||||
void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
|
||||
const std::list<SamplerEntry>& used_samplers) {
|
||||
if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
|
||||
return;
|
||||
}
|
||||
u32 count{};
|
||||
std::vector<u32> bound_offsets;
|
||||
for (const auto& sampler : used_samplers) {
|
||||
if (sampler.is_bindless) {
|
||||
continue;
|
||||
}
|
||||
++count;
|
||||
bound_offsets.emplace_back(sampler.offset);
|
||||
}
|
||||
if (count > 1) {
|
||||
gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
|
||||
VideoCore::GuestDriverProfile& gpu_driver,
|
||||
const std::list<SamplerEntry>& used_samplers) {
|
||||
const u32 base_offset = sampler_to_deduce.offset;
|
||||
u32 max_offset{std::numeric_limits<u32>::max()};
|
||||
for (const auto& sampler : used_samplers) {
|
||||
if (sampler.is_bindless) {
|
||||
continue;
|
||||
}
|
||||
if (sampler.offset > base_offset) {
|
||||
max_offset = std::min(sampler.offset, max_offset);
|
||||
}
|
||||
}
|
||||
if (max_offset == std::numeric_limits<u32>::max()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
class ASTDecoder {
|
||||
public:
|
||||
explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
|
||||
|
||||
void operator()(ASTProgram& ast) {
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
}
|
||||
|
||||
void operator()(ASTIfThen& ast) {
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
}
|
||||
|
||||
void operator()(ASTIfElse& ast) {
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
}
|
||||
|
||||
void operator()(ASTBlockEncoded& ast) {}
|
||||
|
||||
void operator()(ASTBlockDecoded& ast) {}
|
||||
|
||||
void operator()(ASTVarSet& ast) {}
|
||||
|
||||
void operator()(ASTLabel& ast) {}
|
||||
|
||||
void operator()(ASTGoto& ast) {}
|
||||
|
||||
void operator()(ASTDoWhile& ast) {
|
||||
ASTNode current = ast.nodes.GetFirst();
|
||||
while (current) {
|
||||
Visit(current);
|
||||
current = current->GetNext();
|
||||
}
|
||||
}
|
||||
|
||||
void operator()(ASTReturn& ast) {}
|
||||
|
||||
void operator()(ASTBreak& ast) {}
|
||||
|
||||
void Visit(ASTNode& node) {
|
||||
std::visit(*this, *node->GetInnerData());
|
||||
if (node->IsBlockEncoded()) {
|
||||
auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
|
||||
NodeBlock bb = ir.DecodeRange(block->start, block->end);
|
||||
node->TransformBlockEncoded(std::move(bb));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
ShaderIR& ir;
|
||||
};
|
||||
|
||||
void ShaderIR::Decode() {
|
||||
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
|
||||
|
||||
decompiled = false;
|
||||
auto info = ScanFlow(program_code, main_offset, settings, registry);
|
||||
auto& shader_info = *info;
|
||||
coverage_begin = shader_info.start;
|
||||
coverage_end = shader_info.end;
|
||||
switch (shader_info.settings.depth) {
|
||||
case CompileDepth::FlowStack: {
|
||||
for (const auto& block : shader_info.blocks) {
|
||||
basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CompileDepth::NoFlowStack: {
|
||||
disable_flow_stack = true;
|
||||
const auto insert_block = [this](NodeBlock& nodes, u32 label) {
|
||||
if (label == static_cast<u32>(exit_branch)) {
|
||||
return;
|
||||
}
|
||||
basic_blocks.insert({label, nodes});
|
||||
};
|
||||
const auto& blocks = shader_info.blocks;
|
||||
NodeBlock current_block;
|
||||
u32 current_label = static_cast<u32>(exit_branch);
|
||||
for (const auto& block : blocks) {
|
||||
if (shader_info.labels.contains(block.start)) {
|
||||
insert_block(current_block, current_label);
|
||||
current_block.clear();
|
||||
current_label = block.start;
|
||||
}
|
||||
if (!block.ignore_branch) {
|
||||
DecodeRangeInner(current_block, block.start, block.end);
|
||||
InsertControlFlow(current_block, block);
|
||||
} else {
|
||||
DecodeRangeInner(current_block, block.start, block.end + 1);
|
||||
}
|
||||
}
|
||||
insert_block(current_block, current_label);
|
||||
break;
|
||||
}
|
||||
case CompileDepth::DecompileBackwards:
|
||||
case CompileDepth::FullDecompile: {
|
||||
program_manager = std::move(shader_info.manager);
|
||||
disable_flow_stack = true;
|
||||
decompiled = true;
|
||||
ASTDecoder decoder{*this};
|
||||
ASTNode program = GetASTProgram();
|
||||
decoder.Visit(program);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
|
||||
[[fallthrough]];
|
||||
case CompileDepth::BruteForce: {
|
||||
const auto shader_end = static_cast<u32>(program_code.size());
|
||||
coverage_begin = main_offset;
|
||||
coverage_end = shader_end;
|
||||
for (u32 label = main_offset; label < shader_end; ++label) {
|
||||
basic_blocks.insert({label, DecodeRange(label, label + 1)});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (settings.depth != shader_info.settings.depth) {
|
||||
LOG_WARNING(
|
||||
HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
|
||||
CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
|
||||
}
|
||||
}
|
||||
|
||||
NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
|
||||
NodeBlock basic_block;
|
||||
DecodeRangeInner(basic_block, begin, end);
|
||||
return basic_block;
|
||||
}
|
||||
|
||||
void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
|
||||
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
|
||||
pc = DecodeInstr(bb, pc);
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
|
||||
const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
|
||||
Node result = n;
|
||||
if (cond.cc != ConditionCode::T) {
|
||||
result = Conditional(GetConditionCode(cond.cc), {result});
|
||||
}
|
||||
if (cond.predicate != Pred::UnusedIndex) {
|
||||
u32 pred = static_cast<u32>(cond.predicate);
|
||||
const bool is_neg = pred > 7;
|
||||
if (is_neg) {
|
||||
pred -= 8;
|
||||
}
|
||||
result = Conditional(GetPredicate(pred, is_neg), {result});
|
||||
}
|
||||
return result;
|
||||
};
|
||||
if (std::holds_alternative<SingleBranch>(*block.branch)) {
|
||||
auto branch = std::get_if<SingleBranch>(block.branch.get());
|
||||
if (branch->address < 0) {
|
||||
if (branch->kill) {
|
||||
Node n = Operation(OperationCode::Discard);
|
||||
n = apply_conditions(branch->condition, n);
|
||||
bb.push_back(n);
|
||||
global_code.push_back(n);
|
||||
return;
|
||||
}
|
||||
Node n = Operation(OperationCode::Exit);
|
||||
n = apply_conditions(branch->condition, n);
|
||||
bb.push_back(n);
|
||||
global_code.push_back(n);
|
||||
return;
|
||||
}
|
||||
Node n = Operation(OperationCode::Branch, Immediate(branch->address));
|
||||
n = apply_conditions(branch->condition, n);
|
||||
bb.push_back(n);
|
||||
global_code.push_back(n);
|
||||
return;
|
||||
}
|
||||
auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
|
||||
Node op_a = GetRegister(multi_branch->gpr);
|
||||
for (auto& branch_case : multi_branch->branches) {
|
||||
Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
|
||||
Node op_b = Immediate(branch_case.cmp_value);
|
||||
Node condition =
|
||||
GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
|
||||
auto result = Conditional(condition, {n});
|
||||
bb.push_back(result);
|
||||
global_code.push_back(result);
|
||||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||
// Ignore sched instructions when generating code.
|
||||
if (IsSchedInstruction(pc, main_offset)) {
|
||||
return pc + 1;
|
||||
}
|
||||
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
|
||||
|
||||
// Decoding failure
|
||||
if (!opcode) {
|
||||
UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
|
||||
bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
|
||||
nv_address, instr.value)));
|
||||
return pc + 1;
|
||||
}
|
||||
|
||||
bb.push_back(Comment(
|
||||
fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
|
||||
|
||||
using Tegra::Shader::Pred;
|
||||
UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
|
||||
"NeverExecute predicate not implemented");
|
||||
|
||||
static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
|
||||
{OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
|
||||
{OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
|
||||
{OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
|
||||
{OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
|
||||
{OpCode::Type::Shift, &ShaderIR::DecodeShift},
|
||||
{OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
|
||||
{OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
|
||||
{OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
|
||||
{OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
|
||||
{OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
|
||||
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
|
||||
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
|
||||
{OpCode::Type::Warp, &ShaderIR::DecodeWarp},
|
||||
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
|
||||
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
|
||||
{OpCode::Type::Image, &ShaderIR::DecodeImage},
|
||||
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
|
||||
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
|
||||
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
|
||||
{OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
|
||||
{OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
|
||||
{OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
|
||||
{OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
|
||||
{OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
|
||||
{OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
|
||||
{OpCode::Type::Video, &ShaderIR::DecodeVideo},
|
||||
{OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
|
||||
};
|
||||
|
||||
std::vector<Node> tmp_block;
|
||||
if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
|
||||
pc = (this->*decoder->second)(tmp_block, pc);
|
||||
} else {
|
||||
pc = DecodeOther(tmp_block, pc);
|
||||
}
|
||||
|
||||
// Some instructions (like SSY) don't have a predicate field, they are always unconditionally
|
||||
// executed.
|
||||
const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
|
||||
if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
|
||||
const Node conditional =
|
||||
Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
|
||||
global_code.push_back(conditional);
|
||||
bb.push_back(conditional);
|
||||
} else {
|
||||
for (auto& node : tmp_block) {
|
||||
global_code.push_back(node);
|
||||
bb.push_back(node);
|
||||
}
|
||||
}
|
||||
|
||||
return pc + 1;
|
||||
}
|
||||
|
||||
void ShaderIR::PostDecode() {
|
||||
// Deduce texture handler size if needed
|
||||
auto gpu_driver = registry.AccessGuestDriverProfile();
|
||||
DeduceTextureHandlerSize(gpu_driver, used_samplers);
|
||||
// Deduce Indexed Samplers
|
||||
if (!uses_indexed_samplers) {
|
||||
return;
|
||||
}
|
||||
for (auto& sampler : used_samplers) {
|
||||
if (!sampler.is_indexed) {
|
||||
continue;
|
||||
}
|
||||
if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
|
||||
sampler.size = *size;
|
||||
} else {
|
||||
LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
|
||||
sampler.size = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,166 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::SubOp;
|
||||
|
||||
u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
Node op_a = GetRegister(instr.gpr8);
|
||||
|
||||
Node op_b = [&] {
|
||||
if (instr.is_b_imm) {
|
||||
return GetImmediate19(instr);
|
||||
} else if (instr.is_b_gpr) {
|
||||
return GetRegister(instr.gpr20);
|
||||
} else {
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
}
|
||||
}();
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::MOV_C:
|
||||
case OpCode::Id::MOV_R: {
|
||||
// MOV does not have neither 'abs' nor 'neg' bits.
|
||||
SetRegister(bb, instr.gpr0, op_b);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::FMUL_C:
|
||||
case OpCode::Id::FMUL_R:
|
||||
case OpCode::Id::FMUL_IMM: {
|
||||
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
|
||||
if (instr.fmul.tab5cb8_2 != 0) {
|
||||
LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
|
||||
instr.fmul.tab5cb8_2.Value());
|
||||
}
|
||||
if (instr.fmul.tab5c68_0 != 1) {
|
||||
LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
|
||||
instr.fmul.tab5c68_0.Value());
|
||||
}
|
||||
|
||||
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
|
||||
|
||||
static constexpr std::array FmulPostFactor = {
|
||||
1.000f, // None
|
||||
0.500f, // Divide 2
|
||||
0.250f, // Divide 4
|
||||
0.125f, // Divide 8
|
||||
8.000f, // Mul 8
|
||||
4.000f, // Mul 4
|
||||
2.000f, // Mul 2
|
||||
};
|
||||
|
||||
if (instr.fmul.postfactor != 0) {
|
||||
op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
|
||||
Immediate(FmulPostFactor[instr.fmul.postfactor]));
|
||||
}
|
||||
|
||||
// TODO(Rodrigo): Should precise be used when there's a postfactor?
|
||||
Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
|
||||
|
||||
value = GetSaturatedFloat(value, instr.alu.saturate_d);
|
||||
|
||||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::FADD_C:
|
||||
case OpCode::Id::FADD_R:
|
||||
case OpCode::Id::FADD_IMM: {
|
||||
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
|
||||
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
|
||||
|
||||
Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
|
||||
value = GetSaturatedFloat(value, instr.alu.saturate_d);
|
||||
|
||||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::MUFU: {
|
||||
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
|
||||
|
||||
Node value = [&]() {
|
||||
switch (instr.sub_op) {
|
||||
case SubOp::Cos:
|
||||
return Operation(OperationCode::FCos, PRECISE, op_a);
|
||||
case SubOp::Sin:
|
||||
return Operation(OperationCode::FSin, PRECISE, op_a);
|
||||
case SubOp::Ex2:
|
||||
return Operation(OperationCode::FExp2, PRECISE, op_a);
|
||||
case SubOp::Lg2:
|
||||
return Operation(OperationCode::FLog2, PRECISE, op_a);
|
||||
case SubOp::Rcp:
|
||||
return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
|
||||
case SubOp::Rsq:
|
||||
return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
|
||||
case SubOp::Sqrt:
|
||||
return Operation(OperationCode::FSqrt, PRECISE, op_a);
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
value = GetSaturatedFloat(value, instr.alu.saturate_d);
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::FMNMX_C:
|
||||
case OpCode::Id::FMNMX_R:
|
||||
case OpCode::Id::FMNMX_IMM: {
|
||||
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
|
||||
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
|
||||
|
||||
const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
|
||||
|
||||
const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
|
||||
const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
|
||||
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
|
||||
|
||||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::FCMP_RR:
|
||||
case OpCode::Id::FCMP_RC:
|
||||
case OpCode::Id::FCMP_IMMR: {
|
||||
UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
|
||||
Node op_c = GetRegister(instr.gpr39);
|
||||
Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
|
||||
SetRegister(
|
||||
bb, instr.gpr0,
|
||||
Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::RRO_C:
|
||||
case OpCode::Id::RRO_R:
|
||||
case OpCode::Id::RRO_IMM: {
|
||||
LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
|
||||
|
||||
// Currently RRO is only implemented as a register move.
|
||||
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
|
||||
SetRegister(bb, instr.gpr0, op_b);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,101 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::HalfType;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
bool negate_a = false;
|
||||
bool negate_b = false;
|
||||
bool absolute_a = false;
|
||||
bool absolute_b = false;
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HADD2_R:
|
||||
if (instr.alu_half.ftz == 0) {
|
||||
LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
|
||||
}
|
||||
negate_a = ((instr.value >> 43) & 1) != 0;
|
||||
negate_b = ((instr.value >> 31) & 1) != 0;
|
||||
absolute_a = ((instr.value >> 44) & 1) != 0;
|
||||
absolute_b = ((instr.value >> 30) & 1) != 0;
|
||||
break;
|
||||
case OpCode::Id::HADD2_C:
|
||||
if (instr.alu_half.ftz == 0) {
|
||||
LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
|
||||
}
|
||||
negate_a = ((instr.value >> 43) & 1) != 0;
|
||||
negate_b = ((instr.value >> 56) & 1) != 0;
|
||||
absolute_a = ((instr.value >> 44) & 1) != 0;
|
||||
absolute_b = ((instr.value >> 54) & 1) != 0;
|
||||
break;
|
||||
case OpCode::Id::HMUL2_R:
|
||||
negate_a = ((instr.value >> 43) & 1) != 0;
|
||||
absolute_a = ((instr.value >> 44) & 1) != 0;
|
||||
absolute_b = ((instr.value >> 30) & 1) != 0;
|
||||
break;
|
||||
case OpCode::Id::HMUL2_C:
|
||||
negate_b = ((instr.value >> 31) & 1) != 0;
|
||||
absolute_a = ((instr.value >> 44) & 1) != 0;
|
||||
absolute_b = ((instr.value >> 54) & 1) != 0;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
|
||||
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
|
||||
op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
|
||||
|
||||
auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HADD2_C:
|
||||
case OpCode::Id::HMUL2_C:
|
||||
return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
|
||||
case OpCode::Id::HADD2_R:
|
||||
case OpCode::Id::HMUL2_R:
|
||||
return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {HalfType::F32, Immediate(0)};
|
||||
}
|
||||
}();
|
||||
op_b = UnpackHalfFloat(op_b, type_b);
|
||||
op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
|
||||
|
||||
Node value = [this, opcode, op_a, op_b = op_b] {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HADD2_C:
|
||||
case OpCode::Id::HADD2_R:
|
||||
return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
|
||||
case OpCode::Id::HMUL2_C:
|
||||
case OpCode::Id::HMUL2_R:
|
||||
return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
|
||||
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,54 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
|
||||
if (instr.alu_half_imm.ftz == 0) {
|
||||
LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
|
||||
}
|
||||
} else {
|
||||
if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
|
||||
LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
|
||||
}
|
||||
}
|
||||
|
||||
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
|
||||
op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
|
||||
|
||||
const Node op_b = UnpackHalfImmediate(instr, true);
|
||||
|
||||
Node value = [&]() {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HADD2_IMM:
|
||||
return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
|
||||
case OpCode::Id::HMUL2_IMM:
|
||||
return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
|
||||
value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
|
||||
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,53 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::MOV32_IMM: {
|
||||
SetRegister(bb, instr.gpr0, GetImmediate32(instr));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::FMUL32_IMM: {
|
||||
Node value =
|
||||
Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
|
||||
value = GetSaturatedFloat(value, instr.fmul32.saturate);
|
||||
|
||||
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::FADD32I: {
|
||||
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
|
||||
instr.fadd32i.negate_a);
|
||||
const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
|
||||
instr.fadd32i.negate_b);
|
||||
|
||||
const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
|
||||
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
|
||||
opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,375 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::IAdd3Height;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
Node op_a = GetRegister(instr.gpr8);
|
||||
Node op_b = [&]() {
|
||||
if (instr.is_b_imm) {
|
||||
return Immediate(instr.alu.GetSignedImm20_20());
|
||||
} else if (instr.is_b_gpr) {
|
||||
return GetRegister(instr.gpr20);
|
||||
} else {
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
}
|
||||
}();
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::IADD_C:
|
||||
case OpCode::Id::IADD_R:
|
||||
case OpCode::Id::IADD_IMM: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
|
||||
UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
|
||||
|
||||
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
|
||||
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
|
||||
|
||||
Node value = Operation(OperationCode::UAdd, op_a, op_b);
|
||||
|
||||
if (instr.iadd.x) {
|
||||
Node carry = GetInternalFlag(InternalFlag::Carry);
|
||||
Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
|
||||
value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
|
||||
}
|
||||
|
||||
if (instr.generates_cc) {
|
||||
const Node i0 = Immediate(0);
|
||||
|
||||
Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
|
||||
Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
|
||||
Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
|
||||
|
||||
Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
|
||||
Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
|
||||
Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
|
||||
Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
|
||||
|
||||
SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
|
||||
SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
|
||||
SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
|
||||
SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
|
||||
}
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::IADD3_C:
|
||||
case OpCode::Id::IADD3_R:
|
||||
case OpCode::Id::IADD3_IMM: {
|
||||
Node op_c = GetRegister(instr.gpr39);
|
||||
|
||||
const auto ApplyHeight = [&](IAdd3Height height, Node value) {
|
||||
switch (height) {
|
||||
case IAdd3Height::None:
|
||||
return value;
|
||||
case IAdd3Height::LowerHalfWord:
|
||||
return BitfieldExtract(value, 0, 16);
|
||||
case IAdd3Height::UpperHalfWord:
|
||||
return BitfieldExtract(value, 16, 16);
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
|
||||
return Immediate(0);
|
||||
}
|
||||
};
|
||||
|
||||
if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
|
||||
op_a = ApplyHeight(instr.iadd3.height_a, op_a);
|
||||
op_b = ApplyHeight(instr.iadd3.height_b, op_b);
|
||||
op_c = ApplyHeight(instr.iadd3.height_c, op_c);
|
||||
}
|
||||
|
||||
op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
|
||||
op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
|
||||
op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
|
||||
|
||||
const Node value = [&] {
|
||||
Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
|
||||
if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
|
||||
return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
|
||||
}
|
||||
const Node shifted = [&] {
|
||||
switch (instr.iadd3.mode) {
|
||||
case Tegra::Shader::IAdd3Mode::RightShift:
|
||||
// TODO(tech4me): According to
|
||||
// https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
|
||||
// The addition between op_a and op_b should be done in uint33, more
|
||||
// investigation required
|
||||
return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
|
||||
Immediate(16));
|
||||
case Tegra::Shader::IAdd3Mode::LeftShift:
|
||||
return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
|
||||
Immediate(16));
|
||||
default:
|
||||
return add_ab;
|
||||
}
|
||||
}();
|
||||
return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
|
||||
}();
|
||||
|
||||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ISCADD_C:
|
||||
case OpCode::Id::ISCADD_R:
|
||||
case OpCode::Id::ISCADD_IMM: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
|
||||
"Condition codes generation in ISCADD is not implemented");
|
||||
|
||||
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
|
||||
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
|
||||
|
||||
const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
|
||||
const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
|
||||
const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
|
||||
|
||||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::POPC_C:
|
||||
case OpCode::Id::POPC_R:
|
||||
case OpCode::Id::POPC_IMM: {
|
||||
if (instr.popc.invert) {
|
||||
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
|
||||
}
|
||||
const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::FLO_R:
|
||||
case OpCode::Id::FLO_C:
|
||||
case OpCode::Id::FLO_IMM: {
|
||||
Node value;
|
||||
if (instr.flo.invert) {
|
||||
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
|
||||
}
|
||||
if (instr.flo.is_signed) {
|
||||
value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
|
||||
} else {
|
||||
value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
|
||||
}
|
||||
if (instr.flo.sh) {
|
||||
value =
|
||||
Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
|
||||
}
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::SEL_C:
|
||||
case OpCode::Id::SEL_R:
|
||||
case OpCode::Id::SEL_IMM: {
|
||||
const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
|
||||
const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ICMP_CR:
|
||||
case OpCode::Id::ICMP_R:
|
||||
case OpCode::Id::ICMP_RC:
|
||||
case OpCode::Id::ICMP_IMM: {
|
||||
const Node zero = Immediate(0);
|
||||
|
||||
const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::ICMP_CR:
|
||||
return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
|
||||
GetRegister(instr.gpr39)};
|
||||
case OpCode::Id::ICMP_R:
|
||||
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
|
||||
case OpCode::Id::ICMP_RC:
|
||||
return {GetRegister(instr.gpr39),
|
||||
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
|
||||
case OpCode::Id::ICMP_IMM:
|
||||
return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {zero, zero};
|
||||
}
|
||||
}();
|
||||
const Node op_lhs = GetRegister(instr.gpr8);
|
||||
const Node comparison =
|
||||
GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
|
||||
SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::LOP_C:
|
||||
case OpCode::Id::LOP_R:
|
||||
case OpCode::Id::LOP_IMM: {
|
||||
if (instr.alu.lop.invert_a)
|
||||
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
|
||||
if (instr.alu.lop.invert_b)
|
||||
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
|
||||
|
||||
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
|
||||
instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
|
||||
instr.generates_cc);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::LOP3_C:
|
||||
case OpCode::Id::LOP3_R:
|
||||
case OpCode::Id::LOP3_IMM: {
|
||||
const Node op_c = GetRegister(instr.gpr39);
|
||||
const Node lut = [&]() {
|
||||
if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
|
||||
return Immediate(instr.alu.lop3.GetImmLut28());
|
||||
} else {
|
||||
return Immediate(instr.alu.lop3.GetImmLut48());
|
||||
}
|
||||
}();
|
||||
|
||||
WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::IMNMX_C:
|
||||
case OpCode::Id::IMNMX_R:
|
||||
case OpCode::Id::IMNMX_IMM: {
|
||||
UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
|
||||
|
||||
const bool is_signed = instr.imnmx.is_signed;
|
||||
|
||||
const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
|
||||
const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
|
||||
const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
|
||||
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
|
||||
|
||||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::LEA_R2:
|
||||
case OpCode::Id::LEA_R1:
|
||||
case OpCode::Id::LEA_IMM:
|
||||
case OpCode::Id::LEA_RZ:
|
||||
case OpCode::Id::LEA_HI: {
|
||||
auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::LEA_R2: {
|
||||
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
|
||||
Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
|
||||
}
|
||||
case OpCode::Id::LEA_R1: {
|
||||
const bool neg = instr.lea.r1.neg != 0;
|
||||
return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
|
||||
GetRegister(instr.gpr20),
|
||||
Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
|
||||
}
|
||||
case OpCode::Id::LEA_IMM: {
|
||||
const bool neg = instr.lea.imm.neg != 0;
|
||||
return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
|
||||
Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
|
||||
Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
|
||||
}
|
||||
case OpCode::Id::LEA_RZ: {
|
||||
const bool neg = instr.lea.rz.neg != 0;
|
||||
return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
|
||||
GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
|
||||
Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
|
||||
}
|
||||
case OpCode::Id::LEA_HI:
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
|
||||
|
||||
return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
|
||||
Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
|
||||
}
|
||||
}();
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
|
||||
"Unhandled LEA Predicate");
|
||||
|
||||
Node value =
|
||||
Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
|
||||
value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
|
||||
Node imm_lut, bool sets_cc) {
|
||||
const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
|
||||
Node value = Immediate(0);
|
||||
const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
|
||||
if (imm.GetValue() & 0x01) {
|
||||
const Node a = Operation(OperationCode::IBitwiseNot, na);
|
||||
const Node b = Operation(OperationCode::IBitwiseNot, nb);
|
||||
const Node c = Operation(OperationCode::IBitwiseNot, nc);
|
||||
Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
|
||||
r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
|
||||
value = Operation(OperationCode::IBitwiseOr, value, r);
|
||||
}
|
||||
if (imm.GetValue() & 0x02) {
|
||||
const Node a = Operation(OperationCode::IBitwiseNot, na);
|
||||
const Node b = Operation(OperationCode::IBitwiseNot, nb);
|
||||
Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
|
||||
r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
|
||||
value = Operation(OperationCode::IBitwiseOr, value, r);
|
||||
}
|
||||
if (imm.GetValue() & 0x04) {
|
||||
const Node a = Operation(OperationCode::IBitwiseNot, na);
|
||||
const Node c = Operation(OperationCode::IBitwiseNot, nc);
|
||||
Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
|
||||
r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
|
||||
value = Operation(OperationCode::IBitwiseOr, value, r);
|
||||
}
|
||||
if (imm.GetValue() & 0x08) {
|
||||
const Node a = Operation(OperationCode::IBitwiseNot, na);
|
||||
Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
|
||||
r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
|
||||
value = Operation(OperationCode::IBitwiseOr, value, r);
|
||||
}
|
||||
if (imm.GetValue() & 0x10) {
|
||||
const Node b = Operation(OperationCode::IBitwiseNot, nb);
|
||||
const Node c = Operation(OperationCode::IBitwiseNot, nc);
|
||||
Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
|
||||
r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
|
||||
value = Operation(OperationCode::IBitwiseOr, value, r);
|
||||
}
|
||||
if (imm.GetValue() & 0x20) {
|
||||
const Node b = Operation(OperationCode::IBitwiseNot, nb);
|
||||
Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
|
||||
r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
|
||||
value = Operation(OperationCode::IBitwiseOr, value, r);
|
||||
}
|
||||
if (imm.GetValue() & 0x40) {
|
||||
const Node c = Operation(OperationCode::IBitwiseNot, nc);
|
||||
Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
|
||||
r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
|
||||
value = Operation(OperationCode::IBitwiseOr, value, r);
|
||||
}
|
||||
if (imm.GetValue() & 0x80) {
|
||||
Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
|
||||
r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
|
||||
value = Operation(OperationCode::IBitwiseOr, value, r);
|
||||
}
|
||||
return value;
|
||||
}(op_a, op_b, op_c, imm_lut);
|
||||
|
||||
SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
|
||||
SetRegister(bb, dest, lop3_fast);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,99 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::LogicOperation;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
using Tegra::Shader::PredicateResultMode;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
Node op_a = GetRegister(instr.gpr8);
|
||||
Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::IADD32I: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
|
||||
|
||||
op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
|
||||
|
||||
Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
|
||||
|
||||
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::LOP32I: {
|
||||
if (instr.alu.lop32i.invert_a) {
|
||||
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
|
||||
}
|
||||
|
||||
if (instr.alu.lop32i.invert_b) {
|
||||
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
|
||||
}
|
||||
|
||||
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
|
||||
std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
|
||||
instr.op_32.generates_cc != 0);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
|
||||
opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
|
||||
Node op_b, PredicateResultMode predicate_mode, Pred predicate,
|
||||
bool sets_cc) {
|
||||
Node result = [&] {
|
||||
switch (logic_op) {
|
||||
case LogicOperation::And:
|
||||
return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
|
||||
case LogicOperation::Or:
|
||||
return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
|
||||
case LogicOperation::Xor:
|
||||
return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
|
||||
case LogicOperation::PassB:
|
||||
return op_b;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
|
||||
SetInternalFlagsFromInteger(bb, result, sets_cc);
|
||||
SetRegister(bb, dest, result);
|
||||
|
||||
// Write the predicate value depending on the predicate mode.
|
||||
switch (predicate_mode) {
|
||||
case PredicateResultMode::None:
|
||||
// Do nothing.
|
||||
return;
|
||||
case PredicateResultMode::NotZero: {
|
||||
// Set the predicate to true if the result is not zero.
|
||||
Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
|
||||
SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,77 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
Node op_a = GetRegister(instr.gpr8);
|
||||
Node op_b = [&] {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::BFE_R:
|
||||
return GetRegister(instr.gpr20);
|
||||
case OpCode::Id::BFE_C:
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
case OpCode::Id::BFE_IMM:
|
||||
return Immediate(instr.alu.GetSignedImm20_20());
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
|
||||
|
||||
const bool is_signed = instr.bfe.is_signed;
|
||||
|
||||
// using reverse parallel method in
|
||||
// https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
|
||||
// note for later if possible to implement faster method.
|
||||
if (instr.bfe.brev) {
|
||||
const auto swap = [&](u32 s, u32 mask) {
|
||||
Node v1 =
|
||||
SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
|
||||
if (mask != 0) {
|
||||
v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
|
||||
Immediate(mask));
|
||||
}
|
||||
Node v2 = op_a;
|
||||
if (mask != 0) {
|
||||
v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
|
||||
Immediate(mask));
|
||||
}
|
||||
v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
|
||||
Immediate(s));
|
||||
return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
|
||||
std::move(v2));
|
||||
};
|
||||
op_a = swap(1, 0x55555555U);
|
||||
op_a = swap(2, 0x33333333U);
|
||||
op_a = swap(4, 0x0F0F0F0FU);
|
||||
op_a = swap(8, 0x00FF00FFU);
|
||||
op_a = swap(16, 0);
|
||||
}
|
||||
|
||||
const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
|
||||
Immediate(0), Immediate(8));
|
||||
const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
|
||||
Immediate(8), Immediate(8));
|
||||
auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
|
||||
SetRegister(bb, instr.gpr0, std::move(result));
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,45 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::BFI_RC:
|
||||
return {GetRegister(instr.gpr39),
|
||||
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
|
||||
case OpCode::Id::BFI_IMM_R:
|
||||
return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {Immediate(0), Immediate(0)};
|
||||
}
|
||||
}();
|
||||
const Node insert = GetRegister(instr.gpr8);
|
||||
const Node offset = BitfieldExtract(packed_shift, 0, 8);
|
||||
const Node bits = BitfieldExtract(packed_shift, 8, 8);
|
||||
|
||||
const Node value =
|
||||
Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
|
||||
|
||||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,321 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr OperationCode GetFloatSelector(u64 selector) {
|
||||
return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
|
||||
}
|
||||
|
||||
constexpr u32 SizeInBits(Register::Size size) {
|
||||
switch (size) {
|
||||
case Register::Size::Byte:
|
||||
return 8;
|
||||
case Register::Size::Short:
|
||||
return 16;
|
||||
case Register::Size::Word:
|
||||
return 32;
|
||||
case Register::Size::Long:
|
||||
return 64;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
|
||||
Register::Size dst_size,
|
||||
bool src_signed,
|
||||
bool dst_signed) {
|
||||
const u32 dst_bits = SizeInBits(dst_size);
|
||||
if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
|
||||
if (src_signed == dst_signed) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return std::make_pair(0, std::numeric_limits<s32>::max());
|
||||
}
|
||||
if (dst_signed) {
|
||||
// Signed destination, clamp to [-128, 127] for instance
|
||||
return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
|
||||
} else {
|
||||
// Unsigned destination
|
||||
if (dst_bits == 32) {
|
||||
// Avoid shifting by 32, that is undefined behavior
|
||||
return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
|
||||
}
|
||||
return std::make_pair(0, (1 << dst_bits) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::I2I_R:
|
||||
case OpCode::Id::I2I_C:
|
||||
case OpCode::Id::I2I_IMM: {
|
||||
const bool src_signed = instr.conversion.is_input_signed;
|
||||
const bool dst_signed = instr.conversion.is_output_signed;
|
||||
const Register::Size src_size = instr.conversion.src_size;
|
||||
const Register::Size dst_size = instr.conversion.dst_size;
|
||||
const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
|
||||
|
||||
Node value = [this, instr, opcode] {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::I2I_R:
|
||||
return GetRegister(instr.gpr20);
|
||||
case OpCode::Id::I2I_C:
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
case OpCode::Id::I2I_IMM:
|
||||
return Immediate(instr.alu.GetSignedImm20_20());
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
|
||||
// Ensure the source selector is valid
|
||||
switch (instr.conversion.src_size) {
|
||||
case Register::Size::Byte:
|
||||
break;
|
||||
case Register::Size::Short:
|
||||
ASSERT(selector == 0 || selector == 2);
|
||||
break;
|
||||
default:
|
||||
ASSERT(selector == 0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (src_size != Register::Size::Word || selector != 0) {
|
||||
value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
|
||||
Immediate(selector * 8), Immediate(SizeInBits(src_size)));
|
||||
}
|
||||
|
||||
value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
|
||||
instr.conversion.negate_a, src_signed);
|
||||
|
||||
if (instr.alu.saturate_d) {
|
||||
if (src_signed && !dst_signed) {
|
||||
Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
|
||||
Immediate(1 << (SizeInBits(src_size) - 1)));
|
||||
value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
|
||||
std::move(value));
|
||||
|
||||
// Simplify generated expressions, this can be removed without semantic impact
|
||||
SetTemporary(bb, 0, std::move(value));
|
||||
value = GetTemporary(0);
|
||||
|
||||
if (dst_size != Register::Size::Word) {
|
||||
const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
|
||||
Node is_large =
|
||||
Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
|
||||
value = Operation(OperationCode::Select, std::move(is_large), limit,
|
||||
std::move(value));
|
||||
}
|
||||
} else if (const std::optional bounds =
|
||||
IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
|
||||
value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
|
||||
Immediate(bounds->first));
|
||||
value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
|
||||
Immediate(bounds->second));
|
||||
}
|
||||
} else if (dst_size != Register::Size::Word) {
|
||||
// No saturation, we only have to mask the result
|
||||
Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
|
||||
value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
|
||||
}
|
||||
|
||||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::I2F_R:
|
||||
case OpCode::Id::I2F_C:
|
||||
case OpCode::Id::I2F_IMM: {
|
||||
UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
|
||||
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
|
||||
"Condition codes generation in I2F is not implemented");
|
||||
|
||||
Node value = [&] {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::I2F_R:
|
||||
return GetRegister(instr.gpr20);
|
||||
case OpCode::Id::I2F_C:
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
case OpCode::Id::I2F_IMM:
|
||||
return Immediate(instr.alu.GetSignedImm20_20());
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
|
||||
const bool input_signed = instr.conversion.is_input_signed;
|
||||
|
||||
if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
|
||||
ASSERT(instr.conversion.src_size == Register::Size::Byte ||
|
||||
instr.conversion.src_size == Register::Size::Short);
|
||||
if (instr.conversion.src_size == Register::Size::Short) {
|
||||
ASSERT(offset == 0 || offset == 2);
|
||||
}
|
||||
value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
|
||||
std::move(value), Immediate(offset * 8));
|
||||
}
|
||||
|
||||
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
|
||||
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
|
||||
value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
|
||||
value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
|
||||
|
||||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
|
||||
|
||||
if (instr.conversion.dst_size == Register::Size::Short) {
|
||||
value = Operation(OperationCode::HCastFloat, PRECISE, value);
|
||||
}
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::F2F_R:
|
||||
case OpCode::Id::F2F_C:
|
||||
case OpCode::Id::F2F_IMM: {
|
||||
UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
|
||||
UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
|
||||
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
|
||||
"Condition codes generation in F2F is not implemented");
|
||||
|
||||
Node value = [&]() {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::F2F_R:
|
||||
return GetRegister(instr.gpr20);
|
||||
case OpCode::Id::F2F_C:
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
case OpCode::Id::F2F_IMM:
|
||||
return GetImmediate19(instr);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
|
||||
if (instr.conversion.src_size == Register::Size::Short) {
|
||||
value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
|
||||
std::move(value));
|
||||
} else {
|
||||
ASSERT(instr.conversion.float_src.selector == 0);
|
||||
}
|
||||
|
||||
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
|
||||
|
||||
value = [&] {
|
||||
if (instr.conversion.src_size != instr.conversion.dst_size) {
|
||||
// Rounding operations only matter when the source and destination conversion size
|
||||
// is the same.
|
||||
return value;
|
||||
}
|
||||
switch (instr.conversion.f2f.GetRoundingMode()) {
|
||||
case Tegra::Shader::F2fRoundingOp::None:
|
||||
return value;
|
||||
case Tegra::Shader::F2fRoundingOp::Round:
|
||||
return Operation(OperationCode::FRoundEven, value);
|
||||
case Tegra::Shader::F2fRoundingOp::Floor:
|
||||
return Operation(OperationCode::FFloor, value);
|
||||
case Tegra::Shader::F2fRoundingOp::Ceil:
|
||||
return Operation(OperationCode::FCeil, value);
|
||||
case Tegra::Shader::F2fRoundingOp::Trunc:
|
||||
return Operation(OperationCode::FTrunc, value);
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
|
||||
instr.conversion.f2f.rounding.Value());
|
||||
return value;
|
||||
}
|
||||
}();
|
||||
value = GetSaturatedFloat(value, instr.alu.saturate_d);
|
||||
|
||||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
|
||||
|
||||
if (instr.conversion.dst_size == Register::Size::Short) {
|
||||
value = Operation(OperationCode::HCastFloat, PRECISE, value);
|
||||
}
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::F2I_R:
|
||||
case OpCode::Id::F2I_C:
|
||||
case OpCode::Id::F2I_IMM: {
|
||||
UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
|
||||
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
|
||||
"Condition codes generation in F2I is not implemented");
|
||||
Node value = [&]() {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::F2I_R:
|
||||
return GetRegister(instr.gpr20);
|
||||
case OpCode::Id::F2I_C:
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
case OpCode::Id::F2I_IMM:
|
||||
return GetImmediate19(instr);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
|
||||
if (instr.conversion.src_size == Register::Size::Short) {
|
||||
value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
|
||||
std::move(value));
|
||||
} else {
|
||||
ASSERT(instr.conversion.float_src.selector == 0);
|
||||
}
|
||||
|
||||
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
|
||||
|
||||
value = [&]() {
|
||||
switch (instr.conversion.f2i.rounding) {
|
||||
case Tegra::Shader::F2iRoundingOp::RoundEven:
|
||||
return Operation(OperationCode::FRoundEven, PRECISE, value);
|
||||
case Tegra::Shader::F2iRoundingOp::Floor:
|
||||
return Operation(OperationCode::FFloor, PRECISE, value);
|
||||
case Tegra::Shader::F2iRoundingOp::Ceil:
|
||||
return Operation(OperationCode::FCeil, PRECISE, value);
|
||||
case Tegra::Shader::F2iRoundingOp::Trunc:
|
||||
return Operation(OperationCode::FTrunc, PRECISE, value);
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
|
||||
instr.conversion.f2i.rounding.Value());
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
const bool is_signed = instr.conversion.is_output_signed;
|
||||
value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
|
||||
value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,62 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
|
||||
if (instr.ffma.tab5980_0 != 1) {
|
||||
LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
|
||||
}
|
||||
if (instr.ffma.tab5980_1 != 0) {
|
||||
LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
|
||||
}
|
||||
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
|
||||
auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::FFMA_CR: {
|
||||
return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
|
||||
GetRegister(instr.gpr39)};
|
||||
}
|
||||
case OpCode::Id::FFMA_RR:
|
||||
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
|
||||
case OpCode::Id::FFMA_RC: {
|
||||
return {GetRegister(instr.gpr39),
|
||||
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
|
||||
}
|
||||
case OpCode::Id::FFMA_IMM:
|
||||
return {GetImmediate19(instr), GetRegister(instr.gpr39)};
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
|
||||
return {Immediate(0), Immediate(0)};
|
||||
}
|
||||
}();
|
||||
|
||||
op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
|
||||
op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
|
||||
|
||||
Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
|
||||
value = GetSaturatedFloat(value, instr.alu.saturate_d);
|
||||
|
||||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,58 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
|
||||
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
|
||||
instr.fset.neg_a != 0);
|
||||
|
||||
Node op_b = [&]() {
|
||||
if (instr.is_b_imm) {
|
||||
return GetImmediate19(instr);
|
||||
} else if (instr.is_b_gpr) {
|
||||
return GetRegister(instr.gpr20);
|
||||
} else {
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
}
|
||||
}();
|
||||
|
||||
op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
|
||||
|
||||
// The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
|
||||
// condition is true, and to 0 otherwise.
|
||||
const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
|
||||
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
|
||||
const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
|
||||
|
||||
const Node predicate = Operation(combiner, first_pred, second_pred);
|
||||
|
||||
const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
|
||||
const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
|
||||
const Node value =
|
||||
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
|
||||
|
||||
if (instr.fset.bf) {
|
||||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
|
||||
} else {
|
||||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
|
||||
}
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,57 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
|
||||
Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
|
||||
instr.fsetp.neg_a != 0);
|
||||
Node op_b = [&]() {
|
||||
if (instr.is_b_imm) {
|
||||
return GetImmediate19(instr);
|
||||
} else if (instr.is_b_gpr) {
|
||||
return GetRegister(instr.gpr20);
|
||||
} else {
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
}
|
||||
}();
|
||||
op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
|
||||
|
||||
// We can't use the constant predicate as destination.
|
||||
ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
|
||||
|
||||
const Node predicate =
|
||||
GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
|
||||
const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
|
||||
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
|
||||
const Node value = Operation(combiner, predicate, second_pred);
|
||||
|
||||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
SetPredicate(bb, instr.fsetp.pred3, value);
|
||||
|
||||
if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
|
||||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate,
|
||||
// if enabled
|
||||
const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
|
||||
const Node second_value = Operation(combiner, negated_pred, second_pred);
|
||||
SetPredicate(bb, instr.fsetp.pred0, second_value);
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,115 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using std::move;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::PredCondition;
|
||||
|
||||
u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
PredCondition cond{};
|
||||
bool bf = false;
|
||||
bool ftz = false;
|
||||
bool neg_a = false;
|
||||
bool abs_a = false;
|
||||
bool neg_b = false;
|
||||
bool abs_b = false;
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HSET2_C:
|
||||
case OpCode::Id::HSET2_IMM:
|
||||
cond = instr.hsetp2.cbuf_and_imm.cond;
|
||||
bf = instr.Bit(53);
|
||||
ftz = instr.Bit(54);
|
||||
neg_a = instr.Bit(43);
|
||||
abs_a = instr.Bit(44);
|
||||
neg_b = instr.Bit(56);
|
||||
abs_b = instr.Bit(54);
|
||||
break;
|
||||
case OpCode::Id::HSET2_R:
|
||||
cond = instr.hsetp2.reg.cond;
|
||||
bf = instr.Bit(49);
|
||||
ftz = instr.Bit(50);
|
||||
neg_a = instr.Bit(43);
|
||||
abs_a = instr.Bit(44);
|
||||
neg_b = instr.Bit(31);
|
||||
abs_b = instr.Bit(30);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Node op_b = [this, instr, opcode] {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HSET2_C:
|
||||
// Inform as unimplemented as this is not tested.
|
||||
UNIMPLEMENTED_MSG("HSET2_C is not implemented");
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
case OpCode::Id::HSET2_R:
|
||||
return GetRegister(instr.gpr20);
|
||||
case OpCode::Id::HSET2_IMM:
|
||||
return UnpackHalfImmediate(instr, true);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Node{};
|
||||
}
|
||||
}();
|
||||
|
||||
if (!ftz) {
|
||||
LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
|
||||
}
|
||||
|
||||
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
|
||||
op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HSET2_R:
|
||||
op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
|
||||
[[fallthrough]];
|
||||
case OpCode::Id::HSET2_C:
|
||||
op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
|
||||
|
||||
Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
|
||||
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
|
||||
|
||||
// HSET2 operates on each half float in the pack.
|
||||
std::array<Node, 2> values;
|
||||
for (u32 i = 0; i < 2; ++i) {
|
||||
const u32 raw_value = bf ? 0x3c00 : 0xffff;
|
||||
Node true_value = Immediate(raw_value << (i * 16));
|
||||
Node false_value = Immediate(0);
|
||||
|
||||
Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
|
||||
Node predicate = Operation(combiner, comparison, second_pred);
|
||||
values[i] =
|
||||
Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
|
||||
}
|
||||
|
||||
Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
|
||||
SetRegister(bb, instr.gpr0, move(value));
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,80 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
if (instr.hsetp2.ftz != 0) {
|
||||
LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
|
||||
}
|
||||
|
||||
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
|
||||
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
|
||||
|
||||
Tegra::Shader::PredCondition cond{};
|
||||
bool h_and{};
|
||||
Node op_b{};
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HSETP2_C:
|
||||
cond = instr.hsetp2.cbuf_and_imm.cond;
|
||||
h_and = instr.hsetp2.cbuf_and_imm.h_and;
|
||||
op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
|
||||
instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
|
||||
// F32 is hardcoded in hardware
|
||||
op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
|
||||
break;
|
||||
case OpCode::Id::HSETP2_IMM:
|
||||
cond = instr.hsetp2.cbuf_and_imm.cond;
|
||||
h_and = instr.hsetp2.cbuf_and_imm.h_and;
|
||||
op_b = UnpackHalfImmediate(instr, true);
|
||||
break;
|
||||
case OpCode::Id::HSETP2_R:
|
||||
cond = instr.hsetp2.reg.cond;
|
||||
h_and = instr.hsetp2.reg.h_and;
|
||||
op_b =
|
||||
GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
|
||||
instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
op_b = Immediate(0);
|
||||
}
|
||||
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
|
||||
const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
|
||||
|
||||
const auto Write = [&](u64 dest, Node src) {
|
||||
SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
|
||||
};
|
||||
|
||||
const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
|
||||
const u64 first = instr.hsetp2.pred3;
|
||||
const u64 second = instr.hsetp2.pred0;
|
||||
if (h_and) {
|
||||
Node joined = Operation(OperationCode::LogicalAnd2, comparison);
|
||||
Write(first, joined);
|
||||
Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
|
||||
} else {
|
||||
Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
|
||||
Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,73 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <tuple>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::HalfPrecision;
|
||||
using Tegra::Shader::HalfType;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
|
||||
DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
|
||||
} else {
|
||||
DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
|
||||
}
|
||||
|
||||
constexpr auto identity = HalfType::H0_H1;
|
||||
bool neg_b{}, neg_c{};
|
||||
auto [saturate, type_b, op_b, type_c,
|
||||
op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HFMA2_CR:
|
||||
neg_b = instr.hfma2.negate_b;
|
||||
neg_c = instr.hfma2.negate_c;
|
||||
return {instr.hfma2.saturate, HalfType::F32,
|
||||
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
|
||||
instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
|
||||
case OpCode::Id::HFMA2_RC:
|
||||
neg_b = instr.hfma2.negate_b;
|
||||
neg_c = instr.hfma2.negate_c;
|
||||
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
|
||||
HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
|
||||
case OpCode::Id::HFMA2_RR:
|
||||
neg_b = instr.hfma2.rr.negate_b;
|
||||
neg_c = instr.hfma2.rr.negate_c;
|
||||
return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
|
||||
instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
|
||||
case OpCode::Id::HFMA2_IMM_R:
|
||||
neg_c = instr.hfma2.negate_c;
|
||||
return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
|
||||
instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
|
||||
default:
|
||||
return {false, identity, Immediate(0), identity, Immediate(0)};
|
||||
}
|
||||
}();
|
||||
|
||||
const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
|
||||
op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
|
||||
op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
|
||||
|
||||
Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
|
||||
value = GetSaturatedHalfFloat(value, saturate);
|
||||
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,536 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::PredCondition;
|
||||
using Tegra::Shader::StoreType;
|
||||
using Tegra::Texture::ComponentType;
|
||||
using Tegra::Texture::TextureFormat;
|
||||
using Tegra::Texture::TICEntry;
|
||||
|
||||
namespace {
|
||||
|
||||
ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
|
||||
std::size_t component) {
|
||||
const TextureFormat format{descriptor.format};
|
||||
switch (format) {
|
||||
case TextureFormat::R16G16B16A16:
|
||||
case TextureFormat::R32G32B32A32:
|
||||
case TextureFormat::R32G32B32:
|
||||
case TextureFormat::R32G32:
|
||||
case TextureFormat::R16G16:
|
||||
case TextureFormat::R32:
|
||||
case TextureFormat::R16:
|
||||
case TextureFormat::R8:
|
||||
case TextureFormat::R1:
|
||||
if (component == 0) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
if (component == 2) {
|
||||
return descriptor.b_type;
|
||||
}
|
||||
if (component == 3) {
|
||||
return descriptor.a_type;
|
||||
}
|
||||
break;
|
||||
case TextureFormat::A8R8G8B8:
|
||||
if (component == 0) {
|
||||
return descriptor.a_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
if (component == 2) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
if (component == 3) {
|
||||
return descriptor.b_type;
|
||||
}
|
||||
break;
|
||||
case TextureFormat::A2B10G10R10:
|
||||
case TextureFormat::A4B4G4R4:
|
||||
case TextureFormat::A5B5G5R1:
|
||||
case TextureFormat::A1B5G5R5:
|
||||
if (component == 0) {
|
||||
return descriptor.a_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.b_type;
|
||||
}
|
||||
if (component == 2) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
if (component == 3) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
break;
|
||||
case TextureFormat::R32_B24G8:
|
||||
if (component == 0) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.b_type;
|
||||
}
|
||||
if (component == 2) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
break;
|
||||
case TextureFormat::B5G6R5:
|
||||
case TextureFormat::B6G5R5:
|
||||
case TextureFormat::B10G11R11:
|
||||
if (component == 0) {
|
||||
return descriptor.b_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
if (component == 2) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
break;
|
||||
case TextureFormat::R24G8:
|
||||
case TextureFormat::R8G24:
|
||||
case TextureFormat::R8G8:
|
||||
case TextureFormat::G4R4:
|
||||
if (component == 0) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
|
||||
return ComponentType::FLOAT;
|
||||
}
|
||||
|
||||
bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
|
||||
constexpr u8 R = 0b0001;
|
||||
constexpr u8 G = 0b0010;
|
||||
constexpr u8 B = 0b0100;
|
||||
constexpr u8 A = 0b1000;
|
||||
constexpr std::array<u8, 16> mask = {
|
||||
0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
|
||||
(A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
|
||||
return std::bitset<4>{mask.at(component_mask)}.test(component);
|
||||
}
|
||||
|
||||
u32 GetComponentSize(TextureFormat format, std::size_t component) {
|
||||
switch (format) {
|
||||
case TextureFormat::R32G32B32A32:
|
||||
return 32;
|
||||
case TextureFormat::R16G16B16A16:
|
||||
return 16;
|
||||
case TextureFormat::R32G32B32:
|
||||
return component <= 2 ? 32 : 0;
|
||||
case TextureFormat::R32G32:
|
||||
return component <= 1 ? 32 : 0;
|
||||
case TextureFormat::R16G16:
|
||||
return component <= 1 ? 16 : 0;
|
||||
case TextureFormat::R32:
|
||||
return component == 0 ? 32 : 0;
|
||||
case TextureFormat::R16:
|
||||
return component == 0 ? 16 : 0;
|
||||
case TextureFormat::R8:
|
||||
return component == 0 ? 8 : 0;
|
||||
case TextureFormat::R1:
|
||||
return component == 0 ? 1 : 0;
|
||||
case TextureFormat::A8R8G8B8:
|
||||
return 8;
|
||||
case TextureFormat::A2B10G10R10:
|
||||
return (component == 3 || component == 2 || component == 1) ? 10 : 2;
|
||||
case TextureFormat::A4B4G4R4:
|
||||
return 4;
|
||||
case TextureFormat::A5B5G5R1:
|
||||
return (component == 0 || component == 1 || component == 2) ? 5 : 1;
|
||||
case TextureFormat::A1B5G5R5:
|
||||
return (component == 1 || component == 2 || component == 3) ? 5 : 1;
|
||||
case TextureFormat::R32_B24G8:
|
||||
if (component == 0) {
|
||||
return 32;
|
||||
}
|
||||
if (component == 1) {
|
||||
return 24;
|
||||
}
|
||||
if (component == 2) {
|
||||
return 8;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::B5G6R5:
|
||||
if (component == 0 || component == 2) {
|
||||
return 5;
|
||||
}
|
||||
if (component == 1) {
|
||||
return 6;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::B6G5R5:
|
||||
if (component == 1 || component == 2) {
|
||||
return 5;
|
||||
}
|
||||
if (component == 0) {
|
||||
return 6;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::B10G11R11:
|
||||
if (component == 1 || component == 2) {
|
||||
return 11;
|
||||
}
|
||||
if (component == 0) {
|
||||
return 10;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::R24G8:
|
||||
if (component == 0) {
|
||||
return 8;
|
||||
}
|
||||
if (component == 1) {
|
||||
return 24;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::R8G24:
|
||||
if (component == 0) {
|
||||
return 24;
|
||||
}
|
||||
if (component == 1) {
|
||||
return 8;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::R8G8:
|
||||
return (component == 0 || component == 1) ? 8 : 0;
|
||||
case TextureFormat::G4R4:
|
||||
return (component == 0 || component == 1) ? 4 : 0;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t GetImageComponentMask(TextureFormat format) {
|
||||
constexpr u8 R = 0b0001;
|
||||
constexpr u8 G = 0b0010;
|
||||
constexpr u8 B = 0b0100;
|
||||
constexpr u8 A = 0b1000;
|
||||
switch (format) {
|
||||
case TextureFormat::R32G32B32A32:
|
||||
case TextureFormat::R16G16B16A16:
|
||||
case TextureFormat::A8R8G8B8:
|
||||
case TextureFormat::A2B10G10R10:
|
||||
case TextureFormat::A4B4G4R4:
|
||||
case TextureFormat::A5B5G5R1:
|
||||
case TextureFormat::A1B5G5R5:
|
||||
return std::size_t{R | G | B | A};
|
||||
case TextureFormat::R32G32B32:
|
||||
case TextureFormat::R32_B24G8:
|
||||
case TextureFormat::B5G6R5:
|
||||
case TextureFormat::B6G5R5:
|
||||
case TextureFormat::B10G11R11:
|
||||
return std::size_t{R | G | B};
|
||||
case TextureFormat::R32G32:
|
||||
case TextureFormat::R16G16:
|
||||
case TextureFormat::R24G8:
|
||||
case TextureFormat::R8G24:
|
||||
case TextureFormat::R8G8:
|
||||
case TextureFormat::G4R4:
|
||||
return std::size_t{R | G};
|
||||
case TextureFormat::R32:
|
||||
case TextureFormat::R16:
|
||||
case TextureFormat::R8:
|
||||
case TextureFormat::R1:
|
||||
return std::size_t{R};
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
|
||||
return std::size_t{R | G | B | A};
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
|
||||
switch (image_type) {
|
||||
case Tegra::Shader::ImageType::Texture1D:
|
||||
case Tegra::Shader::ImageType::TextureBuffer:
|
||||
return 1;
|
||||
case Tegra::Shader::ImageType::Texture1DArray:
|
||||
case Tegra::Shader::ImageType::Texture2D:
|
||||
return 2;
|
||||
case Tegra::Shader::ImageType::Texture2DArray:
|
||||
case Tegra::Shader::ImageType::Texture3D:
|
||||
return 3;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return 1;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
|
||||
Node original_value) {
|
||||
switch (component_type) {
|
||||
case ComponentType::SNORM: {
|
||||
// range [-1.0, 1.0]
|
||||
auto cnv_value = Operation(OperationCode::FMul, original_value,
|
||||
Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
|
||||
cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
|
||||
return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
|
||||
}
|
||||
case ComponentType::SINT:
|
||||
case ComponentType::UNORM: {
|
||||
bool is_signed = component_type == ComponentType::SINT;
|
||||
// range [0.0, 1.0]
|
||||
auto cnv_value = Operation(OperationCode::FMul, original_value,
|
||||
Immediate(static_cast<float>(1 << component_size) - 1.f));
|
||||
return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
|
||||
is_signed};
|
||||
}
|
||||
case ComponentType::UINT: // range [0, (1 << component_size) - 1]
|
||||
return {std::move(original_value), false};
|
||||
case ComponentType::FLOAT:
|
||||
if (component_size == 16) {
|
||||
return {Operation(OperationCode::HCastFloat, original_value), true};
|
||||
} else {
|
||||
return {std::move(original_value), true};
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
|
||||
return {std::move(original_value), true};
|
||||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
|
||||
std::vector<Node> coords;
|
||||
const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
|
||||
coords.reserve(num_coords);
|
||||
for (std::size_t i = 0; i < num_coords; ++i) {
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + i));
|
||||
}
|
||||
return coords;
|
||||
};
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::SULD: {
|
||||
UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
|
||||
Tegra::Shader::OutOfBoundsStore::Ignore);
|
||||
|
||||
const auto type{instr.suldst.image_type};
|
||||
auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
|
||||
: GetBindlessImage(instr.gpr39, type)};
|
||||
image.MarkRead();
|
||||
|
||||
if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
|
||||
u32 indexer = 0;
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!instr.suldst.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaImage meta{image, {}, element};
|
||||
Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
|
||||
SetTemporary(bb, indexer++, std::move(value));
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
} else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
|
||||
UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
|
||||
instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
|
||||
|
||||
auto descriptor = [this, instr] {
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
|
||||
if (instr.suldst.is_immediate) {
|
||||
sampler_descriptor =
|
||||
registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
|
||||
} else {
|
||||
const Node image_register = GetRegister(instr.gpr39);
|
||||
const auto result = TrackCbuf(image_register, global_code,
|
||||
static_cast<s64>(global_code.size()));
|
||||
const auto buffer = std::get<1>(result);
|
||||
const auto offset = std::get<2>(result);
|
||||
sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
|
||||
}
|
||||
if (!sampler_descriptor) {
|
||||
UNREACHABLE_MSG("Failed to obtain image descriptor");
|
||||
}
|
||||
return *sampler_descriptor;
|
||||
}();
|
||||
|
||||
const auto comp_mask = GetImageComponentMask(descriptor.format);
|
||||
|
||||
switch (instr.suldst.GetStoreDataLayout()) {
|
||||
case StoreType::Bits32:
|
||||
case StoreType::Bits64: {
|
||||
u32 indexer = 0;
|
||||
u32 shifted_counter = 0;
|
||||
Node value = Immediate(0);
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!IsComponentEnabled(comp_mask, element)) {
|
||||
continue;
|
||||
}
|
||||
const auto component_type = GetComponentType(descriptor, element);
|
||||
const auto component_size = GetComponentSize(descriptor.format, element);
|
||||
MetaImage meta{image, {}, element};
|
||||
|
||||
auto [converted_value, is_signed] = GetComponentValue(
|
||||
component_type, component_size,
|
||||
Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
|
||||
|
||||
// shift element to correct position
|
||||
const auto shifted = shifted_counter;
|
||||
if (shifted > 0) {
|
||||
converted_value =
|
||||
SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
|
||||
std::move(converted_value), Immediate(shifted));
|
||||
}
|
||||
shifted_counter += component_size;
|
||||
|
||||
// add value into result
|
||||
value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
|
||||
|
||||
// if we shifted enough for 1 byte -> we save it into temp
|
||||
if (shifted_counter >= 32) {
|
||||
SetTemporary(bb, indexer++, std::move(value));
|
||||
// reset counter and value to prepare pack next byte
|
||||
value = Immediate(0);
|
||||
shifted_counter = 0;
|
||||
}
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::SUST: {
|
||||
UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
|
||||
UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
|
||||
Tegra::Shader::OutOfBoundsStore::Ignore);
|
||||
UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
|
||||
|
||||
std::vector<Node> values;
|
||||
constexpr std::size_t hardcoded_size{4};
|
||||
for (std::size_t i = 0; i < hardcoded_size; ++i) {
|
||||
values.push_back(GetRegister(instr.gpr0.Value() + i));
|
||||
}
|
||||
|
||||
const auto type{instr.suldst.image_type};
|
||||
auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
|
||||
: GetBindlessImage(instr.gpr39, type)};
|
||||
image.MarkWrite();
|
||||
|
||||
MetaImage meta{image, std::move(values)};
|
||||
bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::SUATOM: {
|
||||
UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
|
||||
|
||||
const OperationCode operation_code = [instr] {
|
||||
switch (instr.suatom_d.operation_type) {
|
||||
case Tegra::Shader::ImageAtomicOperationType::S32:
|
||||
case Tegra::Shader::ImageAtomicOperationType::U32:
|
||||
switch (instr.suatom_d.operation) {
|
||||
case Tegra::Shader::ImageAtomicOperation::Add:
|
||||
return OperationCode::AtomicImageAdd;
|
||||
case Tegra::Shader::ImageAtomicOperation::And:
|
||||
return OperationCode::AtomicImageAnd;
|
||||
case Tegra::Shader::ImageAtomicOperation::Or:
|
||||
return OperationCode::AtomicImageOr;
|
||||
case Tegra::Shader::ImageAtomicOperation::Xor:
|
||||
return OperationCode::AtomicImageXor;
|
||||
case Tegra::Shader::ImageAtomicOperation::Exch:
|
||||
return OperationCode::AtomicImageExchange;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
|
||||
static_cast<u64>(instr.suatom_d.operation.Value()),
|
||||
static_cast<u64>(instr.suatom_d.operation_type.Value()));
|
||||
return OperationCode::AtomicImageAdd;
|
||||
}();
|
||||
|
||||
Node value = GetRegister(instr.gpr0);
|
||||
|
||||
const auto type = instr.suatom_d.image_type;
|
||||
auto& image = GetImage(instr.image, type);
|
||||
image.MarkAtomic();
|
||||
|
||||
MetaImage meta{image, {std::move(value)}};
|
||||
SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
|
||||
const auto offset = static_cast<u32>(image.index.Value());
|
||||
|
||||
const auto it =
|
||||
std::find_if(std::begin(used_images), std::end(used_images),
|
||||
[offset](const ImageEntry& entry) { return entry.offset == offset; });
|
||||
if (it != std::end(used_images)) {
|
||||
ASSERT(!it->is_bindless && it->type == type);
|
||||
return *it;
|
||||
}
|
||||
|
||||
const auto next_index = static_cast<u32>(used_images.size());
|
||||
return used_images.emplace_back(next_index, offset, type);
|
||||
}
|
||||
|
||||
ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
|
||||
const Node image_register = GetRegister(reg);
|
||||
const auto result =
|
||||
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
|
||||
|
||||
const auto buffer = std::get<1>(result);
|
||||
const auto offset = std::get<2>(result);
|
||||
|
||||
const auto it = std::find_if(std::begin(used_images), std::end(used_images),
|
||||
[buffer, offset](const ImageEntry& entry) {
|
||||
return entry.buffer == buffer && entry.offset == offset;
|
||||
});
|
||||
if (it != std::end(used_images)) {
|
||||
ASSERT(it->is_bindless && it->type == type);
|
||||
return *it;
|
||||
}
|
||||
|
||||
const auto next_index = static_cast<u32>(used_images.size());
|
||||
return used_images.emplace_back(next_index, offset, buffer, type);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,49 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node op_b = [&]() {
|
||||
if (instr.is_b_imm) {
|
||||
return Immediate(instr.alu.GetSignedImm20_20());
|
||||
} else if (instr.is_b_gpr) {
|
||||
return GetRegister(instr.gpr20);
|
||||
} else {
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
}
|
||||
}();
|
||||
|
||||
// The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
|
||||
// is true, and to 0 otherwise.
|
||||
const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
|
||||
const Node first_pred =
|
||||
GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
|
||||
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
|
||||
|
||||
const Node predicate = Operation(combiner, first_pred, second_pred);
|
||||
|
||||
const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
|
||||
const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
|
||||
const Node value =
|
||||
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,53 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
|
||||
const Node op_b = [&]() {
|
||||
if (instr.is_b_imm) {
|
||||
return Immediate(instr.alu.GetSignedImm20_20());
|
||||
} else if (instr.is_b_gpr) {
|
||||
return GetRegister(instr.gpr20);
|
||||
} else {
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
}
|
||||
}();
|
||||
|
||||
// We can't use the constant predicate as destination.
|
||||
ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
|
||||
|
||||
const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
|
||||
const Node predicate =
|
||||
GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
|
||||
|
||||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
|
||||
const Node value = Operation(combiner, predicate, second_pred);
|
||||
SetPredicate(bb, instr.isetp.pred3, value);
|
||||
|
||||
if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
|
||||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
|
||||
const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
|
||||
SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,493 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using std::move;
|
||||
using Tegra::Shader::AtomicOp;
|
||||
using Tegra::Shader::AtomicType;
|
||||
using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::GlobalAtomicType;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::StoreType;
|
||||
|
||||
namespace {
|
||||
|
||||
OperationCode GetAtomOperation(AtomicOp op) {
|
||||
switch (op) {
|
||||
case AtomicOp::Add:
|
||||
return OperationCode::AtomicIAdd;
|
||||
case AtomicOp::Min:
|
||||
return OperationCode::AtomicIMin;
|
||||
case AtomicOp::Max:
|
||||
return OperationCode::AtomicIMax;
|
||||
case AtomicOp::And:
|
||||
return OperationCode::AtomicIAnd;
|
||||
case AtomicOp::Or:
|
||||
return OperationCode::AtomicIOr;
|
||||
case AtomicOp::Xor:
|
||||
return OperationCode::AtomicIXor;
|
||||
case AtomicOp::Exch:
|
||||
return OperationCode::AtomicIExchange;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("op={}", op);
|
||||
return OperationCode::AtomicIAdd;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
|
||||
return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
|
||||
uniform_type == Tegra::Shader::UniformType::UnsignedShort;
|
||||
}
|
||||
|
||||
u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
|
||||
switch (uniform_type) {
|
||||
case Tegra::Shader::UniformType::UnsignedByte:
|
||||
return 0b11;
|
||||
case Tegra::Shader::UniformType::UnsignedShort:
|
||||
return 0b10;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
|
||||
switch (uniform_type) {
|
||||
case Tegra::Shader::UniformType::UnsignedByte:
|
||||
return 8;
|
||||
case Tegra::Shader::UniformType::UnsignedShort:
|
||||
return 16;
|
||||
case Tegra::Shader::UniformType::Single:
|
||||
return 32;
|
||||
case Tegra::Shader::UniformType::Double:
|
||||
return 64;
|
||||
case Tegra::Shader::UniformType::Quad:
|
||||
case Tegra::Shader::UniformType::UnsignedQuad:
|
||||
return 128;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
|
||||
return 32;
|
||||
}
|
||||
}
|
||||
|
||||
Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
|
||||
Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
|
||||
offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
|
||||
return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
|
||||
}
|
||||
|
||||
Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
|
||||
Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
|
||||
offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
|
||||
return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
|
||||
Immediate(size));
|
||||
}
|
||||
|
||||
Node Sign16Extend(Node value) {
|
||||
Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
|
||||
Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
|
||||
Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
|
||||
return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::LD_A: {
|
||||
// Note: Shouldn't this be interp mode flat? As in no interpolation made.
|
||||
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
|
||||
"Indirect attribute loads are not supported");
|
||||
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
|
||||
"Unaligned attribute loads are not supported");
|
||||
UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
|
||||
instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
|
||||
"Non-32 bits PHYS reads are not implemented");
|
||||
|
||||
const Node buffer{GetRegister(instr.gpr39)};
|
||||
|
||||
u64 next_element = instr.attribute.fmt20.element;
|
||||
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
|
||||
|
||||
const auto LoadNextElement = [&](u32 reg_offset) {
|
||||
const Node attribute{instr.attribute.fmt20.IsPhysical()
|
||||
? GetPhysicalInputAttribute(instr.gpr8, buffer)
|
||||
: GetInputAttribute(static_cast<Attribute::Index>(next_index),
|
||||
next_element, buffer)};
|
||||
|
||||
SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
|
||||
|
||||
// Load the next attribute element into the following register. If the element
|
||||
// to load goes beyond the vec4 size, load the first element of the next
|
||||
// attribute.
|
||||
next_element = (next_element + 1) % 4;
|
||||
next_index = next_index + (next_element == 0 ? 1 : 0);
|
||||
};
|
||||
|
||||
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
|
||||
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
|
||||
LoadNextElement(reg_offset);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::LD_C: {
|
||||
UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
|
||||
|
||||
Node index = GetRegister(instr.gpr8);
|
||||
|
||||
const Node op_a =
|
||||
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
|
||||
|
||||
switch (instr.ld_c.type.Value()) {
|
||||
case Tegra::Shader::UniformType::Single:
|
||||
SetRegister(bb, instr.gpr0, op_a);
|
||||
break;
|
||||
|
||||
case Tegra::Shader::UniformType::Double: {
|
||||
const Node op_b =
|
||||
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
|
||||
|
||||
SetTemporary(bb, 0, op_a);
|
||||
SetTemporary(bb, 1, op_b);
|
||||
SetRegister(bb, instr.gpr0, GetTemporary(0));
|
||||
SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::LD_L:
|
||||
LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
|
||||
[[fallthrough]];
|
||||
case OpCode::Id::LD_S: {
|
||||
const auto GetAddress = [&](s32 offset) {
|
||||
ASSERT(offset % 4 == 0);
|
||||
const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
|
||||
return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
|
||||
};
|
||||
const auto GetMemory = [&](s32 offset) {
|
||||
return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
|
||||
: GetLocalMemory(GetAddress(offset));
|
||||
};
|
||||
|
||||
switch (instr.ldst_sl.type.Value()) {
|
||||
case StoreType::Signed16:
|
||||
SetRegister(bb, instr.gpr0,
|
||||
Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
|
||||
break;
|
||||
case StoreType::Bits32:
|
||||
case StoreType::Bits64:
|
||||
case StoreType::Bits128: {
|
||||
const u32 count = [&] {
|
||||
switch (instr.ldst_sl.type.Value()) {
|
||||
case StoreType::Bits32:
|
||||
return 1;
|
||||
case StoreType::Bits64:
|
||||
return 2;
|
||||
case StoreType::Bits128:
|
||||
return 4;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
}();
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
SetTemporary(bb, i, GetMemory(i * 4));
|
||||
}
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
|
||||
instr.ldst_sl.type.Value());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::LD:
|
||||
case OpCode::Id::LDG: {
|
||||
const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::LD:
|
||||
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
|
||||
return instr.generic.type;
|
||||
case OpCode::Id::LDG:
|
||||
return instr.ldg.type;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
}();
|
||||
|
||||
const auto [real_address_base, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, true, false);
|
||||
|
||||
const u32 size = GetMemorySize(type);
|
||||
const u32 count = Common::AlignUp(size, 32) / 32;
|
||||
if (!real_address_base || !base_address) {
|
||||
// Tracking failed, load zeroes.
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
const Node it_offset = Immediate(i * 4);
|
||||
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
|
||||
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
|
||||
// To handle unaligned loads get the bytes used to dereference global memory and extract
|
||||
// those bytes from the loaded u32.
|
||||
if (IsUnaligned(type)) {
|
||||
gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
|
||||
}
|
||||
|
||||
SetTemporary(bb, i, gmem);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ST_A: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
|
||||
"Indirect attribute loads are not supported");
|
||||
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
|
||||
"Unaligned attribute loads are not supported");
|
||||
|
||||
u64 element = instr.attribute.fmt20.element;
|
||||
auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
|
||||
|
||||
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
|
||||
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
|
||||
Node dest;
|
||||
if (instr.attribute.fmt20.patch) {
|
||||
const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
|
||||
dest = MakeNode<PatchNode>(offset);
|
||||
} else {
|
||||
dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
|
||||
GetRegister(instr.gpr39));
|
||||
}
|
||||
const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
|
||||
|
||||
bb.push_back(Operation(OperationCode::Assign, dest, src));
|
||||
|
||||
// Load the next attribute element into the following register. If the element to load
|
||||
// goes beyond the vec4 size, load the first element of the next attribute.
|
||||
element = (element + 1) % 4;
|
||||
index = index + (element == 0 ? 1 : 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ST_L:
|
||||
LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
|
||||
[[fallthrough]];
|
||||
case OpCode::Id::ST_S: {
|
||||
const auto GetAddress = [&](s32 offset) {
|
||||
ASSERT(offset % 4 == 0);
|
||||
const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
|
||||
return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
|
||||
};
|
||||
|
||||
const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
|
||||
const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
|
||||
const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
|
||||
|
||||
switch (instr.ldst_sl.type.Value()) {
|
||||
case StoreType::Bits128:
|
||||
(this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
|
||||
(this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
|
||||
[[fallthrough]];
|
||||
case StoreType::Bits64:
|
||||
(this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
|
||||
[[fallthrough]];
|
||||
case StoreType::Bits32:
|
||||
(this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
|
||||
break;
|
||||
case StoreType::Unsigned16:
|
||||
case StoreType::Signed16: {
|
||||
Node address = GetAddress(0);
|
||||
Node memory = (this->*get_memory)(address);
|
||||
(this->*set_memory)(
|
||||
bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
|
||||
instr.ldst_sl.type.Value());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ST:
|
||||
case OpCode::Id::STG: {
|
||||
const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::ST:
|
||||
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
|
||||
return instr.generic.type;
|
||||
case OpCode::Id::STG:
|
||||
return instr.stg.type;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
}();
|
||||
|
||||
// For unaligned reads we have to read memory too.
|
||||
const bool is_read = IsUnaligned(type);
|
||||
const auto [real_address_base, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, is_read, true);
|
||||
if (!real_address_base || !base_address) {
|
||||
// Tracking failed, skip the store.
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 size = GetMemorySize(type);
|
||||
const u32 count = Common::AlignUp(size, 32) / 32;
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
const Node it_offset = Immediate(i * 4);
|
||||
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
|
||||
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
Node value = GetRegister(instr.gpr0.Value() + i);
|
||||
|
||||
if (IsUnaligned(type)) {
|
||||
const u32 mask = GetUnalignedMask(type);
|
||||
value = InsertUnaligned(gmem, move(value), real_address, mask, size);
|
||||
}
|
||||
|
||||
bb.push_back(Operation(OperationCode::Assign, gmem, value));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::RED: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
|
||||
instr.red.type.Value());
|
||||
const auto [real_address, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, true, true);
|
||||
if (!real_address || !base_address) {
|
||||
// Tracking failed, skip atomic.
|
||||
break;
|
||||
}
|
||||
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
Node value = GetRegister(instr.gpr0);
|
||||
bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ATOM: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
|
||||
instr.atom.operation == AtomicOp::Dec ||
|
||||
instr.atom.operation == AtomicOp::SafeAdd,
|
||||
"operation={}", instr.atom.operation.Value());
|
||||
UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
|
||||
instr.atom.type == GlobalAtomicType::U64 ||
|
||||
instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
|
||||
instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
|
||||
"type={}", instr.atom.type.Value());
|
||||
|
||||
const auto [real_address, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, true, true);
|
||||
if (!real_address || !base_address) {
|
||||
// Tracking failed, skip atomic.
|
||||
break;
|
||||
}
|
||||
|
||||
const bool is_signed =
|
||||
instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
|
||||
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
SetRegister(bb, instr.gpr0,
|
||||
SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
|
||||
GetRegister(instr.gpr20)));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ATOMS: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
|
||||
instr.atoms.operation == AtomicOp::Dec,
|
||||
"operation={}", instr.atoms.operation.Value());
|
||||
UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
|
||||
instr.atoms.type == AtomicType::U64,
|
||||
"type={}", instr.atoms.type.Value());
|
||||
const bool is_signed =
|
||||
instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
|
||||
const s32 offset = instr.atoms.GetImmediateOffset();
|
||||
Node address = GetRegister(instr.gpr8);
|
||||
address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
|
||||
SetRegister(bb, instr.gpr0,
|
||||
SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
|
||||
GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::AL2P: {
|
||||
// Ignore al2p.direction since we don't care about it.
|
||||
|
||||
// Calculate emulation fake physical address.
|
||||
const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
|
||||
const Node reg{GetRegister(instr.gpr8)};
|
||||
const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
|
||||
|
||||
// Set the fake address to target register.
|
||||
SetRegister(bb, instr.gpr0, fake_address);
|
||||
|
||||
// Signal the shader IR to declare all possible attributes and varyings
|
||||
uses_physical_attributes = true;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
|
||||
Instruction instr,
|
||||
bool is_read, bool is_write) {
|
||||
const auto addr_register{GetRegister(instr.gmem.gpr)};
|
||||
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
|
||||
|
||||
const auto [base_address, index, offset] =
|
||||
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
|
||||
ASSERT_OR_EXECUTE_MSG(
|
||||
base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
|
||||
"Global memory tracking failed");
|
||||
|
||||
bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
|
||||
|
||||
const GlobalMemoryBase descriptor{index, offset};
|
||||
const auto& entry = used_global_memory.try_emplace(descriptor).first;
|
||||
auto& usage = entry->second;
|
||||
usage.is_written |= is_write;
|
||||
usage.is_read |= is_read;
|
||||
|
||||
const auto real_address =
|
||||
Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
|
||||
|
||||
return {real_address, base_address, descriptor};
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,322 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using std::move;
|
||||
using Tegra::Shader::ConditionCode;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::IpaInterpMode;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::PixelImap;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::SystemVariable;
|
||||
|
||||
using Index = Tegra::Shader::Attribute::Index;
|
||||
|
||||
u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::NOP: {
|
||||
UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
|
||||
UNIMPLEMENTED_IF(instr.nop.trigger != 0);
|
||||
// With the previous preconditions, this instruction is a no-operation.
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::EXIT: {
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
|
||||
|
||||
switch (instr.flow.cond) {
|
||||
case Tegra::Shader::FlowCondition::Always:
|
||||
bb.push_back(Operation(OperationCode::Exit));
|
||||
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
|
||||
// If this is an unconditional exit then just end processing here,
|
||||
// otherwise we have to account for the possibility of the condition
|
||||
// not being met, so continue processing the next instruction.
|
||||
pc = MAX_PROGRAM_LENGTH - 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case Tegra::Shader::FlowCondition::Fcsm_Tr:
|
||||
// TODO(bunnei): What is this used for? If we assume this conditon is not
|
||||
// satisifed, dual vertex shaders in Farming Simulator make more sense
|
||||
UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
|
||||
break;
|
||||
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::KIL: {
|
||||
UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
|
||||
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
|
||||
|
||||
bb.push_back(Operation(OperationCode::Discard));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::S2R: {
|
||||
const Node value = [this, instr] {
|
||||
switch (instr.sys20) {
|
||||
case SystemVariable::LaneId:
|
||||
return Operation(OperationCode::ThreadId);
|
||||
case SystemVariable::InvocationId:
|
||||
return Operation(OperationCode::InvocationId);
|
||||
case SystemVariable::Ydirection:
|
||||
uses_y_negate = true;
|
||||
return Operation(OperationCode::YNegate);
|
||||
case SystemVariable::InvocationInfo:
|
||||
LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
|
||||
return Immediate(0x00ff'0000U);
|
||||
case SystemVariable::WscaleFactorXY:
|
||||
UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
|
||||
return Immediate(0U);
|
||||
case SystemVariable::WscaleFactorZ:
|
||||
UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
|
||||
return Immediate(0U);
|
||||
case SystemVariable::Tid: {
|
||||
Node val = Immediate(0);
|
||||
val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
|
||||
val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
|
||||
val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
|
||||
return val;
|
||||
}
|
||||
case SystemVariable::TidX:
|
||||
return Operation(OperationCode::LocalInvocationIdX);
|
||||
case SystemVariable::TidY:
|
||||
return Operation(OperationCode::LocalInvocationIdY);
|
||||
case SystemVariable::TidZ:
|
||||
return Operation(OperationCode::LocalInvocationIdZ);
|
||||
case SystemVariable::CtaIdX:
|
||||
return Operation(OperationCode::WorkGroupIdX);
|
||||
case SystemVariable::CtaIdY:
|
||||
return Operation(OperationCode::WorkGroupIdY);
|
||||
case SystemVariable::CtaIdZ:
|
||||
return Operation(OperationCode::WorkGroupIdZ);
|
||||
case SystemVariable::EqMask:
|
||||
case SystemVariable::LtMask:
|
||||
case SystemVariable::LeMask:
|
||||
case SystemVariable::GtMask:
|
||||
case SystemVariable::GeMask:
|
||||
uses_warps = true;
|
||||
switch (instr.sys20) {
|
||||
case SystemVariable::EqMask:
|
||||
return Operation(OperationCode::ThreadEqMask);
|
||||
case SystemVariable::LtMask:
|
||||
return Operation(OperationCode::ThreadLtMask);
|
||||
case SystemVariable::LeMask:
|
||||
return Operation(OperationCode::ThreadLeMask);
|
||||
case SystemVariable::GtMask:
|
||||
return Operation(OperationCode::ThreadGtMask);
|
||||
case SystemVariable::GeMask:
|
||||
return Operation(OperationCode::ThreadGeMask);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0u);
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
|
||||
return Immediate(0u);
|
||||
}
|
||||
}();
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::BRA: {
|
||||
Node branch;
|
||||
if (instr.bra.constant_buffer == 0) {
|
||||
const u32 target = pc + instr.bra.GetBranchTarget();
|
||||
branch = Operation(OperationCode::Branch, Immediate(target));
|
||||
} else {
|
||||
const u32 target = pc + 1;
|
||||
const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
|
||||
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
|
||||
PRECISE, op_a, Immediate(3));
|
||||
const Node operand =
|
||||
Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
||||
branch = Operation(OperationCode::BranchIndirect, operand);
|
||||
}
|
||||
|
||||
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
|
||||
if (cc != Tegra::Shader::ConditionCode::T) {
|
||||
bb.push_back(Conditional(GetConditionCode(cc), {branch}));
|
||||
} else {
|
||||
bb.push_back(branch);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::BRX: {
|
||||
Node operand;
|
||||
if (instr.brx.constant_buffer != 0) {
|
||||
const s32 target = pc + 1;
|
||||
const Node index = GetRegister(instr.gpr8);
|
||||
const Node op_a =
|
||||
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
|
||||
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
|
||||
PRECISE, op_a, Immediate(3));
|
||||
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
||||
} else {
|
||||
const s32 target = pc + instr.brx.GetBranchExtend();
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
|
||||
PRECISE, op_a, Immediate(3));
|
||||
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
||||
}
|
||||
const Node branch = Operation(OperationCode::BranchIndirect, operand);
|
||||
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
if (cc != ConditionCode::T) {
|
||||
bb.push_back(Conditional(GetConditionCode(cc), {branch}));
|
||||
} else {
|
||||
bb.push_back(branch);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::SSY: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
|
||||
"Constant buffer flow is not supported");
|
||||
|
||||
if (disable_flow_stack) {
|
||||
break;
|
||||
}
|
||||
|
||||
// The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
|
||||
const u32 target = pc + instr.bra.GetBranchTarget();
|
||||
bb.push_back(
|
||||
Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::PBK: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
|
||||
"Constant buffer PBK is not supported");
|
||||
|
||||
if (disable_flow_stack) {
|
||||
break;
|
||||
}
|
||||
|
||||
// PBK pushes to a stack the address where BRK will jump to.
|
||||
const u32 target = pc + instr.bra.GetBranchTarget();
|
||||
bb.push_back(
|
||||
Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::SYNC: {
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
|
||||
|
||||
if (decompiled) {
|
||||
break;
|
||||
}
|
||||
|
||||
// The SYNC opcode jumps to the address previously set by the SSY opcode
|
||||
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::BRK: {
|
||||
const ConditionCode cc = instr.flow_condition_code;
|
||||
UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
|
||||
if (decompiled) {
|
||||
break;
|
||||
}
|
||||
|
||||
// The BRK opcode jumps to the address previously set by the PBK opcode
|
||||
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::IPA: {
|
||||
const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
|
||||
const auto attribute = instr.attribute.fmt28;
|
||||
const Index index = attribute.index;
|
||||
|
||||
Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
|
||||
: GetInputAttribute(index, attribute.element);
|
||||
|
||||
// Code taken from Ryujinx.
|
||||
if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
|
||||
const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
|
||||
if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
|
||||
Node position_w = GetInputAttribute(Index::Position, 3);
|
||||
value = Operation(OperationCode::FMul, move(value), move(position_w));
|
||||
}
|
||||
}
|
||||
|
||||
if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
|
||||
value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
|
||||
}
|
||||
|
||||
value = GetSaturatedFloat(move(value), instr.ipa.saturate);
|
||||
|
||||
SetRegister(bb, instr.gpr0, move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::OUT_R: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
|
||||
"Stream buffer is not supported");
|
||||
|
||||
if (instr.out.emit) {
|
||||
// gpr0 is used to store the next address and gpr8 contains the address to emit.
|
||||
// Hardware uses pointers here but we just ignore it
|
||||
bb.push_back(Operation(OperationCode::EmitVertex));
|
||||
SetRegister(bb, instr.gpr0, Immediate(0));
|
||||
}
|
||||
if (instr.out.cut) {
|
||||
bb.push_back(Operation(OperationCode::EndPrimitive));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ISBERD: {
|
||||
UNIMPLEMENTED_IF(instr.isberd.o != 0);
|
||||
UNIMPLEMENTED_IF(instr.isberd.skew != 0);
|
||||
UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
|
||||
UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
|
||||
LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
|
||||
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::BAR: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
|
||||
bb.push_back(Operation(OperationCode::Barrier));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::MEMBAR: {
|
||||
UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
|
||||
const OperationCode type = [instr] {
|
||||
switch (instr.membar.type) {
|
||||
case Tegra::Shader::MembarType::CTA:
|
||||
return OperationCode::MemoryBarrierGroup;
|
||||
case Tegra::Shader::MembarType::GL:
|
||||
return OperationCode::MemoryBarrierGlobal;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
|
||||
return OperationCode::MemoryBarrierGlobal;
|
||||
}
|
||||
}();
|
||||
bb.push_back(Operation(type));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::DEPBAR: {
|
||||
LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,68 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::PSETP: {
|
||||
const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
|
||||
const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
|
||||
|
||||
// We can't use the constant predicate as destination.
|
||||
ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
|
||||
|
||||
const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
|
||||
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
|
||||
const Node predicate = Operation(combiner, op_a, op_b);
|
||||
|
||||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
|
||||
|
||||
if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
|
||||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
|
||||
// enabled
|
||||
SetPredicate(bb, instr.psetp.pred0,
|
||||
Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
|
||||
second_pred));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::CSETP: {
|
||||
const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
|
||||
const Node condition_code = GetConditionCode(instr.csetp.cc);
|
||||
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
|
||||
|
||||
if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
|
||||
SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
|
||||
}
|
||||
if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
|
||||
const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
|
||||
SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,46 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
|
||||
"Condition codes generation in PSET is not implemented");
|
||||
|
||||
const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
|
||||
const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
|
||||
const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
|
||||
|
||||
const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
|
||||
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
|
||||
const Node predicate = Operation(combiner, first_pred, second_pred);
|
||||
|
||||
const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
|
||||
const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
|
||||
const Node value =
|
||||
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
|
||||
|
||||
if (instr.pset.bf) {
|
||||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
|
||||
} else {
|
||||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
|
||||
}
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,86 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using std::move;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
namespace {
|
||||
constexpr u64 NUM_CONDITION_CODES = 4;
|
||||
constexpr u64 NUM_PREDICATES = 7;
|
||||
} // namespace
|
||||
|
||||
u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
Node apply_mask = [this, opcode, instr] {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::R2P_IMM:
|
||||
case OpCode::Id::P2R_IMM:
|
||||
return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
|
||||
const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
|
||||
|
||||
const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
|
||||
const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
|
||||
const auto get_entry = [this, cc](u64 entry) {
|
||||
return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
|
||||
};
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::R2P_IMM: {
|
||||
Node mask = GetRegister(instr.gpr8);
|
||||
|
||||
for (u64 entry = 0; entry < num_entries; ++entry) {
|
||||
const u32 shift = static_cast<u32>(entry);
|
||||
|
||||
Node apply = BitfieldExtract(apply_mask, shift, 1);
|
||||
Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
|
||||
|
||||
Node compare = BitfieldExtract(mask, offset + shift, 1);
|
||||
Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
|
||||
|
||||
Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
|
||||
bb.push_back(Conditional(condition, {move(code)}));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::P2R_IMM: {
|
||||
Node value = Immediate(0);
|
||||
for (u64 entry = 0; entry < num_entries; ++entry) {
|
||||
Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
|
||||
Immediate(0));
|
||||
value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
|
||||
}
|
||||
value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
|
||||
value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
|
||||
|
||||
SetRegister(bb, instr.gpr0, move(value));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
|
||||
break;
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,153 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using std::move;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::ShfType;
|
||||
using Tegra::Shader::ShfXmode;
|
||||
|
||||
namespace {
|
||||
|
||||
Node IsFull(Node shift) {
|
||||
return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
|
||||
}
|
||||
|
||||
Node Shift(OperationCode opcode, Node value, Node shift) {
|
||||
Node shifted = Operation(opcode, move(value), shift);
|
||||
return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
|
||||
}
|
||||
|
||||
Node ClampShift(Node shift, s32 size = 32) {
|
||||
shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
|
||||
return Operation(OperationCode::IMin, move(shift), Immediate(size));
|
||||
}
|
||||
|
||||
Node WrapShift(Node shift, s32 size = 32) {
|
||||
return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
|
||||
}
|
||||
|
||||
Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
|
||||
// These values are used when the shift value is less than 32
|
||||
Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
|
||||
Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
|
||||
Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
|
||||
|
||||
if (type == ShfType::Bits32) {
|
||||
// On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
|
||||
return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
|
||||
}
|
||||
|
||||
// And these when it's larger than or 32
|
||||
const bool is_signed = type == ShfType::S64;
|
||||
const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
|
||||
Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
|
||||
Node greater = Shift(opcode, high, move(reduced));
|
||||
|
||||
Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
|
||||
Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
|
||||
|
||||
Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
|
||||
return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
|
||||
}
|
||||
|
||||
Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
|
||||
// These values are used when the shift value is less than 32
|
||||
Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
|
||||
Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
|
||||
Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
|
||||
|
||||
if (type == ShfType::Bits32) {
|
||||
// On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
|
||||
return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
|
||||
}
|
||||
|
||||
// And these when it's larger than or 32
|
||||
Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
|
||||
Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
|
||||
|
||||
Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
|
||||
Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
|
||||
|
||||
Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
|
||||
return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
Node op_a = GetRegister(instr.gpr8);
|
||||
Node op_b = [this, instr] {
|
||||
if (instr.is_b_imm) {
|
||||
return Immediate(instr.alu.GetSignedImm20_20());
|
||||
} else if (instr.is_b_gpr) {
|
||||
return GetRegister(instr.gpr20);
|
||||
} else {
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
}
|
||||
}();
|
||||
|
||||
switch (const auto opid = opcode->get().GetId(); opid) {
|
||||
case OpCode::Id::SHR_C:
|
||||
case OpCode::Id::SHR_R:
|
||||
case OpCode::Id::SHR_IMM: {
|
||||
op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
|
||||
|
||||
Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
|
||||
move(op_a), move(op_b));
|
||||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::SHL_C:
|
||||
case OpCode::Id::SHL_R:
|
||||
case OpCode::Id::SHL_IMM: {
|
||||
Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
|
||||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::SHF_RIGHT_R:
|
||||
case OpCode::Id::SHF_RIGHT_IMM:
|
||||
case OpCode::Id::SHF_LEFT_R:
|
||||
case OpCode::Id::SHF_LEFT_IMM: {
|
||||
UNIMPLEMENTED_IF(instr.generates_cc);
|
||||
UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
|
||||
instr.shf.xmode.Value());
|
||||
|
||||
if (instr.is_b_imm) {
|
||||
op_b = Immediate(static_cast<u32>(instr.shf.immediate));
|
||||
}
|
||||
const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
|
||||
Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
|
||||
|
||||
Node negated_shift = Operation(OperationCode::INegate, shift);
|
||||
Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
|
||||
|
||||
const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
|
||||
Node value = (is_right ? ShiftRight : ShiftLeft)(
|
||||
move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
|
||||
|
||||
SetRegister(bb, instr.gpr0, move(value));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,935 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::TextureMiscMode;
|
||||
using Tegra::Shader::TextureProcessMode;
|
||||
using Tegra::Shader::TextureType;
|
||||
|
||||
static std::size_t GetCoordCount(TextureType texture_type) {
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
return 1;
|
||||
case TextureType::Texture2D:
|
||||
return 2;
|
||||
case TextureType::Texture3D:
|
||||
case TextureType::TextureCube:
|
||||
return 3;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
bool is_bindless = false;
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::TEX: {
|
||||
const TextureType texture_type{instr.tex.texture_type};
|
||||
const bool is_array = instr.tex.array != 0;
|
||||
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.tex.GetTextureProcessMode();
|
||||
WriteTexInstructionFloat(
|
||||
bb, instr,
|
||||
GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEX_B: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
|
||||
const TextureType texture_type{instr.tex_b.texture_type};
|
||||
const bool is_array = instr.tex_b.array != 0;
|
||||
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||
const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.tex_b.GetTextureProcessMode();
|
||||
WriteTexInstructionFloat(bb, instr,
|
||||
GetTexCode(instr, texture_type, process_mode, depth_compare,
|
||||
is_array, is_aoffi, {instr.gpr20}));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEXS: {
|
||||
const TextureType texture_type{instr.texs.GetTextureType()};
|
||||
const bool is_array{instr.texs.IsArrayTexture()};
|
||||
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.texs.GetTextureProcessMode();
|
||||
|
||||
const Node4 components =
|
||||
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
|
||||
|
||||
if (instr.texs.fp32_flag) {
|
||||
WriteTexsInstructionFloat(bb, instr, components);
|
||||
} else {
|
||||
WriteTexsInstructionHalfFloat(bb, instr, components);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4_B: {
|
||||
is_bindless = true;
|
||||
[[fallthrough]];
|
||||
}
|
||||
case OpCode::Id::TLD4: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
const auto texture_type = instr.tld4.texture_type.Value();
|
||||
const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
|
||||
: instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
||||
const bool is_array = instr.tld4.array != 0;
|
||||
const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
|
||||
: instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||
const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
|
||||
: instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
|
||||
WriteTexInstructionFloat(bb, instr,
|
||||
GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
|
||||
is_ptp, is_bindless));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4S: {
|
||||
constexpr std::size_t num_coords = 2;
|
||||
const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||
const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node op_b = GetRegister(instr.gpr20);
|
||||
|
||||
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
|
||||
std::vector<Node> coords;
|
||||
std::vector<Node> aoffi;
|
||||
Node depth_compare;
|
||||
if (is_depth_compare) {
|
||||
// Note: TLD4S coordinate encoding works just like TEXS's
|
||||
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_y);
|
||||
if (is_aoffi) {
|
||||
aoffi = GetAoffiCoordinates(op_b, num_coords, true);
|
||||
depth_compare = GetRegister(instr.gpr20.Value() + 1);
|
||||
} else {
|
||||
depth_compare = op_b;
|
||||
}
|
||||
} else {
|
||||
// There's no depth compare
|
||||
coords.push_back(op_a);
|
||||
if (is_aoffi) {
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
aoffi = GetAoffiCoordinates(op_b, num_coords, true);
|
||||
} else {
|
||||
coords.push_back(op_b);
|
||||
}
|
||||
}
|
||||
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
|
||||
|
||||
SamplerInfo info;
|
||||
info.is_shadow = is_depth_compare;
|
||||
const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {},
|
||||
{}, {}, component, element, {}};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, coords);
|
||||
}
|
||||
|
||||
if (instr.tld4s.fp16_flag) {
|
||||
WriteTexsInstructionHalfFloat(bb, instr, values, true);
|
||||
} else {
|
||||
WriteTexsInstructionFloat(bb, instr, values, true);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXD_B:
|
||||
is_bindless = true;
|
||||
[[fallthrough]];
|
||||
case OpCode::Id::TXD: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
|
||||
const bool is_array = instr.txd.is_array != 0;
|
||||
const auto derivate_reg = instr.gpr20.Value();
|
||||
const auto texture_type = instr.txd.texture_type.Value();
|
||||
const auto coord_count = GetCoordCount(texture_type);
|
||||
u64 base_reg = instr.gpr8.Value();
|
||||
Node index_var;
|
||||
SamplerInfo info;
|
||||
info.type = texture_type;
|
||||
info.is_array = is_array;
|
||||
const std::optional<SamplerEntry> sampler =
|
||||
is_bindless ? GetBindlessSampler(base_reg, info, index_var)
|
||||
: GetSampler(instr.sampler, info);
|
||||
Node4 values;
|
||||
if (!sampler) {
|
||||
std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
|
||||
WriteTexInstructionFloat(bb, instr, values);
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_bindless) {
|
||||
base_reg++;
|
||||
}
|
||||
|
||||
std::vector<Node> coords;
|
||||
std::vector<Node> derivates;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(base_reg + i));
|
||||
const std::size_t derivate = i * 2;
|
||||
derivates.push_back(GetRegister(derivate_reg + derivate));
|
||||
derivates.push_back(GetRegister(derivate_reg + derivate + 1));
|
||||
}
|
||||
|
||||
Node array_node = {};
|
||||
if (is_array) {
|
||||
const Node info_reg = GetRegister(base_reg + coord_count);
|
||||
array_node = BitfieldExtract(info_reg, 0, 16);
|
||||
}
|
||||
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates,
|
||||
{}, {}, {}, element, index_var};
|
||||
values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
|
||||
}
|
||||
|
||||
WriteTexInstructionFloat(bb, instr, values);
|
||||
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXQ_B:
|
||||
is_bindless = true;
|
||||
[[fallthrough]];
|
||||
case OpCode::Id::TXQ: {
|
||||
Node index_var;
|
||||
const std::optional<SamplerEntry> sampler =
|
||||
is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
|
||||
: GetSampler(instr.sampler, {});
|
||||
|
||||
if (!sampler) {
|
||||
u32 indexer = 0;
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
const Node value = Immediate(0);
|
||||
SetTemporary(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
u32 indexer = 0;
|
||||
switch (instr.txq.query_type) {
|
||||
case Tegra::Shader::TextureQueryType::Dimension: {
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta,
|
||||
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
|
||||
SetTemporary(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TMML_B:
|
||||
is_bindless = true;
|
||||
[[fallthrough]];
|
||||
case OpCode::Id::TMML: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
|
||||
const auto texture_type = instr.tmml.texture_type.Value();
|
||||
const bool is_array = instr.tmml.array != 0;
|
||||
SamplerInfo info;
|
||||
info.type = texture_type;
|
||||
info.is_array = is_array;
|
||||
Node index_var;
|
||||
const std::optional<SamplerEntry> sampler =
|
||||
is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
|
||||
: GetSampler(instr.sampler, info);
|
||||
|
||||
if (!sampler) {
|
||||
u32 indexer = 0;
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
if (!instr.tmml.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
const Node value = Immediate(0);
|
||||
SetTemporary(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
const u64 base_index = is_array ? 1 : 0;
|
||||
const u64 num_components = [texture_type] {
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
return 1;
|
||||
case TextureType::Texture2D:
|
||||
return 2;
|
||||
case TextureType::TextureCube:
|
||||
return 3;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
|
||||
return 2;
|
||||
}
|
||||
}();
|
||||
// TODO: What's the array component used for?
|
||||
|
||||
std::vector<Node> coords;
|
||||
coords.reserve(num_components);
|
||||
for (u64 component = 0; component < num_components; ++component) {
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
|
||||
}
|
||||
|
||||
u32 indexer = 0;
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
if (!instr.tmml.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
|
||||
Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
|
||||
SetTemporary(bb, indexer++, std::move(value));
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
|
||||
|
||||
WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLDS: {
|
||||
const TextureType texture_type{instr.tlds.GetTextureType()};
|
||||
const bool is_array{instr.tlds.IsArrayTexture()};
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
|
||||
|
||||
const Node4 components = GetTldsCode(instr, texture_type, is_array);
|
||||
|
||||
if (instr.tlds.fp32_flag) {
|
||||
WriteTexsInstructionFloat(bb, instr, components);
|
||||
} else {
|
||||
WriteTexsInstructionHalfFloat(bb, instr, components);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
|
||||
SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
|
||||
if (info.IsComplete()) {
|
||||
return info;
|
||||
}
|
||||
if (!sampler) {
|
||||
LOG_WARNING(HW_GPU, "Unknown sampler info");
|
||||
info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
|
||||
info.is_array = info.is_array.value_or(false);
|
||||
info.is_shadow = info.is_shadow.value_or(false);
|
||||
info.is_buffer = info.is_buffer.value_or(false);
|
||||
return info;
|
||||
}
|
||||
info.type = info.type.value_or(sampler->texture_type);
|
||||
info.is_array = info.is_array.value_or(sampler->is_array != 0);
|
||||
info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0);
|
||||
info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0);
|
||||
return info;
|
||||
}
|
||||
|
||||
std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
|
||||
SamplerInfo sampler_info) {
|
||||
const u32 offset = static_cast<u32>(sampler.index.Value());
|
||||
const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto it =
|
||||
std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
[offset](const SamplerEntry& entry) { return entry.offset == offset; });
|
||||
if (it != used_samplers.end()) {
|
||||
ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
|
||||
it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
|
||||
return *it;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const auto next_index = static_cast<u32>(used_samplers.size());
|
||||
return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array,
|
||||
*info.is_shadow, *info.is_buffer, false);
|
||||
}
|
||||
|
||||
std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
|
||||
SamplerInfo info, Node& index_var) {
|
||||
const Node sampler_register = GetRegister(reg);
|
||||
const auto [base_node, tracked_sampler_info] =
|
||||
TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
|
||||
if (!base_node) {
|
||||
UNREACHABLE();
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
|
||||
const u32 buffer = sampler_info->index;
|
||||
const u32 offset = sampler_info->offset;
|
||||
info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
[buffer, offset](const SamplerEntry& entry) {
|
||||
return entry.buffer == buffer && entry.offset == offset;
|
||||
});
|
||||
if (it != used_samplers.end()) {
|
||||
ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
|
||||
it->is_shadow == info.is_shadow);
|
||||
return *it;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const auto next_index = static_cast<u32>(used_samplers.size());
|
||||
return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
|
||||
*info.is_shadow, *info.is_buffer, false);
|
||||
}
|
||||
if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
|
||||
const std::pair indices = sampler_info->indices;
|
||||
const std::pair offsets = sampler_info->offsets;
|
||||
info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
|
||||
|
||||
// Try to use an already created sampler if it exists
|
||||
const auto it =
|
||||
std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
[indices, offsets](const SamplerEntry& entry) {
|
||||
return offsets == std::pair{entry.offset, entry.secondary_offset} &&
|
||||
indices == std::pair{entry.buffer, entry.secondary_buffer};
|
||||
});
|
||||
if (it != used_samplers.end()) {
|
||||
ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
|
||||
it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
|
||||
return *it;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const u32 next_index = static_cast<u32>(used_samplers.size());
|
||||
return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
|
||||
*info.is_shadow, *info.is_buffer);
|
||||
}
|
||||
if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
|
||||
const u32 base_offset = sampler_info->base_offset / 4;
|
||||
index_var = GetCustomVariable(sampler_info->bindless_var);
|
||||
info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto it = std::find_if(
|
||||
used_samplers.begin(), used_samplers.end(),
|
||||
[base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
|
||||
if (it != used_samplers.end()) {
|
||||
ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
|
||||
it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
|
||||
it->is_indexed);
|
||||
return *it;
|
||||
}
|
||||
|
||||
uses_indexed_samplers = true;
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const auto next_index = static_cast<u32>(used_samplers.size());
|
||||
return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array,
|
||||
*info.is_shadow, *info.is_buffer, true);
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
|
||||
u32 dest_elem = 0;
|
||||
for (u32 elem = 0; elem < 4; ++elem) {
|
||||
if (!instr.tex.IsComponentEnabled(elem)) {
|
||||
// Skip disabled components
|
||||
continue;
|
||||
}
|
||||
SetTemporary(bb, dest_elem++, components[elem]);
|
||||
}
|
||||
// After writing values in temporals, move them to the real registers
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
|
||||
bool ignore_mask) {
|
||||
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
|
||||
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
|
||||
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
|
||||
continue;
|
||||
SetTemporary(bb, dest_elem++, components[component]);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
if (i < 2) {
|
||||
// Write the first two swizzle components to gpr0 and gpr0+1
|
||||
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
|
||||
} else {
|
||||
ASSERT(instr.texs.HasTwoDestinations());
|
||||
// Write the rest of the swizzle components to gpr28 and gpr28+1
|
||||
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components, bool ignore_mask) {
|
||||
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
|
||||
// float instruction).
|
||||
|
||||
Node4 values;
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
|
||||
continue;
|
||||
values[dest_elem++] = components[component];
|
||||
}
|
||||
if (dest_elem == 0)
|
||||
return;
|
||||
|
||||
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
|
||||
|
||||
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
|
||||
if (dest_elem <= 2) {
|
||||
SetRegister(bb, instr.gpr0, first_value);
|
||||
return;
|
||||
}
|
||||
|
||||
SetTemporary(bb, 0, first_value);
|
||||
SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
|
||||
|
||||
SetRegister(bb, instr.gpr0, GetTemporary(0));
|
||||
SetRegister(bb, instr.gpr28, GetTemporary(1));
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset,
|
||||
std::vector<Node> aoffi,
|
||||
std::optional<Tegra::Shader::Register> bindless_reg) {
|
||||
const bool is_array = array != nullptr;
|
||||
const bool is_shadow = depth_compare != nullptr;
|
||||
const bool is_bindless = bindless_reg.has_value();
|
||||
|
||||
ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
|
||||
"Illegal texture type");
|
||||
|
||||
SamplerInfo info;
|
||||
info.type = texture_type;
|
||||
info.is_array = is_array;
|
||||
info.is_shadow = is_shadow;
|
||||
info.is_buffer = false;
|
||||
|
||||
Node index_var;
|
||||
const std::optional<SamplerEntry> sampler =
|
||||
is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
|
||||
: GetSampler(instr.sampler, info);
|
||||
if (!sampler) {
|
||||
return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
|
||||
}
|
||||
|
||||
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
|
||||
process_mode == TextureProcessMode::LL ||
|
||||
process_mode == TextureProcessMode::LLA;
|
||||
const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture;
|
||||
|
||||
Node bias;
|
||||
Node lod;
|
||||
switch (process_mode) {
|
||||
case TextureProcessMode::None:
|
||||
break;
|
||||
case TextureProcessMode::LZ:
|
||||
lod = Immediate(0.0f);
|
||||
break;
|
||||
case TextureProcessMode::LB:
|
||||
// If present, lod or bias are always stored in the register indexed by the gpr20 field with
|
||||
// an offset depending on the usage of the other registers.
|
||||
bias = GetRegister(instr.gpr20.Value() + bias_offset);
|
||||
break;
|
||||
case TextureProcessMode::LL:
|
||||
lod = GetRegister(instr.gpr20.Value() + bias_offset);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
|
||||
break;
|
||||
}
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias,
|
||||
lod, {}, element, index_var};
|
||||
values[element] = Operation(opcode, meta, coords);
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array,
|
||||
bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
|
||||
const bool lod_bias_enabled{
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
|
||||
|
||||
const bool is_bindless = bindless_reg.has_value();
|
||||
|
||||
u64 parameter_register = instr.gpr20.Value();
|
||||
if (is_bindless) {
|
||||
++parameter_register;
|
||||
}
|
||||
|
||||
const u32 bias_lod_offset = (is_bindless ? 1 : 0);
|
||||
if (lod_bias_enabled) {
|
||||
++parameter_register;
|
||||
}
|
||||
|
||||
const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
|
||||
lod_bias_enabled, 4, 5);
|
||||
const auto coord_count = std::get<0>(coord_counts);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
}
|
||||
// 1D.DC in OpenGL the 2nd component is ignored.
|
||||
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
|
||||
coords.push_back(Immediate(0.0f));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
std::vector<Node> aoffi;
|
||||
if (is_aoffi) {
|
||||
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
|
||||
}
|
||||
|
||||
Node dc;
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
dc = GetRegister(parameter_register++);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
|
||||
aoffi, bindless_reg);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
|
||||
lod_bias_enabled, 4, 4);
|
||||
const auto coord_count = std::get<0>(coord_counts);
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
const u64 last_coord_register =
|
||||
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
const u32 bias_offset = coord_count > 2 ? 1 : 0;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
const bool last = (i == (coord_count - 1)) && (coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc;
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
|
||||
{});
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||
bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
|
||||
ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
|
||||
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
}
|
||||
|
||||
u64 parameter_register = instr.gpr20.Value();
|
||||
|
||||
SamplerInfo info;
|
||||
info.type = texture_type;
|
||||
info.is_array = is_array;
|
||||
info.is_shadow = depth_compare;
|
||||
|
||||
Node index_var;
|
||||
const std::optional<SamplerEntry> sampler =
|
||||
is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
|
||||
: GetSampler(instr.sampler, info);
|
||||
Node4 values;
|
||||
if (!sampler) {
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
values[element] = Immediate(0);
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
std::vector<Node> aoffi, ptp;
|
||||
if (is_aoffi) {
|
||||
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
|
||||
} else if (is_ptp) {
|
||||
ptp = GetPtpCoordinates(
|
||||
{GetRegister(parameter_register++), GetRegister(parameter_register++)});
|
||||
}
|
||||
|
||||
Node dc;
|
||||
if (depth_compare) {
|
||||
dc = GetRegister(parameter_register++);
|
||||
}
|
||||
|
||||
const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
|
||||
: Immediate(static_cast<u32>(instr.tld4.component));
|
||||
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{
|
||||
*sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
|
||||
index_var};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
|
||||
const auto texture_type{instr.tld.texture_type};
|
||||
const bool is_array{instr.tld.is_array != 0};
|
||||
const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
|
||||
const std::size_t coord_count{GetCoordCount(texture_type)};
|
||||
|
||||
u64 gpr8_cursor{instr.gpr8.Value()};
|
||||
const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
|
||||
|
||||
std::vector<Node> coords;
|
||||
coords.reserve(coord_count);
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(gpr8_cursor++));
|
||||
}
|
||||
|
||||
u64 gpr20_cursor{instr.gpr20.Value()};
|
||||
// const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
|
||||
const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
|
||||
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
|
||||
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
|
||||
|
||||
const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
|
||||
SamplerInfo info;
|
||||
info.type = texture_type;
|
||||
info.is_array = is_array;
|
||||
info.is_shadow = false;
|
||||
const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
|
||||
|
||||
const std::size_t type_coord_count = GetCoordCount(texture_type);
|
||||
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
|
||||
const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// if is array gpr20 is used
|
||||
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
|
||||
|
||||
const u64 last_coord_register =
|
||||
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < type_coord_count; ++i) {
|
||||
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
|
||||
coords.push_back(
|
||||
GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
// When lod is used always is in gpr20
|
||||
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
|
||||
|
||||
std::vector<Node> aoffi;
|
||||
if (aoffi_enabled) {
|
||||
aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
|
||||
}
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
|
||||
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
|
||||
std::size_t max_coords, std::size_t max_inputs) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
|
||||
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
|
||||
if (total_coord_count > max_coords || total_reg_count > max_inputs) {
|
||||
UNIMPLEMENTED_MSG("Unsupported Texture operation");
|
||||
total_coord_count = std::min(total_coord_count, max_coords);
|
||||
}
|
||||
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
|
||||
total_coord_count +=
|
||||
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
|
||||
|
||||
return {coord_count, total_coord_count};
|
||||
}
|
||||
|
||||
std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
|
||||
bool is_tld4) {
|
||||
const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
|
||||
const u32 size = is_tld4 ? 6 : 4;
|
||||
const s32 wrap_value = is_tld4 ? 32 : 8;
|
||||
const s32 diff_value = is_tld4 ? 64 : 16;
|
||||
const u32 mask = (1U << size) - 1;
|
||||
|
||||
std::vector<Node> aoffi;
|
||||
aoffi.reserve(coord_count);
|
||||
|
||||
const auto aoffi_immediate{
|
||||
TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
|
||||
if (!aoffi_immediate) {
|
||||
// Variable access, not supported on AMD.
|
||||
LOG_WARNING(HW_GPU,
|
||||
"AOFFI constant folding failed, some hardware might have graphical issues");
|
||||
for (std::size_t coord = 0; coord < coord_count; ++coord) {
|
||||
const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
|
||||
const Node condition =
|
||||
Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
|
||||
const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
|
||||
aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
|
||||
}
|
||||
return aoffi;
|
||||
}
|
||||
|
||||
for (std::size_t coord = 0; coord < coord_count; ++coord) {
|
||||
s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
|
||||
if (value >= wrap_value) {
|
||||
value -= diff_value;
|
||||
}
|
||||
aoffi.push_back(Immediate(value));
|
||||
}
|
||||
return aoffi;
|
||||
}
|
||||
|
||||
std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
|
||||
static constexpr u32 num_entries = 8;
|
||||
|
||||
std::vector<Node> ptp;
|
||||
ptp.reserve(num_entries);
|
||||
|
||||
const auto global_size = static_cast<s64>(global_code.size());
|
||||
const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
|
||||
const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
|
||||
if (!low || !high) {
|
||||
for (u32 entry = 0; entry < num_entries; ++entry) {
|
||||
const u32 reg = entry / 4;
|
||||
const u32 offset = entry % 4;
|
||||
const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
|
||||
const Node condition =
|
||||
Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
|
||||
const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
|
||||
ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
|
||||
}
|
||||
return ptp;
|
||||
}
|
||||
|
||||
const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
|
||||
for (u32 entry = 0; entry < num_entries; ++entry) {
|
||||
s32 value = (immediate >> (entry * 8)) & 0b111111;
|
||||
if (value >= 32) {
|
||||
value -= 64;
|
||||
}
|
||||
ptp.push_back(Immediate(value));
|
||||
}
|
||||
|
||||
return ptp;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,169 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using std::move;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
using Tegra::Shader::VideoType;
|
||||
using Tegra::Shader::VmadShr;
|
||||
using Tegra::Shader::VmnmxOperation;
|
||||
using Tegra::Shader::VmnmxType;
|
||||
|
||||
u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
if (opcode->get().GetId() == OpCode::Id::VMNMX) {
|
||||
DecodeVMNMX(bb, instr);
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Node op_a =
|
||||
GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
|
||||
instr.video.type_a, instr.video.byte_height_a);
|
||||
const Node op_b = [this, instr] {
|
||||
if (instr.video.use_register_b) {
|
||||
return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
|
||||
instr.video.signed_b, instr.video.type_b,
|
||||
instr.video.byte_height_b);
|
||||
}
|
||||
if (instr.video.signed_b) {
|
||||
const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
|
||||
return Immediate(static_cast<u32>(imm));
|
||||
} else {
|
||||
return Immediate(instr.alu.GetImm20_16());
|
||||
}
|
||||
}();
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::VMAD: {
|
||||
const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
|
||||
const Node op_c = GetRegister(instr.gpr39);
|
||||
|
||||
Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
|
||||
value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
|
||||
|
||||
if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
|
||||
const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
|
||||
value =
|
||||
SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
|
||||
}
|
||||
|
||||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::VSETP: {
|
||||
// We can't use the constant predicate as destination.
|
||||
ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
|
||||
|
||||
const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
|
||||
const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
|
||||
const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
|
||||
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
|
||||
|
||||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
|
||||
|
||||
if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
|
||||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate,
|
||||
// if enabled
|
||||
const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
|
||||
SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
|
||||
u64 byte_height) {
|
||||
if (!is_chunk) {
|
||||
return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case VideoType::Size16_Low:
|
||||
return BitfieldExtract(op, 0, 16);
|
||||
case VideoType::Size16_High:
|
||||
return BitfieldExtract(op, 16, 16);
|
||||
case VideoType::Size32:
|
||||
// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
|
||||
// (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
|
||||
UNIMPLEMENTED();
|
||||
return Immediate(0);
|
||||
case VideoType::Invalid:
|
||||
UNREACHABLE_MSG("Invalid instruction encoding");
|
||||
return Immediate(0);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
|
||||
UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
|
||||
UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
|
||||
UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
|
||||
UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
|
||||
UNIMPLEMENTED_IF(instr.vmnmx.sat);
|
||||
UNIMPLEMENTED_IF(instr.generates_cc);
|
||||
|
||||
Node op_a = GetRegister(instr.gpr8);
|
||||
Node op_b = GetRegister(instr.gpr20);
|
||||
Node op_c = GetRegister(instr.gpr39);
|
||||
|
||||
const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
|
||||
const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
|
||||
|
||||
const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
|
||||
Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
|
||||
|
||||
switch (instr.vmnmx.operation) {
|
||||
case VmnmxOperation::Mrg_16H:
|
||||
value = BitfieldInsert(move(op_c), move(value), 16, 16);
|
||||
break;
|
||||
case VmnmxOperation::Mrg_16L:
|
||||
value = BitfieldInsert(move(op_c), move(value), 0, 16);
|
||||
break;
|
||||
case VmnmxOperation::Mrg_8B0:
|
||||
value = BitfieldInsert(move(op_c), move(value), 0, 8);
|
||||
break;
|
||||
case VmnmxOperation::Mrg_8B2:
|
||||
value = BitfieldInsert(move(op_c), move(value), 16, 8);
|
||||
break;
|
||||
case VmnmxOperation::Acc:
|
||||
value = Operation(OperationCode::IAdd, move(value), move(op_c));
|
||||
break;
|
||||
case VmnmxOperation::Min:
|
||||
value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
|
||||
break;
|
||||
case VmnmxOperation::Max:
|
||||
value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
|
||||
break;
|
||||
case VmnmxOperation::Nop:
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
|
||||
SetRegister(bb, instr.gpr0, move(value));
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,117 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
using Tegra::Shader::ShuffleOperation;
|
||||
using Tegra::Shader::VoteOperation;
|
||||
|
||||
namespace {
|
||||
|
||||
OperationCode GetOperationCode(VoteOperation vote_op) {
|
||||
switch (vote_op) {
|
||||
case VoteOperation::All:
|
||||
return OperationCode::VoteAll;
|
||||
case VoteOperation::Any:
|
||||
return OperationCode::VoteAny;
|
||||
case VoteOperation::Eq:
|
||||
return OperationCode::VoteEqual;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
|
||||
return OperationCode::VoteAll;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
// Signal the backend that this shader uses warp instructions.
|
||||
uses_warps = true;
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::VOTE: {
|
||||
const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
|
||||
const Node active = Operation(OperationCode::BallotThread, value);
|
||||
const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
|
||||
SetRegister(bb, instr.gpr0, active);
|
||||
SetPredicate(bb, instr.vote.dest_pred, vote);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::SHFL: {
|
||||
Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
|
||||
: GetRegister(instr.gpr39);
|
||||
Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
|
||||
: GetRegister(instr.gpr20);
|
||||
|
||||
Node thread_id = Operation(OperationCode::ThreadId);
|
||||
Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
|
||||
Node seg_mask = BitfieldExtract(mask, 8, 16);
|
||||
|
||||
Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
|
||||
Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
|
||||
Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
|
||||
Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
|
||||
|
||||
Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
|
||||
switch (instr.shfl.operation) {
|
||||
case ShuffleOperation::Idx:
|
||||
return Operation(OperationCode::IBitwiseOr,
|
||||
Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
|
||||
min_thread_id);
|
||||
case ShuffleOperation::Down:
|
||||
return Operation(OperationCode::IAdd, thread_id, index);
|
||||
case ShuffleOperation::Up:
|
||||
return Operation(OperationCode::IAdd, thread_id,
|
||||
Operation(OperationCode::INegate, index));
|
||||
case ShuffleOperation::Bfly:
|
||||
return Operation(OperationCode::IBitwiseXor, thread_id, index);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return Immediate(0U);
|
||||
}();
|
||||
|
||||
Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
|
||||
if (instr.shfl.operation == ShuffleOperation::Up) {
|
||||
return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
|
||||
} else {
|
||||
return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
|
||||
}
|
||||
}();
|
||||
|
||||
SetPredicate(bb, instr.shfl.pred48, in_bounds);
|
||||
SetRegister(
|
||||
bb, instr.gpr0,
|
||||
Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::FSWZADD: {
|
||||
UNIMPLEMENTED_IF(instr.fswzadd.ndv);
|
||||
|
||||
Node op_a = GetRegister(instr.gpr8);
|
||||
Node op_b = GetRegister(instr.gpr20);
|
||||
Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
|
||||
SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
|
||||
break;
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,156 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::PredCondition;
|
||||
|
||||
u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
UNIMPLEMENTED_IF(instr.xmad.sign_a);
|
||||
UNIMPLEMENTED_IF(instr.xmad.sign_b);
|
||||
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
|
||||
"Condition codes generation in XMAD is not implemented");
|
||||
|
||||
Node op_a = GetRegister(instr.gpr8);
|
||||
|
||||
// TODO(bunnei): Needs to be fixed once op_a or op_b is signed
|
||||
UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
|
||||
const bool is_signed_a = instr.xmad.sign_a == 1;
|
||||
const bool is_signed_b = instr.xmad.sign_b == 1;
|
||||
const bool is_signed_c = is_signed_a;
|
||||
|
||||
auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
|
||||
op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::XMAD_CR:
|
||||
return {instr.xmad.merge_56,
|
||||
instr.xmad.product_shift_left_second,
|
||||
instr.xmad.high_b,
|
||||
instr.xmad.mode_cbf,
|
||||
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
|
||||
GetRegister(instr.gpr39)};
|
||||
case OpCode::Id::XMAD_RR:
|
||||
return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
|
||||
instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
|
||||
case OpCode::Id::XMAD_RC:
|
||||
return {false,
|
||||
false,
|
||||
instr.xmad.high_b,
|
||||
instr.xmad.mode_cbf,
|
||||
GetRegister(instr.gpr39),
|
||||
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
|
||||
case OpCode::Id::XMAD_IMM:
|
||||
return {instr.xmad.merge_37,
|
||||
instr.xmad.product_shift_left,
|
||||
false,
|
||||
instr.xmad.mode,
|
||||
Immediate(static_cast<u32>(instr.xmad.imm20_16)),
|
||||
GetRegister(instr.gpr39)};
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
|
||||
return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
|
||||
}
|
||||
}();
|
||||
|
||||
op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
|
||||
instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
|
||||
|
||||
const Node original_b = op_b_binding;
|
||||
const Node op_b =
|
||||
SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
|
||||
is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
|
||||
|
||||
// we already check sign_a and sign_b is difference or not before so just use one in here.
|
||||
Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
|
||||
if (is_psl) {
|
||||
product =
|
||||
SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
|
||||
}
|
||||
SetTemporary(bb, 0, product);
|
||||
product = GetTemporary(0);
|
||||
|
||||
Node original_c = op_c;
|
||||
const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
|
||||
op_c = [&] {
|
||||
switch (set_mode) {
|
||||
case Tegra::Shader::XmadMode::None:
|
||||
return original_c;
|
||||
case Tegra::Shader::XmadMode::CLo:
|
||||
return BitfieldExtract(std::move(original_c), 0, 16);
|
||||
case Tegra::Shader::XmadMode::CHi:
|
||||
return BitfieldExtract(std::move(original_c), 16, 16);
|
||||
case Tegra::Shader::XmadMode::CBcc: {
|
||||
Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
|
||||
original_b, Immediate(16));
|
||||
return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
|
||||
std::move(shifted_b));
|
||||
}
|
||||
case Tegra::Shader::XmadMode::CSfu: {
|
||||
const Node comp_a =
|
||||
GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
|
||||
const Node comp_b =
|
||||
GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
|
||||
const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
|
||||
|
||||
const Node comp_minus_a = GetPredicateComparisonInteger(
|
||||
PredCondition::NE, is_signed_a,
|
||||
SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
|
||||
Immediate(0x80000000)),
|
||||
Immediate(0));
|
||||
const Node comp_minus_b = GetPredicateComparisonInteger(
|
||||
PredCondition::NE, is_signed_b,
|
||||
SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
|
||||
Immediate(0x80000000)),
|
||||
Immediate(0));
|
||||
|
||||
Node new_c = Operation(
|
||||
OperationCode::Select, comp_minus_a,
|
||||
SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
|
||||
original_c);
|
||||
new_c = Operation(
|
||||
OperationCode::Select, comp_minus_b,
|
||||
SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
|
||||
std::move(new_c));
|
||||
|
||||
return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
|
||||
SetTemporary(bb, 1, op_c);
|
||||
op_c = GetTemporary(1);
|
||||
|
||||
// TODO(Rodrigo): Use an appropiate sign for this operation
|
||||
Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
|
||||
SetTemporary(bb, 2, sum);
|
||||
sum = GetTemporary(2);
|
||||
if (is_merge) {
|
||||
const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
|
||||
Immediate(0), Immediate(16));
|
||||
const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
|
||||
Immediate(16));
|
||||
sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
|
||||
}
|
||||
|
||||
SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, std::move(sum));
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,93 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
#include <variant>
|
||||
|
||||
#include "video_core/shader/expr.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
namespace {
|
||||
bool ExprIsBoolean(const Expr& expr) {
|
||||
return std::holds_alternative<ExprBoolean>(*expr);
|
||||
}
|
||||
|
||||
bool ExprBooleanGet(const Expr& expr) {
|
||||
return std::get_if<ExprBoolean>(expr.get())->value;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
bool ExprAnd::operator==(const ExprAnd& b) const {
|
||||
return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
|
||||
}
|
||||
|
||||
bool ExprAnd::operator!=(const ExprAnd& b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
|
||||
bool ExprOr::operator==(const ExprOr& b) const {
|
||||
return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
|
||||
}
|
||||
|
||||
bool ExprOr::operator!=(const ExprOr& b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
|
||||
bool ExprNot::operator==(const ExprNot& b) const {
|
||||
return *operand1 == *b.operand1;
|
||||
}
|
||||
|
||||
bool ExprNot::operator!=(const ExprNot& b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
|
||||
Expr MakeExprNot(Expr first) {
|
||||
if (std::holds_alternative<ExprNot>(*first)) {
|
||||
return std::get_if<ExprNot>(first.get())->operand1;
|
||||
}
|
||||
return MakeExpr<ExprNot>(std::move(first));
|
||||
}
|
||||
|
||||
Expr MakeExprAnd(Expr first, Expr second) {
|
||||
if (ExprIsBoolean(first)) {
|
||||
return ExprBooleanGet(first) ? second : first;
|
||||
}
|
||||
if (ExprIsBoolean(second)) {
|
||||
return ExprBooleanGet(second) ? first : second;
|
||||
}
|
||||
return MakeExpr<ExprAnd>(std::move(first), std::move(second));
|
||||
}
|
||||
|
||||
Expr MakeExprOr(Expr first, Expr second) {
|
||||
if (ExprIsBoolean(first)) {
|
||||
return ExprBooleanGet(first) ? first : second;
|
||||
}
|
||||
if (ExprIsBoolean(second)) {
|
||||
return ExprBooleanGet(second) ? second : first;
|
||||
}
|
||||
return MakeExpr<ExprOr>(std::move(first), std::move(second));
|
||||
}
|
||||
|
||||
bool ExprAreEqual(const Expr& first, const Expr& second) {
|
||||
return (*first) == (*second);
|
||||
}
|
||||
|
||||
bool ExprAreOpposite(const Expr& first, const Expr& second) {
|
||||
if (std::holds_alternative<ExprNot>(*first)) {
|
||||
return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second);
|
||||
}
|
||||
if (std::holds_alternative<ExprNot>(*second)) {
|
||||
return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ExprIsTrue(const Expr& first) {
|
||||
if (ExprIsBoolean(first)) {
|
||||
return ExprBooleanGet(first);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,156 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <variant>
|
||||
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::ConditionCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
class ExprAnd;
|
||||
class ExprBoolean;
|
||||
class ExprCondCode;
|
||||
class ExprGprEqual;
|
||||
class ExprNot;
|
||||
class ExprOr;
|
||||
class ExprPredicate;
|
||||
class ExprVar;
|
||||
|
||||
using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
|
||||
ExprBoolean, ExprGprEqual>;
|
||||
using Expr = std::shared_ptr<ExprData>;
|
||||
|
||||
class ExprAnd final {
|
||||
public:
|
||||
explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
|
||||
|
||||
bool operator==(const ExprAnd& b) const;
|
||||
bool operator!=(const ExprAnd& b) const;
|
||||
|
||||
Expr operand1;
|
||||
Expr operand2;
|
||||
};
|
||||
|
||||
class ExprOr final {
|
||||
public:
|
||||
explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
|
||||
|
||||
bool operator==(const ExprOr& b) const;
|
||||
bool operator!=(const ExprOr& b) const;
|
||||
|
||||
Expr operand1;
|
||||
Expr operand2;
|
||||
};
|
||||
|
||||
class ExprNot final {
|
||||
public:
|
||||
explicit ExprNot(Expr a) : operand1{std::move(a)} {}
|
||||
|
||||
bool operator==(const ExprNot& b) const;
|
||||
bool operator!=(const ExprNot& b) const;
|
||||
|
||||
Expr operand1;
|
||||
};
|
||||
|
||||
class ExprVar final {
|
||||
public:
|
||||
explicit ExprVar(u32 index) : var_index{index} {}
|
||||
|
||||
bool operator==(const ExprVar& b) const {
|
||||
return var_index == b.var_index;
|
||||
}
|
||||
|
||||
bool operator!=(const ExprVar& b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
|
||||
u32 var_index;
|
||||
};
|
||||
|
||||
class ExprPredicate final {
|
||||
public:
|
||||
explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {}
|
||||
|
||||
bool operator==(const ExprPredicate& b) const {
|
||||
return predicate == b.predicate;
|
||||
}
|
||||
|
||||
bool operator!=(const ExprPredicate& b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
|
||||
u32 predicate;
|
||||
};
|
||||
|
||||
class ExprCondCode final {
|
||||
public:
|
||||
explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {}
|
||||
|
||||
bool operator==(const ExprCondCode& b) const {
|
||||
return cc == b.cc;
|
||||
}
|
||||
|
||||
bool operator!=(const ExprCondCode& b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
|
||||
ConditionCode cc;
|
||||
};
|
||||
|
||||
class ExprBoolean final {
|
||||
public:
|
||||
explicit ExprBoolean(bool val) : value{val} {}
|
||||
|
||||
bool operator==(const ExprBoolean& b) const {
|
||||
return value == b.value;
|
||||
}
|
||||
|
||||
bool operator!=(const ExprBoolean& b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
|
||||
bool value;
|
||||
};
|
||||
|
||||
class ExprGprEqual final {
|
||||
public:
|
||||
explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {}
|
||||
|
||||
bool operator==(const ExprGprEqual& b) const {
|
||||
return gpr == b.gpr && value == b.value;
|
||||
}
|
||||
|
||||
bool operator!=(const ExprGprEqual& b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
|
||||
u32 gpr;
|
||||
u32 value;
|
||||
};
|
||||
|
||||
template <typename T, typename... Args>
|
||||
Expr MakeExpr(Args&&... args) {
|
||||
static_assert(std::is_convertible_v<T, ExprData>);
|
||||
return std::make_shared<ExprData>(T(std::forward<Args>(args)...));
|
||||
}
|
||||
|
||||
bool ExprAreEqual(const Expr& first, const Expr& second);
|
||||
|
||||
bool ExprAreOpposite(const Expr& first, const Expr& second);
|
||||
|
||||
Expr MakeExprNot(Expr first);
|
||||
|
||||
Expr MakeExprAnd(Expr first, Expr second);
|
||||
|
||||
Expr MakeExprOr(Expr first, Expr second);
|
||||
|
||||
bool ExprIsTrue(const Expr& first);
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,76 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
|
||||
#include <boost/container_hash/hash.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
|
||||
const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
|
||||
return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
|
||||
}
|
||||
|
||||
bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
|
||||
// Sched instructions appear once every 4 instructions.
|
||||
constexpr std::size_t SchedPeriod = 4;
|
||||
const std::size_t absolute_offset = offset - main_offset;
|
||||
return (absolute_offset % SchedPeriod) == 0;
|
||||
}
|
||||
|
||||
std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
|
||||
// This is the encoded version of BRA that jumps to itself. All Nvidia
|
||||
// shaders end with one.
|
||||
static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL;
|
||||
static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL;
|
||||
|
||||
const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
|
||||
std::size_t offset = start_offset;
|
||||
while (offset < program.size()) {
|
||||
const u64 instruction = program[offset];
|
||||
if (!IsSchedInstruction(offset, start_offset)) {
|
||||
if ((instruction & MASK) == SELF_JUMPING_BRANCH) {
|
||||
// End on Maxwell's "nop" instruction
|
||||
break;
|
||||
}
|
||||
if (instruction == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
++offset;
|
||||
}
|
||||
// The last instruction is included in the program size
|
||||
return std::min(offset + 1, program.size());
|
||||
}
|
||||
|
||||
ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
|
||||
const u8* host_ptr, bool is_compute) {
|
||||
ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
|
||||
ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; });
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
|
||||
code.resize(CalculateProgramSize(code, is_compute));
|
||||
return code;
|
||||
}
|
||||
|
||||
u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
|
||||
const ProgramCode& code_b) {
|
||||
size_t unique_identifier = boost::hash_value(code);
|
||||
if (is_a) {
|
||||
// VertexA programs include two programs
|
||||
boost::hash_combine(unique_identifier, boost::hash_value(code_b));
|
||||
}
|
||||
return static_cast<u64>(unique_identifier);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,43 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using ProgramCode = std::vector<u64>;
|
||||
|
||||
constexpr u32 STAGE_MAIN_OFFSET = 10;
|
||||
constexpr u32 KERNEL_MAIN_OFFSET = 0;
|
||||
|
||||
/// Gets the address for the specified shader stage program
|
||||
GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
|
||||
|
||||
/// Gets if the current instruction offset is a scheduler instruction
|
||||
bool IsSchedInstruction(std::size_t offset, std::size_t main_offset);
|
||||
|
||||
/// Calculates the size of a program stream
|
||||
std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute);
|
||||
|
||||
/// Gets the shader program code from memory for the specified address
|
||||
ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
|
||||
const u8* host_ptr, bool is_compute);
|
||||
|
||||
/// Hashes one (or two) program streams
|
||||
u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
|
||||
const ProgramCode& code_b = {});
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,701 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
enum class OperationCode {
|
||||
Assign, /// (float& dest, float src) -> void
|
||||
|
||||
Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
|
||||
|
||||
FAdd, /// (MetaArithmetic, float a, float b) -> float
|
||||
FMul, /// (MetaArithmetic, float a, float b) -> float
|
||||
FDiv, /// (MetaArithmetic, float a, float b) -> float
|
||||
FFma, /// (MetaArithmetic, float a, float b, float c) -> float
|
||||
FNegate, /// (MetaArithmetic, float a) -> float
|
||||
FAbsolute, /// (MetaArithmetic, float a) -> float
|
||||
FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
|
||||
FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float
|
||||
FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float
|
||||
FMin, /// (MetaArithmetic, float a, float b) -> float
|
||||
FMax, /// (MetaArithmetic, float a, float b) -> float
|
||||
FCos, /// (MetaArithmetic, float a) -> float
|
||||
FSin, /// (MetaArithmetic, float a) -> float
|
||||
FExp2, /// (MetaArithmetic, float a) -> float
|
||||
FLog2, /// (MetaArithmetic, float a) -> float
|
||||
FInverseSqrt, /// (MetaArithmetic, float a) -> float
|
||||
FSqrt, /// (MetaArithmetic, float a) -> float
|
||||
FRoundEven, /// (MetaArithmetic, float a) -> float
|
||||
FFloor, /// (MetaArithmetic, float a) -> float
|
||||
FCeil, /// (MetaArithmetic, float a) -> float
|
||||
FTrunc, /// (MetaArithmetic, float a) -> float
|
||||
FCastInteger, /// (MetaArithmetic, int a) -> float
|
||||
FCastUInteger, /// (MetaArithmetic, uint a) -> float
|
||||
FSwizzleAdd, /// (float a, float b, uint mask) -> float
|
||||
|
||||
IAdd, /// (MetaArithmetic, int a, int b) -> int
|
||||
IMul, /// (MetaArithmetic, int a, int b) -> int
|
||||
IDiv, /// (MetaArithmetic, int a, int b) -> int
|
||||
INegate, /// (MetaArithmetic, int a) -> int
|
||||
IAbsolute, /// (MetaArithmetic, int a) -> int
|
||||
IMin, /// (MetaArithmetic, int a, int b) -> int
|
||||
IMax, /// (MetaArithmetic, int a, int b) -> int
|
||||
ICastFloat, /// (MetaArithmetic, float a) -> int
|
||||
ICastUnsigned, /// (MetaArithmetic, uint a) -> int
|
||||
ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int
|
||||
ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int
|
||||
IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
|
||||
IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int
|
||||
IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int
|
||||
IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int
|
||||
IBitwiseNot, /// (MetaArithmetic, int a) -> int
|
||||
IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
|
||||
IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
|
||||
IBitCount, /// (MetaArithmetic, int) -> int
|
||||
IBitMSB, /// (MetaArithmetic, int) -> int
|
||||
|
||||
UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UMul, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UDiv, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UMin, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UMax, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UCastFloat, /// (MetaArithmetic, float a) -> uint
|
||||
UCastSigned, /// (MetaArithmetic, int a) -> uint
|
||||
ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UBitwiseNot, /// (MetaArithmetic, uint a) -> uint
|
||||
UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
|
||||
UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
|
||||
UBitCount, /// (MetaArithmetic, uint) -> uint
|
||||
UBitMSB, /// (MetaArithmetic, uint) -> uint
|
||||
|
||||
HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
|
||||
HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
|
||||
HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
|
||||
HAbsolute, /// (f16vec2 a) -> f16vec2
|
||||
HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
|
||||
HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
|
||||
HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
|
||||
HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
|
||||
HMergeF32, /// (f16vec2 src) -> float
|
||||
HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
|
||||
HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
|
||||
HPack2, /// (float a, float b) -> f16vec2
|
||||
|
||||
LogicalAssign, /// (bool& dst, bool src) -> void
|
||||
LogicalAnd, /// (bool a, bool b) -> bool
|
||||
LogicalOr, /// (bool a, bool b) -> bool
|
||||
LogicalXor, /// (bool a, bool b) -> bool
|
||||
LogicalNegate, /// (bool a) -> bool
|
||||
LogicalPick2, /// (bool2 pair, uint index) -> bool
|
||||
LogicalAnd2, /// (bool2 a) -> bool
|
||||
|
||||
LogicalFOrdLessThan, /// (float a, float b) -> bool
|
||||
LogicalFOrdEqual, /// (float a, float b) -> bool
|
||||
LogicalFOrdLessEqual, /// (float a, float b) -> bool
|
||||
LogicalFOrdGreaterThan, /// (float a, float b) -> bool
|
||||
LogicalFOrdNotEqual, /// (float a, float b) -> bool
|
||||
LogicalFOrdGreaterEqual, /// (float a, float b) -> bool
|
||||
LogicalFOrdered, /// (float a, float b) -> bool
|
||||
LogicalFUnordered, /// (float a, float b) -> bool
|
||||
LogicalFUnordLessThan, /// (float a, float b) -> bool
|
||||
LogicalFUnordEqual, /// (float a, float b) -> bool
|
||||
LogicalFUnordLessEqual, /// (float a, float b) -> bool
|
||||
LogicalFUnordGreaterThan, /// (float a, float b) -> bool
|
||||
LogicalFUnordNotEqual, /// (float a, float b) -> bool
|
||||
LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
|
||||
|
||||
LogicalILessThan, /// (int a, int b) -> bool
|
||||
LogicalIEqual, /// (int a, int b) -> bool
|
||||
LogicalILessEqual, /// (int a, int b) -> bool
|
||||
LogicalIGreaterThan, /// (int a, int b) -> bool
|
||||
LogicalINotEqual, /// (int a, int b) -> bool
|
||||
LogicalIGreaterEqual, /// (int a, int b) -> bool
|
||||
|
||||
LogicalULessThan, /// (uint a, uint b) -> bool
|
||||
LogicalUEqual, /// (uint a, uint b) -> bool
|
||||
LogicalULessEqual, /// (uint a, uint b) -> bool
|
||||
LogicalUGreaterThan, /// (uint a, uint b) -> bool
|
||||
LogicalUNotEqual, /// (uint a, uint b) -> bool
|
||||
LogicalUGreaterEqual, /// (uint a, uint b) -> bool
|
||||
|
||||
LogicalAddCarry, /// (uint a, uint b) -> bool
|
||||
|
||||
Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
|
||||
|
||||
Texture, /// (MetaTexture, float[N] coords) -> float4
|
||||
TextureLod, /// (MetaTexture, float[N] coords) -> float4
|
||||
TextureGather, /// (MetaTexture, float[N] coords) -> float4
|
||||
TextureQueryDimensions, /// (MetaTexture, float a) -> float4
|
||||
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
|
||||
TexelFetch, /// (MetaTexture, int[N], int) -> float4
|
||||
TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
|
||||
|
||||
ImageLoad, /// (MetaImage, int[N] coords) -> void
|
||||
ImageStore, /// (MetaImage, int[N] coords) -> void
|
||||
|
||||
AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
|
||||
AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
|
||||
AtomicImageOr, /// (MetaImage, int[N] coords) -> void
|
||||
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
|
||||
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
|
||||
|
||||
AtomicUExchange, /// (memory, uint) -> uint
|
||||
AtomicUAdd, /// (memory, uint) -> uint
|
||||
AtomicUMin, /// (memory, uint) -> uint
|
||||
AtomicUMax, /// (memory, uint) -> uint
|
||||
AtomicUAnd, /// (memory, uint) -> uint
|
||||
AtomicUOr, /// (memory, uint) -> uint
|
||||
AtomicUXor, /// (memory, uint) -> uint
|
||||
|
||||
AtomicIExchange, /// (memory, int) -> int
|
||||
AtomicIAdd, /// (memory, int) -> int
|
||||
AtomicIMin, /// (memory, int) -> int
|
||||
AtomicIMax, /// (memory, int) -> int
|
||||
AtomicIAnd, /// (memory, int) -> int
|
||||
AtomicIOr, /// (memory, int) -> int
|
||||
AtomicIXor, /// (memory, int) -> int
|
||||
|
||||
ReduceUAdd, /// (memory, uint) -> void
|
||||
ReduceUMin, /// (memory, uint) -> void
|
||||
ReduceUMax, /// (memory, uint) -> void
|
||||
ReduceUAnd, /// (memory, uint) -> void
|
||||
ReduceUOr, /// (memory, uint) -> void
|
||||
ReduceUXor, /// (memory, uint) -> void
|
||||
|
||||
ReduceIAdd, /// (memory, int) -> void
|
||||
ReduceIMin, /// (memory, int) -> void
|
||||
ReduceIMax, /// (memory, int) -> void
|
||||
ReduceIAnd, /// (memory, int) -> void
|
||||
ReduceIOr, /// (memory, int) -> void
|
||||
ReduceIXor, /// (memory, int) -> void
|
||||
|
||||
Branch, /// (uint branch_target) -> void
|
||||
BranchIndirect, /// (uint branch_target) -> void
|
||||
PushFlowStack, /// (uint branch_target) -> void
|
||||
PopFlowStack, /// () -> void
|
||||
Exit, /// () -> void
|
||||
Discard, /// () -> void
|
||||
|
||||
EmitVertex, /// () -> void
|
||||
EndPrimitive, /// () -> void
|
||||
|
||||
InvocationId, /// () -> int
|
||||
YNegate, /// () -> float
|
||||
LocalInvocationIdX, /// () -> uint
|
||||
LocalInvocationIdY, /// () -> uint
|
||||
LocalInvocationIdZ, /// () -> uint
|
||||
WorkGroupIdX, /// () -> uint
|
||||
WorkGroupIdY, /// () -> uint
|
||||
WorkGroupIdZ, /// () -> uint
|
||||
|
||||
BallotThread, /// (bool) -> uint
|
||||
VoteAll, /// (bool) -> bool
|
||||
VoteAny, /// (bool) -> bool
|
||||
VoteEqual, /// (bool) -> bool
|
||||
|
||||
ThreadId, /// () -> uint
|
||||
ThreadEqMask, /// () -> uint
|
||||
ThreadGeMask, /// () -> uint
|
||||
ThreadGtMask, /// () -> uint
|
||||
ThreadLeMask, /// () -> uint
|
||||
ThreadLtMask, /// () -> uint
|
||||
ShuffleIndexed, /// (uint value, uint index) -> uint
|
||||
|
||||
Barrier, /// () -> void
|
||||
MemoryBarrierGroup, /// () -> void
|
||||
MemoryBarrierGlobal, /// () -> void
|
||||
|
||||
Amount,
|
||||
};
|
||||
|
||||
enum class InternalFlag {
|
||||
Zero = 0,
|
||||
Sign = 1,
|
||||
Carry = 2,
|
||||
Overflow = 3,
|
||||
Amount = 4,
|
||||
};
|
||||
|
||||
enum class MetaStackClass {
|
||||
Ssy,
|
||||
Pbk,
|
||||
};
|
||||
|
||||
class OperationNode;
|
||||
class ConditionalNode;
|
||||
class GprNode;
|
||||
class CustomVarNode;
|
||||
class ImmediateNode;
|
||||
class InternalFlagNode;
|
||||
class PredicateNode;
|
||||
class AbufNode;
|
||||
class CbufNode;
|
||||
class LmemNode;
|
||||
class PatchNode;
|
||||
class SmemNode;
|
||||
class GmemNode;
|
||||
class CommentNode;
|
||||
|
||||
using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
|
||||
InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
|
||||
LmemNode, SmemNode, GmemNode, CommentNode>;
|
||||
using Node = std::shared_ptr<NodeData>;
|
||||
using Node4 = std::array<Node, 4>;
|
||||
using NodeBlock = std::vector<Node>;
|
||||
|
||||
struct ArraySamplerNode;
|
||||
struct BindlessSamplerNode;
|
||||
struct SeparateSamplerNode;
|
||||
|
||||
using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
|
||||
using TrackSampler = std::shared_ptr<TrackSamplerData>;
|
||||
|
||||
struct SamplerEntry {
|
||||
/// Bound samplers constructor
|
||||
explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
|
||||
bool is_shadow_, bool is_buffer_, bool is_indexed_)
|
||||
: index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
|
||||
is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
|
||||
|
||||
/// Separate sampler constructor
|
||||
explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
|
||||
Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
|
||||
bool is_buffer_)
|
||||
: index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
|
||||
buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
|
||||
is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
|
||||
|
||||
/// Bindless samplers constructor
|
||||
explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
|
||||
bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
|
||||
: index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
|
||||
is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
|
||||
}
|
||||
|
||||
u32 index = 0; ///< Emulated index given for the this sampler.
|
||||
u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
|
||||
u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
|
||||
u32 buffer = 0; ///< Buffer where the bindless sampler is read.
|
||||
u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
|
||||
u32 size = 1; ///< Size of the sampler.
|
||||
|
||||
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
|
||||
bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
|
||||
bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
|
||||
bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
|
||||
bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
|
||||
bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
|
||||
bool is_separated = false; ///< Whether the image and sampler is separated or not.
|
||||
};
|
||||
|
||||
/// Represents a tracked bindless sampler into a direct const buffer
|
||||
struct ArraySamplerNode {
|
||||
u32 index;
|
||||
u32 base_offset;
|
||||
u32 bindless_var;
|
||||
};
|
||||
|
||||
/// Represents a tracked separate sampler image pair that was folded statically
|
||||
struct SeparateSamplerNode {
|
||||
std::pair<u32, u32> indices;
|
||||
std::pair<u32, u32> offsets;
|
||||
};
|
||||
|
||||
/// Represents a tracked bindless sampler into a direct const buffer
|
||||
struct BindlessSamplerNode {
|
||||
u32 index;
|
||||
u32 offset;
|
||||
};
|
||||
|
||||
struct ImageEntry {
|
||||
public:
|
||||
/// Bound images constructor
|
||||
explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
|
||||
: index{index_}, offset{offset_}, type{type_} {}
|
||||
|
||||
/// Bindless samplers constructor
|
||||
explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
|
||||
: index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
|
||||
|
||||
void MarkWrite() {
|
||||
is_written = true;
|
||||
}
|
||||
|
||||
void MarkRead() {
|
||||
is_read = true;
|
||||
}
|
||||
|
||||
void MarkAtomic() {
|
||||
MarkWrite();
|
||||
MarkRead();
|
||||
is_atomic = true;
|
||||
}
|
||||
|
||||
u32 index = 0;
|
||||
u32 offset = 0;
|
||||
u32 buffer = 0;
|
||||
|
||||
Tegra::Shader::ImageType type{};
|
||||
bool is_bindless = false;
|
||||
bool is_written = false;
|
||||
bool is_read = false;
|
||||
bool is_atomic = false;
|
||||
};
|
||||
|
||||
struct GlobalMemoryBase {
|
||||
u32 cbuf_index = 0;
|
||||
u32 cbuf_offset = 0;
|
||||
|
||||
[[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const {
|
||||
return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
|
||||
}
|
||||
};
|
||||
|
||||
/// Parameters describing an arithmetic operation
|
||||
struct MetaArithmetic {
|
||||
bool precise{}; ///< Whether the operation can be constraint or not
|
||||
};
|
||||
|
||||
/// Parameters describing a texture sampler
|
||||
struct MetaTexture {
|
||||
SamplerEntry sampler;
|
||||
Node array;
|
||||
Node depth_compare;
|
||||
std::vector<Node> aoffi;
|
||||
std::vector<Node> ptp;
|
||||
std::vector<Node> derivates;
|
||||
Node bias;
|
||||
Node lod;
|
||||
Node component;
|
||||
u32 element{};
|
||||
Node index;
|
||||
};
|
||||
|
||||
struct MetaImage {
|
||||
const ImageEntry& image;
|
||||
std::vector<Node> values;
|
||||
u32 element{};
|
||||
};
|
||||
|
||||
/// Parameters that modify an operation but are not part of any particular operand
|
||||
using Meta =
|
||||
std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
|
||||
|
||||
class AmendNode {
|
||||
public:
|
||||
[[nodiscard]] std::optional<std::size_t> GetAmendIndex() const {
|
||||
if (amend_index == amend_null_index) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return {amend_index};
|
||||
}
|
||||
|
||||
void SetAmendIndex(std::size_t index) {
|
||||
amend_index = index;
|
||||
}
|
||||
|
||||
void ClearAmend() {
|
||||
amend_index = amend_null_index;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
|
||||
std::size_t amend_index{amend_null_index};
|
||||
};
|
||||
|
||||
/// Holds any kind of operation that can be done in the IR
|
||||
class OperationNode final : public AmendNode {
|
||||
public:
|
||||
explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {}
|
||||
|
||||
explicit OperationNode(OperationCode code_, Meta meta_)
|
||||
: OperationNode(code_, std::move(meta_), std::vector<Node>{}) {}
|
||||
|
||||
explicit OperationNode(OperationCode code_, std::vector<Node> operands_)
|
||||
: OperationNode(code_, Meta{}, std::move(operands_)) {}
|
||||
|
||||
explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_)
|
||||
: code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {}
|
||||
|
||||
template <typename... Args>
|
||||
explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_)
|
||||
: code{code_}, meta{std::move(meta_)}, operands{operands_...} {}
|
||||
|
||||
[[nodiscard]] OperationCode GetCode() const {
|
||||
return code;
|
||||
}
|
||||
|
||||
[[nodiscard]] const Meta& GetMeta() const {
|
||||
return meta;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::size_t GetOperandsCount() const {
|
||||
return operands.size();
|
||||
}
|
||||
|
||||
[[nodiscard]] const Node& operator[](std::size_t operand_index) const {
|
||||
return operands.at(operand_index);
|
||||
}
|
||||
|
||||
private:
|
||||
OperationCode code{};
|
||||
Meta meta{};
|
||||
std::vector<Node> operands;
|
||||
};
|
||||
|
||||
/// Encloses inside any kind of node that returns a boolean conditionally-executed code
|
||||
class ConditionalNode final : public AmendNode {
|
||||
public:
|
||||
explicit ConditionalNode(Node condition_, std::vector<Node>&& code_)
|
||||
: condition{std::move(condition_)}, code{std::move(code_)} {}
|
||||
|
||||
[[nodiscard]] const Node& GetCondition() const {
|
||||
return condition;
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::vector<Node>& GetCode() const {
|
||||
return code;
|
||||
}
|
||||
|
||||
private:
|
||||
Node condition; ///< Condition to be satisfied
|
||||
std::vector<Node> code; ///< Code to execute
|
||||
};
|
||||
|
||||
/// A general purpose register
|
||||
class GprNode final {
|
||||
public:
|
||||
explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {}
|
||||
|
||||
[[nodiscard]] constexpr u32 GetIndex() const {
|
||||
return static_cast<u32>(index);
|
||||
}
|
||||
|
||||
private:
|
||||
Tegra::Shader::Register index{};
|
||||
};
|
||||
|
||||
/// A custom variable
|
||||
class CustomVarNode final {
|
||||
public:
|
||||
explicit constexpr CustomVarNode(u32 index_) : index{index_} {}
|
||||
|
||||
[[nodiscard]] constexpr u32 GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 index{};
|
||||
};
|
||||
|
||||
/// A 32-bits value that represents an immediate value
|
||||
class ImmediateNode final {
|
||||
public:
|
||||
explicit constexpr ImmediateNode(u32 value_) : value{value_} {}
|
||||
|
||||
[[nodiscard]] constexpr u32 GetValue() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 value{};
|
||||
};
|
||||
|
||||
/// One of Maxwell's internal flags
|
||||
class InternalFlagNode final {
|
||||
public:
|
||||
explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {}
|
||||
|
||||
[[nodiscard]] constexpr InternalFlag GetFlag() const {
|
||||
return flag;
|
||||
}
|
||||
|
||||
private:
|
||||
InternalFlag flag{};
|
||||
};
|
||||
|
||||
/// A predicate register, it can be negated without additional nodes
|
||||
class PredicateNode final {
|
||||
public:
|
||||
explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_)
|
||||
: index{index_}, negated{negated_} {}
|
||||
|
||||
[[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr bool IsNegated() const {
|
||||
return negated;
|
||||
}
|
||||
|
||||
private:
|
||||
Tegra::Shader::Pred index{};
|
||||
bool negated{};
|
||||
};
|
||||
|
||||
/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
|
||||
class AbufNode final {
|
||||
public:
|
||||
// Initialize for standard attributes (index is explicit).
|
||||
explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {})
|
||||
: buffer{std::move(buffer_)}, index{index_}, element{element_} {}
|
||||
|
||||
// Initialize for physical attributes (index is a variable value).
|
||||
explicit AbufNode(Node physical_address_, Node buffer_ = {})
|
||||
: physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {}
|
||||
|
||||
[[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 GetElement() const {
|
||||
return element;
|
||||
}
|
||||
|
||||
[[nodiscard]] const Node& GetBuffer() const {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsPhysicalBuffer() const {
|
||||
return static_cast<bool>(physical_address);
|
||||
}
|
||||
|
||||
[[nodiscard]] const Node& GetPhysicalAddress() const {
|
||||
return physical_address;
|
||||
}
|
||||
|
||||
private:
|
||||
Node physical_address;
|
||||
Node buffer;
|
||||
Tegra::Shader::Attribute::Index index{};
|
||||
u32 element{};
|
||||
};
|
||||
|
||||
/// Patch memory (used to communicate tessellation stages).
|
||||
class PatchNode final {
|
||||
public:
|
||||
explicit constexpr PatchNode(u32 offset_) : offset{offset_} {}
|
||||
|
||||
[[nodiscard]] constexpr u32 GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 offset{};
|
||||
};
|
||||
|
||||
/// Constant buffer node, usually mapped to uniform buffers in GLSL
|
||||
class CbufNode final {
|
||||
public:
|
||||
explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {}
|
||||
|
||||
[[nodiscard]] u32 GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
[[nodiscard]] const Node& GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 index{};
|
||||
Node offset;
|
||||
};
|
||||
|
||||
/// Local memory node
|
||||
class LmemNode final {
|
||||
public:
|
||||
explicit LmemNode(Node address_) : address{std::move(address_)} {}
|
||||
|
||||
[[nodiscard]] const Node& GetAddress() const {
|
||||
return address;
|
||||
}
|
||||
|
||||
private:
|
||||
Node address;
|
||||
};
|
||||
|
||||
/// Shared memory node
|
||||
class SmemNode final {
|
||||
public:
|
||||
explicit SmemNode(Node address_) : address{std::move(address_)} {}
|
||||
|
||||
[[nodiscard]] const Node& GetAddress() const {
|
||||
return address;
|
||||
}
|
||||
|
||||
private:
|
||||
Node address;
|
||||
};
|
||||
|
||||
/// Global memory node
|
||||
class GmemNode final {
|
||||
public:
|
||||
explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_)
|
||||
: real_address{std::move(real_address_)}, base_address{std::move(base_address_)},
|
||||
descriptor{descriptor_} {}
|
||||
|
||||
[[nodiscard]] const Node& GetRealAddress() const {
|
||||
return real_address;
|
||||
}
|
||||
|
||||
[[nodiscard]] const Node& GetBaseAddress() const {
|
||||
return base_address;
|
||||
}
|
||||
|
||||
[[nodiscard]] const GlobalMemoryBase& GetDescriptor() const {
|
||||
return descriptor;
|
||||
}
|
||||
|
||||
private:
|
||||
Node real_address;
|
||||
Node base_address;
|
||||
GlobalMemoryBase descriptor;
|
||||
};
|
||||
|
||||
/// Commentary, can be dropped
|
||||
class CommentNode final {
|
||||
public:
|
||||
explicit CommentNode(std::string text_) : text{std::move(text_)} {}
|
||||
|
||||
[[nodiscard]] const std::string& GetText() const {
|
||||
return text;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string text;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,115 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
Node Conditional(Node condition, std::vector<Node> code) {
|
||||
return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
|
||||
}
|
||||
|
||||
Node Comment(std::string text) {
|
||||
return MakeNode<CommentNode>(std::move(text));
|
||||
}
|
||||
|
||||
Node Immediate(u32 value) {
|
||||
return MakeNode<ImmediateNode>(value);
|
||||
}
|
||||
|
||||
Node Immediate(s32 value) {
|
||||
return Immediate(static_cast<u32>(value));
|
||||
}
|
||||
|
||||
Node Immediate(f32 value) {
|
||||
u32 integral;
|
||||
std::memcpy(&integral, &value, sizeof(u32));
|
||||
return Immediate(integral);
|
||||
}
|
||||
|
||||
OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) {
|
||||
if (is_signed) {
|
||||
return operation_code;
|
||||
}
|
||||
switch (operation_code) {
|
||||
case OperationCode::FCastInteger:
|
||||
return OperationCode::FCastUInteger;
|
||||
case OperationCode::IAdd:
|
||||
return OperationCode::UAdd;
|
||||
case OperationCode::IMul:
|
||||
return OperationCode::UMul;
|
||||
case OperationCode::IDiv:
|
||||
return OperationCode::UDiv;
|
||||
case OperationCode::IMin:
|
||||
return OperationCode::UMin;
|
||||
case OperationCode::IMax:
|
||||
return OperationCode::UMax;
|
||||
case OperationCode::ICastFloat:
|
||||
return OperationCode::UCastFloat;
|
||||
case OperationCode::ICastUnsigned:
|
||||
return OperationCode::UCastSigned;
|
||||
case OperationCode::ILogicalShiftLeft:
|
||||
return OperationCode::ULogicalShiftLeft;
|
||||
case OperationCode::ILogicalShiftRight:
|
||||
return OperationCode::ULogicalShiftRight;
|
||||
case OperationCode::IArithmeticShiftRight:
|
||||
return OperationCode::UArithmeticShiftRight;
|
||||
case OperationCode::IBitwiseAnd:
|
||||
return OperationCode::UBitwiseAnd;
|
||||
case OperationCode::IBitwiseOr:
|
||||
return OperationCode::UBitwiseOr;
|
||||
case OperationCode::IBitwiseXor:
|
||||
return OperationCode::UBitwiseXor;
|
||||
case OperationCode::IBitwiseNot:
|
||||
return OperationCode::UBitwiseNot;
|
||||
case OperationCode::IBitfieldExtract:
|
||||
return OperationCode::UBitfieldExtract;
|
||||
case OperationCode::IBitfieldInsert:
|
||||
return OperationCode::UBitfieldInsert;
|
||||
case OperationCode::IBitCount:
|
||||
return OperationCode::UBitCount;
|
||||
case OperationCode::LogicalILessThan:
|
||||
return OperationCode::LogicalULessThan;
|
||||
case OperationCode::LogicalIEqual:
|
||||
return OperationCode::LogicalUEqual;
|
||||
case OperationCode::LogicalILessEqual:
|
||||
return OperationCode::LogicalULessEqual;
|
||||
case OperationCode::LogicalIGreaterThan:
|
||||
return OperationCode::LogicalUGreaterThan;
|
||||
case OperationCode::LogicalINotEqual:
|
||||
return OperationCode::LogicalUNotEqual;
|
||||
case OperationCode::LogicalIGreaterEqual:
|
||||
return OperationCode::LogicalUGreaterEqual;
|
||||
case OperationCode::AtomicIExchange:
|
||||
return OperationCode::AtomicUExchange;
|
||||
case OperationCode::AtomicIAdd:
|
||||
return OperationCode::AtomicUAdd;
|
||||
case OperationCode::AtomicIMin:
|
||||
return OperationCode::AtomicUMin;
|
||||
case OperationCode::AtomicIMax:
|
||||
return OperationCode::AtomicUMax;
|
||||
case OperationCode::AtomicIAnd:
|
||||
return OperationCode::AtomicUAnd;
|
||||
case OperationCode::AtomicIOr:
|
||||
return OperationCode::AtomicUOr;
|
||||
case OperationCode::AtomicIXor:
|
||||
return OperationCode::AtomicUXor;
|
||||
case OperationCode::INegate:
|
||||
UNREACHABLE_MSG("Can't negate an unsigned integer");
|
||||
return {};
|
||||
case OperationCode::IAbsolute:
|
||||
UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
|
||||
return {};
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,71 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader/node.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
/// This arithmetic operation cannot be constraint
|
||||
inline constexpr MetaArithmetic PRECISE = {true};
|
||||
/// This arithmetic operation can be optimized away
|
||||
inline constexpr MetaArithmetic NO_PRECISE = {false};
|
||||
|
||||
/// Creates a conditional node
|
||||
Node Conditional(Node condition, std::vector<Node> code);
|
||||
|
||||
/// Creates a commentary node
|
||||
Node Comment(std::string text);
|
||||
|
||||
/// Creates an u32 immediate
|
||||
Node Immediate(u32 value);
|
||||
|
||||
/// Creates a s32 immediate
|
||||
Node Immediate(s32 value);
|
||||
|
||||
/// Creates a f32 immediate
|
||||
Node Immediate(f32 value);
|
||||
|
||||
/// Converts an signed operation code to an unsigned operation code
|
||||
OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
|
||||
|
||||
template <typename T, typename... Args>
|
||||
Node MakeNode(Args&&... args) {
|
||||
static_assert(std::is_convertible_v<T, NodeData>);
|
||||
return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
TrackSampler MakeTrackSampler(Args&&... args) {
|
||||
static_assert(std::is_convertible_v<T, TrackSamplerData>);
|
||||
return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
Node Operation(OperationCode code, Args&&... args) {
|
||||
if constexpr (sizeof...(args) == 0) {
|
||||
return MakeNode<OperationNode>(code);
|
||||
} else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>,
|
||||
Meta>) {
|
||||
return MakeNode<OperationNode>(code, std::forward<Args>(args)...);
|
||||
} else {
|
||||
return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) {
|
||||
return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,181 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <tuple>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Engines::ConstBufferEngineInterface;
|
||||
using Tegra::Engines::SamplerDescriptor;
|
||||
using Tegra::Engines::ShaderType;
|
||||
|
||||
namespace {
|
||||
|
||||
GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
|
||||
if (shader_stage == ShaderType::Compute) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
|
||||
|
||||
return {
|
||||
.tfb_layouts = graphics.regs.tfb_layouts,
|
||||
.tfb_varying_locs = graphics.regs.tfb_varying_locs,
|
||||
.primitive_topology = graphics.regs.draw.topology,
|
||||
.tessellation_primitive = graphics.regs.tess_mode.prim,
|
||||
.tessellation_spacing = graphics.regs.tess_mode.spacing,
|
||||
.tfb_enabled = graphics.regs.tfb_enabled != 0,
|
||||
.tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
|
||||
};
|
||||
}
|
||||
|
||||
ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
|
||||
if (shader_stage != ShaderType::Compute) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
|
||||
const auto& launch = compute.launch_description;
|
||||
|
||||
return {
|
||||
.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
|
||||
.shared_memory_size_in_words = launch.shared_alloc,
|
||||
.local_memory_size_in_words = launch.local_pos_alloc,
|
||||
};
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
|
||||
: stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
|
||||
bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
|
||||
|
||||
Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
|
||||
: stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
|
||||
graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
|
||||
shader_stage, engine_)} {}
|
||||
|
||||
Registry::~Registry() = default;
|
||||
|
||||
std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
|
||||
const std::pair<u32, u32> key = {buffer, offset};
|
||||
const auto iter = keys.find(key);
|
||||
if (iter != keys.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
if (!engine) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
|
||||
keys.emplace(key, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
|
||||
const u32 key = offset;
|
||||
const auto iter = bound_samplers.find(key);
|
||||
if (iter != bound_samplers.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
if (!engine) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
|
||||
bound_samplers.emplace(key, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
|
||||
std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
|
||||
SeparateSamplerKey key;
|
||||
key.buffers = buffers;
|
||||
key.offsets = offsets;
|
||||
const auto iter = separate_samplers.find(key);
|
||||
if (iter != separate_samplers.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
if (!engine) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
|
||||
const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
|
||||
const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
|
||||
separate_samplers.emplace(key, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
|
||||
const std::pair key = {buffer, offset};
|
||||
const auto iter = bindless_samplers.find(key);
|
||||
if (iter != bindless_samplers.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
if (!engine) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
|
||||
bindless_samplers.emplace(key, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
|
||||
keys.insert_or_assign({buffer, offset}, value);
|
||||
}
|
||||
|
||||
void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
|
||||
bound_samplers.insert_or_assign(offset, sampler);
|
||||
}
|
||||
|
||||
void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
|
||||
bindless_samplers.insert_or_assign({buffer, offset}, sampler);
|
||||
}
|
||||
|
||||
bool Registry::IsConsistent() const {
|
||||
if (!engine) {
|
||||
return true;
|
||||
}
|
||||
return std::all_of(keys.begin(), keys.end(),
|
||||
[this](const auto& pair) {
|
||||
const auto [cbuf, offset] = pair.first;
|
||||
const auto value = pair.second;
|
||||
return value == engine->AccessConstBuffer32(stage, cbuf, offset);
|
||||
}) &&
|
||||
std::all_of(bound_samplers.begin(), bound_samplers.end(),
|
||||
[this](const auto& sampler) {
|
||||
const auto [key, value] = sampler;
|
||||
return value == engine->AccessBoundSampler(stage, key);
|
||||
}) &&
|
||||
std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
|
||||
[this](const auto& sampler) {
|
||||
const auto [cbuf, offset] = sampler.first;
|
||||
const auto value = sampler.second;
|
||||
return value == engine->AccessBindlessSampler(stage, cbuf, offset);
|
||||
});
|
||||
}
|
||||
|
||||
bool Registry::HasEqualKeys(const Registry& rhs) const {
|
||||
return std::tie(keys, bound_samplers, bindless_samplers) ==
|
||||
std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
|
||||
}
|
||||
|
||||
const GraphicsInfo& Registry::GetGraphicsInfo() const {
|
||||
ASSERT(stage != Tegra::Engines::ShaderType::Compute);
|
||||
return graphics_info;
|
||||
}
|
||||
|
||||
const ComputeInfo& Registry::GetComputeInfo() const {
|
||||
ASSERT(stage == Tegra::Engines::ShaderType::Compute);
|
||||
return compute_info;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,172 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/hash.h"
|
||||
#include "video_core/engines/const_buffer_engine_interface.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/guest_driver.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
struct SeparateSamplerKey {
|
||||
std::pair<u32, u32> buffers;
|
||||
std::pair<u32, u32> offsets;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<VideoCommon::Shader::SeparateSamplerKey> {
|
||||
std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
|
||||
return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
|
||||
key.offsets.second);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
|
||||
bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
|
||||
const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
|
||||
return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
|
||||
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
|
||||
using SeparateSamplerMap =
|
||||
std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
|
||||
using BindlessSamplerMap =
|
||||
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
|
||||
|
||||
struct GraphicsInfo {
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
|
||||
tfb_layouts{};
|
||||
std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
|
||||
Maxwell::PrimitiveTopology primitive_topology{};
|
||||
Maxwell::TessellationPrimitive tessellation_primitive{};
|
||||
Maxwell::TessellationSpacing tessellation_spacing{};
|
||||
bool tfb_enabled = false;
|
||||
bool tessellation_clockwise = false;
|
||||
};
|
||||
static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
|
||||
std::is_standard_layout_v<GraphicsInfo>);
|
||||
|
||||
struct ComputeInfo {
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
u32 shared_memory_size_in_words = 0;
|
||||
u32 local_memory_size_in_words = 0;
|
||||
};
|
||||
static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
|
||||
|
||||
struct SerializedRegistryInfo {
|
||||
VideoCore::GuestDriverProfile guest_driver_profile;
|
||||
u32 bound_buffer = 0;
|
||||
GraphicsInfo graphics;
|
||||
ComputeInfo compute;
|
||||
};
|
||||
|
||||
/**
|
||||
* The Registry is a class use to interface the 3D and compute engines with the shader compiler.
|
||||
* With it, the shader can obtain required data from GPU state and store it for disk shader
|
||||
* compilation.
|
||||
*/
|
||||
class Registry {
|
||||
public:
|
||||
explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
|
||||
|
||||
explicit Registry(Tegra::Engines::ShaderType shader_stage,
|
||||
Tegra::Engines::ConstBufferEngineInterface& engine_);
|
||||
|
||||
~Registry();
|
||||
|
||||
/// Retrieves a key from the registry, if it's registered, it will give the registered value, if
|
||||
/// not it will obtain it from maxwell3d and register it.
|
||||
std::optional<u32> ObtainKey(u32 buffer, u32 offset);
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
|
||||
std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
|
||||
|
||||
/// Inserts a key.
|
||||
void InsertKey(u32 buffer, u32 offset, u32 value);
|
||||
|
||||
/// Inserts a bound sampler key.
|
||||
void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
|
||||
|
||||
/// Inserts a bindless sampler key.
|
||||
void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
|
||||
|
||||
/// Checks keys and samplers against engine's current const buffers.
|
||||
/// Returns true if they are the same value, false otherwise.
|
||||
bool IsConsistent() const;
|
||||
|
||||
/// Returns true if the keys are equal to the other ones in the registry.
|
||||
bool HasEqualKeys(const Registry& rhs) const;
|
||||
|
||||
/// Returns graphics information from this shader
|
||||
const GraphicsInfo& GetGraphicsInfo() const;
|
||||
|
||||
/// Returns compute information from this shader
|
||||
const ComputeInfo& GetComputeInfo() const;
|
||||
|
||||
/// Gives an getter to the const buffer keys in the database.
|
||||
const KeyMap& GetKeys() const {
|
||||
return keys;
|
||||
}
|
||||
|
||||
/// Gets samplers database.
|
||||
const BoundSamplerMap& GetBoundSamplers() const {
|
||||
return bound_samplers;
|
||||
}
|
||||
|
||||
/// Gets bindless samplers database.
|
||||
const BindlessSamplerMap& GetBindlessSamplers() const {
|
||||
return bindless_samplers;
|
||||
}
|
||||
|
||||
/// Gets bound buffer used on this shader
|
||||
u32 GetBoundBuffer() const {
|
||||
return bound_buffer;
|
||||
}
|
||||
|
||||
/// Obtains access to the guest driver's profile.
|
||||
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
|
||||
return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
|
||||
}
|
||||
|
||||
private:
|
||||
const Tegra::Engines::ShaderType stage;
|
||||
VideoCore::GuestDriverProfile stored_guest_driver_profile;
|
||||
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
|
||||
KeyMap keys;
|
||||
BoundSamplerMap bound_samplers;
|
||||
SeparateSamplerMap separate_samplers;
|
||||
BindlessSamplerMap bindless_samplers;
|
||||
u32 bound_buffer;
|
||||
GraphicsInfo graphics_info;
|
||||
ComputeInfo compute_info;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,464 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::IpaMode;
|
||||
using Tegra::Shader::Pred;
|
||||
using Tegra::Shader::PredCondition;
|
||||
using Tegra::Shader::PredOperation;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_,
|
||||
Registry& registry_)
|
||||
: program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{
|
||||
registry_} {
|
||||
Decode();
|
||||
PostDecode();
|
||||
}
|
||||
|
||||
ShaderIR::~ShaderIR() = default;
|
||||
|
||||
Node ShaderIR::GetRegister(Register reg) {
|
||||
if (reg != Register::ZeroIndex) {
|
||||
used_registers.insert(static_cast<u32>(reg));
|
||||
}
|
||||
return MakeNode<GprNode>(reg);
|
||||
}
|
||||
|
||||
Node ShaderIR::GetCustomVariable(u32 id) {
|
||||
return MakeNode<CustomVarNode>(id);
|
||||
}
|
||||
|
||||
Node ShaderIR::GetImmediate19(Instruction instr) {
|
||||
return Immediate(instr.alu.GetImm20_19());
|
||||
}
|
||||
|
||||
Node ShaderIR::GetImmediate32(Instruction instr) {
|
||||
return Immediate(instr.alu.GetImm20_32());
|
||||
}
|
||||
|
||||
Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
|
||||
const auto index = static_cast<u32>(index_);
|
||||
const auto offset = static_cast<u32>(offset_);
|
||||
|
||||
used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset);
|
||||
|
||||
return MakeNode<CbufNode>(index, Immediate(offset));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
|
||||
const auto index = static_cast<u32>(index_);
|
||||
const auto offset = static_cast<u32>(offset_);
|
||||
|
||||
used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect();
|
||||
|
||||
Node final_offset = [&] {
|
||||
// Attempt to inline constant buffer without a variable offset. This is done to allow
|
||||
// tracking LDC calls.
|
||||
if (const auto gpr = std::get_if<GprNode>(&*node)) {
|
||||
if (gpr->GetIndex() == Register::ZeroIndex) {
|
||||
return Immediate(offset);
|
||||
}
|
||||
}
|
||||
return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
|
||||
}();
|
||||
return MakeNode<CbufNode>(index, std::move(final_offset));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
|
||||
const auto pred = static_cast<Pred>(pred_);
|
||||
if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
|
||||
used_predicates.insert(pred);
|
||||
}
|
||||
|
||||
return MakeNode<PredicateNode>(pred, negated);
|
||||
}
|
||||
|
||||
Node ShaderIR::GetPredicate(bool immediate) {
|
||||
return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
|
||||
MarkAttributeUsage(index, element);
|
||||
used_input_attributes.emplace(index);
|
||||
return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
|
||||
uses_physical_attributes = true;
|
||||
return MakeNode<AbufNode>(GetRegister(physical_address), buffer);
|
||||
}
|
||||
|
||||
Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
|
||||
MarkAttributeUsage(index, element);
|
||||
used_output_attributes.insert(index);
|
||||
return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
|
||||
Node node = MakeNode<InternalFlagNode>(flag);
|
||||
if (negated) {
|
||||
return Operation(OperationCode::LogicalNegate, std::move(node));
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
Node ShaderIR::GetLocalMemory(Node address) {
|
||||
return MakeNode<LmemNode>(std::move(address));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetSharedMemory(Node address) {
|
||||
return MakeNode<SmemNode>(std::move(address));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetTemporary(u32 id) {
|
||||
return GetRegister(Register::ZeroIndex + 1 + id);
|
||||
}
|
||||
|
||||
Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
|
||||
if (absolute) {
|
||||
value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
|
||||
}
|
||||
if (negate) {
|
||||
value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
|
||||
if (!saturate) {
|
||||
return value;
|
||||
}
|
||||
|
||||
Node positive_zero = Immediate(std::copysignf(0, 1));
|
||||
Node positive_one = Immediate(1.0f);
|
||||
return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
|
||||
std::move(positive_one));
|
||||
}
|
||||
|
||||
Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
|
||||
switch (size) {
|
||||
case Register::Size::Byte:
|
||||
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
|
||||
std::move(value), Immediate(24));
|
||||
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
|
||||
std::move(value), Immediate(24));
|
||||
return value;
|
||||
case Register::Size::Short:
|
||||
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
|
||||
std::move(value), Immediate(16));
|
||||
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
|
||||
std::move(value), Immediate(16));
|
||||
return value;
|
||||
case Register::Size::Word:
|
||||
// Default - do nothing
|
||||
return value;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unimplemented conversion size: {}", size);
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) {
|
||||
if (!is_signed) {
|
||||
// Absolute or negate on an unsigned is pointless
|
||||
return value;
|
||||
}
|
||||
if (absolute) {
|
||||
value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
|
||||
}
|
||||
if (negate) {
|
||||
value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
|
||||
Node value = Immediate(instr.half_imm.PackImmediates());
|
||||
if (!has_negation) {
|
||||
return value;
|
||||
}
|
||||
|
||||
Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
|
||||
Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
|
||||
|
||||
return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
|
||||
std::move(second_negate));
|
||||
}
|
||||
|
||||
Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
|
||||
return Operation(OperationCode::HUnpack, type, std::move(value));
|
||||
}
|
||||
|
||||
Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
|
||||
switch (merge) {
|
||||
case Tegra::Shader::HalfMerge::H0_H1:
|
||||
return src;
|
||||
case Tegra::Shader::HalfMerge::F32:
|
||||
return Operation(OperationCode::HMergeF32, std::move(src));
|
||||
case Tegra::Shader::HalfMerge::Mrg_H0:
|
||||
return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
|
||||
case Tegra::Shader::HalfMerge::Mrg_H1:
|
||||
return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
|
||||
}
|
||||
UNREACHABLE();
|
||||
return src;
|
||||
}
|
||||
|
||||
Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
|
||||
if (absolute) {
|
||||
value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
|
||||
}
|
||||
if (negate) {
|
||||
value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
|
||||
GetPredicate(true));
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
|
||||
if (!saturate) {
|
||||
return value;
|
||||
}
|
||||
|
||||
Node positive_zero = Immediate(std::copysignf(0, 1));
|
||||
Node positive_one = Immediate(1.0f);
|
||||
return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
|
||||
std::move(positive_one));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
|
||||
if (condition == PredCondition::T) {
|
||||
return GetPredicate(true);
|
||||
} else if (condition == PredCondition::F) {
|
||||
return GetPredicate(false);
|
||||
}
|
||||
|
||||
static constexpr std::array comparison_table{
|
||||
OperationCode(0),
|
||||
OperationCode::LogicalFOrdLessThan, // LT
|
||||
OperationCode::LogicalFOrdEqual, // EQ
|
||||
OperationCode::LogicalFOrdLessEqual, // LE
|
||||
OperationCode::LogicalFOrdGreaterThan, // GT
|
||||
OperationCode::LogicalFOrdNotEqual, // NE
|
||||
OperationCode::LogicalFOrdGreaterEqual, // GE
|
||||
OperationCode::LogicalFOrdered, // NUM
|
||||
OperationCode::LogicalFUnordered, // NAN
|
||||
OperationCode::LogicalFUnordLessThan, // LTU
|
||||
OperationCode::LogicalFUnordEqual, // EQU
|
||||
OperationCode::LogicalFUnordLessEqual, // LEU
|
||||
OperationCode::LogicalFUnordGreaterThan, // GTU
|
||||
OperationCode::LogicalFUnordNotEqual, // NEU
|
||||
OperationCode::LogicalFUnordGreaterEqual, // GEU
|
||||
};
|
||||
const std::size_t index = static_cast<std::size_t>(condition);
|
||||
ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
|
||||
|
||||
return Operation(comparison_table[index], op_a, op_b);
|
||||
}
|
||||
|
||||
Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
|
||||
Node op_b) {
|
||||
static constexpr std::array comparison_table{
|
||||
std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
|
||||
std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
|
||||
std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
|
||||
std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
|
||||
std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
|
||||
std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
|
||||
};
|
||||
|
||||
const auto comparison =
|
||||
std::find_if(comparison_table.cbegin(), comparison_table.cend(),
|
||||
[condition](const auto entry) { return condition == entry.first; });
|
||||
UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
|
||||
"Unknown predicate comparison operation");
|
||||
|
||||
return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
|
||||
std::move(op_b));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
|
||||
Node op_b) {
|
||||
static constexpr std::array comparison_table{
|
||||
std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
|
||||
std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
|
||||
std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
|
||||
std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
|
||||
std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
|
||||
std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
|
||||
std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
|
||||
std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
|
||||
std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
|
||||
std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
|
||||
std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
|
||||
};
|
||||
|
||||
const auto comparison =
|
||||
std::find_if(comparison_table.cbegin(), comparison_table.cend(),
|
||||
[condition](const auto entry) { return condition == entry.first; });
|
||||
UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
|
||||
"Unknown predicate comparison operation");
|
||||
|
||||
return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
|
||||
}
|
||||
|
||||
OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
|
||||
static constexpr std::array operation_table{
|
||||
OperationCode::LogicalAnd,
|
||||
OperationCode::LogicalOr,
|
||||
OperationCode::LogicalXor,
|
||||
};
|
||||
|
||||
const auto index = static_cast<std::size_t>(operation);
|
||||
if (index >= operation_table.size()) {
|
||||
UNIMPLEMENTED_MSG("Unknown predicate operation.");
|
||||
return {};
|
||||
}
|
||||
|
||||
return operation_table[index];
|
||||
}
|
||||
|
||||
Node ShaderIR::GetConditionCode(ConditionCode cc) const {
|
||||
switch (cc) {
|
||||
case ConditionCode::NEU:
|
||||
return GetInternalFlag(InternalFlag::Zero, true);
|
||||
case ConditionCode::FCSM_TR:
|
||||
UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
|
||||
return MakeNode<PredicateNode>(Pred::NeverExecute, false);
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc);
|
||||
return MakeNode<PredicateNode>(Pred::NeverExecute, false);
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
|
||||
bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
|
||||
}
|
||||
|
||||
void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
|
||||
bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
|
||||
}
|
||||
|
||||
void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
|
||||
bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
|
||||
}
|
||||
|
||||
void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
|
||||
bb.push_back(
|
||||
Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
|
||||
}
|
||||
|
||||
void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) {
|
||||
bb.push_back(
|
||||
Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value)));
|
||||
}
|
||||
|
||||
void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
|
||||
SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
|
||||
}
|
||||
|
||||
void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
|
||||
if (!sets_cc) {
|
||||
return;
|
||||
}
|
||||
Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
|
||||
SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
|
||||
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
|
||||
}
|
||||
|
||||
void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
|
||||
if (!sets_cc) {
|
||||
return;
|
||||
}
|
||||
Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
|
||||
SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
|
||||
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
|
||||
}
|
||||
|
||||
Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
|
||||
return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
|
||||
Immediate(offset), Immediate(bits));
|
||||
}
|
||||
|
||||
Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
|
||||
return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
|
||||
Immediate(bits));
|
||||
}
|
||||
|
||||
void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
|
||||
switch (index) {
|
||||
case Attribute::Index::LayerViewportPointSize:
|
||||
switch (element) {
|
||||
case 0:
|
||||
UNIMPLEMENTED();
|
||||
break;
|
||||
case 1:
|
||||
uses_layer = true;
|
||||
break;
|
||||
case 2:
|
||||
uses_viewport_index = true;
|
||||
break;
|
||||
case 3:
|
||||
uses_point_size = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Attribute::Index::TessCoordInstanceIDVertexID:
|
||||
switch (element) {
|
||||
case 2:
|
||||
uses_instance_id = true;
|
||||
break;
|
||||
case 3:
|
||||
uses_vertex_id = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Attribute::Index::ClipDistances0123:
|
||||
case Attribute::Index::ClipDistances4567: {
|
||||
const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
|
||||
used_clip_distances.at(clip_index) = true;
|
||||
break;
|
||||
}
|
||||
case Attribute::Index::FrontColor:
|
||||
case Attribute::Index::FrontSecondaryColor:
|
||||
case Attribute::Index::BackColor:
|
||||
case Attribute::Index::BackSecondaryColor:
|
||||
uses_legacy_varyings = true;
|
||||
break;
|
||||
default:
|
||||
if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
|
||||
uses_legacy_varyings = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t ShaderIR::DeclareAmend(Node new_amend) {
|
||||
const auto id = amend_code.size();
|
||||
amend_code.push_back(std::move(new_amend));
|
||||
return id;
|
||||
}
|
||||
|
||||
u32 ShaderIR::NewCustomVariable() {
|
||||
return num_custom_variables++;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,479 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/engines/shader_header.h"
|
||||
#include "video_core/shader/ast.h"
|
||||
#include "video_core/shader/compiler_settings.h"
|
||||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader/node.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
struct ShaderBlock;
|
||||
|
||||
constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
|
||||
|
||||
struct ConstBuffer {
|
||||
constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
|
||||
: max_offset{max_offset_}, is_indirect{is_indirect_} {}
|
||||
|
||||
constexpr ConstBuffer() = default;
|
||||
|
||||
void MarkAsUsed(u64 offset) {
|
||||
max_offset = std::max(max_offset, static_cast<u32>(offset));
|
||||
}
|
||||
|
||||
void MarkAsUsedIndirect() {
|
||||
is_indirect = true;
|
||||
}
|
||||
|
||||
bool IsIndirect() const {
|
||||
return is_indirect;
|
||||
}
|
||||
|
||||
u32 GetSize() const {
|
||||
return max_offset + static_cast<u32>(sizeof(float));
|
||||
}
|
||||
|
||||
u32 GetMaxOffset() const {
|
||||
return max_offset;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 max_offset = 0;
|
||||
bool is_indirect = false;
|
||||
};
|
||||
|
||||
struct GlobalMemoryUsage {
|
||||
bool is_read{};
|
||||
bool is_written{};
|
||||
};
|
||||
|
||||
class ShaderIR final {
|
||||
public:
|
||||
explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
|
||||
CompilerSettings settings_, Registry& registry_);
|
||||
~ShaderIR();
|
||||
|
||||
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
|
||||
return basic_blocks;
|
||||
}
|
||||
|
||||
const std::set<u32>& GetRegisters() const {
|
||||
return used_registers;
|
||||
}
|
||||
|
||||
const std::set<Tegra::Shader::Pred>& GetPredicates() const {
|
||||
return used_predicates;
|
||||
}
|
||||
|
||||
const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
|
||||
return used_input_attributes;
|
||||
}
|
||||
|
||||
const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const {
|
||||
return used_output_attributes;
|
||||
}
|
||||
|
||||
const std::map<u32, ConstBuffer>& GetConstantBuffers() const {
|
||||
return used_cbufs;
|
||||
}
|
||||
|
||||
const std::list<SamplerEntry>& GetSamplers() const {
|
||||
return used_samplers;
|
||||
}
|
||||
|
||||
const std::list<ImageEntry>& GetImages() const {
|
||||
return used_images;
|
||||
}
|
||||
|
||||
const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
|
||||
const {
|
||||
return used_clip_distances;
|
||||
}
|
||||
|
||||
const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
|
||||
return used_global_memory;
|
||||
}
|
||||
|
||||
std::size_t GetLength() const {
|
||||
return static_cast<std::size_t>(coverage_end * sizeof(u64));
|
||||
}
|
||||
|
||||
bool UsesLayer() const {
|
||||
return uses_layer;
|
||||
}
|
||||
|
||||
bool UsesViewportIndex() const {
|
||||
return uses_viewport_index;
|
||||
}
|
||||
|
||||
bool UsesPointSize() const {
|
||||
return uses_point_size;
|
||||
}
|
||||
|
||||
bool UsesInstanceId() const {
|
||||
return uses_instance_id;
|
||||
}
|
||||
|
||||
bool UsesVertexId() const {
|
||||
return uses_vertex_id;
|
||||
}
|
||||
|
||||
bool UsesLegacyVaryings() const {
|
||||
return uses_legacy_varyings;
|
||||
}
|
||||
|
||||
bool UsesYNegate() const {
|
||||
return uses_y_negate;
|
||||
}
|
||||
|
||||
bool UsesWarps() const {
|
||||
return uses_warps;
|
||||
}
|
||||
|
||||
bool HasPhysicalAttributes() const {
|
||||
return uses_physical_attributes;
|
||||
}
|
||||
|
||||
const Tegra::Shader::Header& GetHeader() const {
|
||||
return header;
|
||||
}
|
||||
|
||||
bool IsFlowStackDisabled() const {
|
||||
return disable_flow_stack;
|
||||
}
|
||||
|
||||
bool IsDecompiled() const {
|
||||
return decompiled;
|
||||
}
|
||||
|
||||
const ASTManager& GetASTManager() const {
|
||||
return program_manager;
|
||||
}
|
||||
|
||||
ASTNode GetASTProgram() const {
|
||||
return program_manager.GetProgram();
|
||||
}
|
||||
|
||||
u32 GetASTNumVariables() const {
|
||||
return program_manager.GetVariables();
|
||||
}
|
||||
|
||||
u32 ConvertAddressToNvidiaSpace(u32 address) const {
|
||||
return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
|
||||
}
|
||||
|
||||
/// Returns a condition code evaluated from internal flags
|
||||
Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
|
||||
|
||||
const Node& GetAmendNode(std::size_t index) const {
|
||||
return amend_code[index];
|
||||
}
|
||||
|
||||
u32 GetNumCustomVariables() const {
|
||||
return num_custom_variables;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class ASTDecoder;
|
||||
|
||||
struct SamplerInfo {
|
||||
std::optional<Tegra::Shader::TextureType> type;
|
||||
std::optional<bool> is_array;
|
||||
std::optional<bool> is_shadow;
|
||||
std::optional<bool> is_buffer;
|
||||
|
||||
constexpr bool IsComplete() const noexcept {
|
||||
return type && is_array && is_shadow && is_buffer;
|
||||
}
|
||||
};
|
||||
|
||||
void Decode();
|
||||
void PostDecode();
|
||||
|
||||
NodeBlock DecodeRange(u32 begin, u32 end);
|
||||
void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
|
||||
void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
|
||||
|
||||
/**
|
||||
* Decodes a single instruction from Tegra to IR.
|
||||
* @param bb Basic block where the nodes will be written to.
|
||||
* @param pc Program counter. Offset to decode.
|
||||
* @return Next address to decode.
|
||||
*/
|
||||
u32 DecodeInstr(NodeBlock& bb, u32 pc);
|
||||
|
||||
u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeBfe(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeBfi(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeShift(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeFfma(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeHfma2(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeConversion(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeWarp(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeMemory(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeTexture(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeImage(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
|
||||
u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeVideo(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeXmad(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeOther(NodeBlock& bb, u32 pc);
|
||||
|
||||
/// Generates a node for a passed register.
|
||||
Node GetRegister(Tegra::Shader::Register reg);
|
||||
/// Generates a node for a custom variable
|
||||
Node GetCustomVariable(u32 id);
|
||||
/// Generates a node representing a 19-bit immediate value
|
||||
Node GetImmediate19(Tegra::Shader::Instruction instr);
|
||||
/// Generates a node representing a 32-bit immediate value
|
||||
Node GetImmediate32(Tegra::Shader::Instruction instr);
|
||||
/// Generates a node representing a constant buffer
|
||||
Node GetConstBuffer(u64 index, u64 offset);
|
||||
/// Generates a node representing a constant buffer with a variadic offset
|
||||
Node GetConstBufferIndirect(u64 index, u64 offset, Node node);
|
||||
/// Generates a node for a passed predicate. It can be optionally negated
|
||||
Node GetPredicate(u64 pred, bool negated = false);
|
||||
/// Generates a predicate node for an immediate true or false value
|
||||
Node GetPredicate(bool immediate);
|
||||
/// Generates a node representing an input attribute. Keeps track of used attributes.
|
||||
Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
|
||||
/// Generates a node representing a physical input attribute.
|
||||
Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
|
||||
/// Generates a node representing an output attribute. Keeps track of used attributes.
|
||||
Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
|
||||
/// Generates a node representing an internal flag
|
||||
Node GetInternalFlag(InternalFlag flag, bool negated = false) const;
|
||||
/// Generates a node representing a local memory address
|
||||
Node GetLocalMemory(Node address);
|
||||
/// Generates a node representing a shared memory address
|
||||
Node GetSharedMemory(Node address);
|
||||
/// Generates a temporary, internally it uses a post-RZ register
|
||||
Node GetTemporary(u32 id);
|
||||
|
||||
/// Sets a register. src value must be a number-evaluated node.
|
||||
void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
|
||||
/// Sets a predicate. src value must be a bool-evaluated node
|
||||
void SetPredicate(NodeBlock& bb, u64 dest, Node src);
|
||||
/// Sets an internal flag. src value must be a bool-evaluated node
|
||||
void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
|
||||
/// Sets a local memory address with a value.
|
||||
void SetLocalMemory(NodeBlock& bb, Node address, Node value);
|
||||
/// Sets a shared memory address with a value.
|
||||
void SetSharedMemory(NodeBlock& bb, Node address, Node value);
|
||||
/// Sets a temporary. Internally it uses a post-RZ register
|
||||
void SetTemporary(NodeBlock& bb, u32 id, Node value);
|
||||
|
||||
/// Sets internal flags from a float
|
||||
void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
|
||||
/// Sets internal flags from an integer
|
||||
void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
|
||||
|
||||
/// Conditionally absolute/negated float. Absolute is applied first
|
||||
Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
|
||||
/// Conditionally saturates a float
|
||||
Node GetSaturatedFloat(Node value, bool saturate = true);
|
||||
|
||||
/// Converts an integer to different sizes.
|
||||
Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed);
|
||||
/// Conditionally absolute/negated integer. Absolute is applied first
|
||||
Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed);
|
||||
|
||||
/// Unpacks a half immediate from an instruction
|
||||
Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
|
||||
/// Unpacks a binary value into a half float pair with a type format
|
||||
Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
|
||||
/// Merges a half pair into another value
|
||||
Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
|
||||
/// Conditionally absolute/negated half float pair. Absolute is applied first
|
||||
Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
|
||||
/// Conditionally saturates a half float pair
|
||||
Node GetSaturatedHalfFloat(Node value, bool saturate = true);
|
||||
|
||||
/// Get image component value by type and size
|
||||
std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
|
||||
u32 component_size, Node original_value);
|
||||
|
||||
/// Returns a predicate comparing two floats
|
||||
Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
|
||||
/// Returns a predicate comparing two integers
|
||||
Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
|
||||
Node op_a, Node op_b);
|
||||
/// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
|
||||
Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
|
||||
|
||||
/// Returns a predicate combiner operation
|
||||
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
|
||||
|
||||
/// Queries the missing sampler info from the execution context.
|
||||
SamplerInfo GetSamplerInfo(SamplerInfo info,
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> sampler);
|
||||
|
||||
/// Accesses a texture sampler.
|
||||
std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
|
||||
|
||||
/// Accesses a texture sampler for a bindless texture.
|
||||
std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
|
||||
Node& index_var);
|
||||
|
||||
/// Accesses an image.
|
||||
ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
|
||||
|
||||
/// Access a bindless image sampler.
|
||||
ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
|
||||
|
||||
/// Extracts a sequence of bits from a node
|
||||
Node BitfieldExtract(Node value, u32 offset, u32 bits);
|
||||
|
||||
/// Inserts a sequence of bits from a node
|
||||
Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
|
||||
|
||||
/// Marks the usage of a input or output attribute.
|
||||
void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
|
||||
|
||||
/// Decodes VMNMX instruction and inserts its code into the passed basic block.
|
||||
void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
|
||||
|
||||
void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
|
||||
const Node4& components);
|
||||
|
||||
void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
|
||||
const Node4& components, bool ignore_mask = false);
|
||||
void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
|
||||
const Node4& components, bool ignore_mask = false);
|
||||
|
||||
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
||||
bool is_array, bool is_aoffi,
|
||||
std::optional<Tegra::Shader::Register> bindless_reg);
|
||||
|
||||
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
||||
bool is_array);
|
||||
|
||||
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
|
||||
bool is_bindless);
|
||||
|
||||
Node4 GetTldCode(Tegra::Shader::Instruction instr);
|
||||
|
||||
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
bool is_array);
|
||||
|
||||
std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement(
|
||||
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
|
||||
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
|
||||
|
||||
std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
|
||||
|
||||
std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
|
||||
|
||||
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
|
||||
std::optional<Tegra::Shader::Register> bindless_reg);
|
||||
|
||||
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
|
||||
u64 byte_height);
|
||||
|
||||
void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
|
||||
Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
|
||||
Tegra::Shader::PredicateResultMode predicate_mode,
|
||||
Tegra::Shader::Pred predicate, bool sets_cc);
|
||||
void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
|
||||
Node op_c, Node imm_lut, bool sets_cc);
|
||||
|
||||
std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||
|
||||
std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||
s64 cursor);
|
||||
|
||||
std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
|
||||
const OperationNode& operation,
|
||||
Node gpr, Node base_offset,
|
||||
Node tracked, const NodeBlock& code,
|
||||
s64 cursor);
|
||||
|
||||
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||
|
||||
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
||||
s64 cursor) const;
|
||||
|
||||
std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
|
||||
Tegra::Shader::Instruction instr,
|
||||
bool is_read, bool is_write);
|
||||
|
||||
/// Register new amending code and obtain the reference id.
|
||||
std::size_t DeclareAmend(Node new_amend);
|
||||
|
||||
u32 NewCustomVariable();
|
||||
|
||||
const ProgramCode& program_code;
|
||||
const u32 main_offset;
|
||||
const CompilerSettings settings;
|
||||
Registry& registry;
|
||||
|
||||
bool decompiled{};
|
||||
bool disable_flow_stack{};
|
||||
|
||||
u32 coverage_begin{};
|
||||
u32 coverage_end{};
|
||||
|
||||
std::map<u32, NodeBlock> basic_blocks;
|
||||
NodeBlock global_code;
|
||||
ASTManager program_manager{true, true};
|
||||
std::vector<Node> amend_code;
|
||||
u32 num_custom_variables{};
|
||||
|
||||
std::set<u32> used_registers;
|
||||
std::set<Tegra::Shader::Pred> used_predicates;
|
||||
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
|
||||
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
|
||||
std::map<u32, ConstBuffer> used_cbufs;
|
||||
std::list<SamplerEntry> used_samplers;
|
||||
std::list<ImageEntry> used_images;
|
||||
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
|
||||
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
|
||||
bool uses_layer{};
|
||||
bool uses_viewport_index{};
|
||||
bool uses_point_size{};
|
||||
bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
|
||||
bool uses_instance_id{};
|
||||
bool uses_vertex_id{};
|
||||
bool uses_legacy_varyings{};
|
||||
bool uses_y_negate{};
|
||||
bool uses_warps{};
|
||||
bool uses_indexed_samplers{};
|
||||
|
||||
Tegra::Shader::Header header;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,236 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader/node.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
namespace {
|
||||
|
||||
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
|
||||
OperationCode operation_code) {
|
||||
for (; cursor >= 0; --cursor) {
|
||||
Node node = code.at(cursor);
|
||||
|
||||
if (const auto operation = std::get_if<OperationNode>(&*node)) {
|
||||
if (operation->GetCode() == operation_code) {
|
||||
return {std::move(node), cursor};
|
||||
}
|
||||
}
|
||||
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
||||
const auto& conditional_code = conditional->GetCode();
|
||||
auto result = FindOperation(
|
||||
conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
|
||||
auto& found = result.first;
|
||||
if (found) {
|
||||
return {std::move(found), cursor};
|
||||
}
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
|
||||
if (operation.GetCode() != OperationCode::UAdd) {
|
||||
return std::nullopt;
|
||||
}
|
||||
Node gpr;
|
||||
Node offset;
|
||||
ASSERT(operation.GetOperandsCount() == 2);
|
||||
for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
|
||||
Node operand = operation[i];
|
||||
if (std::holds_alternative<ImmediateNode>(*operand)) {
|
||||
offset = operation[i];
|
||||
} else if (std::holds_alternative<GprNode>(*operand)) {
|
||||
gpr = operation[i];
|
||||
}
|
||||
}
|
||||
if (offset && gpr) {
|
||||
return std::make_pair(gpr, offset);
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
bool AmendNodeCv(std::size_t amend_index, Node node) {
|
||||
if (const auto operation = std::get_if<OperationNode>(&*node)) {
|
||||
operation->SetAmendIndex(amend_index);
|
||||
return true;
|
||||
}
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
||||
conditional->SetAmendIndex(amend_index);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||
s64 cursor) {
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
|
||||
const u32 cbuf_index = cbuf->GetIndex();
|
||||
|
||||
// Constant buffer found, test if it's an immediate
|
||||
const auto& offset = cbuf->GetOffset();
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||
auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
|
||||
return {tracked, track};
|
||||
}
|
||||
if (const auto operation = std::get_if<OperationNode>(&*offset)) {
|
||||
const u32 bound_buffer = registry.GetBoundBuffer();
|
||||
if (bound_buffer != cbuf_index) {
|
||||
return {};
|
||||
}
|
||||
if (const std::optional pair = DecoupleIndirectRead(*operation)) {
|
||||
auto [gpr, base_offset] = *pair;
|
||||
return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
|
||||
code, cursor);
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
|
||||
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
|
||||
return {};
|
||||
}
|
||||
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same
|
||||
// register that it uses as operand
|
||||
const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
|
||||
if (!source) {
|
||||
return {};
|
||||
}
|
||||
return TrackBindlessSampler(source, code, new_cursor);
|
||||
}
|
||||
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
|
||||
const OperationNode& op = *operation;
|
||||
|
||||
const OperationCode opcode = operation->GetCode();
|
||||
if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
|
||||
ASSERT(op.GetOperandsCount() == 2);
|
||||
auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
|
||||
auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
|
||||
if (node_a && node_b) {
|
||||
auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
|
||||
std::pair{offset_a, offset_b});
|
||||
return {tracked, std::move(track)};
|
||||
}
|
||||
}
|
||||
std::size_t i = op.GetOperandsCount();
|
||||
while (i--) {
|
||||
if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
|
||||
// Constant buffer found in operand.
|
||||
return found;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
|
||||
const auto& conditional_code = conditional->GetCode();
|
||||
return TrackBindlessSampler(tracked, conditional_code,
|
||||
static_cast<s64>(conditional_code.size()));
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
|
||||
const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
|
||||
const NodeBlock& code, s64 cursor) {
|
||||
const auto offset_imm = std::get<ImmediateNode>(*base_offset);
|
||||
const auto& gpu_driver = registry.AccessGuestDriverProfile();
|
||||
const u32 bindless_cv = NewCustomVariable();
|
||||
const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
|
||||
Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
|
||||
|
||||
Node cv_node = GetCustomVariable(bindless_cv);
|
||||
Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
|
||||
const std::size_t amend_index = DeclareAmend(std::move(amend_op));
|
||||
AmendNodeCv(amend_index, code[cursor]);
|
||||
|
||||
// TODO: Implement bindless index custom variable
|
||||
auto track =
|
||||
MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
|
||||
return {tracked, track};
|
||||
}
|
||||
|
||||
std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
|
||||
s64 cursor) const {
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
|
||||
// Constant buffer found, test if it's an immediate
|
||||
const auto& offset = cbuf->GetOffset();
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||
return {tracked, cbuf->GetIndex(), immediate->GetValue()};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
|
||||
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
|
||||
return {};
|
||||
}
|
||||
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same
|
||||
// register that it uses as operand
|
||||
const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
|
||||
if (!source) {
|
||||
return {};
|
||||
}
|
||||
return TrackCbuf(source, code, new_cursor);
|
||||
}
|
||||
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
|
||||
for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
|
||||
if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
|
||||
// Cbuf found in operand.
|
||||
return found;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
|
||||
const auto& conditional_code = conditional->GetCode();
|
||||
return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
|
||||
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
|
||||
// that it uses as operand
|
||||
const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
|
||||
const auto& found = result.first;
|
||||
if (!found) {
|
||||
return std::nullopt;
|
||||
}
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
|
||||
return immediate->GetValue();
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
||||
s64 cursor) const {
|
||||
for (; cursor >= 0; --cursor) {
|
||||
const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
|
||||
if (!found_node) {
|
||||
return {};
|
||||
}
|
||||
const auto operation = std::get_if<OperationNode>(&*found_node);
|
||||
ASSERT(operation);
|
||||
|
||||
const auto& target = (*operation)[0];
|
||||
if (const auto gpr_target = std::get_if<GprNode>(&*target)) {
|
||||
if (gpr_target->GetIndex() == tracked->GetIndex()) {
|
||||
return {(*operation)[1], new_cursor};
|
||||
}
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,115 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/transform_feedback.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
namespace {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
|
||||
|
||||
/// Attribute offsets that describe a vector
|
||||
constexpr std::array VECTORS = {
|
||||
28, // gl_Position
|
||||
32, // Generic 0
|
||||
36, // Generic 1
|
||||
40, // Generic 2
|
||||
44, // Generic 3
|
||||
48, // Generic 4
|
||||
52, // Generic 5
|
||||
56, // Generic 6
|
||||
60, // Generic 7
|
||||
64, // Generic 8
|
||||
68, // Generic 9
|
||||
72, // Generic 10
|
||||
76, // Generic 11
|
||||
80, // Generic 12
|
||||
84, // Generic 13
|
||||
88, // Generic 14
|
||||
92, // Generic 15
|
||||
96, // Generic 16
|
||||
100, // Generic 17
|
||||
104, // Generic 18
|
||||
108, // Generic 19
|
||||
112, // Generic 20
|
||||
116, // Generic 21
|
||||
120, // Generic 22
|
||||
124, // Generic 23
|
||||
128, // Generic 24
|
||||
132, // Generic 25
|
||||
136, // Generic 26
|
||||
140, // Generic 27
|
||||
144, // Generic 28
|
||||
148, // Generic 29
|
||||
152, // Generic 30
|
||||
156, // Generic 31
|
||||
160, // gl_FrontColor
|
||||
164, // gl_FrontSecondaryColor
|
||||
160, // gl_BackColor
|
||||
164, // gl_BackSecondaryColor
|
||||
192, // gl_TexCoord[0]
|
||||
196, // gl_TexCoord[1]
|
||||
200, // gl_TexCoord[2]
|
||||
204, // gl_TexCoord[3]
|
||||
208, // gl_TexCoord[4]
|
||||
212, // gl_TexCoord[5]
|
||||
216, // gl_TexCoord[6]
|
||||
220, // gl_TexCoord[7]
|
||||
};
|
||||
} // namespace
|
||||
|
||||
std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
|
||||
|
||||
std::unordered_map<u8, VaryingTFB> tfb;
|
||||
|
||||
for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
|
||||
const auto& locations = info.tfb_varying_locs[buffer];
|
||||
const auto& layout = info.tfb_layouts[buffer];
|
||||
const std::size_t varying_count = layout.varying_count;
|
||||
|
||||
std::size_t highest = 0;
|
||||
|
||||
for (std::size_t offset = 0; offset < varying_count; ++offset) {
|
||||
const std::size_t base_offset = offset;
|
||||
const u8 location = locations[offset];
|
||||
|
||||
VaryingTFB varying;
|
||||
varying.buffer = layout.stream;
|
||||
varying.stride = layout.stride;
|
||||
varying.offset = offset * sizeof(u32);
|
||||
varying.components = 1;
|
||||
|
||||
if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
|
||||
UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
|
||||
|
||||
const u8 base_index = location / 4;
|
||||
while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
|
||||
++offset;
|
||||
++varying.components;
|
||||
}
|
||||
}
|
||||
|
||||
[[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
|
||||
UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
|
||||
|
||||
highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(highest != layout.stride);
|
||||
}
|
||||
return tfb;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,23 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
struct VaryingTFB {
|
||||
std::size_t buffer;
|
||||
std::size_t stride;
|
||||
std::size_t offset;
|
||||
std::size_t components;
|
||||
};
|
||||
|
||||
std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user