early-access version 1255
This commit is contained in:
103
src/video_core/engines/const_buffer_engine_interface.h
Executable file
103
src/video_core/engines/const_buffer_engine_interface.h
Executable file
@@ -0,0 +1,103 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/guest_driver.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
struct SamplerDescriptor {
|
||||
union {
|
||||
u32 raw = 0;
|
||||
BitField<0, 2, Tegra::Shader::TextureType> texture_type;
|
||||
BitField<2, 3, Tegra::Texture::ComponentType> r_type;
|
||||
BitField<5, 1, u32> is_array;
|
||||
BitField<6, 1, u32> is_buffer;
|
||||
BitField<7, 1, u32> is_shadow;
|
||||
BitField<8, 3, Tegra::Texture::ComponentType> g_type;
|
||||
BitField<11, 3, Tegra::Texture::ComponentType> b_type;
|
||||
BitField<14, 3, Tegra::Texture::ComponentType> a_type;
|
||||
BitField<17, 7, Tegra::Texture::TextureFormat> format;
|
||||
};
|
||||
|
||||
bool operator==(const SamplerDescriptor& rhs) const noexcept {
|
||||
return raw == rhs.raw;
|
||||
}
|
||||
|
||||
bool operator!=(const SamplerDescriptor& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
|
||||
using Tegra::Shader::TextureType;
|
||||
SamplerDescriptor result;
|
||||
|
||||
result.format.Assign(tic.format.Value());
|
||||
result.r_type.Assign(tic.r_type.Value());
|
||||
result.g_type.Assign(tic.g_type.Value());
|
||||
result.b_type.Assign(tic.b_type.Value());
|
||||
result.a_type.Assign(tic.a_type.Value());
|
||||
|
||||
switch (tic.texture_type.Value()) {
|
||||
case Tegra::Texture::TextureType::Texture1D:
|
||||
result.texture_type.Assign(TextureType::Texture1D);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture2D:
|
||||
result.texture_type.Assign(TextureType::Texture2D);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture3D:
|
||||
result.texture_type.Assign(TextureType::Texture3D);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::TextureCubemap:
|
||||
result.texture_type.Assign(TextureType::TextureCube);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture1DArray:
|
||||
result.texture_type.Assign(TextureType::Texture1D);
|
||||
result.is_array.Assign(1);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture2DArray:
|
||||
result.texture_type.Assign(TextureType::Texture2D);
|
||||
result.is_array.Assign(1);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture1DBuffer:
|
||||
result.texture_type.Assign(TextureType::Texture1D);
|
||||
result.is_buffer.Assign(1);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::Texture2DNoMipmap:
|
||||
result.texture_type.Assign(TextureType::Texture2D);
|
||||
return result;
|
||||
case Tegra::Texture::TextureType::TextureCubeArray:
|
||||
result.texture_type.Assign(TextureType::TextureCube);
|
||||
result.is_array.Assign(1);
|
||||
return result;
|
||||
default:
|
||||
result.texture_type.Assign(TextureType::Texture2D);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
};
|
||||
static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
|
||||
|
||||
class ConstBufferEngineInterface {
|
||||
public:
|
||||
virtual ~ConstBufferEngineInterface() = default;
|
||||
virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
|
||||
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
|
||||
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const = 0;
|
||||
virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
|
||||
virtual u32 GetBoundBuffer() const = 0;
|
||||
|
||||
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
|
||||
virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
17
src/video_core/engines/const_buffer_info.h
Executable file
17
src/video_core/engines/const_buffer_info.h
Executable file
@@ -0,0 +1,17 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
struct ConstBufferInfo {
|
||||
GPUVAddr address;
|
||||
u32 size;
|
||||
bool enabled;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
22
src/video_core/engines/engine_interface.h
Executable file
22
src/video_core/engines/engine_interface.h
Executable file
@@ -0,0 +1,22 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
class EngineInterface {
|
||||
public:
|
||||
/// Write the value to the register identified by method.
|
||||
virtual void CallMethod(u32 method, u32 method_argument, bool is_last_call) = 0;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) = 0;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
52
src/video_core/engines/engine_upload.cpp
Executable file
52
src/video_core/engines/engine_upload.cpp
Executable file
@@ -0,0 +1,52 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines::Upload {
|
||||
|
||||
State::State(MemoryManager& memory_manager_, Registers& regs_)
|
||||
: regs{regs_}, memory_manager{memory_manager_} {}
|
||||
|
||||
State::~State() = default;
|
||||
|
||||
void State::ProcessExec(const bool is_linear_) {
|
||||
write_offset = 0;
|
||||
copy_size = regs.line_length_in * regs.line_count;
|
||||
inner_buffer.resize(copy_size);
|
||||
is_linear = is_linear_;
|
||||
}
|
||||
|
||||
void State::ProcessData(const u32 data, const bool is_last_call) {
|
||||
const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
|
||||
std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
|
||||
write_offset += sub_copy_size;
|
||||
if (!is_last_call) {
|
||||
return;
|
||||
}
|
||||
const GPUVAddr address{regs.dest.Address()};
|
||||
if (is_linear) {
|
||||
memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
|
||||
} else {
|
||||
UNIMPLEMENTED_IF(regs.dest.z != 0);
|
||||
UNIMPLEMENTED_IF(regs.dest.depth != 1);
|
||||
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0);
|
||||
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0);
|
||||
const std::size_t dst_size = Tegra::Texture::CalculateSize(
|
||||
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0);
|
||||
tmp_buffer.resize(dst_size);
|
||||
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
|
||||
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
|
||||
regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
|
||||
tmp_buffer.data());
|
||||
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines::Upload
|
||||
73
src/video_core/engines/engine_upload.h
Executable file
73
src/video_core/engines/engine_upload.h
Executable file
@@ -0,0 +1,73 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines::Upload {
|
||||
|
||||
struct Registers {
|
||||
u32 line_length_in;
|
||||
u32 line_count;
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 pitch;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 z;
|
||||
u32 x;
|
||||
u32 y;
|
||||
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
|
||||
}
|
||||
|
||||
u32 BlockWidth() const {
|
||||
return block_width.Value();
|
||||
}
|
||||
|
||||
u32 BlockHeight() const {
|
||||
return block_height.Value();
|
||||
}
|
||||
|
||||
u32 BlockDepth() const {
|
||||
return block_depth.Value();
|
||||
}
|
||||
} dest;
|
||||
};
|
||||
|
||||
class State {
|
||||
public:
|
||||
explicit State(MemoryManager& memory_manager_, Registers& regs_);
|
||||
~State();
|
||||
|
||||
void ProcessExec(bool is_linear_);
|
||||
void ProcessData(u32 data, bool is_last_call);
|
||||
|
||||
private:
|
||||
u32 write_offset = 0;
|
||||
u32 copy_size = 0;
|
||||
std::vector<u8> inner_buffer;
|
||||
std::vector<u8> tmp_buffer;
|
||||
bool is_linear = false;
|
||||
Registers& regs;
|
||||
MemoryManager& memory_manager;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines::Upload
|
||||
69
src/video_core/engines/fermi_2d.cpp
Executable file
69
src/video_core/engines/fermi_2d.cpp
Executable file
@@ -0,0 +1,69 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
Fermi2D::Fermi2D() {
|
||||
// Nvidia's OpenGL driver seems to assume these values
|
||||
regs.src.depth = 1;
|
||||
regs.dst.depth = 1;
|
||||
}
|
||||
|
||||
Fermi2D::~Fermi2D() = default;
|
||||
|
||||
void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
|
||||
rasterizer = &rasterizer_;
|
||||
}
|
||||
|
||||
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid Fermi2D register, increase the size of the Regs structure");
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
|
||||
Blit();
|
||||
}
|
||||
}
|
||||
|
||||
void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
|
||||
for (u32 i = 0; i < amount; ++i) {
|
||||
CallMethod(method, base_start[i], methods_pending - i <= 1);
|
||||
}
|
||||
}
|
||||
|
||||
void Fermi2D::Blit() {
|
||||
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
|
||||
regs.src.Address(), regs.dst.Address());
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
|
||||
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
|
||||
|
||||
const auto& args = regs.pixels_from_memory;
|
||||
const Config config{
|
||||
.operation = regs.operation,
|
||||
.filter = args.sample_mode.filter,
|
||||
.dst_x0 = args.dst_x0,
|
||||
.dst_y0 = args.dst_y0,
|
||||
.dst_x1 = args.dst_x0 + args.dst_width,
|
||||
.dst_y1 = args.dst_y0 + args.dst_height,
|
||||
.src_x0 = static_cast<s32>(args.src_x0 >> 32),
|
||||
.src_y0 = static_cast<s32>(args.src_y0 >> 32),
|
||||
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
|
||||
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
|
||||
};
|
||||
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
352
src/video_core/engines/fermi_2d.h
Executable file
352
src/video_core/engines/fermi_2d.h
Executable file
@@ -0,0 +1,352 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as G80_2D. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
|
||||
*/
|
||||
|
||||
#define FERMI2D_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class Fermi2D final : public EngineInterface {
|
||||
public:
|
||||
explicit Fermi2D();
|
||||
~Fermi2D();
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
enum class Origin : u32 {
|
||||
Center = 0,
|
||||
Corner = 1,
|
||||
};
|
||||
|
||||
enum class Filter : u32 {
|
||||
Point = 0,
|
||||
Bilinear = 1,
|
||||
};
|
||||
|
||||
enum class Operation : u32 {
|
||||
SrcCopyAnd = 0,
|
||||
ROPAnd = 1,
|
||||
Blend = 2,
|
||||
SrcCopy = 3,
|
||||
ROP = 4,
|
||||
SrcCopyPremult = 5,
|
||||
BlendPremult = 6,
|
||||
};
|
||||
|
||||
enum class MemoryLayout : u32 {
|
||||
BlockLinear = 0,
|
||||
Pitch = 1,
|
||||
};
|
||||
|
||||
enum class CpuIndexWrap : u32 {
|
||||
Wrap = 0,
|
||||
NoWrap = 1,
|
||||
};
|
||||
|
||||
struct Surface {
|
||||
RenderTargetFormat format;
|
||||
MemoryLayout linear;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
u32 pitch;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 addr_upper;
|
||||
u32 addr_lower;
|
||||
|
||||
[[nodiscard]] constexpr GPUVAddr Address() const noexcept {
|
||||
return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
|
||||
|
||||
enum class SectorPromotion : u32 {
|
||||
NoPromotion = 0,
|
||||
PromoteTo2V = 1,
|
||||
PromoteTo2H = 2,
|
||||
PromoteTo4 = 3,
|
||||
};
|
||||
|
||||
enum class NumTpcs : u32 {
|
||||
All = 0,
|
||||
One = 1,
|
||||
};
|
||||
|
||||
enum class RenderEnableMode : u32 {
|
||||
False = 0,
|
||||
True = 1,
|
||||
Conditional = 2,
|
||||
RenderIfEqual = 3,
|
||||
RenderIfNotEqual = 4,
|
||||
};
|
||||
|
||||
enum class ColorKeyFormat : u32 {
|
||||
A16R56G6B5 = 0,
|
||||
A1R5G55B5 = 1,
|
||||
A8R8G8B8 = 2,
|
||||
A2R10G10B10 = 3,
|
||||
Y8 = 4,
|
||||
Y16 = 5,
|
||||
Y32 = 6,
|
||||
};
|
||||
|
||||
union Beta4 {
|
||||
BitField<0, 8, u32> b;
|
||||
BitField<8, 8, u32> g;
|
||||
BitField<16, 8, u32> r;
|
||||
BitField<24, 8, u32> a;
|
||||
};
|
||||
|
||||
struct Point {
|
||||
u32 x;
|
||||
u32 y;
|
||||
};
|
||||
|
||||
enum class PatternSelect : u32 {
|
||||
MonoChrome8x8 = 0,
|
||||
MonoChrome64x1 = 1,
|
||||
MonoChrome1x64 = 2,
|
||||
Color = 3,
|
||||
};
|
||||
|
||||
enum class NotifyType : u32 {
|
||||
WriteOnly = 0,
|
||||
WriteThenAwaken = 1,
|
||||
};
|
||||
|
||||
enum class MonochromePatternColorFormat : u32 {
|
||||
A8X8R8G6B5 = 0,
|
||||
A1R5G5B5 = 1,
|
||||
A8R8G8B8 = 2,
|
||||
A8Y8 = 3,
|
||||
A8X8Y16 = 4,
|
||||
Y32 = 5,
|
||||
};
|
||||
|
||||
enum class MonochromePatternFormat : u32 {
|
||||
CGA6_M1 = 0,
|
||||
LE_M1 = 1,
|
||||
};
|
||||
|
||||
union Regs {
|
||||
static constexpr std::size_t NUM_REGS = 0x258;
|
||||
struct {
|
||||
u32 object;
|
||||
INSERT_UNION_PADDING_WORDS(0x3F);
|
||||
u32 no_operation;
|
||||
NotifyType notify;
|
||||
INSERT_UNION_PADDING_WORDS(0x2);
|
||||
u32 wait_for_idle;
|
||||
INSERT_UNION_PADDING_WORDS(0xB);
|
||||
u32 pm_trigger;
|
||||
INSERT_UNION_PADDING_WORDS(0xF);
|
||||
u32 context_dma_notify;
|
||||
u32 dst_context_dma;
|
||||
u32 src_context_dma;
|
||||
u32 semaphore_context_dma;
|
||||
INSERT_UNION_PADDING_WORDS(0x1C);
|
||||
Surface dst;
|
||||
CpuIndexWrap pixels_from_cpu_index_wrap;
|
||||
u32 kind2d_check_enable;
|
||||
Surface src;
|
||||
SectorPromotion pixels_from_memory_sector_promotion;
|
||||
INSERT_UNION_PADDING_WORDS(0x1);
|
||||
NumTpcs num_tpcs;
|
||||
u32 render_enable_addr_upper;
|
||||
u32 render_enable_addr_lower;
|
||||
RenderEnableMode render_enable_mode;
|
||||
INSERT_UNION_PADDING_WORDS(0x4);
|
||||
u32 clip_x0;
|
||||
u32 clip_y0;
|
||||
u32 clip_width;
|
||||
u32 clip_height;
|
||||
BitField<0, 1, u32> clip_enable;
|
||||
BitField<0, 3, ColorKeyFormat> color_key_format;
|
||||
u32 color_key;
|
||||
BitField<0, 1, u32> color_key_enable;
|
||||
BitField<0, 8, u32> rop;
|
||||
u32 beta1;
|
||||
Beta4 beta4;
|
||||
Operation operation;
|
||||
union {
|
||||
BitField<0, 6, u32> x;
|
||||
BitField<8, 6, u32> y;
|
||||
} pattern_offset;
|
||||
BitField<0, 2, PatternSelect> pattern_select;
|
||||
INSERT_UNION_PADDING_WORDS(0xC);
|
||||
struct {
|
||||
BitField<0, 3, MonochromePatternColorFormat> color_format;
|
||||
BitField<0, 1, MonochromePatternFormat> format;
|
||||
u32 color0;
|
||||
u32 color1;
|
||||
u32 pattern0;
|
||||
u32 pattern1;
|
||||
} monochrome_pattern;
|
||||
struct {
|
||||
std::array<u32, 0x40> X8R8G8B8;
|
||||
std::array<u32, 0x20> R5G6B5;
|
||||
std::array<u32, 0x20> X1R5G5B5;
|
||||
std::array<u32, 0x10> Y8;
|
||||
} color_pattern;
|
||||
INSERT_UNION_PADDING_WORDS(0x10);
|
||||
struct {
|
||||
u32 prim_mode;
|
||||
u32 prim_color_format;
|
||||
u32 prim_color;
|
||||
u32 line_tie_break_bits;
|
||||
INSERT_UNION_PADDING_WORDS(0x14);
|
||||
u32 prim_point_xy;
|
||||
INSERT_UNION_PADDING_WORDS(0x7);
|
||||
std::array<Point, 0x40> prim_point;
|
||||
} render_solid;
|
||||
struct {
|
||||
u32 data_type;
|
||||
u32 color_format;
|
||||
u32 index_format;
|
||||
u32 mono_format;
|
||||
u32 wrap;
|
||||
u32 color0;
|
||||
u32 color1;
|
||||
u32 mono_opacity;
|
||||
INSERT_UNION_PADDING_WORDS(0x6);
|
||||
u32 src_width;
|
||||
u32 src_height;
|
||||
u32 dx_du_frac;
|
||||
u32 dx_du_int;
|
||||
u32 dx_dv_frac;
|
||||
u32 dy_dv_int;
|
||||
u32 dst_x0_frac;
|
||||
u32 dst_x0_int;
|
||||
u32 dst_y0_frac;
|
||||
u32 dst_y0_int;
|
||||
u32 data;
|
||||
} pixels_from_cpu;
|
||||
INSERT_UNION_PADDING_WORDS(0x3);
|
||||
u32 big_endian_control;
|
||||
INSERT_UNION_PADDING_WORDS(0x3);
|
||||
struct {
|
||||
BitField<0, 3, u32> block_shape;
|
||||
BitField<0, 5, u32> corral_size;
|
||||
BitField<0, 1, u32> safe_overlap;
|
||||
union {
|
||||
BitField<0, 1, Origin> origin;
|
||||
BitField<4, 1, Filter> filter;
|
||||
} sample_mode;
|
||||
INSERT_UNION_PADDING_WORDS(0x8);
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_width;
|
||||
s32 dst_height;
|
||||
s64 du_dx;
|
||||
s64 dv_dy;
|
||||
s64 src_x0;
|
||||
s64 src_y0;
|
||||
} pixels_from_memory;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
} regs{};
|
||||
|
||||
struct Config {
|
||||
Operation operation;
|
||||
Filter filter;
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_x1;
|
||||
s32 dst_y1;
|
||||
s32 src_x0;
|
||||
s32 src_y0;
|
||||
s32 src_x1;
|
||||
s32 src_y1;
|
||||
};
|
||||
|
||||
private:
|
||||
VideoCore::RasterizerInterface* rasterizer;
|
||||
|
||||
/// Performs the copy from the source surface to the destination surface as configured in the
|
||||
/// registers.
|
||||
void Blit();
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(object, 0x0);
|
||||
ASSERT_REG_POSITION(no_operation, 0x100);
|
||||
ASSERT_REG_POSITION(notify, 0x104);
|
||||
ASSERT_REG_POSITION(wait_for_idle, 0x110);
|
||||
ASSERT_REG_POSITION(pm_trigger, 0x140);
|
||||
ASSERT_REG_POSITION(context_dma_notify, 0x180);
|
||||
ASSERT_REG_POSITION(dst_context_dma, 0x184);
|
||||
ASSERT_REG_POSITION(src_context_dma, 0x188);
|
||||
ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
|
||||
ASSERT_REG_POSITION(dst, 0x200);
|
||||
ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
|
||||
ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
|
||||
ASSERT_REG_POSITION(src, 0x230);
|
||||
ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
|
||||
ASSERT_REG_POSITION(num_tpcs, 0x260);
|
||||
ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
|
||||
ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
|
||||
ASSERT_REG_POSITION(clip_x0, 0x280);
|
||||
ASSERT_REG_POSITION(clip_y0, 0x284);
|
||||
ASSERT_REG_POSITION(clip_width, 0x288);
|
||||
ASSERT_REG_POSITION(clip_height, 0x28c);
|
||||
ASSERT_REG_POSITION(clip_enable, 0x290);
|
||||
ASSERT_REG_POSITION(color_key_format, 0x294);
|
||||
ASSERT_REG_POSITION(color_key, 0x298);
|
||||
ASSERT_REG_POSITION(rop, 0x2A0);
|
||||
ASSERT_REG_POSITION(beta1, 0x2A4);
|
||||
ASSERT_REG_POSITION(beta4, 0x2A8);
|
||||
ASSERT_REG_POSITION(operation, 0x2AC);
|
||||
ASSERT_REG_POSITION(pattern_offset, 0x2B0);
|
||||
ASSERT_REG_POSITION(pattern_select, 0x2B4);
|
||||
ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
|
||||
ASSERT_REG_POSITION(color_pattern, 0x300);
|
||||
ASSERT_REG_POSITION(render_solid, 0x580);
|
||||
ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
|
||||
ASSERT_REG_POSITION(big_endian_control, 0x870);
|
||||
ASSERT_REG_POSITION(pixels_from_memory, 0x880);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
127
src/video_core/engines/kepler_compute.cpp
Executable file
127
src/video_core/engines/kepler_compute.cpp
Executable file
@@ -0,0 +1,127 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <bitset>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
|
||||
|
||||
KeplerCompute::~KeplerCompute() = default;
|
||||
|
||||
void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
|
||||
rasterizer = &rasterizer_;
|
||||
}
|
||||
|
||||
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid KeplerCompute register, increase the size of the Regs structure");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
switch (method) {
|
||||
case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
|
||||
upload_state.ProcessExec(regs.exec_upload.linear != 0);
|
||||
break;
|
||||
}
|
||||
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
|
||||
upload_state.ProcessData(method_argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
system.GPU().Maxwell3D().OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KEPLER_COMPUTE_REG_INDEX(launch):
|
||||
ProcessLaunch();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
}
|
||||
}
|
||||
|
||||
u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
const auto& buffer = launch_description.const_buffer_config[const_buffer];
|
||||
u32 result;
|
||||
std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
|
||||
return result;
|
||||
}
|
||||
|
||||
SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
|
||||
return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
|
||||
}
|
||||
|
||||
SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const {
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
|
||||
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
|
||||
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
|
||||
}
|
||||
|
||||
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
|
||||
const Texture::TextureHandle tex_handle{handle};
|
||||
const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
|
||||
const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
|
||||
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
|
||||
result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
|
||||
return result;
|
||||
}
|
||||
|
||||
VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
|
||||
return rasterizer->AccessGuestDriverProfile();
|
||||
}
|
||||
|
||||
const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
|
||||
return rasterizer->AccessGuestDriverProfile();
|
||||
}
|
||||
|
||||
void KeplerCompute::ProcessLaunch() {
|
||||
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
|
||||
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
|
||||
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
|
||||
|
||||
const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
|
||||
LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
|
||||
|
||||
rasterizer->DispatchCompute(code_addr);
|
||||
}
|
||||
|
||||
Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
|
||||
const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
|
||||
|
||||
Texture::TICEntry tic_entry;
|
||||
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
|
||||
|
||||
return tic_entry;
|
||||
}
|
||||
|
||||
Texture::TSCEntry KeplerCompute::GetTSCEntry(u32 tsc_index) const {
|
||||
const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
|
||||
|
||||
Texture::TSCEntry tsc_entry;
|
||||
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
|
||||
return tsc_entry;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
269
src/video_core/engines/kepler_compute.h
Executable file
269
src/video_core/engines/kepler_compute.h
Executable file
@@ -0,0 +1,269 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/const_buffer_engine_interface.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as GK104_Compute. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
|
||||
*/
|
||||
|
||||
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
|
||||
public:
|
||||
explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
|
||||
~KeplerCompute();
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
|
||||
|
||||
static constexpr std::size_t NumConstBuffers = 8;
|
||||
|
||||
struct Regs {
|
||||
static constexpr std::size_t NUM_REGS = 0xCF8;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_UNION_PADDING_WORDS(0x60);
|
||||
|
||||
Upload::Registers upload;
|
||||
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 1, u32> linear;
|
||||
};
|
||||
} exec_upload;
|
||||
|
||||
u32 data_upload;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x3F);
|
||||
|
||||
struct {
|
||||
u32 address;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
|
||||
}
|
||||
} launch_desc_loc;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x1);
|
||||
|
||||
u32 launch;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x4A7);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tsc;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x3);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tic;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x22);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} code_loc;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x3FE);
|
||||
|
||||
u32 tex_cb_index;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x374);
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
struct LaunchParams {
|
||||
static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
|
||||
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
|
||||
u32 program_start;
|
||||
|
||||
INSERT_PADDING_WORDS(0x2);
|
||||
|
||||
BitField<30, 1, u32> linked_tsc;
|
||||
|
||||
BitField<0, 31, u32> grid_dim_x;
|
||||
union {
|
||||
BitField<0, 16, u32> grid_dim_y;
|
||||
BitField<16, 16, u32> grid_dim_z;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x3);
|
||||
|
||||
BitField<0, 18, u32> shared_alloc;
|
||||
|
||||
BitField<16, 16, u32> block_dim_x;
|
||||
union {
|
||||
BitField<0, 16, u32> block_dim_y;
|
||||
BitField<16, 16, u32> block_dim_z;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 8, u32> const_buffer_enable_mask;
|
||||
BitField<29, 2, u32> cache_layout;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
|
||||
struct ConstBufferConfig {
|
||||
u32 address_low;
|
||||
union {
|
||||
BitField<0, 8, u32> address_high;
|
||||
BitField<15, 17, u32> size;
|
||||
};
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
|
||||
address_low);
|
||||
}
|
||||
};
|
||||
std::array<ConstBufferConfig, NumConstBuffers> const_buffer_config;
|
||||
|
||||
union {
|
||||
BitField<0, 20, u32> local_pos_alloc;
|
||||
BitField<27, 5, u32> barrier_alloc;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 20, u32> local_neg_alloc;
|
||||
BitField<24, 5, u32> gpr_alloc;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 20, u32> local_crs_alloc;
|
||||
BitField<24, 5, u32> sass_version;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x10);
|
||||
} launch_description{};
|
||||
|
||||
struct {
|
||||
u32 write_offset = 0;
|
||||
u32 copy_size = 0;
|
||||
std::vector<u8> inner_buffer;
|
||||
} state{};
|
||||
|
||||
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
|
||||
"KeplerCompute Regs has wrong size");
|
||||
|
||||
static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
|
||||
"KeplerCompute LaunchParams has wrong size");
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
|
||||
|
||||
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
|
||||
|
||||
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const override;
|
||||
|
||||
SamplerDescriptor AccessSampler(u32 handle) const override;
|
||||
|
||||
u32 GetBoundBuffer() const override {
|
||||
return regs.tex_cb_index;
|
||||
}
|
||||
|
||||
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
|
||||
|
||||
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
|
||||
|
||||
private:
|
||||
void ProcessLaunch();
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
||||
|
||||
/// Retrieves information about a specific TSC entry from the TSC buffer.
|
||||
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
|
||||
|
||||
Core::System& system;
|
||||
MemoryManager& memory_manager;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
Upload::State upload_state;
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec_upload, 0x6C);
|
||||
ASSERT_REG_POSITION(data_upload, 0x6D);
|
||||
ASSERT_REG_POSITION(launch, 0xAF);
|
||||
ASSERT_REG_POSITION(tsc, 0x557);
|
||||
ASSERT_REG_POSITION(tic, 0x55D);
|
||||
ASSERT_REG_POSITION(code_loc, 0x582);
|
||||
ASSERT_REG_POSITION(tex_cb_index, 0x982);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_enable_mask, 0x14);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
50
src/video_core/engines/kepler_memory.cpp
Executable file
50
src/video_core/engines/kepler_memory.cpp
Executable file
@@ -0,0 +1,50 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
|
||||
: system{system_}, upload_state{memory_manager, regs.upload} {}
|
||||
|
||||
KeplerMemory::~KeplerMemory() = default;
|
||||
|
||||
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid KeplerMemory register, increase the size of the Regs structure");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
switch (method) {
|
||||
case KEPLERMEMORY_REG_INDEX(exec): {
|
||||
upload_state.ProcessExec(regs.exec.linear != 0);
|
||||
break;
|
||||
}
|
||||
case KEPLERMEMORY_REG_INDEX(data): {
|
||||
upload_state.ProcessData(method_argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
system.GPU().Maxwell3D().OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
85
src/video_core/engines/kepler_memory.h
Executable file
85
src/video_core/engines/kepler_memory.h
Executable file
@@ -0,0 +1,85 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as P2MF. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
|
||||
*/
|
||||
|
||||
#define KEPLERMEMORY_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class KeplerMemory final : public EngineInterface {
|
||||
public:
|
||||
explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
|
||||
~KeplerMemory();
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
struct Regs {
|
||||
static constexpr size_t NUM_REGS = 0x7F;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_UNION_PADDING_WORDS(0x60);
|
||||
|
||||
Upload::Registers upload;
|
||||
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 1, u32> linear;
|
||||
};
|
||||
} exec;
|
||||
|
||||
u32 data;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x11);
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
Upload::State upload_state;
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec, 0x6C);
|
||||
ASSERT_REG_POSITION(data, 0x6D);
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
708
src/video_core/engines/maxwell_3d.cpp
Executable file
708
src/video_core/engines/maxwell_3d.cpp
Executable file
@@ -0,0 +1,708 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
#include "common/assert.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
using VideoCore::QueryType;
|
||||
|
||||
/// First register id that is actually a Macro call.
|
||||
constexpr u32 MacroRegistersStart = 0xE00;
|
||||
|
||||
Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)},
|
||||
upload_state{memory_manager, regs.upload} {
|
||||
dirty.flags.flip();
|
||||
InitializeRegisterDefaults();
|
||||
}
|
||||
|
||||
Maxwell3D::~Maxwell3D() = default;
|
||||
|
||||
void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
|
||||
rasterizer = &rasterizer_;
|
||||
}
|
||||
|
||||
void Maxwell3D::InitializeRegisterDefaults() {
|
||||
// Initializes registers to their default values - what games expect them to be at boot. This is
|
||||
// for certain registers that may not be explicitly set by games.
|
||||
|
||||
// Reset all registers to zero
|
||||
std::memset(®s, 0, sizeof(regs));
|
||||
|
||||
// Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
|
||||
// needed for ARMS.
|
||||
for (auto& viewport : regs.viewports) {
|
||||
viewport.depth_range_near = 0.0f;
|
||||
viewport.depth_range_far = 1.0f;
|
||||
}
|
||||
for (auto& viewport : regs.viewport_transform) {
|
||||
viewport.swizzle.x.Assign(Regs::ViewportSwizzle::PositiveX);
|
||||
viewport.swizzle.y.Assign(Regs::ViewportSwizzle::PositiveY);
|
||||
viewport.swizzle.z.Assign(Regs::ViewportSwizzle::PositiveZ);
|
||||
viewport.swizzle.w.Assign(Regs::ViewportSwizzle::PositiveW);
|
||||
}
|
||||
|
||||
// Doom and Bomberman seems to use the uninitialized registers and just enable blend
|
||||
// so initialize blend registers with sane values
|
||||
regs.blend.equation_rgb = Regs::Blend::Equation::Add;
|
||||
regs.blend.factor_source_rgb = Regs::Blend::Factor::One;
|
||||
regs.blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
|
||||
regs.blend.equation_a = Regs::Blend::Equation::Add;
|
||||
regs.blend.factor_source_a = Regs::Blend::Factor::One;
|
||||
regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
|
||||
for (auto& blend : regs.independent_blend) {
|
||||
blend.equation_rgb = Regs::Blend::Equation::Add;
|
||||
blend.factor_source_rgb = Regs::Blend::Factor::One;
|
||||
blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
|
||||
blend.equation_a = Regs::Blend::Equation::Add;
|
||||
blend.factor_source_a = Regs::Blend::Factor::One;
|
||||
blend.factor_dest_a = Regs::Blend::Factor::Zero;
|
||||
}
|
||||
regs.stencil_front_op_fail = Regs::StencilOp::Keep;
|
||||
regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
|
||||
regs.stencil_front_op_zpass = Regs::StencilOp::Keep;
|
||||
regs.stencil_front_func_func = Regs::ComparisonOp::Always;
|
||||
regs.stencil_front_func_mask = 0xFFFFFFFF;
|
||||
regs.stencil_front_mask = 0xFFFFFFFF;
|
||||
regs.stencil_two_side_enable = 1;
|
||||
regs.stencil_back_op_fail = Regs::StencilOp::Keep;
|
||||
regs.stencil_back_op_zfail = Regs::StencilOp::Keep;
|
||||
regs.stencil_back_op_zpass = Regs::StencilOp::Keep;
|
||||
regs.stencil_back_func_func = Regs::ComparisonOp::Always;
|
||||
regs.stencil_back_func_mask = 0xFFFFFFFF;
|
||||
regs.stencil_back_mask = 0xFFFFFFFF;
|
||||
|
||||
regs.depth_test_func = Regs::ComparisonOp::Always;
|
||||
regs.front_face = Regs::FrontFace::CounterClockWise;
|
||||
regs.cull_face = Regs::CullFace::Back;
|
||||
|
||||
// TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
|
||||
// register carrying a default value. Assume it's OpenGL's default (1).
|
||||
regs.point_size = 1.0f;
|
||||
|
||||
// TODO(bunnei): Some games do not initialize the color masks (e.g. Sonic Mania). Assuming a
|
||||
// default of enabled fixes rendering here.
|
||||
for (auto& color_mask : regs.color_mask) {
|
||||
color_mask.R.Assign(1);
|
||||
color_mask.G.Assign(1);
|
||||
color_mask.B.Assign(1);
|
||||
color_mask.A.Assign(1);
|
||||
}
|
||||
|
||||
for (auto& format : regs.vertex_attrib_format) {
|
||||
format.constant.Assign(1);
|
||||
}
|
||||
|
||||
// NVN games expect these values to be enabled at boot
|
||||
regs.rasterize_enable = 1;
|
||||
regs.rt_separate_frag_data = 1;
|
||||
regs.framebuffer_srgb = 1;
|
||||
regs.line_width_aliased = 1.0f;
|
||||
regs.line_width_smooth = 1.0f;
|
||||
regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
|
||||
regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill;
|
||||
regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;
|
||||
|
||||
shadow_state = regs;
|
||||
|
||||
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
|
||||
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
|
||||
mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
|
||||
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
|
||||
if (executing_macro == 0) {
|
||||
// A macro call must begin by writing the macro method's register, not its argument.
|
||||
ASSERT_MSG((method % 2) == 0,
|
||||
"Can't start macro execution by writing to the ARGS register");
|
||||
executing_macro = method;
|
||||
}
|
||||
|
||||
macro_params.insert(macro_params.end(), base_start, base_start + amount);
|
||||
|
||||
// Call the macro when there are no more parameters in the command buffer
|
||||
if (is_last_call) {
|
||||
CallMacroMethod(executing_macro, macro_params);
|
||||
macro_params.clear();
|
||||
}
|
||||
}
|
||||
|
||||
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
|
||||
// Keep track of the register value in shadow_state when requested.
|
||||
const auto control = shadow_state.shadow_ram_control;
|
||||
if (control == Regs::ShadowRamControl::Track ||
|
||||
control == Regs::ShadowRamControl::TrackWithFilter) {
|
||||
shadow_state.reg_array[method] = argument;
|
||||
return argument;
|
||||
}
|
||||
if (control == Regs::ShadowRamControl::Replay) {
|
||||
return shadow_state.reg_array[method];
|
||||
}
|
||||
return argument;
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
|
||||
if (regs.reg_array[method] == argument) {
|
||||
return;
|
||||
}
|
||||
regs.reg_array[method] = argument;
|
||||
|
||||
for (const auto& table : dirty.tables) {
|
||||
dirty.flags[table[method]] = true;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument,
|
||||
bool is_last_call) {
|
||||
switch (method) {
|
||||
case MAXWELL3D_REG_INDEX(wait_for_idle):
|
||||
return rasterizer->WaitForIdle();
|
||||
case MAXWELL3D_REG_INDEX(shadow_ram_control):
|
||||
shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
|
||||
return;
|
||||
case MAXWELL3D_REG_INDEX(macros.data):
|
||||
return macro_engine->AddCode(regs.macros.upload_address, argument);
|
||||
case MAXWELL3D_REG_INDEX(macros.bind):
|
||||
return ProcessMacroBind(argument);
|
||||
case MAXWELL3D_REG_INDEX(firmware[4]):
|
||||
return ProcessFirmwareCall4();
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
|
||||
return StartCBData(method);
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[0]):
|
||||
return ProcessCBBind(0);
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[1]):
|
||||
return ProcessCBBind(1);
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[2]):
|
||||
return ProcessCBBind(2);
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[3]):
|
||||
return ProcessCBBind(3);
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[4]):
|
||||
return ProcessCBBind(4);
|
||||
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
|
||||
return DrawArrays();
|
||||
case MAXWELL3D_REG_INDEX(clear_buffers):
|
||||
return ProcessClearBuffers();
|
||||
case MAXWELL3D_REG_INDEX(query.query_get):
|
||||
return ProcessQueryGet();
|
||||
case MAXWELL3D_REG_INDEX(condition.mode):
|
||||
return ProcessQueryCondition();
|
||||
case MAXWELL3D_REG_INDEX(counter_reset):
|
||||
return ProcessCounterReset();
|
||||
case MAXWELL3D_REG_INDEX(sync_info):
|
||||
return ProcessSyncPoint();
|
||||
case MAXWELL3D_REG_INDEX(exec_upload):
|
||||
return upload_state.ProcessExec(regs.exec_upload.linear != 0);
|
||||
case MAXWELL3D_REG_INDEX(data_upload):
|
||||
upload_state.ProcessData(argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
OnMemoryWrite();
|
||||
}
|
||||
return;
|
||||
case MAXWELL3D_REG_INDEX(fragment_barrier):
|
||||
return rasterizer->FragmentBarrier();
|
||||
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
|
||||
return rasterizer->TiledCacheBarrier();
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
|
||||
// Reset the current macro.
|
||||
executing_macro = 0;
|
||||
|
||||
// Lookup the macro offset
|
||||
const u32 entry =
|
||||
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
|
||||
|
||||
// Execute the current macro.
|
||||
macro_engine->Execute(*this, macro_positions[entry], parameters);
|
||||
if (mme_draw.current_mode != MMEDrawMode::Undefined) {
|
||||
FlushMMEInlineDraw();
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
if (method == cb_data_state.current) {
|
||||
regs.reg_array[method] = method_argument;
|
||||
ProcessCBData(method_argument);
|
||||
return;
|
||||
} else if (cb_data_state.current != null_cb_data) {
|
||||
FinishCBData();
|
||||
}
|
||||
|
||||
// It is an error to write to a register other than the current macro's ARG register before it
|
||||
// has finished execution.
|
||||
if (executing_macro != 0) {
|
||||
ASSERT(method == executing_macro + 1);
|
||||
}
|
||||
|
||||
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
|
||||
// uploaded to the GPU during initialization.
|
||||
if (method >= MacroRegistersStart) {
|
||||
ProcessMacro(method, &method_argument, 1, is_last_call);
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid Maxwell3D register, increase the size of the Regs structure");
|
||||
|
||||
const u32 argument = ProcessShadowRam(method, method_argument);
|
||||
ProcessDirtyRegisters(method, argument);
|
||||
ProcessMethodCall(method, argument, method_argument, is_last_call);
|
||||
}
|
||||
|
||||
void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
|
||||
// uploaded to the GPU during initialization.
|
||||
if (method >= MacroRegistersStart) {
|
||||
ProcessMacro(method, base_start, amount, amount == methods_pending);
|
||||
return;
|
||||
}
|
||||
switch (method) {
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
|
||||
ProcessCBMultiData(method, base_start, amount);
|
||||
break;
|
||||
default:
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
|
||||
if (mme_draw.current_mode == MMEDrawMode::Undefined) {
|
||||
if (mme_draw.gl_begin_consume) {
|
||||
mme_draw.current_mode = expected_mode;
|
||||
mme_draw.current_count = count;
|
||||
mme_draw.instance_count = 1;
|
||||
mme_draw.gl_begin_consume = false;
|
||||
mme_draw.gl_end_count = 0;
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count &&
|
||||
mme_draw.instance_mode && mme_draw.gl_begin_consume) {
|
||||
mme_draw.instance_count++;
|
||||
mme_draw.gl_begin_consume = false;
|
||||
return;
|
||||
} else {
|
||||
FlushMMEInlineDraw();
|
||||
}
|
||||
}
|
||||
// Tail call in case it needs to retry.
|
||||
StepInstance(expected_mode, count);
|
||||
}
|
||||
|
||||
void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
|
||||
if (mme_inline[method]) {
|
||||
regs.reg_array[method] = method_argument;
|
||||
if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) ||
|
||||
method == MAXWELL3D_REG_INDEX(index_array.count)) {
|
||||
const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count)
|
||||
? MMEDrawMode::Array
|
||||
: MMEDrawMode::Indexed;
|
||||
StepInstance(expected_mode, method_argument);
|
||||
} else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) {
|
||||
mme_draw.instance_mode =
|
||||
(regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0);
|
||||
mme_draw.gl_begin_consume = true;
|
||||
} else {
|
||||
mme_draw.gl_end_count++;
|
||||
}
|
||||
} else {
|
||||
if (mme_draw.current_mode != MMEDrawMode::Undefined) {
|
||||
FlushMMEInlineDraw();
|
||||
}
|
||||
CallMethod(method, method_argument, true);
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::FlushMMEInlineDraw() {
|
||||
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
|
||||
regs.vertex_buffer.count);
|
||||
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
|
||||
ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
|
||||
|
||||
// Both instance configuration registers can not be set at the same time.
|
||||
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
|
||||
"Illegal combination of instancing parameters");
|
||||
|
||||
const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
|
||||
if (ShouldExecute()) {
|
||||
rasterizer->Draw(is_indexed, true);
|
||||
}
|
||||
|
||||
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
|
||||
// the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
|
||||
// it's possible that it is incorrect and that there is some other register used to specify the
|
||||
// drawing mode.
|
||||
if (is_indexed) {
|
||||
regs.index_array.count = 0;
|
||||
} else {
|
||||
regs.vertex_buffer.count = 0;
|
||||
}
|
||||
mme_draw.current_mode = MMEDrawMode::Undefined;
|
||||
mme_draw.current_count = 0;
|
||||
mme_draw.instance_count = 0;
|
||||
mme_draw.instance_mode = false;
|
||||
mme_draw.gl_begin_consume = false;
|
||||
mme_draw.gl_end_count = 0;
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMacroUpload(u32 data) {
|
||||
macro_engine->AddCode(regs.macros.upload_address++, data);
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMacroBind(u32 data) {
|
||||
macro_positions[regs.macros.entry++] = data;
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessFirmwareCall4() {
|
||||
LOG_WARNING(HW_GPU, "(STUBBED) called");
|
||||
|
||||
// Firmware call 4 is a blob that changes some registers depending on its parameters.
|
||||
// These registers don't affect emulation and so are stubbed by setting 0xd00 to 1.
|
||||
regs.reg_array[0xd00] = 1;
|
||||
}
|
||||
|
||||
void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
|
||||
struct LongQueryResult {
|
||||
u64_le value;
|
||||
u64_le timestamp;
|
||||
};
|
||||
static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
|
||||
const GPUVAddr sequence_address{regs.query.QueryAddress()};
|
||||
if (long_query) {
|
||||
// Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
|
||||
// GPU, this command may actually take a while to complete in real hardware due to GPU
|
||||
// wait queues.
|
||||
LongQueryResult query_result{payload, system.GPU().GetTicks()};
|
||||
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
|
||||
} else {
|
||||
memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessQueryGet() {
|
||||
// TODO(Subv): Support the other query units.
|
||||
if (regs.query.query_get.unit != Regs::QueryUnit::Crop) {
|
||||
LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented");
|
||||
}
|
||||
|
||||
switch (regs.query.query_get.operation) {
|
||||
case Regs::QueryOperation::Release:
|
||||
if (regs.query.query_get.fence == 1) {
|
||||
rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
|
||||
} else {
|
||||
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
|
||||
}
|
||||
break;
|
||||
case Regs::QueryOperation::Acquire:
|
||||
// TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
|
||||
// matches the current payload.
|
||||
UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
|
||||
break;
|
||||
case Regs::QueryOperation::Counter:
|
||||
if (const std::optional<u64> result = GetQueryResult()) {
|
||||
// If the query returns an empty optional it means it's cached and deferred.
|
||||
// In this case we have a non-empty result, so we stamp it immediately.
|
||||
StampQueryResult(*result, regs.query.query_get.short_query == 0);
|
||||
}
|
||||
break;
|
||||
case Regs::QueryOperation::Trap:
|
||||
UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unknown query operation");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessQueryCondition() {
|
||||
const GPUVAddr condition_address{regs.condition.Address()};
|
||||
switch (regs.condition.mode) {
|
||||
case Regs::ConditionMode::Always: {
|
||||
execute_on = true;
|
||||
break;
|
||||
}
|
||||
case Regs::ConditionMode::Never: {
|
||||
execute_on = false;
|
||||
break;
|
||||
}
|
||||
case Regs::ConditionMode::ResNonZero: {
|
||||
Regs::QueryCompare cmp;
|
||||
memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
|
||||
execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
|
||||
break;
|
||||
}
|
||||
case Regs::ConditionMode::Equal: {
|
||||
Regs::QueryCompare cmp;
|
||||
memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
|
||||
execute_on =
|
||||
cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
|
||||
break;
|
||||
}
|
||||
case Regs::ConditionMode::NotEqual: {
|
||||
Regs::QueryCompare cmp;
|
||||
memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
|
||||
execute_on =
|
||||
cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
UNIMPLEMENTED_MSG("Uninplemented Condition Mode!");
|
||||
execute_on = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCounterReset() {
|
||||
switch (regs.counter_reset) {
|
||||
case Regs::CounterReset::SampleCnt:
|
||||
rasterizer->ResetCounter(QueryType::SamplesPassed);
|
||||
break;
|
||||
default:
|
||||
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.counter_reset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessSyncPoint() {
|
||||
const u32 sync_point = regs.sync_info.sync_point.Value();
|
||||
const u32 increment = regs.sync_info.increment.Value();
|
||||
[[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
|
||||
if (increment) {
|
||||
rasterizer->SignalSyncPoint(sync_point);
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::DrawArrays() {
|
||||
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
|
||||
regs.vertex_buffer.count);
|
||||
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
|
||||
|
||||
// Both instance configuration registers can not be set at the same time.
|
||||
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
|
||||
"Illegal combination of instancing parameters");
|
||||
|
||||
if (regs.draw.instance_next) {
|
||||
// Increment the current instance *before* drawing.
|
||||
state.current_instance += 1;
|
||||
} else if (!regs.draw.instance_cont) {
|
||||
// Reset the current instance to 0.
|
||||
state.current_instance = 0;
|
||||
}
|
||||
|
||||
const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
|
||||
if (ShouldExecute()) {
|
||||
rasterizer->Draw(is_indexed, false);
|
||||
}
|
||||
|
||||
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
|
||||
// the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
|
||||
// it's possible that it is incorrect and that there is some other register used to specify the
|
||||
// drawing mode.
|
||||
if (is_indexed) {
|
||||
regs.index_array.count = 0;
|
||||
} else {
|
||||
regs.vertex_buffer.count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<u64> Maxwell3D::GetQueryResult() {
|
||||
switch (regs.query.query_get.select) {
|
||||
case Regs::QuerySelect::Zero:
|
||||
return 0;
|
||||
case Regs::QuerySelect::SamplesPassed:
|
||||
// Deferred.
|
||||
rasterizer->Query(regs.query.QueryAddress(), QueryType::SamplesPassed,
|
||||
system.GPU().GetTicks());
|
||||
return std::nullopt;
|
||||
default:
|
||||
LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
|
||||
regs.query.query_get.select.Value());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
|
||||
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
|
||||
auto& shader = state.shader_stages[stage_index];
|
||||
auto& bind_data = regs.cb_bind[stage_index];
|
||||
|
||||
ASSERT(bind_data.index < Regs::MaxConstBuffers);
|
||||
auto& buffer = shader.const_buffers[bind_data.index];
|
||||
|
||||
buffer.enabled = bind_data.valid.Value() != 0;
|
||||
buffer.address = regs.const_buffer.BufferAddress();
|
||||
buffer.size = regs.const_buffer.cb_size;
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCBData(u32 value) {
|
||||
const u32 id = cb_data_state.id;
|
||||
cb_data_state.buffer[id][cb_data_state.counter] = value;
|
||||
// Increment the current buffer position.
|
||||
regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
|
||||
cb_data_state.counter++;
|
||||
}
|
||||
|
||||
void Maxwell3D::StartCBData(u32 method) {
|
||||
constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
|
||||
cb_data_state.start_pos = regs.const_buffer.cb_pos;
|
||||
cb_data_state.id = method - first_cb_data;
|
||||
cb_data_state.current = method;
|
||||
cb_data_state.counter = 0;
|
||||
ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) {
|
||||
if (cb_data_state.current != method) {
|
||||
if (cb_data_state.current != null_cb_data) {
|
||||
FinishCBData();
|
||||
}
|
||||
constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
|
||||
cb_data_state.start_pos = regs.const_buffer.cb_pos;
|
||||
cb_data_state.id = method - first_cb_data;
|
||||
cb_data_state.current = method;
|
||||
cb_data_state.counter = 0;
|
||||
}
|
||||
const std::size_t id = cb_data_state.id;
|
||||
const std::size_t size = amount;
|
||||
std::size_t i = 0;
|
||||
for (; i < size; i++) {
|
||||
cb_data_state.buffer[id][cb_data_state.counter] = start_base[i];
|
||||
cb_data_state.counter++;
|
||||
}
|
||||
// Increment the current buffer position.
|
||||
regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount;
|
||||
}
|
||||
|
||||
void Maxwell3D::FinishCBData() {
|
||||
// Write the input value to the current const buffer at the current position.
|
||||
const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
|
||||
ASSERT(buffer_address != 0);
|
||||
|
||||
// Don't allow writing past the end of the buffer.
|
||||
ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
|
||||
|
||||
const GPUVAddr address{buffer_address + cb_data_state.start_pos};
|
||||
const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
|
||||
|
||||
const u32 id = cb_data_state.id;
|
||||
memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
|
||||
OnMemoryWrite();
|
||||
|
||||
cb_data_state.id = null_cb_data;
|
||||
cb_data_state.current = null_cb_data;
|
||||
}
|
||||
|
||||
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
|
||||
const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
|
||||
|
||||
Texture::TICEntry tic_entry;
|
||||
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
|
||||
|
||||
return tic_entry;
|
||||
}
|
||||
|
||||
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
|
||||
const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
|
||||
|
||||
Texture::TSCEntry tsc_entry;
|
||||
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
|
||||
return tsc_entry;
|
||||
}
|
||||
|
||||
u32 Maxwell3D::GetRegisterValue(u32 method) const {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
|
||||
return regs.reg_array[method];
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessClearBuffers() {
|
||||
rasterizer->Clear();
|
||||
}
|
||||
|
||||
u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
|
||||
ASSERT(stage != ShaderType::Compute);
|
||||
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
|
||||
const auto& buffer = shader_stage.const_buffers[const_buffer];
|
||||
return memory_manager.Read<u32>(buffer.address + offset);
|
||||
}
|
||||
|
||||
SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
|
||||
return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
|
||||
}
|
||||
|
||||
SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const {
|
||||
ASSERT(stage != ShaderType::Compute);
|
||||
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
|
||||
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
|
||||
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
|
||||
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
|
||||
}
|
||||
|
||||
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
|
||||
const Texture::TextureHandle tex_handle{handle};
|
||||
const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
|
||||
const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
|
||||
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
|
||||
result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
|
||||
return result;
|
||||
}
|
||||
|
||||
VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
|
||||
return rasterizer->AccessGuestDriverProfile();
|
||||
}
|
||||
|
||||
const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
|
||||
return rasterizer->AccessGuestDriverProfile();
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
1728
src/video_core/engines/maxwell_3d.h
Executable file
1728
src/video_core/engines/maxwell_3d.h
Executable file
File diff suppressed because it is too large
Load Diff
219
src/video_core/engines/maxwell_dma.cpp
Executable file
219
src/video_core/engines/maxwell_dma.cpp
Executable file
@@ -0,0 +1,219 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
using namespace Texture;
|
||||
|
||||
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_} {}
|
||||
|
||||
MaxwellDMA::~MaxwellDMA() = default;
|
||||
|
||||
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
if (method == offsetof(Regs, launch_dma) / sizeof(u32)) {
|
||||
Launch();
|
||||
}
|
||||
}
|
||||
|
||||
void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
for (size_t i = 0; i < amount; ++i) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
}
|
||||
}
|
||||
|
||||
void MaxwellDMA::Launch() {
|
||||
LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in),
|
||||
static_cast<GPUVAddr>(regs.offset_out));
|
||||
|
||||
// TODO(Subv): Perform more research and implement all features of this engine.
|
||||
const LaunchDMA& launch = regs.launch_dma;
|
||||
ASSERT(launch.remap_enable == 0);
|
||||
ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
|
||||
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
|
||||
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
|
||||
ASSERT(regs.dst_params.origin.x == 0);
|
||||
ASSERT(regs.dst_params.origin.y == 0);
|
||||
|
||||
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
|
||||
if (!is_src_pitch && !is_dst_pitch) {
|
||||
// If both the source and the destination are in block layout, assert.
|
||||
UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented");
|
||||
return;
|
||||
}
|
||||
|
||||
// All copies here update the main memory, so mark all rasterizer states as invalid.
|
||||
system.GPU().Maxwell3D().OnMemoryWrite();
|
||||
|
||||
if (is_src_pitch && is_dst_pitch) {
|
||||
CopyPitchToPitch();
|
||||
} else {
|
||||
ASSERT(launch.multi_line_enable == 1);
|
||||
|
||||
if (!is_src_pitch && is_dst_pitch) {
|
||||
CopyBlockLinearToPitch();
|
||||
} else {
|
||||
CopyPitchToBlockLinear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyPitchToPitch() {
|
||||
// When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D
|
||||
// buffer of length `line_length_in`.
|
||||
// Otherwise we copy a 2D image of dimensions (line_length_in, line_count).
|
||||
if (!regs.launch_dma.multi_line_enable) {
|
||||
memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in);
|
||||
return;
|
||||
}
|
||||
|
||||
// Perform a line-by-line copy.
|
||||
// We're going to take a subrect of size (line_length_in, line_count) from the source rectangle.
|
||||
// There is no need to manually flush/invalidate the regions because CopyBlock does that for us.
|
||||
for (u32 line = 0; line < regs.line_count; ++line) {
|
||||
const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
|
||||
const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
|
||||
memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
|
||||
}
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
|
||||
UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
|
||||
UNIMPLEMENTED_IF(regs.src_params.layer != 0);
|
||||
|
||||
// Optimized path for micro copies.
|
||||
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
|
||||
if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) {
|
||||
FastCopyBlockLinearToPitch();
|
||||
return;
|
||||
}
|
||||
|
||||
// Deswizzle the input and copy it over.
|
||||
const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
|
||||
const Parameters& src_params = regs.src_params;
|
||||
const u32 width = src_params.width;
|
||||
const u32 height = src_params.height;
|
||||
const u32 depth = src_params.depth;
|
||||
const u32 block_height = src_params.block_size.height;
|
||||
const u32 block_depth = src_params.block_size.depth;
|
||||
const size_t src_size =
|
||||
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
}
|
||||
if (write_buffer.size() < dst_size) {
|
||||
write_buffer.resize(dst_size);
|
||||
}
|
||||
|
||||
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
|
||||
UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel,
|
||||
block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(),
|
||||
read_buffer.data());
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||
UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
|
||||
|
||||
const auto& dst_params = regs.dst_params;
|
||||
const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
|
||||
const u32 width = dst_params.width;
|
||||
const u32 height = dst_params.height;
|
||||
const u32 depth = dst_params.depth;
|
||||
const u32 block_height = dst_params.block_size.height;
|
||||
const u32 block_depth = dst_params.block_size.depth;
|
||||
const size_t dst_size =
|
||||
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
|
||||
const size_t dst_layer_size =
|
||||
CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
|
||||
|
||||
const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
}
|
||||
if (write_buffer.size() < dst_size) {
|
||||
write_buffer.resize(dst_size);
|
||||
}
|
||||
|
||||
if (Settings::IsGPULevelExtreme()) {
|
||||
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
// If the input is linear and the output is tiled, swizzle the input and copy it over.
|
||||
if (regs.dst_params.block_size.depth > 0) {
|
||||
ASSERT(dst_params.layer == 0);
|
||||
SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height,
|
||||
bytes_per_pixel, block_height, block_depth, dst_params.origin.x,
|
||||
dst_params.origin.y, write_buffer.data(), read_buffer.data());
|
||||
} else {
|
||||
SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel,
|
||||
write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(),
|
||||
block_height, dst_params.origin.x, dst_params.origin.y);
|
||||
}
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
||||
const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
|
||||
const size_t src_size = GOB_SIZE;
|
||||
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
|
||||
u32 pos_x = regs.src_params.origin.x;
|
||||
u32 pos_y = regs.src_params.origin.y;
|
||||
const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y,
|
||||
regs.src_params.block_size.height, bytes_per_pixel);
|
||||
const u32 x_in_gob = 64 / bytes_per_pixel;
|
||||
pos_x = pos_x % x_in_gob;
|
||||
pos_y = pos_y % 8;
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
}
|
||||
if (write_buffer.size() < dst_size) {
|
||||
write_buffer.resize(dst_size);
|
||||
}
|
||||
|
||||
if (Settings::IsGPULevelExtreme()) {
|
||||
memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width,
|
||||
bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y,
|
||||
write_buffer.data(), read_buffer.data());
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
274
src/video_core/engines/maxwell_dma.h
Executable file
274
src/video_core/engines/maxwell_dma.h
Executable file
@@ -0,0 +1,274 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This engine is known as gk104_copy. Documentation can be found in:
|
||||
* https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
|
||||
*/
|
||||
|
||||
class MaxwellDMA final : public EngineInterface {
|
||||
public:
|
||||
struct PackedGPUVAddr {
|
||||
u32 upper;
|
||||
u32 lower;
|
||||
|
||||
constexpr operator GPUVAddr() const noexcept {
|
||||
return (static_cast<GPUVAddr>(upper & 0xff) << 32) | lower;
|
||||
}
|
||||
};
|
||||
|
||||
union BlockSize {
|
||||
BitField<0, 4, u32> width;
|
||||
BitField<4, 4, u32> height;
|
||||
BitField<8, 4, u32> depth;
|
||||
BitField<12, 4, u32> gob_height;
|
||||
};
|
||||
static_assert(sizeof(BlockSize) == 4);
|
||||
|
||||
union Origin {
|
||||
BitField<0, 16, u32> x;
|
||||
BitField<16, 16, u32> y;
|
||||
};
|
||||
static_assert(sizeof(Origin) == 4);
|
||||
|
||||
struct Parameters {
|
||||
BlockSize block_size;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
Origin origin;
|
||||
};
|
||||
static_assert(sizeof(Parameters) == 24);
|
||||
|
||||
struct Semaphore {
|
||||
PackedGPUVAddr address;
|
||||
u32 payload;
|
||||
};
|
||||
static_assert(sizeof(Semaphore) == 12);
|
||||
|
||||
struct RenderEnable {
|
||||
enum class Mode : u32 {
|
||||
// Note: This uses Pascal case in order to avoid the identifiers
|
||||
// FALSE and TRUE, which are reserved on Darwin.
|
||||
False = 0,
|
||||
True = 1,
|
||||
Conditional = 2,
|
||||
RenderIfEqual = 3,
|
||||
RenderIfNotEqual = 4,
|
||||
};
|
||||
|
||||
PackedGPUVAddr address;
|
||||
BitField<0, 3, Mode> mode;
|
||||
};
|
||||
static_assert(sizeof(RenderEnable) == 12);
|
||||
|
||||
enum class PhysModeTarget : u32 {
|
||||
LOCAL_FB = 0,
|
||||
COHERENT_SYSMEM = 1,
|
||||
NONCOHERENT_SYSMEM = 2,
|
||||
};
|
||||
using PhysMode = BitField<0, 2, PhysModeTarget>;
|
||||
|
||||
union LaunchDMA {
|
||||
enum class DataTransferType : u32 {
|
||||
NONE = 0,
|
||||
PIPELINED = 1,
|
||||
NON_PIPELINED = 2,
|
||||
};
|
||||
|
||||
enum class SemaphoreType : u32 {
|
||||
NONE = 0,
|
||||
RELEASE_ONE_WORD_SEMAPHORE = 1,
|
||||
RELEASE_FOUR_WORD_SEMAPHORE = 2,
|
||||
};
|
||||
|
||||
enum class InterruptType : u32 {
|
||||
NONE = 0,
|
||||
BLOCKING = 1,
|
||||
NON_BLOCKING = 2,
|
||||
};
|
||||
|
||||
enum class MemoryLayout : u32 {
|
||||
BLOCKLINEAR = 0,
|
||||
PITCH = 1,
|
||||
};
|
||||
|
||||
enum class Type : u32 {
|
||||
VIRTUAL = 0,
|
||||
PHYSICAL = 1,
|
||||
};
|
||||
|
||||
enum class SemaphoreReduction : u32 {
|
||||
IMIN = 0,
|
||||
IMAX = 1,
|
||||
IXOR = 2,
|
||||
IAND = 3,
|
||||
IOR = 4,
|
||||
IADD = 5,
|
||||
INC = 6,
|
||||
DEC = 7,
|
||||
FADD = 0xA,
|
||||
};
|
||||
|
||||
enum class SemaphoreReductionSign : u32 {
|
||||
SIGNED = 0,
|
||||
UNSIGNED = 1,
|
||||
};
|
||||
|
||||
enum class BypassL2 : u32 {
|
||||
USE_PTE_SETTING = 0,
|
||||
FORCE_VOLATILE = 1,
|
||||
};
|
||||
|
||||
BitField<0, 2, DataTransferType> data_transfer_type;
|
||||
BitField<2, 1, u32> flush_enable;
|
||||
BitField<3, 2, SemaphoreType> semaphore_type;
|
||||
BitField<5, 2, InterruptType> interrupt_type;
|
||||
BitField<7, 1, MemoryLayout> src_memory_layout;
|
||||
BitField<8, 1, MemoryLayout> dst_memory_layout;
|
||||
BitField<9, 1, u32> multi_line_enable;
|
||||
BitField<10, 1, u32> remap_enable;
|
||||
BitField<11, 1, u32> rmwdisable;
|
||||
BitField<12, 1, Type> src_type;
|
||||
BitField<13, 1, Type> dst_type;
|
||||
BitField<14, 4, SemaphoreReduction> semaphore_reduction;
|
||||
BitField<18, 1, SemaphoreReductionSign> semaphore_reduction_sign;
|
||||
BitField<19, 1, u32> reduction_enable;
|
||||
BitField<20, 1, BypassL2> bypass_l2;
|
||||
};
|
||||
static_assert(sizeof(LaunchDMA) == 4);
|
||||
|
||||
struct RemapConst {
|
||||
enum Swizzle : u32 {
|
||||
SRC_X = 0,
|
||||
SRC_Y = 1,
|
||||
SRC_Z = 2,
|
||||
SRC_W = 3,
|
||||
CONST_A = 4,
|
||||
CONST_B = 5,
|
||||
NO_WRITE = 6,
|
||||
};
|
||||
|
||||
PackedGPUVAddr address;
|
||||
|
||||
union {
|
||||
BitField<0, 3, Swizzle> dst_x;
|
||||
BitField<4, 3, Swizzle> dst_y;
|
||||
BitField<8, 3, Swizzle> dst_z;
|
||||
BitField<12, 3, Swizzle> dst_w;
|
||||
BitField<16, 2, u32> component_size_minus_one;
|
||||
BitField<20, 2, u32> num_src_components_minus_one;
|
||||
BitField<24, 2, u32> num_dst_components_minus_one;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(RemapConst) == 12);
|
||||
|
||||
explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
|
||||
~MaxwellDMA();
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
private:
|
||||
/// Performs the copy from the source buffer to the destination buffer as configured in the
|
||||
/// registers.
|
||||
void Launch();
|
||||
|
||||
void CopyPitchToPitch();
|
||||
|
||||
void CopyBlockLinearToPitch();
|
||||
|
||||
void CopyPitchToBlockLinear();
|
||||
|
||||
void FastCopyBlockLinearToPitch();
|
||||
|
||||
Core::System& system;
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
std::vector<u8> read_buffer;
|
||||
std::vector<u8> write_buffer;
|
||||
|
||||
static constexpr std::size_t NUM_REGS = 0x800;
|
||||
struct Regs {
|
||||
union {
|
||||
struct {
|
||||
u32 reserved[0x40];
|
||||
u32 nop;
|
||||
u32 reserved01[0xf];
|
||||
u32 pm_trigger;
|
||||
u32 reserved02[0x3f];
|
||||
Semaphore semaphore;
|
||||
u32 reserved03[0x2];
|
||||
RenderEnable render_enable;
|
||||
PhysMode src_phys_mode;
|
||||
PhysMode dst_phys_mode;
|
||||
u32 reserved04[0x26];
|
||||
LaunchDMA launch_dma;
|
||||
u32 reserved05[0x3f];
|
||||
PackedGPUVAddr offset_in;
|
||||
PackedGPUVAddr offset_out;
|
||||
u32 pitch_in;
|
||||
u32 pitch_out;
|
||||
u32 line_length_in;
|
||||
u32 line_count;
|
||||
u32 reserved06[0xb8];
|
||||
RemapConst remap_const;
|
||||
Parameters dst_params;
|
||||
u32 reserved07[0x1];
|
||||
Parameters src_params;
|
||||
u32 reserved08[0x275];
|
||||
u32 pm_trigger_end;
|
||||
u32 reserved09[0x3ba];
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(launch_dma, 0xC0);
|
||||
ASSERT_REG_POSITION(offset_in, 0x100);
|
||||
ASSERT_REG_POSITION(offset_out, 0x102);
|
||||
ASSERT_REG_POSITION(pitch_in, 0x104);
|
||||
ASSERT_REG_POSITION(pitch_out, 0x105);
|
||||
ASSERT_REG_POSITION(line_length_in, 0x106);
|
||||
ASSERT_REG_POSITION(line_count, 0x107);
|
||||
ASSERT_REG_POSITION(remap_const, 0x1C0);
|
||||
ASSERT_REG_POSITION(dst_params, 0x1C3);
|
||||
ASSERT_REG_POSITION(src_params, 0x1CA);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
2298
src/video_core/engines/shader_bytecode.h
Executable file
2298
src/video_core/engines/shader_bytecode.h
Executable file
File diff suppressed because it is too large
Load Diff
158
src/video_core/engines/shader_header.h
Executable file
158
src/video_core/engines/shader_header.h
Executable file
@@ -0,0 +1,158 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Shader {
|
||||
|
||||
enum class OutputTopology : u32 {
|
||||
PointList = 1,
|
||||
LineStrip = 6,
|
||||
TriangleStrip = 7,
|
||||
};
|
||||
|
||||
enum class PixelImap : u8 {
|
||||
Unused = 0,
|
||||
Constant = 1,
|
||||
Perspective = 2,
|
||||
ScreenLinear = 3,
|
||||
};
|
||||
|
||||
// Documentation in:
|
||||
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
|
||||
struct Header {
|
||||
union {
|
||||
BitField<0, 5, u32> sph_type;
|
||||
BitField<5, 5, u32> version;
|
||||
BitField<10, 4, u32> shader_type;
|
||||
BitField<14, 1, u32> mrt_enable;
|
||||
BitField<15, 1, u32> kills_pixels;
|
||||
BitField<16, 1, u32> does_global_store;
|
||||
BitField<17, 4, u32> sass_version;
|
||||
BitField<21, 5, u32> reserved;
|
||||
BitField<26, 1, u32> does_load_or_store;
|
||||
BitField<27, 1, u32> does_fp64;
|
||||
BitField<28, 4, u32> stream_out_mask;
|
||||
} common0;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_low_size;
|
||||
BitField<24, 8, u32> per_patch_attribute_count;
|
||||
} common1;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_high_size;
|
||||
BitField<24, 8, u32> threads_per_input_primitive;
|
||||
} common2;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_crs_size;
|
||||
BitField<24, 4, OutputTopology> output_topology;
|
||||
BitField<28, 4, u32> reserved;
|
||||
} common3;
|
||||
|
||||
union {
|
||||
BitField<0, 12, u32> max_output_vertices;
|
||||
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
|
||||
BitField<20, 4, u32> reserved;
|
||||
BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
|
||||
} common4;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
|
||||
INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
|
||||
INSERT_UNION_PADDING_BYTES(16); // ImapGenericVector[32]
|
||||
INSERT_UNION_PADDING_BYTES(2); // ImapColor
|
||||
union {
|
||||
BitField<0, 8, u16> clip_distances;
|
||||
BitField<8, 1, u16> point_sprite_s;
|
||||
BitField<9, 1, u16> point_sprite_t;
|
||||
BitField<10, 1, u16> fog_coordinate;
|
||||
BitField<12, 1, u16> tessellation_eval_point_u;
|
||||
BitField<13, 1, u16> tessellation_eval_point_v;
|
||||
BitField<14, 1, u16> instance_id;
|
||||
BitField<15, 1, u16> vertex_id;
|
||||
};
|
||||
INSERT_UNION_PADDING_BYTES(5); // ImapFixedFncTexture[10]
|
||||
INSERT_UNION_PADDING_BYTES(1); // ImapReserved
|
||||
INSERT_UNION_PADDING_BYTES(3); // OmapSystemValuesA
|
||||
INSERT_UNION_PADDING_BYTES(1); // OmapSystemValuesB
|
||||
INSERT_UNION_PADDING_BYTES(16); // OmapGenericVector[32]
|
||||
INSERT_UNION_PADDING_BYTES(2); // OmapColor
|
||||
INSERT_UNION_PADDING_BYTES(2); // OmapSystemValuesC
|
||||
INSERT_UNION_PADDING_BYTES(5); // OmapFixedFncTexture[10]
|
||||
INSERT_UNION_PADDING_BYTES(1); // OmapReserved
|
||||
} vtg;
|
||||
|
||||
struct {
|
||||
INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
|
||||
INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
|
||||
|
||||
union {
|
||||
BitField<0, 2, PixelImap> x;
|
||||
BitField<2, 2, PixelImap> y;
|
||||
BitField<4, 2, PixelImap> z;
|
||||
BitField<6, 2, PixelImap> w;
|
||||
u8 raw;
|
||||
} imap_generic_vector[32];
|
||||
|
||||
INSERT_UNION_PADDING_BYTES(2); // ImapColor
|
||||
INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC
|
||||
INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
|
||||
INSERT_UNION_PADDING_BYTES(2); // ImapReserved
|
||||
|
||||
struct {
|
||||
u32 target;
|
||||
union {
|
||||
BitField<0, 1, u32> sample_mask;
|
||||
BitField<1, 1, u32> depth;
|
||||
BitField<2, 30, u32> reserved;
|
||||
};
|
||||
} omap;
|
||||
|
||||
bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
|
||||
const u32 bit = render_target * 4 + component;
|
||||
return omap.target & (1 << bit);
|
||||
}
|
||||
|
||||
PixelImap GetPixelImap(u32 attribute) const {
|
||||
const auto get_index = [this, attribute](u32 index) {
|
||||
return static_cast<PixelImap>(
|
||||
(imap_generic_vector[attribute].raw >> (index * 2)) & 3);
|
||||
};
|
||||
|
||||
std::optional<PixelImap> result;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
const PixelImap index = get_index(component);
|
||||
if (index == PixelImap::Unused) {
|
||||
continue;
|
||||
}
|
||||
if (result && result != index) {
|
||||
LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
|
||||
}
|
||||
result = index;
|
||||
}
|
||||
return result.value_or(PixelImap::Unused);
|
||||
}
|
||||
} ps;
|
||||
|
||||
std::array<u32, 0xF> raw;
|
||||
};
|
||||
|
||||
u64 GetLocalMemorySize() const {
|
||||
return (common1.shader_local_memory_low_size |
|
||||
(common2.shader_local_memory_high_size << 24));
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
|
||||
|
||||
} // namespace Tegra::Shader
|
||||
21
src/video_core/engines/shader_type.h
Executable file
21
src/video_core/engines/shader_type.h
Executable file
@@ -0,0 +1,21 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
enum class ShaderType : u32 {
|
||||
Vertex = 0,
|
||||
TesselationControl = 1,
|
||||
TesselationEval = 2,
|
||||
Geometry = 3,
|
||||
Fragment = 4,
|
||||
Compute = 5,
|
||||
};
|
||||
static constexpr std::size_t MaxShaderTypes = 6;
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
Reference in New Issue
Block a user