another try
This commit is contained in:
@@ -1,16 +1,16 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
struct ConstBufferInfo {
|
||||
GPUVAddr address;
|
||||
u32 size;
|
||||
bool enabled;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
struct ConstBufferInfo {
|
||||
GPUVAddr address;
|
||||
u32 size;
|
||||
bool enabled;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
@@ -1,22 +1,22 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
class EngineInterface {
|
||||
public:
|
||||
virtual ~EngineInterface() = default;
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
virtual void CallMethod(u32 method, u32 method_argument, bool is_last_call) = 0;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) = 0;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
class EngineInterface {
|
||||
public:
|
||||
virtual ~EngineInterface() = default;
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
virtual void CallMethod(u32 method, u32 method_argument, bool is_last_call) = 0;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) = 0;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
@@ -1,84 +1,84 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "common/algorithm.h"
|
||||
#include "common/assert.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines::Upload {
|
||||
|
||||
State::State(MemoryManager& memory_manager_, Registers& regs_)
|
||||
: regs{regs_}, memory_manager{memory_manager_} {}
|
||||
|
||||
State::~State() = default;
|
||||
|
||||
void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void State::ProcessExec(const bool is_linear_) {
|
||||
write_offset = 0;
|
||||
copy_size = regs.line_length_in * regs.line_count;
|
||||
inner_buffer.resize(copy_size);
|
||||
is_linear = is_linear_;
|
||||
}
|
||||
|
||||
void State::ProcessData(const u32 data, const bool is_last_call) {
|
||||
const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
|
||||
std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
|
||||
write_offset += sub_copy_size;
|
||||
if (!is_last_call) {
|
||||
return;
|
||||
}
|
||||
ProcessData(inner_buffer);
|
||||
}
|
||||
|
||||
void State::ProcessData(const u32* data, size_t num_data) {
|
||||
std::span<const u8> read_buffer(reinterpret_cast<const u8*>(data), num_data * sizeof(u32));
|
||||
ProcessData(read_buffer);
|
||||
}
|
||||
|
||||
void State::ProcessData(std::span<const u8> read_buffer) {
|
||||
const GPUVAddr address{regs.dest.Address()};
|
||||
if (is_linear) {
|
||||
if (regs.line_count == 1) {
|
||||
rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer);
|
||||
} else {
|
||||
for (u32 line = 0; line < regs.line_count; ++line) {
|
||||
const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch;
|
||||
memory_manager.WriteBlockUnsafe(
|
||||
dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in,
|
||||
regs.line_length_in);
|
||||
}
|
||||
memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count);
|
||||
}
|
||||
} else {
|
||||
u32 width = regs.dest.width;
|
||||
u32 x_elements = regs.line_length_in;
|
||||
u32 x_offset = regs.dest.x;
|
||||
const u32 bpp_shift = Common::FoldRight(
|
||||
4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
|
||||
width, x_elements, x_offset, static_cast<u32>(address));
|
||||
width >>= bpp_shift;
|
||||
x_elements >>= bpp_shift;
|
||||
x_offset >>= bpp_shift;
|
||||
const u32 bytes_per_pixel = 1U << bpp_shift;
|
||||
const std::size_t dst_size = Tegra::Texture::CalculateSize(
|
||||
true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
|
||||
regs.dest.BlockHeight(), regs.dest.BlockDepth());
|
||||
tmp_buffer.resize(dst_size);
|
||||
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
|
||||
Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width,
|
||||
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
|
||||
x_elements, regs.line_count, regs.dest.BlockHeight(),
|
||||
regs.dest.BlockDepth(), regs.line_length_in);
|
||||
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines::Upload
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "common/algorithm.h"
|
||||
#include "common/assert.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines::Upload {
|
||||
|
||||
State::State(MemoryManager& memory_manager_, Registers& regs_)
|
||||
: regs{regs_}, memory_manager{memory_manager_} {}
|
||||
|
||||
State::~State() = default;
|
||||
|
||||
void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void State::ProcessExec(const bool is_linear_) {
|
||||
write_offset = 0;
|
||||
copy_size = regs.line_length_in * regs.line_count;
|
||||
inner_buffer.resize(copy_size);
|
||||
is_linear = is_linear_;
|
||||
}
|
||||
|
||||
void State::ProcessData(const u32 data, const bool is_last_call) {
|
||||
const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
|
||||
std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
|
||||
write_offset += sub_copy_size;
|
||||
if (!is_last_call) {
|
||||
return;
|
||||
}
|
||||
ProcessData(inner_buffer);
|
||||
}
|
||||
|
||||
void State::ProcessData(const u32* data, size_t num_data) {
|
||||
std::span<const u8> read_buffer(reinterpret_cast<const u8*>(data), num_data * sizeof(u32));
|
||||
ProcessData(read_buffer);
|
||||
}
|
||||
|
||||
void State::ProcessData(std::span<const u8> read_buffer) {
|
||||
const GPUVAddr address{regs.dest.Address()};
|
||||
if (is_linear) {
|
||||
if (regs.line_count == 1) {
|
||||
rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer);
|
||||
} else {
|
||||
for (u32 line = 0; line < regs.line_count; ++line) {
|
||||
const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch;
|
||||
memory_manager.WriteBlockUnsafe(
|
||||
dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in,
|
||||
regs.line_length_in);
|
||||
}
|
||||
memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count);
|
||||
}
|
||||
} else {
|
||||
u32 width = regs.dest.width;
|
||||
u32 x_elements = regs.line_length_in;
|
||||
u32 x_offset = regs.dest.x;
|
||||
const u32 bpp_shift = Common::FoldRight(
|
||||
4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
|
||||
width, x_elements, x_offset, static_cast<u32>(address));
|
||||
width >>= bpp_shift;
|
||||
x_elements >>= bpp_shift;
|
||||
x_offset >>= bpp_shift;
|
||||
const u32 bytes_per_pixel = 1U << bpp_shift;
|
||||
const std::size_t dst_size = Tegra::Texture::CalculateSize(
|
||||
true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
|
||||
regs.dest.BlockHeight(), regs.dest.BlockDepth());
|
||||
tmp_buffer.resize(dst_size);
|
||||
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
|
||||
Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width,
|
||||
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
|
||||
x_elements, regs.line_count, regs.dest.BlockHeight(),
|
||||
regs.dest.BlockDepth(), regs.line_length_in);
|
||||
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines::Upload
|
||||
|
@@ -1,84 +1,84 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines::Upload {
|
||||
|
||||
struct Registers {
|
||||
u32 line_length_in;
|
||||
u32 line_count;
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 pitch;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
u32 x;
|
||||
u32 y;
|
||||
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
|
||||
}
|
||||
|
||||
u32 BlockWidth() const {
|
||||
return block_width.Value();
|
||||
}
|
||||
|
||||
u32 BlockHeight() const {
|
||||
return block_height.Value();
|
||||
}
|
||||
|
||||
u32 BlockDepth() const {
|
||||
return block_depth.Value();
|
||||
}
|
||||
} dest;
|
||||
};
|
||||
|
||||
class State {
|
||||
public:
|
||||
explicit State(MemoryManager& memory_manager_, Registers& regs_);
|
||||
~State();
|
||||
|
||||
void ProcessExec(bool is_linear_);
|
||||
void ProcessData(u32 data, bool is_last_call);
|
||||
void ProcessData(const u32* data, size_t num_data);
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
private:
|
||||
void ProcessData(std::span<const u8> read_buffer);
|
||||
|
||||
u32 write_offset = 0;
|
||||
u32 copy_size = 0;
|
||||
std::vector<u8> inner_buffer;
|
||||
std::vector<u8> tmp_buffer;
|
||||
bool is_linear = false;
|
||||
Registers& regs;
|
||||
MemoryManager& memory_manager;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines::Upload
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines::Upload {
|
||||
|
||||
struct Registers {
|
||||
u32 line_length_in;
|
||||
u32 line_count;
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 pitch;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
u32 x;
|
||||
u32 y;
|
||||
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
|
||||
}
|
||||
|
||||
u32 BlockWidth() const {
|
||||
return block_width.Value();
|
||||
}
|
||||
|
||||
u32 BlockHeight() const {
|
||||
return block_height.Value();
|
||||
}
|
||||
|
||||
u32 BlockDepth() const {
|
||||
return block_depth.Value();
|
||||
}
|
||||
} dest;
|
||||
};
|
||||
|
||||
class State {
|
||||
public:
|
||||
explicit State(MemoryManager& memory_manager_, Registers& regs_);
|
||||
~State();
|
||||
|
||||
void ProcessExec(bool is_linear_);
|
||||
void ProcessData(u32 data, bool is_last_call);
|
||||
void ProcessData(const u32* data, size_t num_data);
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
private:
|
||||
void ProcessData(std::span<const u8> read_buffer);
|
||||
|
||||
u32 write_offset = 0;
|
||||
u32 copy_size = 0;
|
||||
std::vector<u8> inner_buffer;
|
||||
std::vector<u8> tmp_buffer;
|
||||
bool is_linear = false;
|
||||
Registers& regs;
|
||||
MemoryManager& memory_manager;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines::Upload
|
||||
|
@@ -1,86 +1,86 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
using VideoCore::Surface::BytesPerBlock;
|
||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
Fermi2D::Fermi2D() {
|
||||
// Nvidia's OpenGL driver seems to assume these values
|
||||
regs.src.depth = 1;
|
||||
regs.dst.depth = 1;
|
||||
}
|
||||
|
||||
Fermi2D::~Fermi2D() = default;
|
||||
|
||||
void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid Fermi2D register, increase the size of the Regs structure");
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
|
||||
Blit();
|
||||
}
|
||||
}
|
||||
|
||||
void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
|
||||
for (u32 i = 0; i < amount; ++i) {
|
||||
CallMethod(method, base_start[i], methods_pending - i <= 1);
|
||||
}
|
||||
}
|
||||
|
||||
void Fermi2D::Blit() {
|
||||
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
|
||||
regs.src.Address(), regs.dst.Address());
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
|
||||
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
|
||||
|
||||
const auto& args = regs.pixels_from_memory;
|
||||
Config config{
|
||||
.operation = regs.operation,
|
||||
.filter = args.sample_mode.filter,
|
||||
.dst_x0 = args.dst_x0,
|
||||
.dst_y0 = args.dst_y0,
|
||||
.dst_x1 = args.dst_x0 + args.dst_width,
|
||||
.dst_y1 = args.dst_y0 + args.dst_height,
|
||||
.src_x0 = static_cast<s32>(args.src_x0 >> 32),
|
||||
.src_y0 = static_cast<s32>(args.src_y0 >> 32),
|
||||
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
|
||||
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
|
||||
};
|
||||
Surface src = regs.src;
|
||||
const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
|
||||
const auto need_align_to_pitch =
|
||||
src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
|
||||
static_cast<s32>(src.width) == config.src_x1 &&
|
||||
config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0;
|
||||
if (need_align_to_pitch) {
|
||||
auto address = src.Address() + config.src_x0 * bytes_per_pixel;
|
||||
src.addr_upper = static_cast<u32>(address >> 32);
|
||||
src.addr_lower = static_cast<u32>(address);
|
||||
src.width -= config.src_x0;
|
||||
config.src_x1 -= config.src_x0;
|
||||
config.src_x0 = 0;
|
||||
}
|
||||
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
using VideoCore::Surface::BytesPerBlock;
|
||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
Fermi2D::Fermi2D() {
|
||||
// Nvidia's OpenGL driver seems to assume these values
|
||||
regs.src.depth = 1;
|
||||
regs.dst.depth = 1;
|
||||
}
|
||||
|
||||
Fermi2D::~Fermi2D() = default;
|
||||
|
||||
void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid Fermi2D register, increase the size of the Regs structure");
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
|
||||
Blit();
|
||||
}
|
||||
}
|
||||
|
||||
void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
|
||||
for (u32 i = 0; i < amount; ++i) {
|
||||
CallMethod(method, base_start[i], methods_pending - i <= 1);
|
||||
}
|
||||
}
|
||||
|
||||
void Fermi2D::Blit() {
|
||||
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
|
||||
regs.src.Address(), regs.dst.Address());
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
|
||||
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
|
||||
|
||||
const auto& args = regs.pixels_from_memory;
|
||||
Config config{
|
||||
.operation = regs.operation,
|
||||
.filter = args.sample_mode.filter,
|
||||
.dst_x0 = args.dst_x0,
|
||||
.dst_y0 = args.dst_y0,
|
||||
.dst_x1 = args.dst_x0 + args.dst_width,
|
||||
.dst_y1 = args.dst_y0 + args.dst_height,
|
||||
.src_x0 = static_cast<s32>(args.src_x0 >> 32),
|
||||
.src_y0 = static_cast<s32>(args.src_y0 >> 32),
|
||||
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
|
||||
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
|
||||
};
|
||||
Surface src = regs.src;
|
||||
const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
|
||||
const auto need_align_to_pitch =
|
||||
src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
|
||||
static_cast<s32>(src.width) == config.src_x1 &&
|
||||
config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0;
|
||||
if (need_align_to_pitch) {
|
||||
auto address = src.Address() + config.src_x0 * bytes_per_pixel;
|
||||
src.addr_upper = static_cast<u32>(address >> 32);
|
||||
src.addr_lower = static_cast<u32>(address);
|
||||
src.width -= config.src_x0;
|
||||
config.src_x1 -= config.src_x0;
|
||||
config.src_x0 = 0;
|
||||
}
|
||||
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
@@ -1,350 +1,350 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as G80_2D. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
|
||||
*/
|
||||
|
||||
#define FERMI2D_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class Fermi2D final : public EngineInterface {
|
||||
public:
|
||||
explicit Fermi2D();
|
||||
~Fermi2D() override;
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
enum class Origin : u32 {
|
||||
Center = 0,
|
||||
Corner = 1,
|
||||
};
|
||||
|
||||
enum class Filter : u32 {
|
||||
Point = 0,
|
||||
Bilinear = 1,
|
||||
};
|
||||
|
||||
enum class Operation : u32 {
|
||||
SrcCopyAnd = 0,
|
||||
ROPAnd = 1,
|
||||
Blend = 2,
|
||||
SrcCopy = 3,
|
||||
ROP = 4,
|
||||
SrcCopyPremult = 5,
|
||||
BlendPremult = 6,
|
||||
};
|
||||
|
||||
enum class MemoryLayout : u32 {
|
||||
BlockLinear = 0,
|
||||
Pitch = 1,
|
||||
};
|
||||
|
||||
enum class CpuIndexWrap : u32 {
|
||||
Wrap = 0,
|
||||
NoWrap = 1,
|
||||
};
|
||||
|
||||
struct Surface {
|
||||
RenderTargetFormat format;
|
||||
MemoryLayout linear;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
u32 pitch;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 addr_upper;
|
||||
u32 addr_lower;
|
||||
|
||||
[[nodiscard]] constexpr GPUVAddr Address() const noexcept {
|
||||
return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
|
||||
|
||||
enum class SectorPromotion : u32 {
|
||||
NoPromotion = 0,
|
||||
PromoteTo2V = 1,
|
||||
PromoteTo2H = 2,
|
||||
PromoteTo4 = 3,
|
||||
};
|
||||
|
||||
enum class NumTpcs : u32 {
|
||||
All = 0,
|
||||
One = 1,
|
||||
};
|
||||
|
||||
enum class RenderEnableMode : u32 {
|
||||
False = 0,
|
||||
True = 1,
|
||||
Conditional = 2,
|
||||
RenderIfEqual = 3,
|
||||
RenderIfNotEqual = 4,
|
||||
};
|
||||
|
||||
enum class ColorKeyFormat : u32 {
|
||||
A16R56G6B5 = 0,
|
||||
A1R5G55B5 = 1,
|
||||
A8R8G8B8 = 2,
|
||||
A2R10G10B10 = 3,
|
||||
Y8 = 4,
|
||||
Y16 = 5,
|
||||
Y32 = 6,
|
||||
};
|
||||
|
||||
union Beta4 {
|
||||
BitField<0, 8, u32> b;
|
||||
BitField<8, 8, u32> g;
|
||||
BitField<16, 8, u32> r;
|
||||
BitField<24, 8, u32> a;
|
||||
};
|
||||
|
||||
struct Point {
|
||||
u32 x;
|
||||
u32 y;
|
||||
};
|
||||
|
||||
enum class PatternSelect : u32 {
|
||||
MonoChrome8x8 = 0,
|
||||
MonoChrome64x1 = 1,
|
||||
MonoChrome1x64 = 2,
|
||||
Color = 3,
|
||||
};
|
||||
|
||||
enum class NotifyType : u32 {
|
||||
WriteOnly = 0,
|
||||
WriteThenAwaken = 1,
|
||||
};
|
||||
|
||||
enum class MonochromePatternColorFormat : u32 {
|
||||
A8X8R8G6B5 = 0,
|
||||
A1R5G5B5 = 1,
|
||||
A8R8G8B8 = 2,
|
||||
A8Y8 = 3,
|
||||
A8X8Y16 = 4,
|
||||
Y32 = 5,
|
||||
};
|
||||
|
||||
enum class MonochromePatternFormat : u32 {
|
||||
CGA6_M1 = 0,
|
||||
LE_M1 = 1,
|
||||
};
|
||||
|
||||
union Regs {
|
||||
static constexpr std::size_t NUM_REGS = 0x258;
|
||||
struct {
|
||||
u32 object;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3F);
|
||||
u32 no_operation;
|
||||
NotifyType notify;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x2);
|
||||
u32 wait_for_idle;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xB);
|
||||
u32 pm_trigger;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xF);
|
||||
u32 context_dma_notify;
|
||||
u32 dst_context_dma;
|
||||
u32 src_context_dma;
|
||||
u32 semaphore_context_dma;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x1C);
|
||||
Surface dst;
|
||||
CpuIndexWrap pixels_from_cpu_index_wrap;
|
||||
u32 kind2d_check_enable;
|
||||
Surface src;
|
||||
SectorPromotion pixels_from_memory_sector_promotion;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x1);
|
||||
NumTpcs num_tpcs;
|
||||
u32 render_enable_addr_upper;
|
||||
u32 render_enable_addr_lower;
|
||||
RenderEnableMode render_enable_mode;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x4);
|
||||
u32 clip_x0;
|
||||
u32 clip_y0;
|
||||
u32 clip_width;
|
||||
u32 clip_height;
|
||||
BitField<0, 1, u32> clip_enable;
|
||||
BitField<0, 3, ColorKeyFormat> color_key_format;
|
||||
u32 color_key;
|
||||
BitField<0, 1, u32> color_key_enable;
|
||||
BitField<0, 8, u32> rop;
|
||||
u32 beta1;
|
||||
Beta4 beta4;
|
||||
Operation operation;
|
||||
union {
|
||||
BitField<0, 6, u32> x;
|
||||
BitField<8, 6, u32> y;
|
||||
} pattern_offset;
|
||||
BitField<0, 2, PatternSelect> pattern_select;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xC);
|
||||
struct {
|
||||
BitField<0, 3, MonochromePatternColorFormat> color_format;
|
||||
BitField<0, 1, MonochromePatternFormat> format;
|
||||
u32 color0;
|
||||
u32 color1;
|
||||
u32 pattern0;
|
||||
u32 pattern1;
|
||||
} monochrome_pattern;
|
||||
struct {
|
||||
std::array<u32, 0x40> X8R8G8B8;
|
||||
std::array<u32, 0x20> R5G6B5;
|
||||
std::array<u32, 0x20> X1R5G5B5;
|
||||
std::array<u32, 0x10> Y8;
|
||||
} color_pattern;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x10);
|
||||
struct {
|
||||
u32 prim_mode;
|
||||
u32 prim_color_format;
|
||||
u32 prim_color;
|
||||
u32 line_tie_break_bits;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x14);
|
||||
u32 prim_point_xy;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x7);
|
||||
std::array<Point, 0x40> prim_point;
|
||||
} render_solid;
|
||||
struct {
|
||||
u32 data_type;
|
||||
u32 color_format;
|
||||
u32 index_format;
|
||||
u32 mono_format;
|
||||
u32 wrap;
|
||||
u32 color0;
|
||||
u32 color1;
|
||||
u32 mono_opacity;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x6);
|
||||
u32 src_width;
|
||||
u32 src_height;
|
||||
u32 dx_du_frac;
|
||||
u32 dx_du_int;
|
||||
u32 dx_dv_frac;
|
||||
u32 dy_dv_int;
|
||||
u32 dst_x0_frac;
|
||||
u32 dst_x0_int;
|
||||
u32 dst_y0_frac;
|
||||
u32 dst_y0_int;
|
||||
u32 data;
|
||||
} pixels_from_cpu;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3);
|
||||
u32 big_endian_control;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3);
|
||||
struct {
|
||||
BitField<0, 3, u32> block_shape;
|
||||
BitField<0, 5, u32> corral_size;
|
||||
BitField<0, 1, u32> safe_overlap;
|
||||
union {
|
||||
BitField<0, 1, Origin> origin;
|
||||
BitField<4, 1, Filter> filter;
|
||||
} sample_mode;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x8);
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_width;
|
||||
s32 dst_height;
|
||||
s64 du_dx;
|
||||
s64 dv_dy;
|
||||
s64 src_x0;
|
||||
s64 src_y0;
|
||||
} pixels_from_memory;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
} regs{};
|
||||
|
||||
struct Config {
|
||||
Operation operation;
|
||||
Filter filter;
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_x1;
|
||||
s32 dst_y1;
|
||||
s32 src_x0;
|
||||
s32 src_y0;
|
||||
s32 src_x1;
|
||||
s32 src_y1;
|
||||
};
|
||||
|
||||
private:
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
|
||||
/// Performs the copy from the source surface to the destination surface as configured in the
|
||||
/// registers.
|
||||
void Blit();
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(object, 0x0);
|
||||
ASSERT_REG_POSITION(no_operation, 0x100);
|
||||
ASSERT_REG_POSITION(notify, 0x104);
|
||||
ASSERT_REG_POSITION(wait_for_idle, 0x110);
|
||||
ASSERT_REG_POSITION(pm_trigger, 0x140);
|
||||
ASSERT_REG_POSITION(context_dma_notify, 0x180);
|
||||
ASSERT_REG_POSITION(dst_context_dma, 0x184);
|
||||
ASSERT_REG_POSITION(src_context_dma, 0x188);
|
||||
ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
|
||||
ASSERT_REG_POSITION(dst, 0x200);
|
||||
ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
|
||||
ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
|
||||
ASSERT_REG_POSITION(src, 0x230);
|
||||
ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
|
||||
ASSERT_REG_POSITION(num_tpcs, 0x260);
|
||||
ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
|
||||
ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
|
||||
ASSERT_REG_POSITION(clip_x0, 0x280);
|
||||
ASSERT_REG_POSITION(clip_y0, 0x284);
|
||||
ASSERT_REG_POSITION(clip_width, 0x288);
|
||||
ASSERT_REG_POSITION(clip_height, 0x28c);
|
||||
ASSERT_REG_POSITION(clip_enable, 0x290);
|
||||
ASSERT_REG_POSITION(color_key_format, 0x294);
|
||||
ASSERT_REG_POSITION(color_key, 0x298);
|
||||
ASSERT_REG_POSITION(rop, 0x2A0);
|
||||
ASSERT_REG_POSITION(beta1, 0x2A4);
|
||||
ASSERT_REG_POSITION(beta4, 0x2A8);
|
||||
ASSERT_REG_POSITION(operation, 0x2AC);
|
||||
ASSERT_REG_POSITION(pattern_offset, 0x2B0);
|
||||
ASSERT_REG_POSITION(pattern_select, 0x2B4);
|
||||
ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
|
||||
ASSERT_REG_POSITION(color_pattern, 0x300);
|
||||
ASSERT_REG_POSITION(render_solid, 0x580);
|
||||
ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
|
||||
ASSERT_REG_POSITION(big_endian_control, 0x870);
|
||||
ASSERT_REG_POSITION(pixels_from_memory, 0x880);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as G80_2D. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
|
||||
*/
|
||||
|
||||
#define FERMI2D_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class Fermi2D final : public EngineInterface {
|
||||
public:
|
||||
explicit Fermi2D();
|
||||
~Fermi2D() override;
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
enum class Origin : u32 {
|
||||
Center = 0,
|
||||
Corner = 1,
|
||||
};
|
||||
|
||||
enum class Filter : u32 {
|
||||
Point = 0,
|
||||
Bilinear = 1,
|
||||
};
|
||||
|
||||
enum class Operation : u32 {
|
||||
SrcCopyAnd = 0,
|
||||
ROPAnd = 1,
|
||||
Blend = 2,
|
||||
SrcCopy = 3,
|
||||
ROP = 4,
|
||||
SrcCopyPremult = 5,
|
||||
BlendPremult = 6,
|
||||
};
|
||||
|
||||
enum class MemoryLayout : u32 {
|
||||
BlockLinear = 0,
|
||||
Pitch = 1,
|
||||
};
|
||||
|
||||
enum class CpuIndexWrap : u32 {
|
||||
Wrap = 0,
|
||||
NoWrap = 1,
|
||||
};
|
||||
|
||||
struct Surface {
|
||||
RenderTargetFormat format;
|
||||
MemoryLayout linear;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
u32 pitch;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 addr_upper;
|
||||
u32 addr_lower;
|
||||
|
||||
[[nodiscard]] constexpr GPUVAddr Address() const noexcept {
|
||||
return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
|
||||
|
||||
enum class SectorPromotion : u32 {
|
||||
NoPromotion = 0,
|
||||
PromoteTo2V = 1,
|
||||
PromoteTo2H = 2,
|
||||
PromoteTo4 = 3,
|
||||
};
|
||||
|
||||
enum class NumTpcs : u32 {
|
||||
All = 0,
|
||||
One = 1,
|
||||
};
|
||||
|
||||
enum class RenderEnableMode : u32 {
|
||||
False = 0,
|
||||
True = 1,
|
||||
Conditional = 2,
|
||||
RenderIfEqual = 3,
|
||||
RenderIfNotEqual = 4,
|
||||
};
|
||||
|
||||
enum class ColorKeyFormat : u32 {
|
||||
A16R56G6B5 = 0,
|
||||
A1R5G55B5 = 1,
|
||||
A8R8G8B8 = 2,
|
||||
A2R10G10B10 = 3,
|
||||
Y8 = 4,
|
||||
Y16 = 5,
|
||||
Y32 = 6,
|
||||
};
|
||||
|
||||
union Beta4 {
|
||||
BitField<0, 8, u32> b;
|
||||
BitField<8, 8, u32> g;
|
||||
BitField<16, 8, u32> r;
|
||||
BitField<24, 8, u32> a;
|
||||
};
|
||||
|
||||
struct Point {
|
||||
u32 x;
|
||||
u32 y;
|
||||
};
|
||||
|
||||
enum class PatternSelect : u32 {
|
||||
MonoChrome8x8 = 0,
|
||||
MonoChrome64x1 = 1,
|
||||
MonoChrome1x64 = 2,
|
||||
Color = 3,
|
||||
};
|
||||
|
||||
enum class NotifyType : u32 {
|
||||
WriteOnly = 0,
|
||||
WriteThenAwaken = 1,
|
||||
};
|
||||
|
||||
enum class MonochromePatternColorFormat : u32 {
|
||||
A8X8R8G6B5 = 0,
|
||||
A1R5G5B5 = 1,
|
||||
A8R8G8B8 = 2,
|
||||
A8Y8 = 3,
|
||||
A8X8Y16 = 4,
|
||||
Y32 = 5,
|
||||
};
|
||||
|
||||
enum class MonochromePatternFormat : u32 {
|
||||
CGA6_M1 = 0,
|
||||
LE_M1 = 1,
|
||||
};
|
||||
|
||||
union Regs {
|
||||
static constexpr std::size_t NUM_REGS = 0x258;
|
||||
struct {
|
||||
u32 object;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3F);
|
||||
u32 no_operation;
|
||||
NotifyType notify;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x2);
|
||||
u32 wait_for_idle;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xB);
|
||||
u32 pm_trigger;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xF);
|
||||
u32 context_dma_notify;
|
||||
u32 dst_context_dma;
|
||||
u32 src_context_dma;
|
||||
u32 semaphore_context_dma;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x1C);
|
||||
Surface dst;
|
||||
CpuIndexWrap pixels_from_cpu_index_wrap;
|
||||
u32 kind2d_check_enable;
|
||||
Surface src;
|
||||
SectorPromotion pixels_from_memory_sector_promotion;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x1);
|
||||
NumTpcs num_tpcs;
|
||||
u32 render_enable_addr_upper;
|
||||
u32 render_enable_addr_lower;
|
||||
RenderEnableMode render_enable_mode;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x4);
|
||||
u32 clip_x0;
|
||||
u32 clip_y0;
|
||||
u32 clip_width;
|
||||
u32 clip_height;
|
||||
BitField<0, 1, u32> clip_enable;
|
||||
BitField<0, 3, ColorKeyFormat> color_key_format;
|
||||
u32 color_key;
|
||||
BitField<0, 1, u32> color_key_enable;
|
||||
BitField<0, 8, u32> rop;
|
||||
u32 beta1;
|
||||
Beta4 beta4;
|
||||
Operation operation;
|
||||
union {
|
||||
BitField<0, 6, u32> x;
|
||||
BitField<8, 6, u32> y;
|
||||
} pattern_offset;
|
||||
BitField<0, 2, PatternSelect> pattern_select;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xC);
|
||||
struct {
|
||||
BitField<0, 3, MonochromePatternColorFormat> color_format;
|
||||
BitField<0, 1, MonochromePatternFormat> format;
|
||||
u32 color0;
|
||||
u32 color1;
|
||||
u32 pattern0;
|
||||
u32 pattern1;
|
||||
} monochrome_pattern;
|
||||
struct {
|
||||
std::array<u32, 0x40> X8R8G8B8;
|
||||
std::array<u32, 0x20> R5G6B5;
|
||||
std::array<u32, 0x20> X1R5G5B5;
|
||||
std::array<u32, 0x10> Y8;
|
||||
} color_pattern;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x10);
|
||||
struct {
|
||||
u32 prim_mode;
|
||||
u32 prim_color_format;
|
||||
u32 prim_color;
|
||||
u32 line_tie_break_bits;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x14);
|
||||
u32 prim_point_xy;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x7);
|
||||
std::array<Point, 0x40> prim_point;
|
||||
} render_solid;
|
||||
struct {
|
||||
u32 data_type;
|
||||
u32 color_format;
|
||||
u32 index_format;
|
||||
u32 mono_format;
|
||||
u32 wrap;
|
||||
u32 color0;
|
||||
u32 color1;
|
||||
u32 mono_opacity;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x6);
|
||||
u32 src_width;
|
||||
u32 src_height;
|
||||
u32 dx_du_frac;
|
||||
u32 dx_du_int;
|
||||
u32 dx_dv_frac;
|
||||
u32 dy_dv_int;
|
||||
u32 dst_x0_frac;
|
||||
u32 dst_x0_int;
|
||||
u32 dst_y0_frac;
|
||||
u32 dst_y0_int;
|
||||
u32 data;
|
||||
} pixels_from_cpu;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3);
|
||||
u32 big_endian_control;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3);
|
||||
struct {
|
||||
BitField<0, 3, u32> block_shape;
|
||||
BitField<0, 5, u32> corral_size;
|
||||
BitField<0, 1, u32> safe_overlap;
|
||||
union {
|
||||
BitField<0, 1, Origin> origin;
|
||||
BitField<4, 1, Filter> filter;
|
||||
} sample_mode;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x8);
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_width;
|
||||
s32 dst_height;
|
||||
s64 du_dx;
|
||||
s64 dv_dy;
|
||||
s64 src_x0;
|
||||
s64 src_y0;
|
||||
} pixels_from_memory;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
} regs{};
|
||||
|
||||
struct Config {
|
||||
Operation operation;
|
||||
Filter filter;
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_x1;
|
||||
s32 dst_y1;
|
||||
s32 src_x0;
|
||||
s32 src_y0;
|
||||
s32 src_x1;
|
||||
s32 src_y1;
|
||||
};
|
||||
|
||||
private:
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
|
||||
/// Performs the copy from the source surface to the destination surface as configured in the
|
||||
/// registers.
|
||||
void Blit();
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(object, 0x0);
|
||||
ASSERT_REG_POSITION(no_operation, 0x100);
|
||||
ASSERT_REG_POSITION(notify, 0x104);
|
||||
ASSERT_REG_POSITION(wait_for_idle, 0x110);
|
||||
ASSERT_REG_POSITION(pm_trigger, 0x140);
|
||||
ASSERT_REG_POSITION(context_dma_notify, 0x180);
|
||||
ASSERT_REG_POSITION(dst_context_dma, 0x184);
|
||||
ASSERT_REG_POSITION(src_context_dma, 0x188);
|
||||
ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
|
||||
ASSERT_REG_POSITION(dst, 0x200);
|
||||
ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
|
||||
ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
|
||||
ASSERT_REG_POSITION(src, 0x230);
|
||||
ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
|
||||
ASSERT_REG_POSITION(num_tpcs, 0x260);
|
||||
ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
|
||||
ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
|
||||
ASSERT_REG_POSITION(clip_x0, 0x280);
|
||||
ASSERT_REG_POSITION(clip_y0, 0x284);
|
||||
ASSERT_REG_POSITION(clip_width, 0x288);
|
||||
ASSERT_REG_POSITION(clip_height, 0x28c);
|
||||
ASSERT_REG_POSITION(clip_enable, 0x290);
|
||||
ASSERT_REG_POSITION(color_key_format, 0x294);
|
||||
ASSERT_REG_POSITION(color_key, 0x298);
|
||||
ASSERT_REG_POSITION(rop, 0x2A0);
|
||||
ASSERT_REG_POSITION(beta1, 0x2A4);
|
||||
ASSERT_REG_POSITION(beta4, 0x2A8);
|
||||
ASSERT_REG_POSITION(operation, 0x2AC);
|
||||
ASSERT_REG_POSITION(pattern_offset, 0x2B0);
|
||||
ASSERT_REG_POSITION(pattern_select, 0x2B4);
|
||||
ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
|
||||
ASSERT_REG_POSITION(color_pattern, 0x300);
|
||||
ASSERT_REG_POSITION(render_solid, 0x580);
|
||||
ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
|
||||
ASSERT_REG_POSITION(big_endian_control, 0x870);
|
||||
ASSERT_REG_POSITION(pixels_from_memory, 0x880);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
@@ -1,87 +1,87 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <bitset>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
|
||||
|
||||
KeplerCompute::~KeplerCompute() = default;
|
||||
|
||||
void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
upload_state.BindRasterizer(rasterizer);
|
||||
}
|
||||
|
||||
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid KeplerCompute register, increase the size of the Regs structure");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
switch (method) {
|
||||
case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
|
||||
upload_state.ProcessExec(regs.exec_upload.linear != 0);
|
||||
break;
|
||||
}
|
||||
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
|
||||
upload_state.ProcessData(method_argument, is_last_call);
|
||||
break;
|
||||
}
|
||||
case KEPLER_COMPUTE_REG_INDEX(launch):
|
||||
ProcessLaunch();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
switch (method) {
|
||||
case KEPLER_COMPUTE_REG_INDEX(data_upload):
|
||||
upload_state.ProcessData(base_start, static_cast<size_t>(amount));
|
||||
return;
|
||||
default:
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void KeplerCompute::ProcessLaunch() {
|
||||
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
|
||||
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
|
||||
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
|
||||
rasterizer->DispatchCompute();
|
||||
}
|
||||
|
||||
Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
|
||||
const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
|
||||
|
||||
Texture::TICEntry tic_entry;
|
||||
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
|
||||
|
||||
return tic_entry;
|
||||
}
|
||||
|
||||
Texture::TSCEntry KeplerCompute::GetTSCEntry(u32 tsc_index) const {
|
||||
const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
|
||||
|
||||
Texture::TSCEntry tsc_entry;
|
||||
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
|
||||
return tsc_entry;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <bitset>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
|
||||
|
||||
KeplerCompute::~KeplerCompute() = default;
|
||||
|
||||
void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
upload_state.BindRasterizer(rasterizer);
|
||||
}
|
||||
|
||||
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid KeplerCompute register, increase the size of the Regs structure");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
switch (method) {
|
||||
case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
|
||||
upload_state.ProcessExec(regs.exec_upload.linear != 0);
|
||||
break;
|
||||
}
|
||||
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
|
||||
upload_state.ProcessData(method_argument, is_last_call);
|
||||
break;
|
||||
}
|
||||
case KEPLER_COMPUTE_REG_INDEX(launch):
|
||||
ProcessLaunch();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
switch (method) {
|
||||
case KEPLER_COMPUTE_REG_INDEX(data_upload):
|
||||
upload_state.ProcessData(base_start, static_cast<size_t>(amount));
|
||||
return;
|
||||
default:
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void KeplerCompute::ProcessLaunch() {
|
||||
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
|
||||
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
|
||||
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
|
||||
rasterizer->DispatchCompute();
|
||||
}
|
||||
|
||||
Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
|
||||
const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
|
||||
|
||||
Texture::TICEntry tic_entry;
|
||||
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
|
||||
|
||||
return tic_entry;
|
||||
}
|
||||
|
||||
Texture::TSCEntry KeplerCompute::GetTSCEntry(u32 tsc_index) const {
|
||||
const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
|
||||
|
||||
Texture::TSCEntry tsc_entry;
|
||||
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
|
||||
return tsc_entry;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
@@ -1,248 +1,248 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as GK104_Compute. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
|
||||
*/
|
||||
|
||||
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class KeplerCompute final : public EngineInterface {
|
||||
public:
|
||||
explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
|
||||
~KeplerCompute();
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
static constexpr std::size_t NumConstBuffers = 8;
|
||||
|
||||
struct Regs {
|
||||
static constexpr std::size_t NUM_REGS = 0xCF8;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS_NOINIT(0x60);
|
||||
|
||||
Upload::Registers upload;
|
||||
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 1, u32> linear;
|
||||
};
|
||||
} exec_upload;
|
||||
|
||||
u32 data_upload;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3F);
|
||||
|
||||
struct {
|
||||
u32 address;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
|
||||
}
|
||||
} launch_desc_loc;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x1);
|
||||
|
||||
u32 launch;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x4A7);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tsc;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tic;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x22);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} code_loc;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3FE);
|
||||
|
||||
u32 tex_cb_index;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x374);
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
struct LaunchParams {
|
||||
static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
|
||||
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
|
||||
u32 program_start;
|
||||
|
||||
INSERT_PADDING_WORDS(0x2);
|
||||
|
||||
BitField<30, 1, u32> linked_tsc;
|
||||
|
||||
BitField<0, 31, u32> grid_dim_x;
|
||||
union {
|
||||
BitField<0, 16, u32> grid_dim_y;
|
||||
BitField<16, 16, u32> grid_dim_z;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x3);
|
||||
|
||||
BitField<0, 18, u32> shared_alloc;
|
||||
|
||||
BitField<16, 16, u32> block_dim_x;
|
||||
union {
|
||||
BitField<0, 16, u32> block_dim_y;
|
||||
BitField<16, 16, u32> block_dim_z;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 8, u32> const_buffer_enable_mask;
|
||||
BitField<29, 2, u32> cache_layout;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
|
||||
struct ConstBufferConfig {
|
||||
u32 address_low;
|
||||
union {
|
||||
BitField<0, 8, u32> address_high;
|
||||
BitField<15, 17, u32> size;
|
||||
};
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
|
||||
address_low);
|
||||
}
|
||||
};
|
||||
std::array<ConstBufferConfig, NumConstBuffers> const_buffer_config;
|
||||
|
||||
union {
|
||||
BitField<0, 20, u32> local_pos_alloc;
|
||||
BitField<27, 5, u32> barrier_alloc;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 20, u32> local_neg_alloc;
|
||||
BitField<24, 5, u32> gpr_alloc;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 20, u32> local_crs_alloc;
|
||||
BitField<24, 5, u32> sass_version;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x10);
|
||||
} launch_description{};
|
||||
|
||||
struct {
|
||||
u32 write_offset = 0;
|
||||
u32 copy_size = 0;
|
||||
std::vector<u8> inner_buffer;
|
||||
} state{};
|
||||
|
||||
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
|
||||
"KeplerCompute Regs has wrong size");
|
||||
|
||||
static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
|
||||
"KeplerCompute LaunchParams has wrong size");
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
private:
|
||||
void ProcessLaunch();
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
||||
|
||||
/// Retrieves information about a specific TSC entry from the TSC buffer.
|
||||
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
|
||||
|
||||
Core::System& system;
|
||||
MemoryManager& memory_manager;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
Upload::State upload_state;
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec_upload, 0x6C);
|
||||
ASSERT_REG_POSITION(data_upload, 0x6D);
|
||||
ASSERT_REG_POSITION(launch, 0xAF);
|
||||
ASSERT_REG_POSITION(tsc, 0x557);
|
||||
ASSERT_REG_POSITION(tic, 0x55D);
|
||||
ASSERT_REG_POSITION(code_loc, 0x582);
|
||||
ASSERT_REG_POSITION(tex_cb_index, 0x982);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_enable_mask, 0x14);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as GK104_Compute. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
|
||||
*/
|
||||
|
||||
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class KeplerCompute final : public EngineInterface {
|
||||
public:
|
||||
explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
|
||||
~KeplerCompute();
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
static constexpr std::size_t NumConstBuffers = 8;
|
||||
|
||||
struct Regs {
|
||||
static constexpr std::size_t NUM_REGS = 0xCF8;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS_NOINIT(0x60);
|
||||
|
||||
Upload::Registers upload;
|
||||
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 1, u32> linear;
|
||||
};
|
||||
} exec_upload;
|
||||
|
||||
u32 data_upload;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3F);
|
||||
|
||||
struct {
|
||||
u32 address;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
|
||||
}
|
||||
} launch_desc_loc;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x1);
|
||||
|
||||
u32 launch;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x4A7);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tsc;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tic;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x22);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} code_loc;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3FE);
|
||||
|
||||
u32 tex_cb_index;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x374);
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
struct LaunchParams {
|
||||
static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
|
||||
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
|
||||
u32 program_start;
|
||||
|
||||
INSERT_PADDING_WORDS(0x2);
|
||||
|
||||
BitField<30, 1, u32> linked_tsc;
|
||||
|
||||
BitField<0, 31, u32> grid_dim_x;
|
||||
union {
|
||||
BitField<0, 16, u32> grid_dim_y;
|
||||
BitField<16, 16, u32> grid_dim_z;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x3);
|
||||
|
||||
BitField<0, 18, u32> shared_alloc;
|
||||
|
||||
BitField<16, 16, u32> block_dim_x;
|
||||
union {
|
||||
BitField<0, 16, u32> block_dim_y;
|
||||
BitField<16, 16, u32> block_dim_z;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 8, u32> const_buffer_enable_mask;
|
||||
BitField<29, 2, u32> cache_layout;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
|
||||
struct ConstBufferConfig {
|
||||
u32 address_low;
|
||||
union {
|
||||
BitField<0, 8, u32> address_high;
|
||||
BitField<15, 17, u32> size;
|
||||
};
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
|
||||
address_low);
|
||||
}
|
||||
};
|
||||
std::array<ConstBufferConfig, NumConstBuffers> const_buffer_config;
|
||||
|
||||
union {
|
||||
BitField<0, 20, u32> local_pos_alloc;
|
||||
BitField<27, 5, u32> barrier_alloc;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 20, u32> local_neg_alloc;
|
||||
BitField<24, 5, u32> gpr_alloc;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 20, u32> local_crs_alloc;
|
||||
BitField<24, 5, u32> sass_version;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x10);
|
||||
} launch_description{};
|
||||
|
||||
struct {
|
||||
u32 write_offset = 0;
|
||||
u32 copy_size = 0;
|
||||
std::vector<u8> inner_buffer;
|
||||
} state{};
|
||||
|
||||
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
|
||||
"KeplerCompute Regs has wrong size");
|
||||
|
||||
static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
|
||||
"KeplerCompute LaunchParams has wrong size");
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
private:
|
||||
void ProcessLaunch();
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
||||
|
||||
/// Retrieves information about a specific TSC entry from the TSC buffer.
|
||||
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
|
||||
|
||||
Core::System& system;
|
||||
MemoryManager& memory_manager;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
Upload::State upload_state;
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec_upload, 0x6C);
|
||||
ASSERT_REG_POSITION(data_upload, 0x6D);
|
||||
ASSERT_REG_POSITION(launch, 0xAF);
|
||||
ASSERT_REG_POSITION(tsc, 0x557);
|
||||
ASSERT_REG_POSITION(tic, 0x55D);
|
||||
ASSERT_REG_POSITION(code_loc, 0x582);
|
||||
ASSERT_REG_POSITION(tex_cb_index, 0x982);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_enable_mask, 0x14);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
@@ -1,55 +1,55 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
|
||||
: system{system_}, upload_state{memory_manager, regs.upload} {}
|
||||
|
||||
KeplerMemory::~KeplerMemory() = default;
|
||||
|
||||
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
upload_state.BindRasterizer(rasterizer_);
|
||||
}
|
||||
|
||||
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid KeplerMemory register, increase the size of the Regs structure");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
switch (method) {
|
||||
case KEPLERMEMORY_REG_INDEX(exec): {
|
||||
upload_state.ProcessExec(regs.exec.linear != 0);
|
||||
break;
|
||||
}
|
||||
case KEPLERMEMORY_REG_INDEX(data): {
|
||||
upload_state.ProcessData(method_argument, is_last_call);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
switch (method) {
|
||||
case KEPLERMEMORY_REG_INDEX(data):
|
||||
upload_state.ProcessData(base_start, static_cast<size_t>(amount));
|
||||
return;
|
||||
default:
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
|
||||
: system{system_}, upload_state{memory_manager, regs.upload} {}
|
||||
|
||||
KeplerMemory::~KeplerMemory() = default;
|
||||
|
||||
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
upload_state.BindRasterizer(rasterizer_);
|
||||
}
|
||||
|
||||
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid KeplerMemory register, increase the size of the Regs structure");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
switch (method) {
|
||||
case KEPLERMEMORY_REG_INDEX(exec): {
|
||||
upload_state.ProcessExec(regs.exec.linear != 0);
|
||||
break;
|
||||
}
|
||||
case KEPLERMEMORY_REG_INDEX(data): {
|
||||
upload_state.ProcessData(method_argument, is_last_call);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
switch (method) {
|
||||
case KEPLERMEMORY_REG_INDEX(data):
|
||||
upload_state.ProcessData(base_start, static_cast<size_t>(amount));
|
||||
return;
|
||||
default:
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
@@ -1,89 +1,89 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as P2MF. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
|
||||
*/
|
||||
|
||||
#define KEPLERMEMORY_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class KeplerMemory final : public EngineInterface {
|
||||
public:
|
||||
explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
|
||||
~KeplerMemory() override;
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
struct Regs {
|
||||
static constexpr size_t NUM_REGS = 0x7F;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS_NOINIT(0x60);
|
||||
|
||||
Upload::Registers upload;
|
||||
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 1, u32> linear;
|
||||
};
|
||||
} exec;
|
||||
|
||||
u32 data;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x11);
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
Upload::State upload_state;
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec, 0x6C);
|
||||
ASSERT_REG_POSITION(data, 0x6D);
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as P2MF. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
|
||||
*/
|
||||
|
||||
#define KEPLERMEMORY_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class KeplerMemory final : public EngineInterface {
|
||||
public:
|
||||
explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
|
||||
~KeplerMemory() override;
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
struct Regs {
|
||||
static constexpr size_t NUM_REGS = 0x7F;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS_NOINIT(0x60);
|
||||
|
||||
Upload::Registers upload;
|
||||
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 1, u32> linear;
|
||||
};
|
||||
} exec;
|
||||
|
||||
u32 data;
|
||||
|
||||
INSERT_PADDING_WORDS_NOINIT(0x11);
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
Upload::State upload_state;
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec, 0x6C);
|
||||
ASSERT_REG_POSITION(data, 0x6D);
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,317 +1,317 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/algorithm.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
MICROPROFILE_DECLARE(GPU_DMAEngine);
|
||||
MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128));
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
using namespace Texture;
|
||||
|
||||
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_} {}
|
||||
|
||||
MaxwellDMA::~MaxwellDMA() = default;
|
||||
|
||||
void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
if (method == offsetof(Regs, launch_dma) / sizeof(u32)) {
|
||||
Launch();
|
||||
}
|
||||
}
|
||||
|
||||
void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
for (size_t i = 0; i < amount; ++i) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
}
|
||||
}
|
||||
|
||||
void MaxwellDMA::Launch() {
|
||||
MICROPROFILE_SCOPE(GPU_DMAEngine);
|
||||
LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in),
|
||||
static_cast<GPUVAddr>(regs.offset_out));
|
||||
|
||||
// TODO(Subv): Perform more research and implement all features of this engine.
|
||||
const LaunchDMA& launch = regs.launch_dma;
|
||||
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
|
||||
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
|
||||
|
||||
if (launch.multi_line_enable) {
|
||||
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
|
||||
if (!is_src_pitch && !is_dst_pitch) {
|
||||
// If both the source and the destination are in block layout, assert.
|
||||
UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_src_pitch && is_dst_pitch) {
|
||||
for (u32 line = 0; line < regs.line_count; ++line) {
|
||||
const GPUVAddr source_line =
|
||||
regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
|
||||
const GPUVAddr dest_line =
|
||||
regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
|
||||
memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
|
||||
}
|
||||
} else {
|
||||
if (!is_src_pitch && is_dst_pitch) {
|
||||
CopyBlockLinearToPitch();
|
||||
} else {
|
||||
CopyPitchToBlockLinear();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// TODO: allow multisized components.
|
||||
auto& accelerate = rasterizer->AccessAccelerateDMA();
|
||||
const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
|
||||
if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
|
||||
ASSERT(regs.remap_const.component_size_minus_one == 3);
|
||||
accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
|
||||
std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
|
||||
memory_manager.WriteBlockUnsafe(regs.offset_out,
|
||||
reinterpret_cast<u8*>(tmp_buffer.data()),
|
||||
regs.line_length_in * sizeof(u32));
|
||||
} else {
|
||||
auto convert_linear_2_blocklinear_addr = [](u64 address) {
|
||||
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
|
||||
((address & 0x180) >> 1) | ((address & 0x20) << 3);
|
||||
};
|
||||
auto src_kind = memory_manager.GetPageKind(regs.offset_in);
|
||||
auto dst_kind = memory_manager.GetPageKind(regs.offset_out);
|
||||
const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind));
|
||||
const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind));
|
||||
if (!is_src_pitch && is_dst_pitch) {
|
||||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||
std::vector<u8> dst_buffer(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
|
||||
dst_buffer[offset] =
|
||||
tmp_buffer[convert_linear_2_blocklinear_addr(regs.offset_in + offset) -
|
||||
regs.offset_in];
|
||||
}
|
||||
memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
|
||||
} else if (is_src_pitch && !is_dst_pitch) {
|
||||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||
std::vector<u8> dst_buffer(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
|
||||
dst_buffer[convert_linear_2_blocklinear_addr(regs.offset_out + offset) -
|
||||
regs.offset_out] = tmp_buffer[offset];
|
||||
}
|
||||
memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
|
||||
} else {
|
||||
if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
|
||||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ReleaseSemaphore();
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
|
||||
UNIMPLEMENTED_IF(regs.src_params.layer != 0);
|
||||
|
||||
const bool is_remapping = regs.launch_dma.remap_enable != 0;
|
||||
|
||||
// Optimized path for micro copies.
|
||||
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
|
||||
if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
|
||||
regs.src_params.height > GOB_SIZE_Y) {
|
||||
FastCopyBlockLinearToPitch();
|
||||
return;
|
||||
}
|
||||
|
||||
// Deswizzle the input and copy it over.
|
||||
const Parameters& src_params = regs.src_params;
|
||||
|
||||
const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
|
||||
const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
|
||||
|
||||
const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
|
||||
|
||||
u32 width = src_params.width;
|
||||
u32 x_elements = regs.line_length_in;
|
||||
u32 x_offset = src_params.origin.x;
|
||||
u32 bpp_shift = 0U;
|
||||
if (!is_remapping) {
|
||||
bpp_shift = Common::FoldRight(
|
||||
4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
|
||||
width, x_elements, x_offset, static_cast<u32>(regs.offset_in));
|
||||
width >>= bpp_shift;
|
||||
x_elements >>= bpp_shift;
|
||||
x_offset >>= bpp_shift;
|
||||
}
|
||||
|
||||
const u32 bytes_per_pixel = base_bpp << bpp_shift;
|
||||
const u32 height = src_params.height;
|
||||
const u32 depth = src_params.depth;
|
||||
const u32 block_height = src_params.block_size.height;
|
||||
const u32 block_depth = src_params.block_size.depth;
|
||||
const size_t src_size =
|
||||
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
}
|
||||
if (write_buffer.size() < dst_size) {
|
||||
write_buffer.resize(dst_size);
|
||||
}
|
||||
|
||||
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
|
||||
UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
|
||||
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||
UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
|
||||
UNIMPLEMENTED_IF(regs.dst_params.layer != 0);
|
||||
|
||||
const bool is_remapping = regs.launch_dma.remap_enable != 0;
|
||||
const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
|
||||
const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
|
||||
|
||||
const auto& dst_params = regs.dst_params;
|
||||
|
||||
const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
|
||||
|
||||
u32 width = dst_params.width;
|
||||
u32 x_elements = regs.line_length_in;
|
||||
u32 x_offset = dst_params.origin.x;
|
||||
u32 bpp_shift = 0U;
|
||||
if (!is_remapping) {
|
||||
bpp_shift = Common::FoldRight(
|
||||
4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
|
||||
width, x_elements, x_offset, static_cast<u32>(regs.offset_out));
|
||||
width >>= bpp_shift;
|
||||
x_elements >>= bpp_shift;
|
||||
x_offset >>= bpp_shift;
|
||||
}
|
||||
|
||||
const u32 bytes_per_pixel = base_bpp << bpp_shift;
|
||||
const u32 height = dst_params.height;
|
||||
const u32 depth = dst_params.depth;
|
||||
const u32 block_height = dst_params.block_size.height;
|
||||
const u32 block_depth = dst_params.block_size.depth;
|
||||
const size_t dst_size =
|
||||
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
|
||||
const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
}
|
||||
if (write_buffer.size() < dst_size) {
|
||||
write_buffer.resize(dst_size);
|
||||
}
|
||||
|
||||
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
|
||||
if (Settings::IsGPULevelExtreme()) {
|
||||
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
// If the input is linear and the output is tiled, swizzle the input and copy it over.
|
||||
SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
|
||||
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_in);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
||||
const u32 bytes_per_pixel = 1U;
|
||||
const size_t src_size = GOB_SIZE;
|
||||
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
|
||||
u32 pos_x = regs.src_params.origin.x;
|
||||
u32 pos_y = regs.src_params.origin.y;
|
||||
const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y,
|
||||
regs.src_params.block_size.height, bytes_per_pixel);
|
||||
const u32 x_in_gob = 64 / bytes_per_pixel;
|
||||
pos_x = pos_x % x_in_gob;
|
||||
pos_y = pos_y % 8;
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
}
|
||||
if (write_buffer.size() < dst_size) {
|
||||
write_buffer.resize(dst_size);
|
||||
}
|
||||
|
||||
if (Settings::IsGPULevelExtreme()) {
|
||||
memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width,
|
||||
regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count,
|
||||
regs.src_params.block_size.height, regs.src_params.block_size.depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::ReleaseSemaphore() {
|
||||
const auto type = regs.launch_dma.semaphore_type;
|
||||
const GPUVAddr address = regs.semaphore.address;
|
||||
const u32 payload = regs.semaphore.payload;
|
||||
switch (type) {
|
||||
case LaunchDMA::SemaphoreType::NONE:
|
||||
break;
|
||||
case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: {
|
||||
std::function<void()> operation(
|
||||
[this, address, payload] { memory_manager.Write<u32>(address, payload); });
|
||||
rasterizer->SignalFence(std::move(operation));
|
||||
break;
|
||||
}
|
||||
case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: {
|
||||
std::function<void()> operation([this, address, payload] {
|
||||
memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks());
|
||||
memory_manager.Write<u64>(address, payload);
|
||||
});
|
||||
rasterizer->SignalFence(std::move(operation));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT_MSG(false, "Unknown semaphore type: {}", static_cast<u32>(type.Value()));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/algorithm.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
MICROPROFILE_DECLARE(GPU_DMAEngine);
|
||||
MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128));
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
using namespace Texture;
|
||||
|
||||
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_} {}
|
||||
|
||||
MaxwellDMA::~MaxwellDMA() = default;
|
||||
|
||||
void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
if (method == offsetof(Regs, launch_dma) / sizeof(u32)) {
|
||||
Launch();
|
||||
}
|
||||
}
|
||||
|
||||
void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
for (size_t i = 0; i < amount; ++i) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
}
|
||||
}
|
||||
|
||||
void MaxwellDMA::Launch() {
|
||||
MICROPROFILE_SCOPE(GPU_DMAEngine);
|
||||
LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in),
|
||||
static_cast<GPUVAddr>(regs.offset_out));
|
||||
|
||||
// TODO(Subv): Perform more research and implement all features of this engine.
|
||||
const LaunchDMA& launch = regs.launch_dma;
|
||||
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
|
||||
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
|
||||
|
||||
if (launch.multi_line_enable) {
|
||||
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
|
||||
if (!is_src_pitch && !is_dst_pitch) {
|
||||
// If both the source and the destination are in block layout, assert.
|
||||
UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_src_pitch && is_dst_pitch) {
|
||||
for (u32 line = 0; line < regs.line_count; ++line) {
|
||||
const GPUVAddr source_line =
|
||||
regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
|
||||
const GPUVAddr dest_line =
|
||||
regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
|
||||
memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
|
||||
}
|
||||
} else {
|
||||
if (!is_src_pitch && is_dst_pitch) {
|
||||
CopyBlockLinearToPitch();
|
||||
} else {
|
||||
CopyPitchToBlockLinear();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// TODO: allow multisized components.
|
||||
auto& accelerate = rasterizer->AccessAccelerateDMA();
|
||||
const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
|
||||
if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
|
||||
ASSERT(regs.remap_const.component_size_minus_one == 3);
|
||||
accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
|
||||
std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
|
||||
memory_manager.WriteBlockUnsafe(regs.offset_out,
|
||||
reinterpret_cast<u8*>(tmp_buffer.data()),
|
||||
regs.line_length_in * sizeof(u32));
|
||||
} else {
|
||||
auto convert_linear_2_blocklinear_addr = [](u64 address) {
|
||||
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
|
||||
((address & 0x180) >> 1) | ((address & 0x20) << 3);
|
||||
};
|
||||
auto src_kind = memory_manager.GetPageKind(regs.offset_in);
|
||||
auto dst_kind = memory_manager.GetPageKind(regs.offset_out);
|
||||
const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind));
|
||||
const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind));
|
||||
if (!is_src_pitch && is_dst_pitch) {
|
||||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||
std::vector<u8> dst_buffer(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
|
||||
dst_buffer[offset] =
|
||||
tmp_buffer[convert_linear_2_blocklinear_addr(regs.offset_in + offset) -
|
||||
regs.offset_in];
|
||||
}
|
||||
memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
|
||||
} else if (is_src_pitch && !is_dst_pitch) {
|
||||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||
std::vector<u8> dst_buffer(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
|
||||
dst_buffer[convert_linear_2_blocklinear_addr(regs.offset_out + offset) -
|
||||
regs.offset_out] = tmp_buffer[offset];
|
||||
}
|
||||
memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
|
||||
} else {
|
||||
if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
|
||||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ReleaseSemaphore();
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
|
||||
UNIMPLEMENTED_IF(regs.src_params.layer != 0);
|
||||
|
||||
const bool is_remapping = regs.launch_dma.remap_enable != 0;
|
||||
|
||||
// Optimized path for micro copies.
|
||||
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
|
||||
if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
|
||||
regs.src_params.height > GOB_SIZE_Y) {
|
||||
FastCopyBlockLinearToPitch();
|
||||
return;
|
||||
}
|
||||
|
||||
// Deswizzle the input and copy it over.
|
||||
const Parameters& src_params = regs.src_params;
|
||||
|
||||
const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
|
||||
const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
|
||||
|
||||
const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
|
||||
|
||||
u32 width = src_params.width;
|
||||
u32 x_elements = regs.line_length_in;
|
||||
u32 x_offset = src_params.origin.x;
|
||||
u32 bpp_shift = 0U;
|
||||
if (!is_remapping) {
|
||||
bpp_shift = Common::FoldRight(
|
||||
4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
|
||||
width, x_elements, x_offset, static_cast<u32>(regs.offset_in));
|
||||
width >>= bpp_shift;
|
||||
x_elements >>= bpp_shift;
|
||||
x_offset >>= bpp_shift;
|
||||
}
|
||||
|
||||
const u32 bytes_per_pixel = base_bpp << bpp_shift;
|
||||
const u32 height = src_params.height;
|
||||
const u32 depth = src_params.depth;
|
||||
const u32 block_height = src_params.block_size.height;
|
||||
const u32 block_depth = src_params.block_size.depth;
|
||||
const size_t src_size =
|
||||
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
}
|
||||
if (write_buffer.size() < dst_size) {
|
||||
write_buffer.resize(dst_size);
|
||||
}
|
||||
|
||||
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
|
||||
UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
|
||||
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||
UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
|
||||
UNIMPLEMENTED_IF(regs.dst_params.layer != 0);
|
||||
|
||||
const bool is_remapping = regs.launch_dma.remap_enable != 0;
|
||||
const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
|
||||
const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
|
||||
|
||||
const auto& dst_params = regs.dst_params;
|
||||
|
||||
const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
|
||||
|
||||
u32 width = dst_params.width;
|
||||
u32 x_elements = regs.line_length_in;
|
||||
u32 x_offset = dst_params.origin.x;
|
||||
u32 bpp_shift = 0U;
|
||||
if (!is_remapping) {
|
||||
bpp_shift = Common::FoldRight(
|
||||
4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
|
||||
width, x_elements, x_offset, static_cast<u32>(regs.offset_out));
|
||||
width >>= bpp_shift;
|
||||
x_elements >>= bpp_shift;
|
||||
x_offset >>= bpp_shift;
|
||||
}
|
||||
|
||||
const u32 bytes_per_pixel = base_bpp << bpp_shift;
|
||||
const u32 height = dst_params.height;
|
||||
const u32 depth = dst_params.depth;
|
||||
const u32 block_height = dst_params.block_size.height;
|
||||
const u32 block_depth = dst_params.block_size.depth;
|
||||
const size_t dst_size =
|
||||
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
|
||||
const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
}
|
||||
if (write_buffer.size() < dst_size) {
|
||||
write_buffer.resize(dst_size);
|
||||
}
|
||||
|
||||
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
|
||||
if (Settings::IsGPULevelExtreme()) {
|
||||
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
// If the input is linear and the output is tiled, swizzle the input and copy it over.
|
||||
SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
|
||||
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_in);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
||||
const u32 bytes_per_pixel = 1U;
|
||||
const size_t src_size = GOB_SIZE;
|
||||
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
|
||||
u32 pos_x = regs.src_params.origin.x;
|
||||
u32 pos_y = regs.src_params.origin.y;
|
||||
const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y,
|
||||
regs.src_params.block_size.height, bytes_per_pixel);
|
||||
const u32 x_in_gob = 64 / bytes_per_pixel;
|
||||
pos_x = pos_x % x_in_gob;
|
||||
pos_y = pos_y % 8;
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
}
|
||||
if (write_buffer.size() < dst_size) {
|
||||
write_buffer.resize(dst_size);
|
||||
}
|
||||
|
||||
if (Settings::IsGPULevelExtreme()) {
|
||||
memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width,
|
||||
regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count,
|
||||
regs.src_params.block_size.height, regs.src_params.block_size.depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::ReleaseSemaphore() {
|
||||
const auto type = regs.launch_dma.semaphore_type;
|
||||
const GPUVAddr address = regs.semaphore.address;
|
||||
const u32 payload = regs.semaphore.payload;
|
||||
switch (type) {
|
||||
case LaunchDMA::SemaphoreType::NONE:
|
||||
break;
|
||||
case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: {
|
||||
std::function<void()> operation(
|
||||
[this, address, payload] { memory_manager.Write<u32>(address, payload); });
|
||||
rasterizer->SignalFence(std::move(operation));
|
||||
break;
|
||||
}
|
||||
case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: {
|
||||
std::function<void()> operation([this, address, payload] {
|
||||
memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks());
|
||||
memory_manager.Write<u64>(address, payload);
|
||||
});
|
||||
rasterizer->SignalFence(std::move(operation));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT_MSG(false, "Unknown semaphore type: {}", static_cast<u32>(type.Value()));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
@@ -1,294 +1,294 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
class AccelerateDMAInterface {
|
||||
public:
|
||||
/// Write the value to the register identified by method.
|
||||
virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0;
|
||||
|
||||
virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* This engine is known as gk104_copy. Documentation can be found in:
|
||||
* https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
|
||||
*/
|
||||
|
||||
class MaxwellDMA final : public EngineInterface {
|
||||
public:
|
||||
struct PackedGPUVAddr {
|
||||
u32 upper;
|
||||
u32 lower;
|
||||
|
||||
constexpr operator GPUVAddr() const noexcept {
|
||||
return (static_cast<GPUVAddr>(upper & 0xff) << 32) | lower;
|
||||
}
|
||||
};
|
||||
|
||||
union BlockSize {
|
||||
BitField<0, 4, u32> width;
|
||||
BitField<4, 4, u32> height;
|
||||
BitField<8, 4, u32> depth;
|
||||
BitField<12, 4, u32> gob_height;
|
||||
};
|
||||
static_assert(sizeof(BlockSize) == 4);
|
||||
|
||||
union Origin {
|
||||
BitField<0, 16, u32> x;
|
||||
BitField<16, 16, u32> y;
|
||||
};
|
||||
static_assert(sizeof(Origin) == 4);
|
||||
|
||||
struct Parameters {
|
||||
BlockSize block_size;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
Origin origin;
|
||||
};
|
||||
static_assert(sizeof(Parameters) == 24);
|
||||
|
||||
struct Semaphore {
|
||||
PackedGPUVAddr address;
|
||||
u32 payload;
|
||||
};
|
||||
static_assert(sizeof(Semaphore) == 12);
|
||||
|
||||
struct RenderEnable {
|
||||
enum class Mode : u32 {
|
||||
// Note: This uses Pascal case in order to avoid the identifiers
|
||||
// FALSE and TRUE, which are reserved on Darwin.
|
||||
False = 0,
|
||||
True = 1,
|
||||
Conditional = 2,
|
||||
RenderIfEqual = 3,
|
||||
RenderIfNotEqual = 4,
|
||||
};
|
||||
|
||||
PackedGPUVAddr address;
|
||||
BitField<0, 3, Mode> mode;
|
||||
};
|
||||
static_assert(sizeof(RenderEnable) == 12);
|
||||
|
||||
enum class PhysModeTarget : u32 {
|
||||
LOCAL_FB = 0,
|
||||
COHERENT_SYSMEM = 1,
|
||||
NONCOHERENT_SYSMEM = 2,
|
||||
};
|
||||
using PhysMode = BitField<0, 2, PhysModeTarget>;
|
||||
|
||||
union LaunchDMA {
|
||||
enum class DataTransferType : u32 {
|
||||
NONE = 0,
|
||||
PIPELINED = 1,
|
||||
NON_PIPELINED = 2,
|
||||
};
|
||||
|
||||
enum class SemaphoreType : u32 {
|
||||
NONE = 0,
|
||||
RELEASE_ONE_WORD_SEMAPHORE = 1,
|
||||
RELEASE_FOUR_WORD_SEMAPHORE = 2,
|
||||
};
|
||||
|
||||
enum class InterruptType : u32 {
|
||||
NONE = 0,
|
||||
BLOCKING = 1,
|
||||
NON_BLOCKING = 2,
|
||||
};
|
||||
|
||||
enum class MemoryLayout : u32 {
|
||||
BLOCKLINEAR = 0,
|
||||
PITCH = 1,
|
||||
};
|
||||
|
||||
enum class Type : u32 {
|
||||
VIRTUAL = 0,
|
||||
PHYSICAL = 1,
|
||||
};
|
||||
|
||||
enum class SemaphoreReduction : u32 {
|
||||
IMIN = 0,
|
||||
IMAX = 1,
|
||||
IXOR = 2,
|
||||
IAND = 3,
|
||||
IOR = 4,
|
||||
IADD = 5,
|
||||
INC = 6,
|
||||
DEC = 7,
|
||||
FADD = 0xA,
|
||||
};
|
||||
|
||||
enum class SemaphoreReductionSign : u32 {
|
||||
SIGNED = 0,
|
||||
UNSIGNED = 1,
|
||||
};
|
||||
|
||||
enum class BypassL2 : u32 {
|
||||
USE_PTE_SETTING = 0,
|
||||
FORCE_VOLATILE = 1,
|
||||
};
|
||||
|
||||
BitField<0, 2, DataTransferType> data_transfer_type;
|
||||
BitField<2, 1, u32> flush_enable;
|
||||
BitField<3, 2, SemaphoreType> semaphore_type;
|
||||
BitField<5, 2, InterruptType> interrupt_type;
|
||||
BitField<7, 1, MemoryLayout> src_memory_layout;
|
||||
BitField<8, 1, MemoryLayout> dst_memory_layout;
|
||||
BitField<9, 1, u32> multi_line_enable;
|
||||
BitField<10, 1, u32> remap_enable;
|
||||
BitField<11, 1, u32> rmwdisable;
|
||||
BitField<12, 1, Type> src_type;
|
||||
BitField<13, 1, Type> dst_type;
|
||||
BitField<14, 4, SemaphoreReduction> semaphore_reduction;
|
||||
BitField<18, 1, SemaphoreReductionSign> semaphore_reduction_sign;
|
||||
BitField<19, 1, u32> reduction_enable;
|
||||
BitField<20, 1, BypassL2> bypass_l2;
|
||||
};
|
||||
static_assert(sizeof(LaunchDMA) == 4);
|
||||
|
||||
struct RemapConst {
|
||||
enum class Swizzle : u32 {
|
||||
SRC_X = 0,
|
||||
SRC_Y = 1,
|
||||
SRC_Z = 2,
|
||||
SRC_W = 3,
|
||||
CONST_A = 4,
|
||||
CONST_B = 5,
|
||||
NO_WRITE = 6,
|
||||
};
|
||||
|
||||
PackedGPUVAddr address;
|
||||
|
||||
union {
|
||||
BitField<0, 3, Swizzle> dst_x;
|
||||
BitField<4, 3, Swizzle> dst_y;
|
||||
BitField<8, 3, Swizzle> dst_z;
|
||||
BitField<12, 3, Swizzle> dst_w;
|
||||
BitField<0, 12, u32> dst_components_raw;
|
||||
BitField<16, 2, u32> component_size_minus_one;
|
||||
BitField<20, 2, u32> num_src_components_minus_one;
|
||||
BitField<24, 2, u32> num_dst_components_minus_one;
|
||||
};
|
||||
|
||||
Swizzle GetComponent(size_t i) const {
|
||||
const u32 raw = dst_components_raw;
|
||||
return static_cast<Swizzle>((raw >> (i * 3)) & 0x7);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(RemapConst) == 12);
|
||||
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
|
||||
~MaxwellDMA() override;
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
private:
|
||||
/// Performs the copy from the source buffer to the destination buffer as configured in the
|
||||
/// registers.
|
||||
void Launch();
|
||||
|
||||
void CopyBlockLinearToPitch();
|
||||
|
||||
void CopyPitchToBlockLinear();
|
||||
|
||||
void FastCopyBlockLinearToPitch();
|
||||
|
||||
void ReleaseSemaphore();
|
||||
|
||||
Core::System& system;
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
|
||||
std::vector<u8> read_buffer;
|
||||
std::vector<u8> write_buffer;
|
||||
|
||||
static constexpr std::size_t NUM_REGS = 0x800;
|
||||
struct Regs {
|
||||
union {
|
||||
struct {
|
||||
u32 reserved[0x40];
|
||||
u32 nop;
|
||||
u32 reserved01[0xf];
|
||||
u32 pm_trigger;
|
||||
u32 reserved02[0x3f];
|
||||
Semaphore semaphore;
|
||||
u32 reserved03[0x2];
|
||||
RenderEnable render_enable;
|
||||
PhysMode src_phys_mode;
|
||||
PhysMode dst_phys_mode;
|
||||
u32 reserved04[0x26];
|
||||
LaunchDMA launch_dma;
|
||||
u32 reserved05[0x3f];
|
||||
PackedGPUVAddr offset_in;
|
||||
PackedGPUVAddr offset_out;
|
||||
u32 pitch_in;
|
||||
u32 pitch_out;
|
||||
u32 line_length_in;
|
||||
u32 line_count;
|
||||
u32 reserved06[0xb6];
|
||||
u32 remap_consta_value;
|
||||
u32 remap_constb_value;
|
||||
RemapConst remap_const;
|
||||
Parameters dst_params;
|
||||
u32 reserved07[0x1];
|
||||
Parameters src_params;
|
||||
u32 reserved08[0x275];
|
||||
u32 pm_trigger_end;
|
||||
u32 reserved09[0x3ba];
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(launch_dma, 0xC0);
|
||||
ASSERT_REG_POSITION(offset_in, 0x100);
|
||||
ASSERT_REG_POSITION(offset_out, 0x102);
|
||||
ASSERT_REG_POSITION(pitch_in, 0x104);
|
||||
ASSERT_REG_POSITION(pitch_out, 0x105);
|
||||
ASSERT_REG_POSITION(line_length_in, 0x106);
|
||||
ASSERT_REG_POSITION(line_count, 0x107);
|
||||
ASSERT_REG_POSITION(remap_const, 0x1C0);
|
||||
ASSERT_REG_POSITION(dst_params, 0x1C3);
|
||||
ASSERT_REG_POSITION(src_params, 0x1CA);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
class AccelerateDMAInterface {
|
||||
public:
|
||||
/// Write the value to the register identified by method.
|
||||
virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0;
|
||||
|
||||
virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* This engine is known as gk104_copy. Documentation can be found in:
|
||||
* https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
|
||||
*/
|
||||
|
||||
class MaxwellDMA final : public EngineInterface {
|
||||
public:
|
||||
struct PackedGPUVAddr {
|
||||
u32 upper;
|
||||
u32 lower;
|
||||
|
||||
constexpr operator GPUVAddr() const noexcept {
|
||||
return (static_cast<GPUVAddr>(upper & 0xff) << 32) | lower;
|
||||
}
|
||||
};
|
||||
|
||||
union BlockSize {
|
||||
BitField<0, 4, u32> width;
|
||||
BitField<4, 4, u32> height;
|
||||
BitField<8, 4, u32> depth;
|
||||
BitField<12, 4, u32> gob_height;
|
||||
};
|
||||
static_assert(sizeof(BlockSize) == 4);
|
||||
|
||||
union Origin {
|
||||
BitField<0, 16, u32> x;
|
||||
BitField<16, 16, u32> y;
|
||||
};
|
||||
static_assert(sizeof(Origin) == 4);
|
||||
|
||||
struct Parameters {
|
||||
BlockSize block_size;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
Origin origin;
|
||||
};
|
||||
static_assert(sizeof(Parameters) == 24);
|
||||
|
||||
struct Semaphore {
|
||||
PackedGPUVAddr address;
|
||||
u32 payload;
|
||||
};
|
||||
static_assert(sizeof(Semaphore) == 12);
|
||||
|
||||
struct RenderEnable {
|
||||
enum class Mode : u32 {
|
||||
// Note: This uses Pascal case in order to avoid the identifiers
|
||||
// FALSE and TRUE, which are reserved on Darwin.
|
||||
False = 0,
|
||||
True = 1,
|
||||
Conditional = 2,
|
||||
RenderIfEqual = 3,
|
||||
RenderIfNotEqual = 4,
|
||||
};
|
||||
|
||||
PackedGPUVAddr address;
|
||||
BitField<0, 3, Mode> mode;
|
||||
};
|
||||
static_assert(sizeof(RenderEnable) == 12);
|
||||
|
||||
enum class PhysModeTarget : u32 {
|
||||
LOCAL_FB = 0,
|
||||
COHERENT_SYSMEM = 1,
|
||||
NONCOHERENT_SYSMEM = 2,
|
||||
};
|
||||
using PhysMode = BitField<0, 2, PhysModeTarget>;
|
||||
|
||||
union LaunchDMA {
|
||||
enum class DataTransferType : u32 {
|
||||
NONE = 0,
|
||||
PIPELINED = 1,
|
||||
NON_PIPELINED = 2,
|
||||
};
|
||||
|
||||
enum class SemaphoreType : u32 {
|
||||
NONE = 0,
|
||||
RELEASE_ONE_WORD_SEMAPHORE = 1,
|
||||
RELEASE_FOUR_WORD_SEMAPHORE = 2,
|
||||
};
|
||||
|
||||
enum class InterruptType : u32 {
|
||||
NONE = 0,
|
||||
BLOCKING = 1,
|
||||
NON_BLOCKING = 2,
|
||||
};
|
||||
|
||||
enum class MemoryLayout : u32 {
|
||||
BLOCKLINEAR = 0,
|
||||
PITCH = 1,
|
||||
};
|
||||
|
||||
enum class Type : u32 {
|
||||
VIRTUAL = 0,
|
||||
PHYSICAL = 1,
|
||||
};
|
||||
|
||||
enum class SemaphoreReduction : u32 {
|
||||
IMIN = 0,
|
||||
IMAX = 1,
|
||||
IXOR = 2,
|
||||
IAND = 3,
|
||||
IOR = 4,
|
||||
IADD = 5,
|
||||
INC = 6,
|
||||
DEC = 7,
|
||||
FADD = 0xA,
|
||||
};
|
||||
|
||||
enum class SemaphoreReductionSign : u32 {
|
||||
SIGNED = 0,
|
||||
UNSIGNED = 1,
|
||||
};
|
||||
|
||||
enum class BypassL2 : u32 {
|
||||
USE_PTE_SETTING = 0,
|
||||
FORCE_VOLATILE = 1,
|
||||
};
|
||||
|
||||
BitField<0, 2, DataTransferType> data_transfer_type;
|
||||
BitField<2, 1, u32> flush_enable;
|
||||
BitField<3, 2, SemaphoreType> semaphore_type;
|
||||
BitField<5, 2, InterruptType> interrupt_type;
|
||||
BitField<7, 1, MemoryLayout> src_memory_layout;
|
||||
BitField<8, 1, MemoryLayout> dst_memory_layout;
|
||||
BitField<9, 1, u32> multi_line_enable;
|
||||
BitField<10, 1, u32> remap_enable;
|
||||
BitField<11, 1, u32> rmwdisable;
|
||||
BitField<12, 1, Type> src_type;
|
||||
BitField<13, 1, Type> dst_type;
|
||||
BitField<14, 4, SemaphoreReduction> semaphore_reduction;
|
||||
BitField<18, 1, SemaphoreReductionSign> semaphore_reduction_sign;
|
||||
BitField<19, 1, u32> reduction_enable;
|
||||
BitField<20, 1, BypassL2> bypass_l2;
|
||||
};
|
||||
static_assert(sizeof(LaunchDMA) == 4);
|
||||
|
||||
struct RemapConst {
|
||||
enum class Swizzle : u32 {
|
||||
SRC_X = 0,
|
||||
SRC_Y = 1,
|
||||
SRC_Z = 2,
|
||||
SRC_W = 3,
|
||||
CONST_A = 4,
|
||||
CONST_B = 5,
|
||||
NO_WRITE = 6,
|
||||
};
|
||||
|
||||
PackedGPUVAddr address;
|
||||
|
||||
union {
|
||||
BitField<0, 3, Swizzle> dst_x;
|
||||
BitField<4, 3, Swizzle> dst_y;
|
||||
BitField<8, 3, Swizzle> dst_z;
|
||||
BitField<12, 3, Swizzle> dst_w;
|
||||
BitField<0, 12, u32> dst_components_raw;
|
||||
BitField<16, 2, u32> component_size_minus_one;
|
||||
BitField<20, 2, u32> num_src_components_minus_one;
|
||||
BitField<24, 2, u32> num_dst_components_minus_one;
|
||||
};
|
||||
|
||||
Swizzle GetComponent(size_t i) const {
|
||||
const u32 raw = dst_components_raw;
|
||||
return static_cast<Swizzle>((raw >> (i * 3)) & 0x7);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(RemapConst) == 12);
|
||||
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
|
||||
~MaxwellDMA() override;
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
||||
/// Write multiple values to the register identified by method.
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
private:
|
||||
/// Performs the copy from the source buffer to the destination buffer as configured in the
|
||||
/// registers.
|
||||
void Launch();
|
||||
|
||||
void CopyBlockLinearToPitch();
|
||||
|
||||
void CopyPitchToBlockLinear();
|
||||
|
||||
void FastCopyBlockLinearToPitch();
|
||||
|
||||
void ReleaseSemaphore();
|
||||
|
||||
Core::System& system;
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
|
||||
std::vector<u8> read_buffer;
|
||||
std::vector<u8> write_buffer;
|
||||
|
||||
static constexpr std::size_t NUM_REGS = 0x800;
|
||||
struct Regs {
|
||||
union {
|
||||
struct {
|
||||
u32 reserved[0x40];
|
||||
u32 nop;
|
||||
u32 reserved01[0xf];
|
||||
u32 pm_trigger;
|
||||
u32 reserved02[0x3f];
|
||||
Semaphore semaphore;
|
||||
u32 reserved03[0x2];
|
||||
RenderEnable render_enable;
|
||||
PhysMode src_phys_mode;
|
||||
PhysMode dst_phys_mode;
|
||||
u32 reserved04[0x26];
|
||||
LaunchDMA launch_dma;
|
||||
u32 reserved05[0x3f];
|
||||
PackedGPUVAddr offset_in;
|
||||
PackedGPUVAddr offset_out;
|
||||
u32 pitch_in;
|
||||
u32 pitch_out;
|
||||
u32 line_length_in;
|
||||
u32 line_count;
|
||||
u32 reserved06[0xb6];
|
||||
u32 remap_consta_value;
|
||||
u32 remap_constb_value;
|
||||
RemapConst remap_const;
|
||||
Parameters dst_params;
|
||||
u32 reserved07[0x1];
|
||||
Parameters src_params;
|
||||
u32 reserved08[0x275];
|
||||
u32 pm_trigger_end;
|
||||
u32 reserved09[0x3ba];
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(launch_dma, 0xC0);
|
||||
ASSERT_REG_POSITION(offset_in, 0x100);
|
||||
ASSERT_REG_POSITION(offset_out, 0x102);
|
||||
ASSERT_REG_POSITION(pitch_in, 0x104);
|
||||
ASSERT_REG_POSITION(pitch_out, 0x105);
|
||||
ASSERT_REG_POSITION(line_length_in, 0x106);
|
||||
ASSERT_REG_POSITION(line_count, 0x107);
|
||||
ASSERT_REG_POSITION(remap_const, 0x1C0);
|
||||
ASSERT_REG_POSITION(dst_params, 0x1C3);
|
||||
ASSERT_REG_POSITION(src_params, 0x1CA);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
@@ -1,305 +1,305 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/engines/puller.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
|
||||
Control::ChannelState& channel_state_)
|
||||
: gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
|
||||
channel_state_} {}
|
||||
|
||||
Puller::~Puller() = default;
|
||||
|
||||
void Puller::ProcessBindMethod(const MethodCall& method_call) {
|
||||
// Bind the current subchannel to the desired engine id.
|
||||
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
|
||||
method_call.argument);
|
||||
const auto engine_id = static_cast<EngineID>(method_call.argument);
|
||||
bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
|
||||
switch (engine_id) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessFenceActionMethod() {
|
||||
switch (regs.fence_action.op) {
|
||||
case Puller::FenceOperation::Acquire:
|
||||
// UNIMPLEMENTED_MSG("Channel Scheduling pending.");
|
||||
// WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
|
||||
rasterizer->ReleaseFences();
|
||||
break;
|
||||
case Puller::FenceOperation::Increment:
|
||||
rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreTriggerMethod() {
|
||||
const auto semaphoreOperationMask = 0xF;
|
||||
const auto op =
|
||||
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
|
||||
if (op == GpuSemaphoreOperation::WriteLong) {
|
||||
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
||||
const u32 payload = regs.semaphore_sequence;
|
||||
[this, sequence_address, payload] {
|
||||
memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks());
|
||||
memory_manager.Write<u64>(sequence_address, payload);
|
||||
}();
|
||||
} else {
|
||||
do {
|
||||
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
|
||||
regs.acquire_source = true;
|
||||
regs.acquire_value = regs.semaphore_sequence;
|
||||
if (op == GpuSemaphoreOperation::AcquireEqual) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_mode = false;
|
||||
if (word != regs.acquire_value) {
|
||||
rasterizer->ReleaseFences();
|
||||
continue;
|
||||
}
|
||||
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_mode = true;
|
||||
if (word < regs.acquire_value) {
|
||||
rasterizer->ReleaseFences();
|
||||
continue;
|
||||
}
|
||||
} else if (op == GpuSemaphoreOperation::AcquireMask) {
|
||||
if (word && regs.semaphore_sequence == 0) {
|
||||
rasterizer->ReleaseFences();
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
|
||||
}
|
||||
} while (false);
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreRelease() {
|
||||
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
||||
const u32 payload = regs.semaphore_release;
|
||||
std::function<void()> operation([this, sequence_address, payload] {
|
||||
memory_manager.Write<u32>(sequence_address, payload);
|
||||
});
|
||||
rasterizer->SyncOperation(std::move(operation));
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreAcquire() {
|
||||
u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||
const auto value = regs.semaphore_acquire;
|
||||
while (word != value) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_value = value;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
rasterizer->ReleaseFences();
|
||||
word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||
// TODO(kemathe73) figure out how to do the acquire_timeout
|
||||
regs.acquire_mode = false;
|
||||
regs.acquire_source = false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU puller method.
|
||||
void Puller::CallPullerMethod(const MethodCall& method_call) {
|
||||
regs.reg_array[method_call.method] = method_call.argument;
|
||||
const auto method = static_cast<BufferMethods>(method_call.method);
|
||||
|
||||
switch (method) {
|
||||
case BufferMethods::BindObject: {
|
||||
ProcessBindMethod(method_call);
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Nop:
|
||||
case BufferMethods::SemaphoreAddressHigh:
|
||||
case BufferMethods::SemaphoreAddressLow:
|
||||
case BufferMethods::SemaphoreSequencePayload:
|
||||
case BufferMethods::SyncpointPayload:
|
||||
break;
|
||||
case BufferMethods::WrcacheFlush:
|
||||
case BufferMethods::RefCnt:
|
||||
rasterizer->SignalReference();
|
||||
break;
|
||||
case BufferMethods::SyncpointOperation:
|
||||
ProcessFenceActionMethod();
|
||||
break;
|
||||
case BufferMethods::WaitForIdle:
|
||||
rasterizer->WaitForIdle();
|
||||
break;
|
||||
case BufferMethods::SemaphoreOperation: {
|
||||
ProcessSemaphoreTriggerMethod();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::NonStallInterrupt: {
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method NonStallInterrupt not implemented");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::MemOpA: {
|
||||
LOG_ERROR(HW_GPU, "Memory Operation A");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::MemOpB: {
|
||||
// Implement this better.
|
||||
rasterizer->InvalidateGPUCache();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::MemOpC:
|
||||
case BufferMethods::MemOpD: {
|
||||
LOG_ERROR(HW_GPU, "Memory Operation C,D");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::SemaphoreAcquire: {
|
||||
ProcessSemaphoreAcquire();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::SemaphoreRelease: {
|
||||
ProcessSemaphoreRelease();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Yield: {
|
||||
// TODO(Kmather73): Research and implement this method.
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU engine method.
|
||||
void Puller::CallEngineMethod(const MethodCall& method_call) {
|
||||
const EngineID engine = bound_engines[method_call.subchannel];
|
||||
|
||||
switch (engine) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU engine multivalue method.
|
||||
void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
const EngineID engine = bound_engines[subchannel];
|
||||
|
||||
switch (engine) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU method.
|
||||
void Puller::CallMethod(const MethodCall& method_call) {
|
||||
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
|
||||
method_call.subchannel);
|
||||
|
||||
ASSERT(method_call.subchannel < bound_engines.size());
|
||||
|
||||
if (ExecuteMethodOnEngine(method_call.method)) {
|
||||
CallEngineMethod(method_call);
|
||||
} else {
|
||||
CallPullerMethod(method_call);
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU multivalue method.
|
||||
void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
|
||||
|
||||
ASSERT(subchannel < bound_engines.size());
|
||||
|
||||
if (ExecuteMethodOnEngine(method)) {
|
||||
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
|
||||
} else {
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallPullerMethod(MethodCall{
|
||||
method,
|
||||
base_start[i],
|
||||
subchannel,
|
||||
methods_pending - static_cast<u32>(i),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
/// Determines where the method should be executed.
|
||||
[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
|
||||
const auto buffer_method = static_cast<BufferMethods>(method);
|
||||
return buffer_method >= BufferMethods::NonPullerMethods;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/engines/puller.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
|
||||
Control::ChannelState& channel_state_)
|
||||
: gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
|
||||
channel_state_} {}
|
||||
|
||||
Puller::~Puller() = default;
|
||||
|
||||
void Puller::ProcessBindMethod(const MethodCall& method_call) {
|
||||
// Bind the current subchannel to the desired engine id.
|
||||
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
|
||||
method_call.argument);
|
||||
const auto engine_id = static_cast<EngineID>(method_call.argument);
|
||||
bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
|
||||
switch (engine_id) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessFenceActionMethod() {
|
||||
switch (regs.fence_action.op) {
|
||||
case Puller::FenceOperation::Acquire:
|
||||
// UNIMPLEMENTED_MSG("Channel Scheduling pending.");
|
||||
// WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
|
||||
rasterizer->ReleaseFences();
|
||||
break;
|
||||
case Puller::FenceOperation::Increment:
|
||||
rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreTriggerMethod() {
|
||||
const auto semaphoreOperationMask = 0xF;
|
||||
const auto op =
|
||||
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
|
||||
if (op == GpuSemaphoreOperation::WriteLong) {
|
||||
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
||||
const u32 payload = regs.semaphore_sequence;
|
||||
[this, sequence_address, payload] {
|
||||
memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks());
|
||||
memory_manager.Write<u64>(sequence_address, payload);
|
||||
}();
|
||||
} else {
|
||||
do {
|
||||
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
|
||||
regs.acquire_source = true;
|
||||
regs.acquire_value = regs.semaphore_sequence;
|
||||
if (op == GpuSemaphoreOperation::AcquireEqual) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_mode = false;
|
||||
if (word != regs.acquire_value) {
|
||||
rasterizer->ReleaseFences();
|
||||
continue;
|
||||
}
|
||||
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_mode = true;
|
||||
if (word < regs.acquire_value) {
|
||||
rasterizer->ReleaseFences();
|
||||
continue;
|
||||
}
|
||||
} else if (op == GpuSemaphoreOperation::AcquireMask) {
|
||||
if (word && regs.semaphore_sequence == 0) {
|
||||
rasterizer->ReleaseFences();
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
|
||||
}
|
||||
} while (false);
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreRelease() {
|
||||
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
||||
const u32 payload = regs.semaphore_release;
|
||||
std::function<void()> operation([this, sequence_address, payload] {
|
||||
memory_manager.Write<u32>(sequence_address, payload);
|
||||
});
|
||||
rasterizer->SyncOperation(std::move(operation));
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreAcquire() {
|
||||
u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||
const auto value = regs.semaphore_acquire;
|
||||
while (word != value) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_value = value;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
rasterizer->ReleaseFences();
|
||||
word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||
// TODO(kemathe73) figure out how to do the acquire_timeout
|
||||
regs.acquire_mode = false;
|
||||
regs.acquire_source = false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU puller method.
|
||||
void Puller::CallPullerMethod(const MethodCall& method_call) {
|
||||
regs.reg_array[method_call.method] = method_call.argument;
|
||||
const auto method = static_cast<BufferMethods>(method_call.method);
|
||||
|
||||
switch (method) {
|
||||
case BufferMethods::BindObject: {
|
||||
ProcessBindMethod(method_call);
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Nop:
|
||||
case BufferMethods::SemaphoreAddressHigh:
|
||||
case BufferMethods::SemaphoreAddressLow:
|
||||
case BufferMethods::SemaphoreSequencePayload:
|
||||
case BufferMethods::SyncpointPayload:
|
||||
break;
|
||||
case BufferMethods::WrcacheFlush:
|
||||
case BufferMethods::RefCnt:
|
||||
rasterizer->SignalReference();
|
||||
break;
|
||||
case BufferMethods::SyncpointOperation:
|
||||
ProcessFenceActionMethod();
|
||||
break;
|
||||
case BufferMethods::WaitForIdle:
|
||||
rasterizer->WaitForIdle();
|
||||
break;
|
||||
case BufferMethods::SemaphoreOperation: {
|
||||
ProcessSemaphoreTriggerMethod();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::NonStallInterrupt: {
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method NonStallInterrupt not implemented");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::MemOpA: {
|
||||
LOG_ERROR(HW_GPU, "Memory Operation A");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::MemOpB: {
|
||||
// Implement this better.
|
||||
rasterizer->InvalidateGPUCache();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::MemOpC:
|
||||
case BufferMethods::MemOpD: {
|
||||
LOG_ERROR(HW_GPU, "Memory Operation C,D");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::SemaphoreAcquire: {
|
||||
ProcessSemaphoreAcquire();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::SemaphoreRelease: {
|
||||
ProcessSemaphoreRelease();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Yield: {
|
||||
// TODO(Kmather73): Research and implement this method.
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU engine method.
|
||||
void Puller::CallEngineMethod(const MethodCall& method_call) {
|
||||
const EngineID engine = bound_engines[method_call.subchannel];
|
||||
|
||||
switch (engine) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU engine multivalue method.
|
||||
void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
const EngineID engine = bound_engines[subchannel];
|
||||
|
||||
switch (engine) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU method.
|
||||
void Puller::CallMethod(const MethodCall& method_call) {
|
||||
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
|
||||
method_call.subchannel);
|
||||
|
||||
ASSERT(method_call.subchannel < bound_engines.size());
|
||||
|
||||
if (ExecuteMethodOnEngine(method_call.method)) {
|
||||
CallEngineMethod(method_call);
|
||||
} else {
|
||||
CallPullerMethod(method_call);
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU multivalue method.
|
||||
void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
|
||||
|
||||
ASSERT(subchannel < bound_engines.size());
|
||||
|
||||
if (ExecuteMethodOnEngine(method)) {
|
||||
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
|
||||
} else {
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallPullerMethod(MethodCall{
|
||||
method,
|
||||
base_start[i],
|
||||
subchannel,
|
||||
methods_pending - static_cast<u32>(i),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
/// Determines where the method should be executed.
|
||||
[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
|
||||
const auto buffer_method = static_cast<BufferMethods>(method);
|
||||
return buffer_method >= BufferMethods::NonPullerMethods;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
@@ -1,177 +1,177 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
class DmaPusher;
|
||||
|
||||
enum class EngineID {
|
||||
FERMI_TWOD_A = 0x902D, // 2D Engine
|
||||
MAXWELL_B = 0xB197, // 3D Engine
|
||||
KEPLER_COMPUTE_B = 0xB1C0,
|
||||
KEPLER_INLINE_TO_MEMORY_B = 0xA140,
|
||||
MAXWELL_DMA_COPY_A = 0xB0B5,
|
||||
};
|
||||
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
} // namespace Tegra
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
class Puller final {
|
||||
public:
|
||||
struct MethodCall {
|
||||
u32 method{};
|
||||
u32 argument{};
|
||||
u32 subchannel{};
|
||||
u32 method_count{};
|
||||
|
||||
explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
|
||||
: method(method_), argument(argument_), subchannel(subchannel_),
|
||||
method_count(method_count_) {}
|
||||
|
||||
[[nodiscard]] bool IsLastCall() const {
|
||||
return method_count <= 1;
|
||||
}
|
||||
};
|
||||
|
||||
enum class FenceOperation : u32 {
|
||||
Acquire = 0,
|
||||
Increment = 1,
|
||||
};
|
||||
|
||||
union FenceAction {
|
||||
u32 raw;
|
||||
BitField<0, 1, FenceOperation> op;
|
||||
BitField<8, 24, u32> syncpoint_id;
|
||||
};
|
||||
|
||||
explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
|
||||
Control::ChannelState& channel_state);
|
||||
~Puller();
|
||||
|
||||
void CallMethod(const MethodCall& method_call);
|
||||
|
||||
void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending);
|
||||
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
void CallPullerMethod(const MethodCall& method_call);
|
||||
|
||||
void CallEngineMethod(const MethodCall& method_call);
|
||||
|
||||
void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending);
|
||||
|
||||
private:
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
DmaPusher& dma_pusher;
|
||||
Control::ChannelState& channel_state;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
|
||||
static constexpr std::size_t NUM_REGS = 0x800;
|
||||
struct Regs {
|
||||
static constexpr size_t NUM_REGS = 0x40;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS_NOINIT(0x4);
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
|
||||
[[nodiscard]] GPUVAddr SemaphoreAddress() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} semaphore_address;
|
||||
|
||||
u32 semaphore_sequence;
|
||||
u32 semaphore_trigger;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xC);
|
||||
|
||||
// The pusher and the puller share the reference counter, the pusher only has read
|
||||
// access
|
||||
u32 reference_count;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x5);
|
||||
|
||||
u32 semaphore_acquire;
|
||||
u32 semaphore_release;
|
||||
u32 fence_value;
|
||||
FenceAction fence_action;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xE2);
|
||||
|
||||
// Puller state
|
||||
u32 acquire_mode;
|
||||
u32 acquire_source;
|
||||
u32 acquire_active;
|
||||
u32 acquire_timeout;
|
||||
u32 acquire_value;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
void ProcessBindMethod(const MethodCall& method_call);
|
||||
void ProcessFenceActionMethod();
|
||||
void ProcessSemaphoreAcquire();
|
||||
void ProcessSemaphoreRelease();
|
||||
void ProcessSemaphoreTriggerMethod();
|
||||
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
|
||||
|
||||
/// Mapping of command subchannels to their bound engine ids
|
||||
std::array<EngineID, 8> bound_engines{};
|
||||
|
||||
enum class GpuSemaphoreOperation {
|
||||
AcquireEqual = 0x1,
|
||||
WriteLong = 0x2,
|
||||
AcquireGequal = 0x4,
|
||||
AcquireMask = 0x8,
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(semaphore_address, 0x4);
|
||||
ASSERT_REG_POSITION(semaphore_sequence, 0x6);
|
||||
ASSERT_REG_POSITION(semaphore_trigger, 0x7);
|
||||
ASSERT_REG_POSITION(reference_count, 0x14);
|
||||
ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
|
||||
ASSERT_REG_POSITION(semaphore_release, 0x1B);
|
||||
ASSERT_REG_POSITION(fence_value, 0x1C);
|
||||
ASSERT_REG_POSITION(fence_action, 0x1D);
|
||||
|
||||
ASSERT_REG_POSITION(acquire_mode, 0x100);
|
||||
ASSERT_REG_POSITION(acquire_source, 0x101);
|
||||
ASSERT_REG_POSITION(acquire_active, 0x102);
|
||||
ASSERT_REG_POSITION(acquire_timeout, 0x103);
|
||||
ASSERT_REG_POSITION(acquire_value, 0x104);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
class DmaPusher;
|
||||
|
||||
enum class EngineID {
|
||||
FERMI_TWOD_A = 0x902D, // 2D Engine
|
||||
MAXWELL_B = 0xB197, // 3D Engine
|
||||
KEPLER_COMPUTE_B = 0xB1C0,
|
||||
KEPLER_INLINE_TO_MEMORY_B = 0xA140,
|
||||
MAXWELL_DMA_COPY_A = 0xB0B5,
|
||||
};
|
||||
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
} // namespace Tegra
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
class Puller final {
|
||||
public:
|
||||
struct MethodCall {
|
||||
u32 method{};
|
||||
u32 argument{};
|
||||
u32 subchannel{};
|
||||
u32 method_count{};
|
||||
|
||||
explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
|
||||
: method(method_), argument(argument_), subchannel(subchannel_),
|
||||
method_count(method_count_) {}
|
||||
|
||||
[[nodiscard]] bool IsLastCall() const {
|
||||
return method_count <= 1;
|
||||
}
|
||||
};
|
||||
|
||||
enum class FenceOperation : u32 {
|
||||
Acquire = 0,
|
||||
Increment = 1,
|
||||
};
|
||||
|
||||
union FenceAction {
|
||||
u32 raw;
|
||||
BitField<0, 1, FenceOperation> op;
|
||||
BitField<8, 24, u32> syncpoint_id;
|
||||
};
|
||||
|
||||
explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
|
||||
Control::ChannelState& channel_state);
|
||||
~Puller();
|
||||
|
||||
void CallMethod(const MethodCall& method_call);
|
||||
|
||||
void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending);
|
||||
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
void CallPullerMethod(const MethodCall& method_call);
|
||||
|
||||
void CallEngineMethod(const MethodCall& method_call);
|
||||
|
||||
void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending);
|
||||
|
||||
private:
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
DmaPusher& dma_pusher;
|
||||
Control::ChannelState& channel_state;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
|
||||
static constexpr std::size_t NUM_REGS = 0x800;
|
||||
struct Regs {
|
||||
static constexpr size_t NUM_REGS = 0x40;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS_NOINIT(0x4);
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
|
||||
[[nodiscard]] GPUVAddr SemaphoreAddress() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} semaphore_address;
|
||||
|
||||
u32 semaphore_sequence;
|
||||
u32 semaphore_trigger;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xC);
|
||||
|
||||
// The pusher and the puller share the reference counter, the pusher only has read
|
||||
// access
|
||||
u32 reference_count;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x5);
|
||||
|
||||
u32 semaphore_acquire;
|
||||
u32 semaphore_release;
|
||||
u32 fence_value;
|
||||
FenceAction fence_action;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xE2);
|
||||
|
||||
// Puller state
|
||||
u32 acquire_mode;
|
||||
u32 acquire_source;
|
||||
u32 acquire_active;
|
||||
u32 acquire_timeout;
|
||||
u32 acquire_value;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
void ProcessBindMethod(const MethodCall& method_call);
|
||||
void ProcessFenceActionMethod();
|
||||
void ProcessSemaphoreAcquire();
|
||||
void ProcessSemaphoreRelease();
|
||||
void ProcessSemaphoreTriggerMethod();
|
||||
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
|
||||
|
||||
/// Mapping of command subchannels to their bound engine ids
|
||||
std::array<EngineID, 8> bound_engines{};
|
||||
|
||||
enum class GpuSemaphoreOperation {
|
||||
AcquireEqual = 0x1,
|
||||
WriteLong = 0x2,
|
||||
AcquireGequal = 0x4,
|
||||
AcquireMask = 0x8,
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(semaphore_address, 0x4);
|
||||
ASSERT_REG_POSITION(semaphore_sequence, 0x6);
|
||||
ASSERT_REG_POSITION(semaphore_trigger, 0x7);
|
||||
ASSERT_REG_POSITION(reference_count, 0x14);
|
||||
ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
|
||||
ASSERT_REG_POSITION(semaphore_release, 0x1B);
|
||||
ASSERT_REG_POSITION(fence_value, 0x1C);
|
||||
ASSERT_REG_POSITION(fence_action, 0x1D);
|
||||
|
||||
ASSERT_REG_POSITION(acquire_mode, 0x100);
|
||||
ASSERT_REG_POSITION(acquire_source, 0x101);
|
||||
ASSERT_REG_POSITION(acquire_active, 0x102);
|
||||
ASSERT_REG_POSITION(acquire_timeout, 0x103);
|
||||
ASSERT_REG_POSITION(acquire_value, 0x104);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
Reference in New Issue
Block a user