another try

2022-11-05 13:58:44 +01:00
parent 4a9f2bbf2a
commit 9f63fbe700
2002 changed files with 671171 additions and 671092 deletions
--- a/src/video_core/engines/const_buffer_info.h
+++ b/src/video_core/engines/const_buffer_info.h
@@ -1,16 +1,16 @@
-// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace Tegra::Engines {
-
-struct ConstBufferInfo {
-    GPUVAddr address;
-    u32 size;
-    bool enabled;
-};
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Tegra::Engines {
+
+struct ConstBufferInfo {
+    GPUVAddr address;
+    u32 size;
+    bool enabled;
+};
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/engine_interface.h
+++ b/src/video_core/engines/engine_interface.h
@@ -1,22 +1,22 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace Tegra::Engines {
-
-class EngineInterface {
-public:
-    virtual ~EngineInterface() = default;
-
-    /// Write the value to the register identified by method.
-    virtual void CallMethod(u32 method, u32 method_argument, bool is_last_call) = 0;
-
-    /// Write multiple values to the register identified by method.
-    virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                                 u32 methods_pending) = 0;
-};
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Tegra::Engines {
+
+class EngineInterface {
+public:
+    virtual ~EngineInterface() = default;
+
+    /// Write the value to the register identified by method.
+    virtual void CallMethod(u32 method, u32 method_argument, bool is_last_call) = 0;
+
+    /// Write multiple values to the register identified by method.
+    virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                                 u32 methods_pending) = 0;
+};
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -1,84 +1,84 @@
-// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <cstring>
-
-#include "common/algorithm.h"
-#include "common/assert.h"
-#include "video_core/engines/engine_upload.h"
-#include "video_core/memory_manager.h"
-#include "video_core/rasterizer_interface.h"
-#include "video_core/textures/decoders.h"
-
-namespace Tegra::Engines::Upload {
-
-State::State(MemoryManager& memory_manager_, Registers& regs_)
-    : regs{regs_}, memory_manager{memory_manager_} {}
-
-State::~State() = default;
-
-void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
-    rasterizer = rasterizer_;
-}
-
-void State::ProcessExec(const bool is_linear_) {
-    write_offset = 0;
-    copy_size = regs.line_length_in * regs.line_count;
-    inner_buffer.resize(copy_size);
-    is_linear = is_linear_;
-}
-
-void State::ProcessData(const u32 data, const bool is_last_call) {
-    const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
-    std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
-    write_offset += sub_copy_size;
-    if (!is_last_call) {
-        return;
-    }
-    ProcessData(inner_buffer);
-}
-
-void State::ProcessData(const u32* data, size_t num_data) {
-    std::span<const u8> read_buffer(reinterpret_cast<const u8*>(data), num_data * sizeof(u32));
-    ProcessData(read_buffer);
-}
-
-void State::ProcessData(std::span<const u8> read_buffer) {
-    const GPUVAddr address{regs.dest.Address()};
-    if (is_linear) {
-        if (regs.line_count == 1) {
-            rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer);
-        } else {
-            for (u32 line = 0; line < regs.line_count; ++line) {
-                const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch;
-                memory_manager.WriteBlockUnsafe(
-                    dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in,
-                    regs.line_length_in);
-            }
-            memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count);
-        }
-    } else {
-        u32 width = regs.dest.width;
-        u32 x_elements = regs.line_length_in;
-        u32 x_offset = regs.dest.x;
-        const u32 bpp_shift = Common::FoldRight(
-            4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
-            width, x_elements, x_offset, static_cast<u32>(address));
-        width >>= bpp_shift;
-        x_elements >>= bpp_shift;
-        x_offset >>= bpp_shift;
-        const u32 bytes_per_pixel = 1U << bpp_shift;
-        const std::size_t dst_size = Tegra::Texture::CalculateSize(
-            true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
-            regs.dest.BlockHeight(), regs.dest.BlockDepth());
-        tmp_buffer.resize(dst_size);
-        memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
-        Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width,
-                                       regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
-                                       x_elements, regs.line_count, regs.dest.BlockHeight(),
-                                       regs.dest.BlockDepth(), regs.line_length_in);
-        memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
-    }
-}
-
-} // namespace Tegra::Engines::Upload
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <cstring>
+
+#include "common/algorithm.h"
+#include "common/assert.h"
+#include "video_core/engines/engine_upload.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra::Engines::Upload {
+
+State::State(MemoryManager& memory_manager_, Registers& regs_)
+    : regs{regs_}, memory_manager{memory_manager_} {}
+
+State::~State() = default;
+
+void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
+}
+
+void State::ProcessExec(const bool is_linear_) {
+    write_offset = 0;
+    copy_size = regs.line_length_in * regs.line_count;
+    inner_buffer.resize(copy_size);
+    is_linear = is_linear_;
+}
+
+void State::ProcessData(const u32 data, const bool is_last_call) {
+    const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
+    std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
+    write_offset += sub_copy_size;
+    if (!is_last_call) {
+        return;
+    }
+    ProcessData(inner_buffer);
+}
+
+void State::ProcessData(const u32* data, size_t num_data) {
+    std::span<const u8> read_buffer(reinterpret_cast<const u8*>(data), num_data * sizeof(u32));
+    ProcessData(read_buffer);
+}
+
+void State::ProcessData(std::span<const u8> read_buffer) {
+    const GPUVAddr address{regs.dest.Address()};
+    if (is_linear) {
+        if (regs.line_count == 1) {
+            rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer);
+        } else {
+            for (u32 line = 0; line < regs.line_count; ++line) {
+                const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch;
+                memory_manager.WriteBlockUnsafe(
+                    dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in,
+                    regs.line_length_in);
+            }
+            memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count);
+        }
+    } else {
+        u32 width = regs.dest.width;
+        u32 x_elements = regs.line_length_in;
+        u32 x_offset = regs.dest.x;
+        const u32 bpp_shift = Common::FoldRight(
+            4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
+            width, x_elements, x_offset, static_cast<u32>(address));
+        width >>= bpp_shift;
+        x_elements >>= bpp_shift;
+        x_offset >>= bpp_shift;
+        const u32 bytes_per_pixel = 1U << bpp_shift;
+        const std::size_t dst_size = Tegra::Texture::CalculateSize(
+            true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
+            regs.dest.BlockHeight(), regs.dest.BlockDepth());
+        tmp_buffer.resize(dst_size);
+        memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
+        Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width,
+                                       regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
+                                       x_elements, regs.line_count, regs.dest.BlockHeight(),
+                                       regs.dest.BlockDepth(), regs.line_length_in);
+        memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
+    }
+}
+
+} // namespace Tegra::Engines::Upload
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -1,84 +1,84 @@
-// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <span>
-#include <vector>
-#include "common/bit_field.h"
-#include "common/common_types.h"
-
-namespace Tegra {
-class MemoryManager;
-}
-
-namespace VideoCore {
-class RasterizerInterface;
-}
-
-namespace Tegra::Engines::Upload {
-
-struct Registers {
-    u32 line_length_in;
-    u32 line_count;
-
-    struct {
-        u32 address_high;
-        u32 address_low;
-        u32 pitch;
-        union {
-            BitField<0, 4, u32> block_width;
-            BitField<4, 4, u32> block_height;
-            BitField<8, 4, u32> block_depth;
-        };
-        u32 width;
-        u32 height;
-        u32 depth;
-        u32 layer;
-        u32 x;
-        u32 y;
-
-        GPUVAddr Address() const {
-            return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
-        }
-
-        u32 BlockWidth() const {
-            return block_width.Value();
-        }
-
-        u32 BlockHeight() const {
-            return block_height.Value();
-        }
-
-        u32 BlockDepth() const {
-            return block_depth.Value();
-        }
-    } dest;
-};
-
-class State {
-public:
-    explicit State(MemoryManager& memory_manager_, Registers& regs_);
-    ~State();
-
-    void ProcessExec(bool is_linear_);
-    void ProcessData(u32 data, bool is_last_call);
-    void ProcessData(const u32* data, size_t num_data);
-
-    /// Binds a rasterizer to this engine.
-    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
-
-private:
-    void ProcessData(std::span<const u8> read_buffer);
-
-    u32 write_offset = 0;
-    u32 copy_size = 0;
-    std::vector<u8> inner_buffer;
-    std::vector<u8> tmp_buffer;
-    bool is_linear = false;
-    Registers& regs;
-    MemoryManager& memory_manager;
-    VideoCore::RasterizerInterface* rasterizer = nullptr;
-};
-
-} // namespace Tegra::Engines::Upload
+// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <span>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra::Engines::Upload {
+
+struct Registers {
+    u32 line_length_in;
+    u32 line_count;
+
+    struct {
+        u32 address_high;
+        u32 address_low;
+        u32 pitch;
+        union {
+            BitField<0, 4, u32> block_width;
+            BitField<4, 4, u32> block_height;
+            BitField<8, 4, u32> block_depth;
+        };
+        u32 width;
+        u32 height;
+        u32 depth;
+        u32 layer;
+        u32 x;
+        u32 y;
+
+        GPUVAddr Address() const {
+            return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
+        }
+
+        u32 BlockWidth() const {
+            return block_width.Value();
+        }
+
+        u32 BlockHeight() const {
+            return block_height.Value();
+        }
+
+        u32 BlockDepth() const {
+            return block_depth.Value();
+        }
+    } dest;
+};
+
+class State {
+public:
+    explicit State(MemoryManager& memory_manager_, Registers& regs_);
+    ~State();
+
+    void ProcessExec(bool is_linear_);
+    void ProcessData(u32 data, bool is_last_call);
+    void ProcessData(const u32* data, size_t num_data);
+
+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+private:
+    void ProcessData(std::span<const u8> read_buffer);
+
+    u32 write_offset = 0;
+    u32 copy_size = 0;
+    std::vector<u8> inner_buffer;
+    std::vector<u8> tmp_buffer;
+    bool is_linear = false;
+    Registers& regs;
+    MemoryManager& memory_manager;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+};
+
+} // namespace Tegra::Engines::Upload
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -1,86 +1,86 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "video_core/engines/fermi_2d.h"
-#include "video_core/memory_manager.h"
-#include "video_core/rasterizer_interface.h"
-#include "video_core/surface.h"
-
-using VideoCore::Surface::BytesPerBlock;
-using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
-
-namespace Tegra::Engines {
-
-Fermi2D::Fermi2D() {
-    // Nvidia's OpenGL driver seems to assume these values
-    regs.src.depth = 1;
-    regs.dst.depth = 1;
-}
-
-Fermi2D::~Fermi2D() = default;
-
-void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
-    rasterizer = rasterizer_;
-}
-
-void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
-    ASSERT_MSG(method < Regs::NUM_REGS,
-               "Invalid Fermi2D register, increase the size of the Regs structure");
-    regs.reg_array[method] = method_argument;
-
-    if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
-        Blit();
-    }
-}
-
-void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
-    for (u32 i = 0; i < amount; ++i) {
-        CallMethod(method, base_start[i], methods_pending - i <= 1);
-    }
-}
-
-void Fermi2D::Blit() {
-    LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
-              regs.src.Address(), regs.dst.Address());
-
-    UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
-    UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
-    UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
-    UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
-    UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
-
-    const auto& args = regs.pixels_from_memory;
-    Config config{
-        .operation = regs.operation,
-        .filter = args.sample_mode.filter,
-        .dst_x0 = args.dst_x0,
-        .dst_y0 = args.dst_y0,
-        .dst_x1 = args.dst_x0 + args.dst_width,
-        .dst_y1 = args.dst_y0 + args.dst_height,
-        .src_x0 = static_cast<s32>(args.src_x0 >> 32),
-        .src_y0 = static_cast<s32>(args.src_y0 >> 32),
-        .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
-        .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
-    };
-    Surface src = regs.src;
-    const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
-    const auto need_align_to_pitch =
-        src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
-        static_cast<s32>(src.width) == config.src_x1 &&
-        config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0;
-    if (need_align_to_pitch) {
-        auto address = src.Address() + config.src_x0 * bytes_per_pixel;
-        src.addr_upper = static_cast<u32>(address >> 32);
-        src.addr_lower = static_cast<u32>(address);
-        src.width -= config.src_x0;
-        config.src_x1 -= config.src_x0;
-        config.src_x0 = 0;
-    }
-    if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
-        UNIMPLEMENTED();
-    }
-}
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/surface.h"
+
+using VideoCore::Surface::BytesPerBlock;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+
+namespace Tegra::Engines {
+
+Fermi2D::Fermi2D() {
+    // Nvidia's OpenGL driver seems to assume these values
+    regs.src.depth = 1;
+    regs.dst.depth = 1;
+}
+
+Fermi2D::~Fermi2D() = default;
+
+void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
+}
+
+void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
+    ASSERT_MSG(method < Regs::NUM_REGS,
+               "Invalid Fermi2D register, increase the size of the Regs structure");
+    regs.reg_array[method] = method_argument;
+
+    if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
+        Blit();
+    }
+}
+
+void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
+    for (u32 i = 0; i < amount; ++i) {
+        CallMethod(method, base_start[i], methods_pending - i <= 1);
+    }
+}
+
+void Fermi2D::Blit() {
+    LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
+              regs.src.Address(), regs.dst.Address());
+
+    UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
+    UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
+    UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
+    UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
+    UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
+
+    const auto& args = regs.pixels_from_memory;
+    Config config{
+        .operation = regs.operation,
+        .filter = args.sample_mode.filter,
+        .dst_x0 = args.dst_x0,
+        .dst_y0 = args.dst_y0,
+        .dst_x1 = args.dst_x0 + args.dst_width,
+        .dst_y1 = args.dst_y0 + args.dst_height,
+        .src_x0 = static_cast<s32>(args.src_x0 >> 32),
+        .src_y0 = static_cast<s32>(args.src_y0 >> 32),
+        .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
+        .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
+    };
+    Surface src = regs.src;
+    const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
+    const auto need_align_to_pitch =
+        src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
+        static_cast<s32>(src.width) == config.src_x1 &&
+        config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0;
+    if (need_align_to_pitch) {
+        auto address = src.Address() + config.src_x0 * bytes_per_pixel;
+        src.addr_upper = static_cast<u32>(address >> 32);
+        src.addr_lower = static_cast<u32>(address);
+        src.width -= config.src_x0;
+        config.src_x1 -= config.src_x0;
+        config.src_x0 = 0;
+    }
+    if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
+        UNIMPLEMENTED();
+    }
+}
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -1,350 +1,350 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include "common/bit_field.h"
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-#include "video_core/engines/engine_interface.h"
-#include "video_core/gpu.h"
-
-namespace Tegra {
-class MemoryManager;
-}
-
-namespace VideoCore {
-class RasterizerInterface;
-}
-
-namespace Tegra::Engines {
-
-/**
- * This Engine is known as G80_2D. Documentation can be found in:
- * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
- * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
- */
-
-#define FERMI2D_REG_INDEX(field_name)                                                              \
-    (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
-
-class Fermi2D final : public EngineInterface {
-public:
-    explicit Fermi2D();
-    ~Fermi2D() override;
-
-    /// Binds a rasterizer to this engine.
-    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
-
-    /// Write the value to the register identified by method.
-    void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
-
-    /// Write multiple values to the register identified by method.
-    void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                         u32 methods_pending) override;
-
-    enum class Origin : u32 {
-        Center = 0,
-        Corner = 1,
-    };
-
-    enum class Filter : u32 {
-        Point = 0,
-        Bilinear = 1,
-    };
-
-    enum class Operation : u32 {
-        SrcCopyAnd = 0,
-        ROPAnd = 1,
-        Blend = 2,
-        SrcCopy = 3,
-        ROP = 4,
-        SrcCopyPremult = 5,
-        BlendPremult = 6,
-    };
-
-    enum class MemoryLayout : u32 {
-        BlockLinear = 0,
-        Pitch = 1,
-    };
-
-    enum class CpuIndexWrap : u32 {
-        Wrap = 0,
-        NoWrap = 1,
-    };
-
-    struct Surface {
-        RenderTargetFormat format;
-        MemoryLayout linear;
-        union {
-            BitField<0, 4, u32> block_width;
-            BitField<4, 4, u32> block_height;
-            BitField<8, 4, u32> block_depth;
-        };
-        u32 depth;
-        u32 layer;
-        u32 pitch;
-        u32 width;
-        u32 height;
-        u32 addr_upper;
-        u32 addr_lower;
-
-        [[nodiscard]] constexpr GPUVAddr Address() const noexcept {
-            return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
-        }
-    };
-    static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
-
-    enum class SectorPromotion : u32 {
-        NoPromotion = 0,
-        PromoteTo2V = 1,
-        PromoteTo2H = 2,
-        PromoteTo4 = 3,
-    };
-
-    enum class NumTpcs : u32 {
-        All = 0,
-        One = 1,
-    };
-
-    enum class RenderEnableMode : u32 {
-        False = 0,
-        True = 1,
-        Conditional = 2,
-        RenderIfEqual = 3,
-        RenderIfNotEqual = 4,
-    };
-
-    enum class ColorKeyFormat : u32 {
-        A16R56G6B5 = 0,
-        A1R5G55B5 = 1,
-        A8R8G8B8 = 2,
-        A2R10G10B10 = 3,
-        Y8 = 4,
-        Y16 = 5,
-        Y32 = 6,
-    };
-
-    union Beta4 {
-        BitField<0, 8, u32> b;
-        BitField<8, 8, u32> g;
-        BitField<16, 8, u32> r;
-        BitField<24, 8, u32> a;
-    };
-
-    struct Point {
-        u32 x;
-        u32 y;
-    };
-
-    enum class PatternSelect : u32 {
-        MonoChrome8x8 = 0,
-        MonoChrome64x1 = 1,
-        MonoChrome1x64 = 2,
-        Color = 3,
-    };
-
-    enum class NotifyType : u32 {
-        WriteOnly = 0,
-        WriteThenAwaken = 1,
-    };
-
-    enum class MonochromePatternColorFormat : u32 {
-        A8X8R8G6B5 = 0,
-        A1R5G5B5 = 1,
-        A8R8G8B8 = 2,
-        A8Y8 = 3,
-        A8X8Y16 = 4,
-        Y32 = 5,
-    };
-
-    enum class MonochromePatternFormat : u32 {
-        CGA6_M1 = 0,
-        LE_M1 = 1,
-    };
-
-    union Regs {
-        static constexpr std::size_t NUM_REGS = 0x258;
-        struct {
-            u32 object;
-            INSERT_PADDING_WORDS_NOINIT(0x3F);
-            u32 no_operation;
-            NotifyType notify;
-            INSERT_PADDING_WORDS_NOINIT(0x2);
-            u32 wait_for_idle;
-            INSERT_PADDING_WORDS_NOINIT(0xB);
-            u32 pm_trigger;
-            INSERT_PADDING_WORDS_NOINIT(0xF);
-            u32 context_dma_notify;
-            u32 dst_context_dma;
-            u32 src_context_dma;
-            u32 semaphore_context_dma;
-            INSERT_PADDING_WORDS_NOINIT(0x1C);
-            Surface dst;
-            CpuIndexWrap pixels_from_cpu_index_wrap;
-            u32 kind2d_check_enable;
-            Surface src;
-            SectorPromotion pixels_from_memory_sector_promotion;
-            INSERT_PADDING_WORDS_NOINIT(0x1);
-            NumTpcs num_tpcs;
-            u32 render_enable_addr_upper;
-            u32 render_enable_addr_lower;
-            RenderEnableMode render_enable_mode;
-            INSERT_PADDING_WORDS_NOINIT(0x4);
-            u32 clip_x0;
-            u32 clip_y0;
-            u32 clip_width;
-            u32 clip_height;
-            BitField<0, 1, u32> clip_enable;
-            BitField<0, 3, ColorKeyFormat> color_key_format;
-            u32 color_key;
-            BitField<0, 1, u32> color_key_enable;
-            BitField<0, 8, u32> rop;
-            u32 beta1;
-            Beta4 beta4;
-            Operation operation;
-            union {
-                BitField<0, 6, u32> x;
-                BitField<8, 6, u32> y;
-            } pattern_offset;
-            BitField<0, 2, PatternSelect> pattern_select;
-            INSERT_PADDING_WORDS_NOINIT(0xC);
-            struct {
-                BitField<0, 3, MonochromePatternColorFormat> color_format;
-                BitField<0, 1, MonochromePatternFormat> format;
-                u32 color0;
-                u32 color1;
-                u32 pattern0;
-                u32 pattern1;
-            } monochrome_pattern;
-            struct {
-                std::array<u32, 0x40> X8R8G8B8;
-                std::array<u32, 0x20> R5G6B5;
-                std::array<u32, 0x20> X1R5G5B5;
-                std::array<u32, 0x10> Y8;
-            } color_pattern;
-            INSERT_PADDING_WORDS_NOINIT(0x10);
-            struct {
-                u32 prim_mode;
-                u32 prim_color_format;
-                u32 prim_color;
-                u32 line_tie_break_bits;
-                INSERT_PADDING_WORDS_NOINIT(0x14);
-                u32 prim_point_xy;
-                INSERT_PADDING_WORDS_NOINIT(0x7);
-                std::array<Point, 0x40> prim_point;
-            } render_solid;
-            struct {
-                u32 data_type;
-                u32 color_format;
-                u32 index_format;
-                u32 mono_format;
-                u32 wrap;
-                u32 color0;
-                u32 color1;
-                u32 mono_opacity;
-                INSERT_PADDING_WORDS_NOINIT(0x6);
-                u32 src_width;
-                u32 src_height;
-                u32 dx_du_frac;
-                u32 dx_du_int;
-                u32 dx_dv_frac;
-                u32 dy_dv_int;
-                u32 dst_x0_frac;
-                u32 dst_x0_int;
-                u32 dst_y0_frac;
-                u32 dst_y0_int;
-                u32 data;
-            } pixels_from_cpu;
-            INSERT_PADDING_WORDS_NOINIT(0x3);
-            u32 big_endian_control;
-            INSERT_PADDING_WORDS_NOINIT(0x3);
-            struct {
-                BitField<0, 3, u32> block_shape;
-                BitField<0, 5, u32> corral_size;
-                BitField<0, 1, u32> safe_overlap;
-                union {
-                    BitField<0, 1, Origin> origin;
-                    BitField<4, 1, Filter> filter;
-                } sample_mode;
-                INSERT_PADDING_WORDS_NOINIT(0x8);
-                s32 dst_x0;
-                s32 dst_y0;
-                s32 dst_width;
-                s32 dst_height;
-                s64 du_dx;
-                s64 dv_dy;
-                s64 src_x0;
-                s64 src_y0;
-            } pixels_from_memory;
-        };
-        std::array<u32, NUM_REGS> reg_array;
-    } regs{};
-
-    struct Config {
-        Operation operation;
-        Filter filter;
-        s32 dst_x0;
-        s32 dst_y0;
-        s32 dst_x1;
-        s32 dst_y1;
-        s32 src_x0;
-        s32 src_y0;
-        s32 src_x1;
-        s32 src_y1;
-    };
-
-private:
-    VideoCore::RasterizerInterface* rasterizer = nullptr;
-
-    /// Performs the copy from the source surface to the destination surface as configured in the
-    /// registers.
-    void Blit();
-};
-
-#define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(Fermi2D::Regs, field_name) == position,                                 \
-                  "Field " #field_name " has invalid position")
-
-ASSERT_REG_POSITION(object, 0x0);
-ASSERT_REG_POSITION(no_operation, 0x100);
-ASSERT_REG_POSITION(notify, 0x104);
-ASSERT_REG_POSITION(wait_for_idle, 0x110);
-ASSERT_REG_POSITION(pm_trigger, 0x140);
-ASSERT_REG_POSITION(context_dma_notify, 0x180);
-ASSERT_REG_POSITION(dst_context_dma, 0x184);
-ASSERT_REG_POSITION(src_context_dma, 0x188);
-ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
-ASSERT_REG_POSITION(dst, 0x200);
-ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
-ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
-ASSERT_REG_POSITION(src, 0x230);
-ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
-ASSERT_REG_POSITION(num_tpcs, 0x260);
-ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
-ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
-ASSERT_REG_POSITION(clip_x0, 0x280);
-ASSERT_REG_POSITION(clip_y0, 0x284);
-ASSERT_REG_POSITION(clip_width, 0x288);
-ASSERT_REG_POSITION(clip_height, 0x28c);
-ASSERT_REG_POSITION(clip_enable, 0x290);
-ASSERT_REG_POSITION(color_key_format, 0x294);
-ASSERT_REG_POSITION(color_key, 0x298);
-ASSERT_REG_POSITION(rop, 0x2A0);
-ASSERT_REG_POSITION(beta1, 0x2A4);
-ASSERT_REG_POSITION(beta4, 0x2A8);
-ASSERT_REG_POSITION(operation, 0x2AC);
-ASSERT_REG_POSITION(pattern_offset, 0x2B0);
-ASSERT_REG_POSITION(pattern_select, 0x2B4);
-ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
-ASSERT_REG_POSITION(color_pattern, 0x300);
-ASSERT_REG_POSITION(render_solid, 0x580);
-ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
-ASSERT_REG_POSITION(big_endian_control, 0x870);
-ASSERT_REG_POSITION(pixels_from_memory, 0x880);
-
-#undef ASSERT_REG_POSITION
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/engines/engine_interface.h"
+#include "video_core/gpu.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra::Engines {
+
+/**
+ * This Engine is known as G80_2D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
+ */
+
+#define FERMI2D_REG_INDEX(field_name)                                                              \
+    (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
+
+class Fermi2D final : public EngineInterface {
+public:
+    explicit Fermi2D();
+    ~Fermi2D() override;
+
+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+    /// Write the value to the register identified by method.
+    void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
+
+    /// Write multiple values to the register identified by method.
+    void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                         u32 methods_pending) override;
+
+    enum class Origin : u32 {
+        Center = 0,
+        Corner = 1,
+    };
+
+    enum class Filter : u32 {
+        Point = 0,
+        Bilinear = 1,
+    };
+
+    enum class Operation : u32 {
+        SrcCopyAnd = 0,
+        ROPAnd = 1,
+        Blend = 2,
+        SrcCopy = 3,
+        ROP = 4,
+        SrcCopyPremult = 5,
+        BlendPremult = 6,
+    };
+
+    enum class MemoryLayout : u32 {
+        BlockLinear = 0,
+        Pitch = 1,
+    };
+
+    enum class CpuIndexWrap : u32 {
+        Wrap = 0,
+        NoWrap = 1,
+    };
+
+    struct Surface {
+        RenderTargetFormat format;
+        MemoryLayout linear;
+        union {
+            BitField<0, 4, u32> block_width;
+            BitField<4, 4, u32> block_height;
+            BitField<8, 4, u32> block_depth;
+        };
+        u32 depth;
+        u32 layer;
+        u32 pitch;
+        u32 width;
+        u32 height;
+        u32 addr_upper;
+        u32 addr_lower;
+
+        [[nodiscard]] constexpr GPUVAddr Address() const noexcept {
+            return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
+        }
+    };
+    static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
+
+    enum class SectorPromotion : u32 {
+        NoPromotion = 0,
+        PromoteTo2V = 1,
+        PromoteTo2H = 2,
+        PromoteTo4 = 3,
+    };
+
+    enum class NumTpcs : u32 {
+        All = 0,
+        One = 1,
+    };
+
+    enum class RenderEnableMode : u32 {
+        False = 0,
+        True = 1,
+        Conditional = 2,
+        RenderIfEqual = 3,
+        RenderIfNotEqual = 4,
+    };
+
+    enum class ColorKeyFormat : u32 {
+        A16R56G6B5 = 0,
+        A1R5G55B5 = 1,
+        A8R8G8B8 = 2,
+        A2R10G10B10 = 3,
+        Y8 = 4,
+        Y16 = 5,
+        Y32 = 6,
+    };
+
+    union Beta4 {
+        BitField<0, 8, u32> b;
+        BitField<8, 8, u32> g;
+        BitField<16, 8, u32> r;
+        BitField<24, 8, u32> a;
+    };
+
+    struct Point {
+        u32 x;
+        u32 y;
+    };
+
+    enum class PatternSelect : u32 {
+        MonoChrome8x8 = 0,
+        MonoChrome64x1 = 1,
+        MonoChrome1x64 = 2,
+        Color = 3,
+    };
+
+    enum class NotifyType : u32 {
+        WriteOnly = 0,
+        WriteThenAwaken = 1,
+    };
+
+    enum class MonochromePatternColorFormat : u32 {
+        A8X8R8G6B5 = 0,
+        A1R5G5B5 = 1,
+        A8R8G8B8 = 2,
+        A8Y8 = 3,
+        A8X8Y16 = 4,
+        Y32 = 5,
+    };
+
+    enum class MonochromePatternFormat : u32 {
+        CGA6_M1 = 0,
+        LE_M1 = 1,
+    };
+
+    union Regs {
+        static constexpr std::size_t NUM_REGS = 0x258;
+        struct {
+            u32 object;
+            INSERT_PADDING_WORDS_NOINIT(0x3F);
+            u32 no_operation;
+            NotifyType notify;
+            INSERT_PADDING_WORDS_NOINIT(0x2);
+            u32 wait_for_idle;
+            INSERT_PADDING_WORDS_NOINIT(0xB);
+            u32 pm_trigger;
+            INSERT_PADDING_WORDS_NOINIT(0xF);
+            u32 context_dma_notify;
+            u32 dst_context_dma;
+            u32 src_context_dma;
+            u32 semaphore_context_dma;
+            INSERT_PADDING_WORDS_NOINIT(0x1C);
+            Surface dst;
+            CpuIndexWrap pixels_from_cpu_index_wrap;
+            u32 kind2d_check_enable;
+            Surface src;
+            SectorPromotion pixels_from_memory_sector_promotion;
+            INSERT_PADDING_WORDS_NOINIT(0x1);
+            NumTpcs num_tpcs;
+            u32 render_enable_addr_upper;
+            u32 render_enable_addr_lower;
+            RenderEnableMode render_enable_mode;
+            INSERT_PADDING_WORDS_NOINIT(0x4);
+            u32 clip_x0;
+            u32 clip_y0;
+            u32 clip_width;
+            u32 clip_height;
+            BitField<0, 1, u32> clip_enable;
+            BitField<0, 3, ColorKeyFormat> color_key_format;
+            u32 color_key;
+            BitField<0, 1, u32> color_key_enable;
+            BitField<0, 8, u32> rop;
+            u32 beta1;
+            Beta4 beta4;
+            Operation operation;
+            union {
+                BitField<0, 6, u32> x;
+                BitField<8, 6, u32> y;
+            } pattern_offset;
+            BitField<0, 2, PatternSelect> pattern_select;
+            INSERT_PADDING_WORDS_NOINIT(0xC);
+            struct {
+                BitField<0, 3, MonochromePatternColorFormat> color_format;
+                BitField<0, 1, MonochromePatternFormat> format;
+                u32 color0;
+                u32 color1;
+                u32 pattern0;
+                u32 pattern1;
+            } monochrome_pattern;
+            struct {
+                std::array<u32, 0x40> X8R8G8B8;
+                std::array<u32, 0x20> R5G6B5;
+                std::array<u32, 0x20> X1R5G5B5;
+                std::array<u32, 0x10> Y8;
+            } color_pattern;
+            INSERT_PADDING_WORDS_NOINIT(0x10);
+            struct {
+                u32 prim_mode;
+                u32 prim_color_format;
+                u32 prim_color;
+                u32 line_tie_break_bits;
+                INSERT_PADDING_WORDS_NOINIT(0x14);
+                u32 prim_point_xy;
+                INSERT_PADDING_WORDS_NOINIT(0x7);
+                std::array<Point, 0x40> prim_point;
+            } render_solid;
+            struct {
+                u32 data_type;
+                u32 color_format;
+                u32 index_format;
+                u32 mono_format;
+                u32 wrap;
+                u32 color0;
+                u32 color1;
+                u32 mono_opacity;
+                INSERT_PADDING_WORDS_NOINIT(0x6);
+                u32 src_width;
+                u32 src_height;
+                u32 dx_du_frac;
+                u32 dx_du_int;
+                u32 dx_dv_frac;
+                u32 dy_dv_int;
+                u32 dst_x0_frac;
+                u32 dst_x0_int;
+                u32 dst_y0_frac;
+                u32 dst_y0_int;
+                u32 data;
+            } pixels_from_cpu;
+            INSERT_PADDING_WORDS_NOINIT(0x3);
+            u32 big_endian_control;
+            INSERT_PADDING_WORDS_NOINIT(0x3);
+            struct {
+                BitField<0, 3, u32> block_shape;
+                BitField<0, 5, u32> corral_size;
+                BitField<0, 1, u32> safe_overlap;
+                union {
+                    BitField<0, 1, Origin> origin;
+                    BitField<4, 1, Filter> filter;
+                } sample_mode;
+                INSERT_PADDING_WORDS_NOINIT(0x8);
+                s32 dst_x0;
+                s32 dst_y0;
+                s32 dst_width;
+                s32 dst_height;
+                s64 du_dx;
+                s64 dv_dy;
+                s64 src_x0;
+                s64 src_y0;
+            } pixels_from_memory;
+        };
+        std::array<u32, NUM_REGS> reg_array;
+    } regs{};
+
+    struct Config {
+        Operation operation;
+        Filter filter;
+        s32 dst_x0;
+        s32 dst_y0;
+        s32 dst_x1;
+        s32 dst_y1;
+        s32 src_x0;
+        s32 src_y0;
+        s32 src_x1;
+        s32 src_y1;
+    };
+
+private:
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+    /// Performs the copy from the source surface to the destination surface as configured in the
+    /// registers.
+    void Blit();
+};
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(Fermi2D::Regs, field_name) == position,                                 \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(object, 0x0);
+ASSERT_REG_POSITION(no_operation, 0x100);
+ASSERT_REG_POSITION(notify, 0x104);
+ASSERT_REG_POSITION(wait_for_idle, 0x110);
+ASSERT_REG_POSITION(pm_trigger, 0x140);
+ASSERT_REG_POSITION(context_dma_notify, 0x180);
+ASSERT_REG_POSITION(dst_context_dma, 0x184);
+ASSERT_REG_POSITION(src_context_dma, 0x188);
+ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
+ASSERT_REG_POSITION(dst, 0x200);
+ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
+ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
+ASSERT_REG_POSITION(src, 0x230);
+ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
+ASSERT_REG_POSITION(num_tpcs, 0x260);
+ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
+ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
+ASSERT_REG_POSITION(clip_x0, 0x280);
+ASSERT_REG_POSITION(clip_y0, 0x284);
+ASSERT_REG_POSITION(clip_width, 0x288);
+ASSERT_REG_POSITION(clip_height, 0x28c);
+ASSERT_REG_POSITION(clip_enable, 0x290);
+ASSERT_REG_POSITION(color_key_format, 0x294);
+ASSERT_REG_POSITION(color_key, 0x298);
+ASSERT_REG_POSITION(rop, 0x2A0);
+ASSERT_REG_POSITION(beta1, 0x2A4);
+ASSERT_REG_POSITION(beta4, 0x2A8);
+ASSERT_REG_POSITION(operation, 0x2AC);
+ASSERT_REG_POSITION(pattern_offset, 0x2B0);
+ASSERT_REG_POSITION(pattern_select, 0x2B4);
+ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
+ASSERT_REG_POSITION(color_pattern, 0x300);
+ASSERT_REG_POSITION(render_solid, 0x580);
+ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
+ASSERT_REG_POSITION(big_endian_control, 0x870);
+ASSERT_REG_POSITION(pixels_from_memory, 0x880);
+
+#undef ASSERT_REG_POSITION
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -1,87 +1,87 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <bitset>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "video_core/engines/kepler_compute.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
-#include "video_core/rasterizer_interface.h"
-#include "video_core/textures/decoders.h"
-
-namespace Tegra::Engines {
-
-KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
-    : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
-
-KeplerCompute::~KeplerCompute() = default;
-
-void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
-    rasterizer = rasterizer_;
-    upload_state.BindRasterizer(rasterizer);
-}
-
-void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
-    ASSERT_MSG(method < Regs::NUM_REGS,
-               "Invalid KeplerCompute register, increase the size of the Regs structure");
-
-    regs.reg_array[method] = method_argument;
-
-    switch (method) {
-    case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
-        upload_state.ProcessExec(regs.exec_upload.linear != 0);
-        break;
-    }
-    case KEPLER_COMPUTE_REG_INDEX(data_upload): {
-        upload_state.ProcessData(method_argument, is_last_call);
-        break;
-    }
-    case KEPLER_COMPUTE_REG_INDEX(launch):
-        ProcessLaunch();
-        break;
-    default:
-        break;
-    }
-}
-
-void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                                    u32 methods_pending) {
-    switch (method) {
-    case KEPLER_COMPUTE_REG_INDEX(data_upload):
-        upload_state.ProcessData(base_start, static_cast<size_t>(amount));
-        return;
-    default:
-        for (std::size_t i = 0; i < amount; i++) {
-            CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
-        }
-        break;
-    }
-}
-
-void KeplerCompute::ProcessLaunch() {
-    const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
-    memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
-                                   LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
-    rasterizer->DispatchCompute();
-}
-
-Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
-    const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
-
-    Texture::TICEntry tic_entry;
-    memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
-
-    return tic_entry;
-}
-
-Texture::TSCEntry KeplerCompute::GetTSCEntry(u32 tsc_index) const {
-    const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
-
-    Texture::TSCEntry tsc_entry;
-    memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
-    return tsc_entry;
-}
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <bitset>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra::Engines {
+
+KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
+    : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
+
+KeplerCompute::~KeplerCompute() = default;
+
+void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
+    upload_state.BindRasterizer(rasterizer);
+}
+
+void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
+    ASSERT_MSG(method < Regs::NUM_REGS,
+               "Invalid KeplerCompute register, increase the size of the Regs structure");
+
+    regs.reg_array[method] = method_argument;
+
+    switch (method) {
+    case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
+        upload_state.ProcessExec(regs.exec_upload.linear != 0);
+        break;
+    }
+    case KEPLER_COMPUTE_REG_INDEX(data_upload): {
+        upload_state.ProcessData(method_argument, is_last_call);
+        break;
+    }
+    case KEPLER_COMPUTE_REG_INDEX(launch):
+        ProcessLaunch();
+        break;
+    default:
+        break;
+    }
+}
+
+void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                                    u32 methods_pending) {
+    switch (method) {
+    case KEPLER_COMPUTE_REG_INDEX(data_upload):
+        upload_state.ProcessData(base_start, static_cast<size_t>(amount));
+        return;
+    default:
+        for (std::size_t i = 0; i < amount; i++) {
+            CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+        }
+        break;
+    }
+}
+
+void KeplerCompute::ProcessLaunch() {
+    const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
+    memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
+                                   LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
+    rasterizer->DispatchCompute();
+}
+
+Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
+    const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
+
+    Texture::TICEntry tic_entry;
+    memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
+
+    return tic_entry;
+}
+
+Texture::TSCEntry KeplerCompute::GetTSCEntry(u32 tsc_index) const {
+    const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
+
+    Texture::TSCEntry tsc_entry;
+    memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
+    return tsc_entry;
+}
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -1,248 +1,248 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <vector>
-#include "common/bit_field.h"
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-#include "video_core/engines/engine_interface.h"
-#include "video_core/engines/engine_upload.h"
-#include "video_core/textures/texture.h"
-
-namespace Core {
-class System;
-}
-
-namespace Tegra {
-class MemoryManager;
-}
-
-namespace VideoCore {
-class RasterizerInterface;
-}
-
-namespace Tegra::Engines {
-
-/**
- * This Engine is known as GK104_Compute. Documentation can be found in:
- * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
- * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
- */
-
-#define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
-    (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
-
-class KeplerCompute final : public EngineInterface {
-public:
-    explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
-    ~KeplerCompute();
-
-    /// Binds a rasterizer to this engine.
-    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
-
-    static constexpr std::size_t NumConstBuffers = 8;
-
-    struct Regs {
-        static constexpr std::size_t NUM_REGS = 0xCF8;
-
-        union {
-            struct {
-                INSERT_PADDING_WORDS_NOINIT(0x60);
-
-                Upload::Registers upload;
-
-                struct {
-                    union {
-                        BitField<0, 1, u32> linear;
-                    };
-                } exec_upload;
-
-                u32 data_upload;
-
-                INSERT_PADDING_WORDS_NOINIT(0x3F);
-
-                struct {
-                    u32 address;
-                    GPUVAddr Address() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
-                    }
-                } launch_desc_loc;
-
-                INSERT_PADDING_WORDS_NOINIT(0x1);
-
-                u32 launch;
-
-                INSERT_PADDING_WORDS_NOINIT(0x4A7);
-
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-                    u32 limit;
-                    GPUVAddr Address() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-                } tsc;
-
-                INSERT_PADDING_WORDS_NOINIT(0x3);
-
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-                    u32 limit;
-                    GPUVAddr Address() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-                } tic;
-
-                INSERT_PADDING_WORDS_NOINIT(0x22);
-
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-                    GPUVAddr Address() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-                } code_loc;
-
-                INSERT_PADDING_WORDS_NOINIT(0x3FE);
-
-                u32 tex_cb_index;
-
-                INSERT_PADDING_WORDS_NOINIT(0x374);
-            };
-            std::array<u32, NUM_REGS> reg_array;
-        };
-    } regs{};
-
-    struct LaunchParams {
-        static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
-
-        INSERT_PADDING_WORDS(0x8);
-
-        u32 program_start;
-
-        INSERT_PADDING_WORDS(0x2);
-
-        BitField<30, 1, u32> linked_tsc;
-
-        BitField<0, 31, u32> grid_dim_x;
-        union {
-            BitField<0, 16, u32> grid_dim_y;
-            BitField<16, 16, u32> grid_dim_z;
-        };
-
-        INSERT_PADDING_WORDS(0x3);
-
-        BitField<0, 18, u32> shared_alloc;
-
-        BitField<16, 16, u32> block_dim_x;
-        union {
-            BitField<0, 16, u32> block_dim_y;
-            BitField<16, 16, u32> block_dim_z;
-        };
-
-        union {
-            BitField<0, 8, u32> const_buffer_enable_mask;
-            BitField<29, 2, u32> cache_layout;
-        };
-
-        INSERT_PADDING_WORDS(0x8);
-
-        struct ConstBufferConfig {
-            u32 address_low;
-            union {
-                BitField<0, 8, u32> address_high;
-                BitField<15, 17, u32> size;
-            };
-            GPUVAddr Address() const {
-                return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
-                                             address_low);
-            }
-        };
-        std::array<ConstBufferConfig, NumConstBuffers> const_buffer_config;
-
-        union {
-            BitField<0, 20, u32> local_pos_alloc;
-            BitField<27, 5, u32> barrier_alloc;
-        };
-
-        union {
-            BitField<0, 20, u32> local_neg_alloc;
-            BitField<24, 5, u32> gpr_alloc;
-        };
-
-        union {
-            BitField<0, 20, u32> local_crs_alloc;
-            BitField<24, 5, u32> sass_version;
-        };
-
-        INSERT_PADDING_WORDS(0x10);
-    } launch_description{};
-
-    struct {
-        u32 write_offset = 0;
-        u32 copy_size = 0;
-        std::vector<u8> inner_buffer;
-    } state{};
-
-    static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
-                  "KeplerCompute Regs has wrong size");
-
-    static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
-                  "KeplerCompute LaunchParams has wrong size");
-
-    /// Write the value to the register identified by method.
-    void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
-
-    /// Write multiple values to the register identified by method.
-    void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                         u32 methods_pending) override;
-
-private:
-    void ProcessLaunch();
-
-    /// Retrieves information about a specific TIC entry from the TIC buffer.
-    Texture::TICEntry GetTICEntry(u32 tic_index) const;
-
-    /// Retrieves information about a specific TSC entry from the TSC buffer.
-    Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
-
-    Core::System& system;
-    MemoryManager& memory_manager;
-    VideoCore::RasterizerInterface* rasterizer = nullptr;
-    Upload::State upload_state;
-};
-
-#define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4,                       \
-                  "Field " #field_name " has invalid position")
-
-#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position)                                         \
-    static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4,               \
-                  "Field " #field_name " has invalid position")
-
-ASSERT_REG_POSITION(upload, 0x60);
-ASSERT_REG_POSITION(exec_upload, 0x6C);
-ASSERT_REG_POSITION(data_upload, 0x6D);
-ASSERT_REG_POSITION(launch, 0xAF);
-ASSERT_REG_POSITION(tsc, 0x557);
-ASSERT_REG_POSITION(tic, 0x55D);
-ASSERT_REG_POSITION(code_loc, 0x582);
-ASSERT_REG_POSITION(tex_cb_index, 0x982);
-ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
-ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
-ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
-ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
-ASSERT_LAUNCH_PARAM_POSITION(const_buffer_enable_mask, 0x14);
-ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
-
-#undef ASSERT_REG_POSITION
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/engines/engine_interface.h"
+#include "video_core/engines/engine_upload.h"
+#include "video_core/textures/texture.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra::Engines {
+
+/**
+ * This Engine is known as GK104_Compute. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
+ */
+
+#define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
+    (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
+
+class KeplerCompute final : public EngineInterface {
+public:
+    explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
+    ~KeplerCompute();
+
+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+    static constexpr std::size_t NumConstBuffers = 8;
+
+    struct Regs {
+        static constexpr std::size_t NUM_REGS = 0xCF8;
+
+        union {
+            struct {
+                INSERT_PADDING_WORDS_NOINIT(0x60);
+
+                Upload::Registers upload;
+
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec_upload;
+
+                u32 data_upload;
+
+                INSERT_PADDING_WORDS_NOINIT(0x3F);
+
+                struct {
+                    u32 address;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
+                    }
+                } launch_desc_loc;
+
+                INSERT_PADDING_WORDS_NOINIT(0x1);
+
+                u32 launch;
+
+                INSERT_PADDING_WORDS_NOINIT(0x4A7);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 limit;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } tsc;
+
+                INSERT_PADDING_WORDS_NOINIT(0x3);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 limit;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } tic;
+
+                INSERT_PADDING_WORDS_NOINIT(0x22);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } code_loc;
+
+                INSERT_PADDING_WORDS_NOINIT(0x3FE);
+
+                u32 tex_cb_index;
+
+                INSERT_PADDING_WORDS_NOINIT(0x374);
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+    struct LaunchParams {
+        static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
+
+        INSERT_PADDING_WORDS(0x8);
+
+        u32 program_start;
+
+        INSERT_PADDING_WORDS(0x2);
+
+        BitField<30, 1, u32> linked_tsc;
+
+        BitField<0, 31, u32> grid_dim_x;
+        union {
+            BitField<0, 16, u32> grid_dim_y;
+            BitField<16, 16, u32> grid_dim_z;
+        };
+
+        INSERT_PADDING_WORDS(0x3);
+
+        BitField<0, 18, u32> shared_alloc;
+
+        BitField<16, 16, u32> block_dim_x;
+        union {
+            BitField<0, 16, u32> block_dim_y;
+            BitField<16, 16, u32> block_dim_z;
+        };
+
+        union {
+            BitField<0, 8, u32> const_buffer_enable_mask;
+            BitField<29, 2, u32> cache_layout;
+        };
+
+        INSERT_PADDING_WORDS(0x8);
+
+        struct ConstBufferConfig {
+            u32 address_low;
+            union {
+                BitField<0, 8, u32> address_high;
+                BitField<15, 17, u32> size;
+            };
+            GPUVAddr Address() const {
+                return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
+                                             address_low);
+            }
+        };
+        std::array<ConstBufferConfig, NumConstBuffers> const_buffer_config;
+
+        union {
+            BitField<0, 20, u32> local_pos_alloc;
+            BitField<27, 5, u32> barrier_alloc;
+        };
+
+        union {
+            BitField<0, 20, u32> local_neg_alloc;
+            BitField<24, 5, u32> gpr_alloc;
+        };
+
+        union {
+            BitField<0, 20, u32> local_crs_alloc;
+            BitField<24, 5, u32> sass_version;
+        };
+
+        INSERT_PADDING_WORDS(0x10);
+    } launch_description{};
+
+    struct {
+        u32 write_offset = 0;
+        u32 copy_size = 0;
+        std::vector<u8> inner_buffer;
+    } state{};
+
+    static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
+                  "KeplerCompute Regs has wrong size");
+
+    static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
+                  "KeplerCompute LaunchParams has wrong size");
+
+    /// Write the value to the register identified by method.
+    void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
+
+    /// Write multiple values to the register identified by method.
+    void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                         u32 methods_pending) override;
+
+private:
+    void ProcessLaunch();
+
+    /// Retrieves information about a specific TIC entry from the TIC buffer.
+    Texture::TICEntry GetTICEntry(u32 tic_index) const;
+
+    /// Retrieves information about a specific TSC entry from the TSC buffer.
+    Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
+
+    Core::System& system;
+    MemoryManager& memory_manager;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+    Upload::State upload_state;
+};
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4,                       \
+                  "Field " #field_name " has invalid position")
+
+#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position)                                         \
+    static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4,               \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
+ASSERT_REG_POSITION(launch, 0xAF);
+ASSERT_REG_POSITION(tsc, 0x557);
+ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(code_loc, 0x582);
+ASSERT_REG_POSITION(tex_cb_index, 0x982);
+ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
+ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
+ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
+ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
+ASSERT_LAUNCH_PARAM_POSITION(const_buffer_enable_mask, 0x14);
+ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
+
+#undef ASSERT_REG_POSITION
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -1,55 +1,55 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "video_core/engines/kepler_memory.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
-#include "video_core/rasterizer_interface.h"
-
-namespace Tegra::Engines {
-
-KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
-    : system{system_}, upload_state{memory_manager, regs.upload} {}
-
-KeplerMemory::~KeplerMemory() = default;
-
-void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
-    upload_state.BindRasterizer(rasterizer_);
-}
-
-void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
-    ASSERT_MSG(method < Regs::NUM_REGS,
-               "Invalid KeplerMemory register, increase the size of the Regs structure");
-
-    regs.reg_array[method] = method_argument;
-
-    switch (method) {
-    case KEPLERMEMORY_REG_INDEX(exec): {
-        upload_state.ProcessExec(regs.exec.linear != 0);
-        break;
-    }
-    case KEPLERMEMORY_REG_INDEX(data): {
-        upload_state.ProcessData(method_argument, is_last_call);
-        break;
-    }
-    }
-}
-
-void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                                   u32 methods_pending) {
-    switch (method) {
-    case KEPLERMEMORY_REG_INDEX(data):
-        upload_state.ProcessData(base_start, static_cast<size_t>(amount));
-        return;
-    default:
-        for (std::size_t i = 0; i < amount; i++) {
-            CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
-        }
-        break;
-    }
-}
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace Tegra::Engines {
+
+KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
+    : system{system_}, upload_state{memory_manager, regs.upload} {}
+
+KeplerMemory::~KeplerMemory() = default;
+
+void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    upload_state.BindRasterizer(rasterizer_);
+}
+
+void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
+    ASSERT_MSG(method < Regs::NUM_REGS,
+               "Invalid KeplerMemory register, increase the size of the Regs structure");
+
+    regs.reg_array[method] = method_argument;
+
+    switch (method) {
+    case KEPLERMEMORY_REG_INDEX(exec): {
+        upload_state.ProcessExec(regs.exec.linear != 0);
+        break;
+    }
+    case KEPLERMEMORY_REG_INDEX(data): {
+        upload_state.ProcessData(method_argument, is_last_call);
+        break;
+    }
+    }
+}
+
+void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                                   u32 methods_pending) {
+    switch (method) {
+    case KEPLERMEMORY_REG_INDEX(data):
+        upload_state.ProcessData(base_start, static_cast<size_t>(amount));
+        return;
+    default:
+        for (std::size_t i = 0; i < amount; i++) {
+            CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+        }
+        break;
+    }
+}
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -1,89 +1,89 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include "common/bit_field.h"
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-#include "video_core/engines/engine_interface.h"
-#include "video_core/engines/engine_upload.h"
-
-namespace Core {
-class System;
-}
-
-namespace Tegra {
-class MemoryManager;
-}
-
-namespace VideoCore {
-class RasterizerInterface;
-}
-
-namespace Tegra::Engines {
-
-/**
- * This Engine is known as P2MF. Documentation can be found in:
- * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
- * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
- */
-
-#define KEPLERMEMORY_REG_INDEX(field_name)                                                         \
-    (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
-
-class KeplerMemory final : public EngineInterface {
-public:
-    explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
-    ~KeplerMemory() override;
-
-    /// Binds a rasterizer to this engine.
-    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
-
-    /// Write the value to the register identified by method.
-    void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
-
-    /// Write multiple values to the register identified by method.
-    void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                         u32 methods_pending) override;
-
-    struct Regs {
-        static constexpr size_t NUM_REGS = 0x7F;
-
-        union {
-            struct {
-                INSERT_PADDING_WORDS_NOINIT(0x60);
-
-                Upload::Registers upload;
-
-                struct {
-                    union {
-                        BitField<0, 1, u32> linear;
-                    };
-                } exec;
-
-                u32 data;
-
-                INSERT_PADDING_WORDS_NOINIT(0x11);
-            };
-            std::array<u32, NUM_REGS> reg_array;
-        };
-    } regs{};
-
-private:
-    Core::System& system;
-    Upload::State upload_state;
-};
-
-#define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4,                        \
-                  "Field " #field_name " has invalid position")
-
-ASSERT_REG_POSITION(upload, 0x60);
-ASSERT_REG_POSITION(exec, 0x6C);
-ASSERT_REG_POSITION(data, 0x6D);
-#undef ASSERT_REG_POSITION
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/engines/engine_interface.h"
+#include "video_core/engines/engine_upload.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra::Engines {
+
+/**
+ * This Engine is known as P2MF. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
+ */
+
+#define KEPLERMEMORY_REG_INDEX(field_name)                                                         \
+    (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
+
+class KeplerMemory final : public EngineInterface {
+public:
+    explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
+    ~KeplerMemory() override;
+
+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+    /// Write the value to the register identified by method.
+    void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
+
+    /// Write multiple values to the register identified by method.
+    void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                         u32 methods_pending) override;
+
+    struct Regs {
+        static constexpr size_t NUM_REGS = 0x7F;
+
+        union {
+            struct {
+                INSERT_PADDING_WORDS_NOINIT(0x60);
+
+                Upload::Registers upload;
+
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec;
+
+                u32 data;
+
+                INSERT_PADDING_WORDS_NOINIT(0x11);
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+private:
+    Core::System& system;
+    Upload::State upload_state;
+};
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4,                        \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec, 0x6C);
+ASSERT_REG_POSITION(data, 0x6D);
+#undef ASSERT_REG_POSITION
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -1,317 +1,317 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include "common/algorithm.h"
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "common/microprofile.h"
-#include "common/settings.h"
-#include "core/core.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/maxwell_dma.h"
-#include "video_core/memory_manager.h"
-#include "video_core/renderer_base.h"
-#include "video_core/textures/decoders.h"
-
-MICROPROFILE_DECLARE(GPU_DMAEngine);
-MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128));
-
-namespace Tegra::Engines {
-
-using namespace Texture;
-
-MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
-    : system{system_}, memory_manager{memory_manager_} {}
-
-MaxwellDMA::~MaxwellDMA() = default;
-
-void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
-    rasterizer = rasterizer_;
-}
-
-void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
-    ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
-
-    regs.reg_array[method] = method_argument;
-
-    if (method == offsetof(Regs, launch_dma) / sizeof(u32)) {
-        Launch();
-    }
-}
-
-void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                                 u32 methods_pending) {
-    for (size_t i = 0; i < amount; ++i) {
-        CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
-    }
-}
-
-void MaxwellDMA::Launch() {
-    MICROPROFILE_SCOPE(GPU_DMAEngine);
-    LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in),
-              static_cast<GPUVAddr>(regs.offset_out));
-
-    // TODO(Subv): Perform more research and implement all features of this engine.
-    const LaunchDMA& launch = regs.launch_dma;
-    ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
-    ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
-
-    if (launch.multi_line_enable) {
-        const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
-        const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
-
-        if (!is_src_pitch && !is_dst_pitch) {
-            // If both the source and the destination are in block layout, assert.
-            UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
-            return;
-        }
-
-        if (is_src_pitch && is_dst_pitch) {
-            for (u32 line = 0; line < regs.line_count; ++line) {
-                const GPUVAddr source_line =
-                    regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
-                const GPUVAddr dest_line =
-                    regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
-                memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
-            }
-        } else {
-            if (!is_src_pitch && is_dst_pitch) {
-                CopyBlockLinearToPitch();
-            } else {
-                CopyPitchToBlockLinear();
-            }
-        }
-    } else {
-        // TODO: allow multisized components.
-        auto& accelerate = rasterizer->AccessAccelerateDMA();
-        const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
-        if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
-            ASSERT(regs.remap_const.component_size_minus_one == 3);
-            accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
-            std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
-            memory_manager.WriteBlockUnsafe(regs.offset_out,
-                                            reinterpret_cast<u8*>(tmp_buffer.data()),
-                                            regs.line_length_in * sizeof(u32));
-        } else {
-            auto convert_linear_2_blocklinear_addr = [](u64 address) {
-                return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
-                       ((address & 0x180) >> 1) | ((address & 0x20) << 3);
-            };
-            auto src_kind = memory_manager.GetPageKind(regs.offset_in);
-            auto dst_kind = memory_manager.GetPageKind(regs.offset_out);
-            const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind));
-            const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind));
-            if (!is_src_pitch && is_dst_pitch) {
-                std::vector<u8> tmp_buffer(regs.line_length_in);
-                std::vector<u8> dst_buffer(regs.line_length_in);
-                memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
-                                               regs.line_length_in);
-                for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
-                    dst_buffer[offset] =
-                        tmp_buffer[convert_linear_2_blocklinear_addr(regs.offset_in + offset) -
-                                   regs.offset_in];
-                }
-                memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
-            } else if (is_src_pitch && !is_dst_pitch) {
-                std::vector<u8> tmp_buffer(regs.line_length_in);
-                std::vector<u8> dst_buffer(regs.line_length_in);
-                memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
-                                               regs.line_length_in);
-                for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
-                    dst_buffer[convert_linear_2_blocklinear_addr(regs.offset_out + offset) -
-                               regs.offset_out] = tmp_buffer[offset];
-                }
-                memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
-            } else {
-                if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
-                    std::vector<u8> tmp_buffer(regs.line_length_in);
-                    memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
-                                                   regs.line_length_in);
-                    memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
-                                              regs.line_length_in);
-                }
-            }
-        }
-    }
-
-    ReleaseSemaphore();
-}
-
-void MaxwellDMA::CopyBlockLinearToPitch() {
-    UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
-    UNIMPLEMENTED_IF(regs.src_params.layer != 0);
-
-    const bool is_remapping = regs.launch_dma.remap_enable != 0;
-
-    // Optimized path for micro copies.
-    const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
-    if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
-        regs.src_params.height > GOB_SIZE_Y) {
-        FastCopyBlockLinearToPitch();
-        return;
-    }
-
-    // Deswizzle the input and copy it over.
-    const Parameters& src_params = regs.src_params;
-
-    const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
-    const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
-
-    const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
-
-    u32 width = src_params.width;
-    u32 x_elements = regs.line_length_in;
-    u32 x_offset = src_params.origin.x;
-    u32 bpp_shift = 0U;
-    if (!is_remapping) {
-        bpp_shift = Common::FoldRight(
-            4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
-            width, x_elements, x_offset, static_cast<u32>(regs.offset_in));
-        width >>= bpp_shift;
-        x_elements >>= bpp_shift;
-        x_offset >>= bpp_shift;
-    }
-
-    const u32 bytes_per_pixel = base_bpp << bpp_shift;
-    const u32 height = src_params.height;
-    const u32 depth = src_params.depth;
-    const u32 block_height = src_params.block_size.height;
-    const u32 block_depth = src_params.block_size.depth;
-    const size_t src_size =
-        CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
-
-    if (read_buffer.size() < src_size) {
-        read_buffer.resize(src_size);
-    }
-    if (write_buffer.size() < dst_size) {
-        write_buffer.resize(dst_size);
-    }
-
-    memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
-    memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
-
-    UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
-                     src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
-                     regs.pitch_out);
-
-    memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
-}
-
-void MaxwellDMA::CopyPitchToBlockLinear() {
-    UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
-    UNIMPLEMENTED_IF(regs.dst_params.layer != 0);
-
-    const bool is_remapping = regs.launch_dma.remap_enable != 0;
-    const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
-    const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
-
-    const auto& dst_params = regs.dst_params;
-
-    const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
-
-    u32 width = dst_params.width;
-    u32 x_elements = regs.line_length_in;
-    u32 x_offset = dst_params.origin.x;
-    u32 bpp_shift = 0U;
-    if (!is_remapping) {
-        bpp_shift = Common::FoldRight(
-            4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
-            width, x_elements, x_offset, static_cast<u32>(regs.offset_out));
-        width >>= bpp_shift;
-        x_elements >>= bpp_shift;
-        x_offset >>= bpp_shift;
-    }
-
-    const u32 bytes_per_pixel = base_bpp << bpp_shift;
-    const u32 height = dst_params.height;
-    const u32 depth = dst_params.depth;
-    const u32 block_height = dst_params.block_size.height;
-    const u32 block_depth = dst_params.block_size.depth;
-    const size_t dst_size =
-        CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
-    const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
-
-    if (read_buffer.size() < src_size) {
-        read_buffer.resize(src_size);
-    }
-    if (write_buffer.size() < dst_size) {
-        write_buffer.resize(dst_size);
-    }
-
-    memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
-    if (Settings::IsGPULevelExtreme()) {
-        memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
-    } else {
-        memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
-    }
-
-    // If the input is linear and the output is tiled, swizzle the input and copy it over.
-    SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
-                   dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
-                   regs.pitch_in);
-
-    memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
-}
-
-void MaxwellDMA::FastCopyBlockLinearToPitch() {
-    const u32 bytes_per_pixel = 1U;
-    const size_t src_size = GOB_SIZE;
-    const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
-    u32 pos_x = regs.src_params.origin.x;
-    u32 pos_y = regs.src_params.origin.y;
-    const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y,
-                                    regs.src_params.block_size.height, bytes_per_pixel);
-    const u32 x_in_gob = 64 / bytes_per_pixel;
-    pos_x = pos_x % x_in_gob;
-    pos_y = pos_y % 8;
-
-    if (read_buffer.size() < src_size) {
-        read_buffer.resize(src_size);
-    }
-    if (write_buffer.size() < dst_size) {
-        write_buffer.resize(dst_size);
-    }
-
-    if (Settings::IsGPULevelExtreme()) {
-        memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size);
-        memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
-    } else {
-        memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size);
-        memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
-    }
-
-    UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width,
-                     regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count,
-                     regs.src_params.block_size.height, regs.src_params.block_size.depth,
-                     regs.pitch_out);
-
-    memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
-}
-
-void MaxwellDMA::ReleaseSemaphore() {
-    const auto type = regs.launch_dma.semaphore_type;
-    const GPUVAddr address = regs.semaphore.address;
-    const u32 payload = regs.semaphore.payload;
-    switch (type) {
-    case LaunchDMA::SemaphoreType::NONE:
-        break;
-    case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: {
-        std::function<void()> operation(
-            [this, address, payload] { memory_manager.Write<u32>(address, payload); });
-        rasterizer->SignalFence(std::move(operation));
-        break;
-    }
-    case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: {
-        std::function<void()> operation([this, address, payload] {
-            memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks());
-            memory_manager.Write<u64>(address, payload);
-        });
-        rasterizer->SignalFence(std::move(operation));
-        break;
-    }
-    default:
-        ASSERT_MSG(false, "Unknown semaphore type: {}", static_cast<u32>(type.Value()));
-    }
-}
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/algorithm.h"
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/microprofile.h"
+#include "common/settings.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_base.h"
+#include "video_core/textures/decoders.h"
+
+MICROPROFILE_DECLARE(GPU_DMAEngine);
+MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128));
+
+namespace Tegra::Engines {
+
+using namespace Texture;
+
+MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
+    : system{system_}, memory_manager{memory_manager_} {}
+
+MaxwellDMA::~MaxwellDMA() = default;
+
+void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
+}
+
+void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
+    ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
+
+    regs.reg_array[method] = method_argument;
+
+    if (method == offsetof(Regs, launch_dma) / sizeof(u32)) {
+        Launch();
+    }
+}
+
+void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                                 u32 methods_pending) {
+    for (size_t i = 0; i < amount; ++i) {
+        CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+    }
+}
+
+void MaxwellDMA::Launch() {
+    MICROPROFILE_SCOPE(GPU_DMAEngine);
+    LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in),
+              static_cast<GPUVAddr>(regs.offset_out));
+
+    // TODO(Subv): Perform more research and implement all features of this engine.
+    const LaunchDMA& launch = regs.launch_dma;
+    ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
+    ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
+
+    if (launch.multi_line_enable) {
+        const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+        const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+
+        if (!is_src_pitch && !is_dst_pitch) {
+            // If both the source and the destination are in block layout, assert.
+            UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
+            return;
+        }
+
+        if (is_src_pitch && is_dst_pitch) {
+            for (u32 line = 0; line < regs.line_count; ++line) {
+                const GPUVAddr source_line =
+                    regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
+                const GPUVAddr dest_line =
+                    regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
+                memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
+            }
+        } else {
+            if (!is_src_pitch && is_dst_pitch) {
+                CopyBlockLinearToPitch();
+            } else {
+                CopyPitchToBlockLinear();
+            }
+        }
+    } else {
+        // TODO: allow multisized components.
+        auto& accelerate = rasterizer->AccessAccelerateDMA();
+        const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
+        if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
+            ASSERT(regs.remap_const.component_size_minus_one == 3);
+            accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
+            std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
+            memory_manager.WriteBlockUnsafe(regs.offset_out,
+                                            reinterpret_cast<u8*>(tmp_buffer.data()),
+                                            regs.line_length_in * sizeof(u32));
+        } else {
+            auto convert_linear_2_blocklinear_addr = [](u64 address) {
+                return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
+                       ((address & 0x180) >> 1) | ((address & 0x20) << 3);
+            };
+            auto src_kind = memory_manager.GetPageKind(regs.offset_in);
+            auto dst_kind = memory_manager.GetPageKind(regs.offset_out);
+            const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind));
+            const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind));
+            if (!is_src_pitch && is_dst_pitch) {
+                std::vector<u8> tmp_buffer(regs.line_length_in);
+                std::vector<u8> dst_buffer(regs.line_length_in);
+                memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+                                               regs.line_length_in);
+                for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
+                    dst_buffer[offset] =
+                        tmp_buffer[convert_linear_2_blocklinear_addr(regs.offset_in + offset) -
+                                   regs.offset_in];
+                }
+                memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
+            } else if (is_src_pitch && !is_dst_pitch) {
+                std::vector<u8> tmp_buffer(regs.line_length_in);
+                std::vector<u8> dst_buffer(regs.line_length_in);
+                memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+                                               regs.line_length_in);
+                for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
+                    dst_buffer[convert_linear_2_blocklinear_addr(regs.offset_out + offset) -
+                               regs.offset_out] = tmp_buffer[offset];
+                }
+                memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
+            } else {
+                if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
+                    std::vector<u8> tmp_buffer(regs.line_length_in);
+                    memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+                                                   regs.line_length_in);
+                    memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
+                                              regs.line_length_in);
+                }
+            }
+        }
+    }
+
+    ReleaseSemaphore();
+}
+
+void MaxwellDMA::CopyBlockLinearToPitch() {
+    UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
+    UNIMPLEMENTED_IF(regs.src_params.layer != 0);
+
+    const bool is_remapping = regs.launch_dma.remap_enable != 0;
+
+    // Optimized path for micro copies.
+    const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
+    if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
+        regs.src_params.height > GOB_SIZE_Y) {
+        FastCopyBlockLinearToPitch();
+        return;
+    }
+
+    // Deswizzle the input and copy it over.
+    const Parameters& src_params = regs.src_params;
+
+    const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
+    const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
+
+    const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
+
+    u32 width = src_params.width;
+    u32 x_elements = regs.line_length_in;
+    u32 x_offset = src_params.origin.x;
+    u32 bpp_shift = 0U;
+    if (!is_remapping) {
+        bpp_shift = Common::FoldRight(
+            4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
+            width, x_elements, x_offset, static_cast<u32>(regs.offset_in));
+        width >>= bpp_shift;
+        x_elements >>= bpp_shift;
+        x_offset >>= bpp_shift;
+    }
+
+    const u32 bytes_per_pixel = base_bpp << bpp_shift;
+    const u32 height = src_params.height;
+    const u32 depth = src_params.depth;
+    const u32 block_height = src_params.block_size.height;
+    const u32 block_depth = src_params.block_size.depth;
+    const size_t src_size =
+        CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
+
+    if (read_buffer.size() < src_size) {
+        read_buffer.resize(src_size);
+    }
+    if (write_buffer.size() < dst_size) {
+        write_buffer.resize(dst_size);
+    }
+
+    memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
+    memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
+
+    UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
+                     src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
+                     regs.pitch_out);
+
+    memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+}
+
+void MaxwellDMA::CopyPitchToBlockLinear() {
+    UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
+    UNIMPLEMENTED_IF(regs.dst_params.layer != 0);
+
+    const bool is_remapping = regs.launch_dma.remap_enable != 0;
+    const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
+    const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
+
+    const auto& dst_params = regs.dst_params;
+
+    const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
+
+    u32 width = dst_params.width;
+    u32 x_elements = regs.line_length_in;
+    u32 x_offset = dst_params.origin.x;
+    u32 bpp_shift = 0U;
+    if (!is_remapping) {
+        bpp_shift = Common::FoldRight(
+            4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
+            width, x_elements, x_offset, static_cast<u32>(regs.offset_out));
+        width >>= bpp_shift;
+        x_elements >>= bpp_shift;
+        x_offset >>= bpp_shift;
+    }
+
+    const u32 bytes_per_pixel = base_bpp << bpp_shift;
+    const u32 height = dst_params.height;
+    const u32 depth = dst_params.depth;
+    const u32 block_height = dst_params.block_size.height;
+    const u32 block_depth = dst_params.block_size.depth;
+    const size_t dst_size =
+        CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
+    const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
+
+    if (read_buffer.size() < src_size) {
+        read_buffer.resize(src_size);
+    }
+    if (write_buffer.size() < dst_size) {
+        write_buffer.resize(dst_size);
+    }
+
+    memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
+    if (Settings::IsGPULevelExtreme()) {
+        memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
+    } else {
+        memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
+    }
+
+    // If the input is linear and the output is tiled, swizzle the input and copy it over.
+    SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
+                   dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
+                   regs.pitch_in);
+
+    memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+}
+
+void MaxwellDMA::FastCopyBlockLinearToPitch() {
+    const u32 bytes_per_pixel = 1U;
+    const size_t src_size = GOB_SIZE;
+    const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
+    u32 pos_x = regs.src_params.origin.x;
+    u32 pos_y = regs.src_params.origin.y;
+    const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y,
+                                    regs.src_params.block_size.height, bytes_per_pixel);
+    const u32 x_in_gob = 64 / bytes_per_pixel;
+    pos_x = pos_x % x_in_gob;
+    pos_y = pos_y % 8;
+
+    if (read_buffer.size() < src_size) {
+        read_buffer.resize(src_size);
+    }
+    if (write_buffer.size() < dst_size) {
+        write_buffer.resize(dst_size);
+    }
+
+    if (Settings::IsGPULevelExtreme()) {
+        memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size);
+        memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
+    } else {
+        memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size);
+        memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
+    }
+
+    UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width,
+                     regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count,
+                     regs.src_params.block_size.height, regs.src_params.block_size.depth,
+                     regs.pitch_out);
+
+    memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+}
+
+void MaxwellDMA::ReleaseSemaphore() {
+    const auto type = regs.launch_dma.semaphore_type;
+    const GPUVAddr address = regs.semaphore.address;
+    const u32 payload = regs.semaphore.payload;
+    switch (type) {
+    case LaunchDMA::SemaphoreType::NONE:
+        break;
+    case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: {
+        std::function<void()> operation(
+            [this, address, payload] { memory_manager.Write<u32>(address, payload); });
+        rasterizer->SignalFence(std::move(operation));
+        break;
+    }
+    case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: {
+        std::function<void()> operation([this, address, payload] {
+            memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks());
+            memory_manager.Write<u64>(address, payload);
+        });
+        rasterizer->SignalFence(std::move(operation));
+        break;
+    }
+    default:
+        ASSERT_MSG(false, "Unknown semaphore type: {}", static_cast<u32>(type.Value()));
+    }
+}
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -1,294 +1,294 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <vector>
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "video_core/engines/engine_interface.h"
-
-namespace Core {
-class System;
-}
-
-namespace Tegra {
-class MemoryManager;
-}
-
-namespace VideoCore {
-class RasterizerInterface;
-}
-
-namespace Tegra::Engines {
-
-class AccelerateDMAInterface {
-public:
-    /// Write the value to the register identified by method.
-    virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0;
-
-    virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0;
-};
-
-/**
- * This engine is known as gk104_copy. Documentation can be found in:
- * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
- * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
- */
-
-class MaxwellDMA final : public EngineInterface {
-public:
-    struct PackedGPUVAddr {
-        u32 upper;
-        u32 lower;
-
-        constexpr operator GPUVAddr() const noexcept {
-            return (static_cast<GPUVAddr>(upper & 0xff) << 32) | lower;
-        }
-    };
-
-    union BlockSize {
-        BitField<0, 4, u32> width;
-        BitField<4, 4, u32> height;
-        BitField<8, 4, u32> depth;
-        BitField<12, 4, u32> gob_height;
-    };
-    static_assert(sizeof(BlockSize) == 4);
-
-    union Origin {
-        BitField<0, 16, u32> x;
-        BitField<16, 16, u32> y;
-    };
-    static_assert(sizeof(Origin) == 4);
-
-    struct Parameters {
-        BlockSize block_size;
-        u32 width;
-        u32 height;
-        u32 depth;
-        u32 layer;
-        Origin origin;
-    };
-    static_assert(sizeof(Parameters) == 24);
-
-    struct Semaphore {
-        PackedGPUVAddr address;
-        u32 payload;
-    };
-    static_assert(sizeof(Semaphore) == 12);
-
-    struct RenderEnable {
-        enum class Mode : u32 {
-            // Note: This uses Pascal case in order to avoid the identifiers
-            // FALSE and TRUE, which are reserved on Darwin.
-            False = 0,
-            True = 1,
-            Conditional = 2,
-            RenderIfEqual = 3,
-            RenderIfNotEqual = 4,
-        };
-
-        PackedGPUVAddr address;
-        BitField<0, 3, Mode> mode;
-    };
-    static_assert(sizeof(RenderEnable) == 12);
-
-    enum class PhysModeTarget : u32 {
-        LOCAL_FB = 0,
-        COHERENT_SYSMEM = 1,
-        NONCOHERENT_SYSMEM = 2,
-    };
-    using PhysMode = BitField<0, 2, PhysModeTarget>;
-
-    union LaunchDMA {
-        enum class DataTransferType : u32 {
-            NONE = 0,
-            PIPELINED = 1,
-            NON_PIPELINED = 2,
-        };
-
-        enum class SemaphoreType : u32 {
-            NONE = 0,
-            RELEASE_ONE_WORD_SEMAPHORE = 1,
-            RELEASE_FOUR_WORD_SEMAPHORE = 2,
-        };
-
-        enum class InterruptType : u32 {
-            NONE = 0,
-            BLOCKING = 1,
-            NON_BLOCKING = 2,
-        };
-
-        enum class MemoryLayout : u32 {
-            BLOCKLINEAR = 0,
-            PITCH = 1,
-        };
-
-        enum class Type : u32 {
-            VIRTUAL = 0,
-            PHYSICAL = 1,
-        };
-
-        enum class SemaphoreReduction : u32 {
-            IMIN = 0,
-            IMAX = 1,
-            IXOR = 2,
-            IAND = 3,
-            IOR = 4,
-            IADD = 5,
-            INC = 6,
-            DEC = 7,
-            FADD = 0xA,
-        };
-
-        enum class SemaphoreReductionSign : u32 {
-            SIGNED = 0,
-            UNSIGNED = 1,
-        };
-
-        enum class BypassL2 : u32 {
-            USE_PTE_SETTING = 0,
-            FORCE_VOLATILE = 1,
-        };
-
-        BitField<0, 2, DataTransferType> data_transfer_type;
-        BitField<2, 1, u32> flush_enable;
-        BitField<3, 2, SemaphoreType> semaphore_type;
-        BitField<5, 2, InterruptType> interrupt_type;
-        BitField<7, 1, MemoryLayout> src_memory_layout;
-        BitField<8, 1, MemoryLayout> dst_memory_layout;
-        BitField<9, 1, u32> multi_line_enable;
-        BitField<10, 1, u32> remap_enable;
-        BitField<11, 1, u32> rmwdisable;
-        BitField<12, 1, Type> src_type;
-        BitField<13, 1, Type> dst_type;
-        BitField<14, 4, SemaphoreReduction> semaphore_reduction;
-        BitField<18, 1, SemaphoreReductionSign> semaphore_reduction_sign;
-        BitField<19, 1, u32> reduction_enable;
-        BitField<20, 1, BypassL2> bypass_l2;
-    };
-    static_assert(sizeof(LaunchDMA) == 4);
-
-    struct RemapConst {
-        enum class Swizzle : u32 {
-            SRC_X = 0,
-            SRC_Y = 1,
-            SRC_Z = 2,
-            SRC_W = 3,
-            CONST_A = 4,
-            CONST_B = 5,
-            NO_WRITE = 6,
-        };
-
-        PackedGPUVAddr address;
-
-        union {
-            BitField<0, 3, Swizzle> dst_x;
-            BitField<4, 3, Swizzle> dst_y;
-            BitField<8, 3, Swizzle> dst_z;
-            BitField<12, 3, Swizzle> dst_w;
-            BitField<0, 12, u32> dst_components_raw;
-            BitField<16, 2, u32> component_size_minus_one;
-            BitField<20, 2, u32> num_src_components_minus_one;
-            BitField<24, 2, u32> num_dst_components_minus_one;
-        };
-
-        Swizzle GetComponent(size_t i) const {
-            const u32 raw = dst_components_raw;
-            return static_cast<Swizzle>((raw >> (i * 3)) & 0x7);
-        }
-    };
-    static_assert(sizeof(RemapConst) == 12);
-
-    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
-
-    explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
-    ~MaxwellDMA() override;
-
-    /// Write the value to the register identified by method.
-    void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
-
-    /// Write multiple values to the register identified by method.
-    void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                         u32 methods_pending) override;
-
-private:
-    /// Performs the copy from the source buffer to the destination buffer as configured in the
-    /// registers.
-    void Launch();
-
-    void CopyBlockLinearToPitch();
-
-    void CopyPitchToBlockLinear();
-
-    void FastCopyBlockLinearToPitch();
-
-    void ReleaseSemaphore();
-
-    Core::System& system;
-
-    MemoryManager& memory_manager;
-    VideoCore::RasterizerInterface* rasterizer = nullptr;
-
-    std::vector<u8> read_buffer;
-    std::vector<u8> write_buffer;
-
-    static constexpr std::size_t NUM_REGS = 0x800;
-    struct Regs {
-        union {
-            struct {
-                u32 reserved[0x40];
-                u32 nop;
-                u32 reserved01[0xf];
-                u32 pm_trigger;
-                u32 reserved02[0x3f];
-                Semaphore semaphore;
-                u32 reserved03[0x2];
-                RenderEnable render_enable;
-                PhysMode src_phys_mode;
-                PhysMode dst_phys_mode;
-                u32 reserved04[0x26];
-                LaunchDMA launch_dma;
-                u32 reserved05[0x3f];
-                PackedGPUVAddr offset_in;
-                PackedGPUVAddr offset_out;
-                u32 pitch_in;
-                u32 pitch_out;
-                u32 line_length_in;
-                u32 line_count;
-                u32 reserved06[0xb6];
-                u32 remap_consta_value;
-                u32 remap_constb_value;
-                RemapConst remap_const;
-                Parameters dst_params;
-                u32 reserved07[0x1];
-                Parameters src_params;
-                u32 reserved08[0x275];
-                u32 pm_trigger_end;
-                u32 reserved09[0x3ba];
-            };
-            std::array<u32, NUM_REGS> reg_array;
-        };
-    } regs{};
-
-#define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4,                          \
-                  "Field " #field_name " has invalid position")
-
-    ASSERT_REG_POSITION(launch_dma, 0xC0);
-    ASSERT_REG_POSITION(offset_in, 0x100);
-    ASSERT_REG_POSITION(offset_out, 0x102);
-    ASSERT_REG_POSITION(pitch_in, 0x104);
-    ASSERT_REG_POSITION(pitch_out, 0x105);
-    ASSERT_REG_POSITION(line_length_in, 0x106);
-    ASSERT_REG_POSITION(line_count, 0x107);
-    ASSERT_REG_POSITION(remap_const, 0x1C0);
-    ASSERT_REG_POSITION(dst_params, 0x1C3);
-    ASSERT_REG_POSITION(src_params, 0x1CA);
-
-#undef ASSERT_REG_POSITION
-};
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "video_core/engines/engine_interface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra::Engines {
+
+class AccelerateDMAInterface {
+public:
+    /// Write the value to the register identified by method.
+    virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0;
+
+    virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0;
+};
+
+/**
+ * This engine is known as gk104_copy. Documentation can be found in:
+ * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
+ * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
+ */
+
+class MaxwellDMA final : public EngineInterface {
+public:
+    struct PackedGPUVAddr {
+        u32 upper;
+        u32 lower;
+
+        constexpr operator GPUVAddr() const noexcept {
+            return (static_cast<GPUVAddr>(upper & 0xff) << 32) | lower;
+        }
+    };
+
+    union BlockSize {
+        BitField<0, 4, u32> width;
+        BitField<4, 4, u32> height;
+        BitField<8, 4, u32> depth;
+        BitField<12, 4, u32> gob_height;
+    };
+    static_assert(sizeof(BlockSize) == 4);
+
+    union Origin {
+        BitField<0, 16, u32> x;
+        BitField<16, 16, u32> y;
+    };
+    static_assert(sizeof(Origin) == 4);
+
+    struct Parameters {
+        BlockSize block_size;
+        u32 width;
+        u32 height;
+        u32 depth;
+        u32 layer;
+        Origin origin;
+    };
+    static_assert(sizeof(Parameters) == 24);
+
+    struct Semaphore {
+        PackedGPUVAddr address;
+        u32 payload;
+    };
+    static_assert(sizeof(Semaphore) == 12);
+
+    struct RenderEnable {
+        enum class Mode : u32 {
+            // Note: This uses Pascal case in order to avoid the identifiers
+            // FALSE and TRUE, which are reserved on Darwin.
+            False = 0,
+            True = 1,
+            Conditional = 2,
+            RenderIfEqual = 3,
+            RenderIfNotEqual = 4,
+        };
+
+        PackedGPUVAddr address;
+        BitField<0, 3, Mode> mode;
+    };
+    static_assert(sizeof(RenderEnable) == 12);
+
+    enum class PhysModeTarget : u32 {
+        LOCAL_FB = 0,
+        COHERENT_SYSMEM = 1,
+        NONCOHERENT_SYSMEM = 2,
+    };
+    using PhysMode = BitField<0, 2, PhysModeTarget>;
+
+    union LaunchDMA {
+        enum class DataTransferType : u32 {
+            NONE = 0,
+            PIPELINED = 1,
+            NON_PIPELINED = 2,
+        };
+
+        enum class SemaphoreType : u32 {
+            NONE = 0,
+            RELEASE_ONE_WORD_SEMAPHORE = 1,
+            RELEASE_FOUR_WORD_SEMAPHORE = 2,
+        };
+
+        enum class InterruptType : u32 {
+            NONE = 0,
+            BLOCKING = 1,
+            NON_BLOCKING = 2,
+        };
+
+        enum class MemoryLayout : u32 {
+            BLOCKLINEAR = 0,
+            PITCH = 1,
+        };
+
+        enum class Type : u32 {
+            VIRTUAL = 0,
+            PHYSICAL = 1,
+        };
+
+        enum class SemaphoreReduction : u32 {
+            IMIN = 0,
+            IMAX = 1,
+            IXOR = 2,
+            IAND = 3,
+            IOR = 4,
+            IADD = 5,
+            INC = 6,
+            DEC = 7,
+            FADD = 0xA,
+        };
+
+        enum class SemaphoreReductionSign : u32 {
+            SIGNED = 0,
+            UNSIGNED = 1,
+        };
+
+        enum class BypassL2 : u32 {
+            USE_PTE_SETTING = 0,
+            FORCE_VOLATILE = 1,
+        };
+
+        BitField<0, 2, DataTransferType> data_transfer_type;
+        BitField<2, 1, u32> flush_enable;
+        BitField<3, 2, SemaphoreType> semaphore_type;
+        BitField<5, 2, InterruptType> interrupt_type;
+        BitField<7, 1, MemoryLayout> src_memory_layout;
+        BitField<8, 1, MemoryLayout> dst_memory_layout;
+        BitField<9, 1, u32> multi_line_enable;
+        BitField<10, 1, u32> remap_enable;
+        BitField<11, 1, u32> rmwdisable;
+        BitField<12, 1, Type> src_type;
+        BitField<13, 1, Type> dst_type;
+        BitField<14, 4, SemaphoreReduction> semaphore_reduction;
+        BitField<18, 1, SemaphoreReductionSign> semaphore_reduction_sign;
+        BitField<19, 1, u32> reduction_enable;
+        BitField<20, 1, BypassL2> bypass_l2;
+    };
+    static_assert(sizeof(LaunchDMA) == 4);
+
+    struct RemapConst {
+        enum class Swizzle : u32 {
+            SRC_X = 0,
+            SRC_Y = 1,
+            SRC_Z = 2,
+            SRC_W = 3,
+            CONST_A = 4,
+            CONST_B = 5,
+            NO_WRITE = 6,
+        };
+
+        PackedGPUVAddr address;
+
+        union {
+            BitField<0, 3, Swizzle> dst_x;
+            BitField<4, 3, Swizzle> dst_y;
+            BitField<8, 3, Swizzle> dst_z;
+            BitField<12, 3, Swizzle> dst_w;
+            BitField<0, 12, u32> dst_components_raw;
+            BitField<16, 2, u32> component_size_minus_one;
+            BitField<20, 2, u32> num_src_components_minus_one;
+            BitField<24, 2, u32> num_dst_components_minus_one;
+        };
+
+        Swizzle GetComponent(size_t i) const {
+            const u32 raw = dst_components_raw;
+            return static_cast<Swizzle>((raw >> (i * 3)) & 0x7);
+        }
+    };
+    static_assert(sizeof(RemapConst) == 12);
+
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+    explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
+    ~MaxwellDMA() override;
+
+    /// Write the value to the register identified by method.
+    void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
+
+    /// Write multiple values to the register identified by method.
+    void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                         u32 methods_pending) override;
+
+private:
+    /// Performs the copy from the source buffer to the destination buffer as configured in the
+    /// registers.
+    void Launch();
+
+    void CopyBlockLinearToPitch();
+
+    void CopyPitchToBlockLinear();
+
+    void FastCopyBlockLinearToPitch();
+
+    void ReleaseSemaphore();
+
+    Core::System& system;
+
+    MemoryManager& memory_manager;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+    std::vector<u8> read_buffer;
+    std::vector<u8> write_buffer;
+
+    static constexpr std::size_t NUM_REGS = 0x800;
+    struct Regs {
+        union {
+            struct {
+                u32 reserved[0x40];
+                u32 nop;
+                u32 reserved01[0xf];
+                u32 pm_trigger;
+                u32 reserved02[0x3f];
+                Semaphore semaphore;
+                u32 reserved03[0x2];
+                RenderEnable render_enable;
+                PhysMode src_phys_mode;
+                PhysMode dst_phys_mode;
+                u32 reserved04[0x26];
+                LaunchDMA launch_dma;
+                u32 reserved05[0x3f];
+                PackedGPUVAddr offset_in;
+                PackedGPUVAddr offset_out;
+                u32 pitch_in;
+                u32 pitch_out;
+                u32 line_length_in;
+                u32 line_count;
+                u32 reserved06[0xb6];
+                u32 remap_consta_value;
+                u32 remap_constb_value;
+                RemapConst remap_const;
+                Parameters dst_params;
+                u32 reserved07[0x1];
+                Parameters src_params;
+                u32 reserved08[0x275];
+                u32 pm_trigger_end;
+                u32 reserved09[0x3ba];
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4,                          \
+                  "Field " #field_name " has invalid position")
+
+    ASSERT_REG_POSITION(launch_dma, 0xC0);
+    ASSERT_REG_POSITION(offset_in, 0x100);
+    ASSERT_REG_POSITION(offset_out, 0x102);
+    ASSERT_REG_POSITION(pitch_in, 0x104);
+    ASSERT_REG_POSITION(pitch_out, 0x105);
+    ASSERT_REG_POSITION(line_length_in, 0x106);
+    ASSERT_REG_POSITION(line_count, 0x107);
+    ASSERT_REG_POSITION(remap_const, 0x1C0);
+    ASSERT_REG_POSITION(dst_params, 0x1C3);
+    ASSERT_REG_POSITION(src_params, 0x1CA);
+
+#undef ASSERT_REG_POSITION
+};
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -1,305 +1,305 @@
-// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "common/settings.h"
-#include "core/core.h"
-#include "video_core/control/channel_state.h"
-#include "video_core/dma_pusher.h"
-#include "video_core/engines/fermi_2d.h"
-#include "video_core/engines/kepler_compute.h"
-#include "video_core/engines/kepler_memory.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/maxwell_dma.h"
-#include "video_core/engines/puller.h"
-#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
-#include "video_core/rasterizer_interface.h"
-
-namespace Tegra::Engines {
-
-Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
-               Control::ChannelState& channel_state_)
-    : gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
-                                                                               channel_state_} {}
-
-Puller::~Puller() = default;
-
-void Puller::ProcessBindMethod(const MethodCall& method_call) {
-    // Bind the current subchannel to the desired engine id.
-    LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
-              method_call.argument);
-    const auto engine_id = static_cast<EngineID>(method_call.argument);
-    bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
-    switch (engine_id) {
-    case EngineID::FERMI_TWOD_A:
-        dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
-        break;
-    case EngineID::MAXWELL_B:
-        dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
-        break;
-    case EngineID::KEPLER_COMPUTE_B:
-        dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
-        break;
-    case EngineID::MAXWELL_DMA_COPY_A:
-        dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
-        break;
-    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-        dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
-    }
-}
-
-void Puller::ProcessFenceActionMethod() {
-    switch (regs.fence_action.op) {
-    case Puller::FenceOperation::Acquire:
-        // UNIMPLEMENTED_MSG("Channel Scheduling pending.");
-        // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
-        rasterizer->ReleaseFences();
-        break;
-    case Puller::FenceOperation::Increment:
-        rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
-    }
-}
-
-void Puller::ProcessSemaphoreTriggerMethod() {
-    const auto semaphoreOperationMask = 0xF;
-    const auto op =
-        static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
-    if (op == GpuSemaphoreOperation::WriteLong) {
-        const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
-        const u32 payload = regs.semaphore_sequence;
-        [this, sequence_address, payload] {
-            memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks());
-            memory_manager.Write<u64>(sequence_address, payload);
-        }();
-    } else {
-        do {
-            const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
-            regs.acquire_source = true;
-            regs.acquire_value = regs.semaphore_sequence;
-            if (op == GpuSemaphoreOperation::AcquireEqual) {
-                regs.acquire_active = true;
-                regs.acquire_mode = false;
-                if (word != regs.acquire_value) {
-                    rasterizer->ReleaseFences();
-                    continue;
-                }
-            } else if (op == GpuSemaphoreOperation::AcquireGequal) {
-                regs.acquire_active = true;
-                regs.acquire_mode = true;
-                if (word < regs.acquire_value) {
-                    rasterizer->ReleaseFences();
-                    continue;
-                }
-            } else if (op == GpuSemaphoreOperation::AcquireMask) {
-                if (word && regs.semaphore_sequence == 0) {
-                    rasterizer->ReleaseFences();
-                    continue;
-                }
-            } else {
-                LOG_ERROR(HW_GPU, "Invalid semaphore operation");
-            }
-        } while (false);
-    }
-}
-
-void Puller::ProcessSemaphoreRelease() {
-    const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
-    const u32 payload = regs.semaphore_release;
-    std::function<void()> operation([this, sequence_address, payload] {
-        memory_manager.Write<u32>(sequence_address, payload);
-    });
-    rasterizer->SyncOperation(std::move(operation));
-}
-
-void Puller::ProcessSemaphoreAcquire() {
-    u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
-    const auto value = regs.semaphore_acquire;
-    while (word != value) {
-        regs.acquire_active = true;
-        regs.acquire_value = value;
-        std::this_thread::sleep_for(std::chrono::milliseconds(1));
-        rasterizer->ReleaseFences();
-        word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
-        // TODO(kemathe73) figure out how to do the acquire_timeout
-        regs.acquire_mode = false;
-        regs.acquire_source = false;
-    }
-}
-
-/// Calls a GPU puller method.
-void Puller::CallPullerMethod(const MethodCall& method_call) {
-    regs.reg_array[method_call.method] = method_call.argument;
-    const auto method = static_cast<BufferMethods>(method_call.method);
-
-    switch (method) {
-    case BufferMethods::BindObject: {
-        ProcessBindMethod(method_call);
-        break;
-    }
-    case BufferMethods::Nop:
-    case BufferMethods::SemaphoreAddressHigh:
-    case BufferMethods::SemaphoreAddressLow:
-    case BufferMethods::SemaphoreSequencePayload:
-    case BufferMethods::SyncpointPayload:
-        break;
-    case BufferMethods::WrcacheFlush:
-    case BufferMethods::RefCnt:
-        rasterizer->SignalReference();
-        break;
-    case BufferMethods::SyncpointOperation:
-        ProcessFenceActionMethod();
-        break;
-    case BufferMethods::WaitForIdle:
-        rasterizer->WaitForIdle();
-        break;
-    case BufferMethods::SemaphoreOperation: {
-        ProcessSemaphoreTriggerMethod();
-        break;
-    }
-    case BufferMethods::NonStallInterrupt: {
-        LOG_ERROR(HW_GPU, "Special puller engine method NonStallInterrupt not implemented");
-        break;
-    }
-    case BufferMethods::MemOpA: {
-        LOG_ERROR(HW_GPU, "Memory Operation A");
-        break;
-    }
-    case BufferMethods::MemOpB: {
-        // Implement this better.
-        rasterizer->InvalidateGPUCache();
-        break;
-    }
-    case BufferMethods::MemOpC:
-    case BufferMethods::MemOpD: {
-        LOG_ERROR(HW_GPU, "Memory Operation C,D");
-        break;
-    }
-    case BufferMethods::SemaphoreAcquire: {
-        ProcessSemaphoreAcquire();
-        break;
-    }
-    case BufferMethods::SemaphoreRelease: {
-        ProcessSemaphoreRelease();
-        break;
-    }
-    case BufferMethods::Yield: {
-        // TODO(Kmather73): Research and implement this method.
-        LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
-        break;
-    }
-    default:
-        LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
-        break;
-    }
-}
-
-/// Calls a GPU engine method.
-void Puller::CallEngineMethod(const MethodCall& method_call) {
-    const EngineID engine = bound_engines[method_call.subchannel];
-
-    switch (engine) {
-    case EngineID::FERMI_TWOD_A:
-        channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
-                                           method_call.IsLastCall());
-        break;
-    case EngineID::MAXWELL_B:
-        channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
-                                             method_call.IsLastCall());
-        break;
-    case EngineID::KEPLER_COMPUTE_B:
-        channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
-                                                 method_call.IsLastCall());
-        break;
-    case EngineID::MAXWELL_DMA_COPY_A:
-        channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
-                                              method_call.IsLastCall());
-        break;
-    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-        channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
-                                                method_call.IsLastCall());
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented engine");
-    }
-}
-
-/// Calls a GPU engine multivalue method.
-void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                                   u32 methods_pending) {
-    const EngineID engine = bound_engines[subchannel];
-
-    switch (engine) {
-    case EngineID::FERMI_TWOD_A:
-        channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
-        break;
-    case EngineID::MAXWELL_B:
-        channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
-        break;
-    case EngineID::KEPLER_COMPUTE_B:
-        channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
-        break;
-    case EngineID::MAXWELL_DMA_COPY_A:
-        channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
-        break;
-    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-        channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented engine");
-    }
-}
-
-/// Calls a GPU method.
-void Puller::CallMethod(const MethodCall& method_call) {
-    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
-              method_call.subchannel);
-
-    ASSERT(method_call.subchannel < bound_engines.size());
-
-    if (ExecuteMethodOnEngine(method_call.method)) {
-        CallEngineMethod(method_call);
-    } else {
-        CallPullerMethod(method_call);
-    }
-}
-
-/// Calls a GPU multivalue method.
-void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                             u32 methods_pending) {
-    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
-
-    ASSERT(subchannel < bound_engines.size());
-
-    if (ExecuteMethodOnEngine(method)) {
-        CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
-    } else {
-        for (std::size_t i = 0; i < amount; i++) {
-            CallPullerMethod(MethodCall{
-                method,
-                base_start[i],
-                subchannel,
-                methods_pending - static_cast<u32>(i),
-            });
-        }
-    }
-}
-
-void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
-    rasterizer = rasterizer_;
-}
-
-/// Determines where the method should be executed.
-[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
-    const auto buffer_method = static_cast<BufferMethods>(method);
-    return buffer_method >= BufferMethods::NonPullerMethods;
-}
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/settings.h"
+#include "core/core.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/engines/puller.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace Tegra::Engines {
+
+Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
+               Control::ChannelState& channel_state_)
+    : gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
+                                                                               channel_state_} {}
+
+Puller::~Puller() = default;
+
+void Puller::ProcessBindMethod(const MethodCall& method_call) {
+    // Bind the current subchannel to the desired engine id.
+    LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
+              method_call.argument);
+    const auto engine_id = static_cast<EngineID>(method_call.argument);
+    bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
+    switch (engine_id) {
+    case EngineID::FERMI_TWOD_A:
+        dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
+        break;
+    case EngineID::MAXWELL_B:
+        dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
+        break;
+    case EngineID::KEPLER_COMPUTE_B:
+        dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
+        break;
+    case EngineID::MAXWELL_DMA_COPY_A:
+        dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
+        break;
+    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+        dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
+    }
+}
+
+void Puller::ProcessFenceActionMethod() {
+    switch (regs.fence_action.op) {
+    case Puller::FenceOperation::Acquire:
+        // UNIMPLEMENTED_MSG("Channel Scheduling pending.");
+        // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+        rasterizer->ReleaseFences();
+        break;
+    case Puller::FenceOperation::Increment:
+        rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
+    }
+}
+
+void Puller::ProcessSemaphoreTriggerMethod() {
+    const auto semaphoreOperationMask = 0xF;
+    const auto op =
+        static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
+    if (op == GpuSemaphoreOperation::WriteLong) {
+        const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
+        const u32 payload = regs.semaphore_sequence;
+        [this, sequence_address, payload] {
+            memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks());
+            memory_manager.Write<u64>(sequence_address, payload);
+        }();
+    } else {
+        do {
+            const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
+            regs.acquire_source = true;
+            regs.acquire_value = regs.semaphore_sequence;
+            if (op == GpuSemaphoreOperation::AcquireEqual) {
+                regs.acquire_active = true;
+                regs.acquire_mode = false;
+                if (word != regs.acquire_value) {
+                    rasterizer->ReleaseFences();
+                    continue;
+                }
+            } else if (op == GpuSemaphoreOperation::AcquireGequal) {
+                regs.acquire_active = true;
+                regs.acquire_mode = true;
+                if (word < regs.acquire_value) {
+                    rasterizer->ReleaseFences();
+                    continue;
+                }
+            } else if (op == GpuSemaphoreOperation::AcquireMask) {
+                if (word && regs.semaphore_sequence == 0) {
+                    rasterizer->ReleaseFences();
+                    continue;
+                }
+            } else {
+                LOG_ERROR(HW_GPU, "Invalid semaphore operation");
+            }
+        } while (false);
+    }
+}
+
+void Puller::ProcessSemaphoreRelease() {
+    const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
+    const u32 payload = regs.semaphore_release;
+    std::function<void()> operation([this, sequence_address, payload] {
+        memory_manager.Write<u32>(sequence_address, payload);
+    });
+    rasterizer->SyncOperation(std::move(operation));
+}
+
+void Puller::ProcessSemaphoreAcquire() {
+    u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
+    const auto value = regs.semaphore_acquire;
+    while (word != value) {
+        regs.acquire_active = true;
+        regs.acquire_value = value;
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+        rasterizer->ReleaseFences();
+        word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
+        // TODO(kemathe73) figure out how to do the acquire_timeout
+        regs.acquire_mode = false;
+        regs.acquire_source = false;
+    }
+}
+
+/// Calls a GPU puller method.
+void Puller::CallPullerMethod(const MethodCall& method_call) {
+    regs.reg_array[method_call.method] = method_call.argument;
+    const auto method = static_cast<BufferMethods>(method_call.method);
+
+    switch (method) {
+    case BufferMethods::BindObject: {
+        ProcessBindMethod(method_call);
+        break;
+    }
+    case BufferMethods::Nop:
+    case BufferMethods::SemaphoreAddressHigh:
+    case BufferMethods::SemaphoreAddressLow:
+    case BufferMethods::SemaphoreSequencePayload:
+    case BufferMethods::SyncpointPayload:
+        break;
+    case BufferMethods::WrcacheFlush:
+    case BufferMethods::RefCnt:
+        rasterizer->SignalReference();
+        break;
+    case BufferMethods::SyncpointOperation:
+        ProcessFenceActionMethod();
+        break;
+    case BufferMethods::WaitForIdle:
+        rasterizer->WaitForIdle();
+        break;
+    case BufferMethods::SemaphoreOperation: {
+        ProcessSemaphoreTriggerMethod();
+        break;
+    }
+    case BufferMethods::NonStallInterrupt: {
+        LOG_ERROR(HW_GPU, "Special puller engine method NonStallInterrupt not implemented");
+        break;
+    }
+    case BufferMethods::MemOpA: {
+        LOG_ERROR(HW_GPU, "Memory Operation A");
+        break;
+    }
+    case BufferMethods::MemOpB: {
+        // Implement this better.
+        rasterizer->InvalidateGPUCache();
+        break;
+    }
+    case BufferMethods::MemOpC:
+    case BufferMethods::MemOpD: {
+        LOG_ERROR(HW_GPU, "Memory Operation C,D");
+        break;
+    }
+    case BufferMethods::SemaphoreAcquire: {
+        ProcessSemaphoreAcquire();
+        break;
+    }
+    case BufferMethods::SemaphoreRelease: {
+        ProcessSemaphoreRelease();
+        break;
+    }
+    case BufferMethods::Yield: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
+        break;
+    }
+    default:
+        LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
+        break;
+    }
+}
+
+/// Calls a GPU engine method.
+void Puller::CallEngineMethod(const MethodCall& method_call) {
+    const EngineID engine = bound_engines[method_call.subchannel];
+
+    switch (engine) {
+    case EngineID::FERMI_TWOD_A:
+        channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
+                                           method_call.IsLastCall());
+        break;
+    case EngineID::MAXWELL_B:
+        channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
+                                             method_call.IsLastCall());
+        break;
+    case EngineID::KEPLER_COMPUTE_B:
+        channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
+                                                 method_call.IsLastCall());
+        break;
+    case EngineID::MAXWELL_DMA_COPY_A:
+        channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
+                                              method_call.IsLastCall());
+        break;
+    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+        channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
+                                                method_call.IsLastCall());
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented engine");
+    }
+}
+
+/// Calls a GPU engine multivalue method.
+void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                                   u32 methods_pending) {
+    const EngineID engine = bound_engines[subchannel];
+
+    switch (engine) {
+    case EngineID::FERMI_TWOD_A:
+        channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::MAXWELL_B:
+        channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::KEPLER_COMPUTE_B:
+        channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::MAXWELL_DMA_COPY_A:
+        channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+        channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented engine");
+    }
+}
+
+/// Calls a GPU method.
+void Puller::CallMethod(const MethodCall& method_call) {
+    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
+              method_call.subchannel);
+
+    ASSERT(method_call.subchannel < bound_engines.size());
+
+    if (ExecuteMethodOnEngine(method_call.method)) {
+        CallEngineMethod(method_call);
+    } else {
+        CallPullerMethod(method_call);
+    }
+}
+
+/// Calls a GPU multivalue method.
+void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                             u32 methods_pending) {
+    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
+
+    ASSERT(subchannel < bound_engines.size());
+
+    if (ExecuteMethodOnEngine(method)) {
+        CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
+    } else {
+        for (std::size_t i = 0; i < amount; i++) {
+            CallPullerMethod(MethodCall{
+                method,
+                base_start[i],
+                subchannel,
+                methods_pending - static_cast<u32>(i),
+            });
+        }
+    }
+}
+
+void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
+}
+
+/// Determines where the method should be executed.
+[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
+    const auto buffer_method = static_cast<BufferMethods>(method);
+    return buffer_method >= BufferMethods::NonPullerMethods;
+}
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/puller.h
+++ b/src/video_core/engines/puller.h
@@ -1,177 +1,177 @@
-// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <vector>
-#include "common/bit_field.h"
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-#include "video_core/engines/engine_interface.h"
-
-namespace Core {
-class System;
-}
-
-namespace Tegra {
-class MemoryManager;
-class DmaPusher;
-
-enum class EngineID {
-    FERMI_TWOD_A = 0x902D, // 2D Engine
-    MAXWELL_B = 0xB197,    // 3D Engine
-    KEPLER_COMPUTE_B = 0xB1C0,
-    KEPLER_INLINE_TO_MEMORY_B = 0xA140,
-    MAXWELL_DMA_COPY_A = 0xB0B5,
-};
-
-namespace Control {
-struct ChannelState;
-}
-} // namespace Tegra
-
-namespace VideoCore {
-class RasterizerInterface;
-}
-
-namespace Tegra::Engines {
-
-class Puller final {
-public:
-    struct MethodCall {
-        u32 method{};
-        u32 argument{};
-        u32 subchannel{};
-        u32 method_count{};
-
-        explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
-            : method(method_), argument(argument_), subchannel(subchannel_),
-              method_count(method_count_) {}
-
-        [[nodiscard]] bool IsLastCall() const {
-            return method_count <= 1;
-        }
-    };
-
-    enum class FenceOperation : u32 {
-        Acquire = 0,
-        Increment = 1,
-    };
-
-    union FenceAction {
-        u32 raw;
-        BitField<0, 1, FenceOperation> op;
-        BitField<8, 24, u32> syncpoint_id;
-    };
-
-    explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
-                    Control::ChannelState& channel_state);
-    ~Puller();
-
-    void CallMethod(const MethodCall& method_call);
-
-    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                         u32 methods_pending);
-
-    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
-
-    void CallPullerMethod(const MethodCall& method_call);
-
-    void CallEngineMethod(const MethodCall& method_call);
-
-    void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                               u32 methods_pending);
-
-private:
-    Tegra::GPU& gpu;
-
-    MemoryManager& memory_manager;
-    DmaPusher& dma_pusher;
-    Control::ChannelState& channel_state;
-    VideoCore::RasterizerInterface* rasterizer = nullptr;
-
-    static constexpr std::size_t NUM_REGS = 0x800;
-    struct Regs {
-        static constexpr size_t NUM_REGS = 0x40;
-
-        union {
-            struct {
-                INSERT_PADDING_WORDS_NOINIT(0x4);
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-
-                    [[nodiscard]] GPUVAddr SemaphoreAddress() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-                } semaphore_address;
-
-                u32 semaphore_sequence;
-                u32 semaphore_trigger;
-                INSERT_PADDING_WORDS_NOINIT(0xC);
-
-                // The pusher and the puller share the reference counter, the pusher only has read
-                // access
-                u32 reference_count;
-                INSERT_PADDING_WORDS_NOINIT(0x5);
-
-                u32 semaphore_acquire;
-                u32 semaphore_release;
-                u32 fence_value;
-                FenceAction fence_action;
-                INSERT_PADDING_WORDS_NOINIT(0xE2);
-
-                // Puller state
-                u32 acquire_mode;
-                u32 acquire_source;
-                u32 acquire_active;
-                u32 acquire_timeout;
-                u32 acquire_value;
-            };
-            std::array<u32, NUM_REGS> reg_array;
-        };
-    } regs{};
-
-    void ProcessBindMethod(const MethodCall& method_call);
-    void ProcessFenceActionMethod();
-    void ProcessSemaphoreAcquire();
-    void ProcessSemaphoreRelease();
-    void ProcessSemaphoreTriggerMethod();
-    [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
-
-    /// Mapping of command subchannels to their bound engine ids
-    std::array<EngineID, 8> bound_engines{};
-
-    enum class GpuSemaphoreOperation {
-        AcquireEqual = 0x1,
-        WriteLong = 0x2,
-        AcquireGequal = 0x4,
-        AcquireMask = 0x8,
-    };
-
-#define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(Regs, field_name) == position * 4,                                      \
-                  "Field " #field_name " has invalid position")
-
-    ASSERT_REG_POSITION(semaphore_address, 0x4);
-    ASSERT_REG_POSITION(semaphore_sequence, 0x6);
-    ASSERT_REG_POSITION(semaphore_trigger, 0x7);
-    ASSERT_REG_POSITION(reference_count, 0x14);
-    ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
-    ASSERT_REG_POSITION(semaphore_release, 0x1B);
-    ASSERT_REG_POSITION(fence_value, 0x1C);
-    ASSERT_REG_POSITION(fence_action, 0x1D);
-
-    ASSERT_REG_POSITION(acquire_mode, 0x100);
-    ASSERT_REG_POSITION(acquire_source, 0x101);
-    ASSERT_REG_POSITION(acquire_active, 0x102);
-    ASSERT_REG_POSITION(acquire_timeout, 0x103);
-    ASSERT_REG_POSITION(acquire_value, 0x104);
-
-#undef ASSERT_REG_POSITION
-};
-
-} // namespace Tegra::Engines
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/engines/engine_interface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+class DmaPusher;
+
+enum class EngineID {
+    FERMI_TWOD_A = 0x902D, // 2D Engine
+    MAXWELL_B = 0xB197,    // 3D Engine
+    KEPLER_COMPUTE_B = 0xB1C0,
+    KEPLER_INLINE_TO_MEMORY_B = 0xA140,
+    MAXWELL_DMA_COPY_A = 0xB0B5,
+};
+
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra::Engines {
+
+class Puller final {
+public:
+    struct MethodCall {
+        u32 method{};
+        u32 argument{};
+        u32 subchannel{};
+        u32 method_count{};
+
+        explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
+            : method(method_), argument(argument_), subchannel(subchannel_),
+              method_count(method_count_) {}
+
+        [[nodiscard]] bool IsLastCall() const {
+            return method_count <= 1;
+        }
+    };
+
+    enum class FenceOperation : u32 {
+        Acquire = 0,
+        Increment = 1,
+    };
+
+    union FenceAction {
+        u32 raw;
+        BitField<0, 1, FenceOperation> op;
+        BitField<8, 24, u32> syncpoint_id;
+    };
+
+    explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
+                    Control::ChannelState& channel_state);
+    ~Puller();
+
+    void CallMethod(const MethodCall& method_call);
+
+    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                         u32 methods_pending);
+
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+    void CallPullerMethod(const MethodCall& method_call);
+
+    void CallEngineMethod(const MethodCall& method_call);
+
+    void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                               u32 methods_pending);
+
+private:
+    Tegra::GPU& gpu;
+
+    MemoryManager& memory_manager;
+    DmaPusher& dma_pusher;
+    Control::ChannelState& channel_state;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+    static constexpr std::size_t NUM_REGS = 0x800;
+    struct Regs {
+        static constexpr size_t NUM_REGS = 0x40;
+
+        union {
+            struct {
+                INSERT_PADDING_WORDS_NOINIT(0x4);
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+
+                    [[nodiscard]] GPUVAddr SemaphoreAddress() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } semaphore_address;
+
+                u32 semaphore_sequence;
+                u32 semaphore_trigger;
+                INSERT_PADDING_WORDS_NOINIT(0xC);
+
+                // The pusher and the puller share the reference counter, the pusher only has read
+                // access
+                u32 reference_count;
+                INSERT_PADDING_WORDS_NOINIT(0x5);
+
+                u32 semaphore_acquire;
+                u32 semaphore_release;
+                u32 fence_value;
+                FenceAction fence_action;
+                INSERT_PADDING_WORDS_NOINIT(0xE2);
+
+                // Puller state
+                u32 acquire_mode;
+                u32 acquire_source;
+                u32 acquire_active;
+                u32 acquire_timeout;
+                u32 acquire_value;
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+    void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessFenceActionMethod();
+    void ProcessSemaphoreAcquire();
+    void ProcessSemaphoreRelease();
+    void ProcessSemaphoreTriggerMethod();
+    [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
+
+    /// Mapping of command subchannels to their bound engine ids
+    std::array<EngineID, 8> bound_engines{};
+
+    enum class GpuSemaphoreOperation {
+        AcquireEqual = 0x1,
+        WriteLong = 0x2,
+        AcquireGequal = 0x4,
+        AcquireMask = 0x8,
+    };
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(Regs, field_name) == position * 4,                                      \
+                  "Field " #field_name " has invalid position")
+
+    ASSERT_REG_POSITION(semaphore_address, 0x4);
+    ASSERT_REG_POSITION(semaphore_sequence, 0x6);
+    ASSERT_REG_POSITION(semaphore_trigger, 0x7);
+    ASSERT_REG_POSITION(reference_count, 0x14);
+    ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
+    ASSERT_REG_POSITION(semaphore_release, 0x1B);
+    ASSERT_REG_POSITION(fence_value, 0x1C);
+    ASSERT_REG_POSITION(fence_action, 0x1D);
+
+    ASSERT_REG_POSITION(acquire_mode, 0x100);
+    ASSERT_REG_POSITION(acquire_source, 0x101);
+    ASSERT_REG_POSITION(acquire_active, 0x102);
+    ASSERT_REG_POSITION(acquire_timeout, 0x103);
+    ASSERT_REG_POSITION(acquire_value, 0x104);
+
+#undef ASSERT_REG_POSITION
+};
+
+} // namespace Tegra::Engines