early-access version 1332

main
pineappleEA 2021-01-17 03:19:34 +01:00
parent 233493ad87
commit f70af7672d
126 changed files with 3856 additions and 3241 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 1331.
This is the source code for early-access 1332.
## Legal Notice

View File

@ -1,26 +1,5 @@
<RCC>
<qresource prefix="controller">
<file alias="dual_joycon">dual_joycon.png</file>
<file alias="dual_joycon_dark">dual_joycon_dark.png</file>
<file alias="dual_joycon_midnight">dual_joycon_midnight.png</file>
<file alias="handheld">handheld.png</file>
<file alias="handheld_dark">handheld_dark.png</file>
<file alias="handheld_midnight">handheld_midnight.png</file>
<file alias="pro_controller">pro_controller.png</file>
<file alias="pro_controller_dark">pro_controller_dark.png</file>
<file alias="pro_controller_midnight">pro_controller_midnight.png</file>
<file alias="single_joycon_left">single_joycon_left.png</file>
<file alias="single_joycon_left_dark">single_joycon_left_dark.png</file>
<file alias="single_joycon_left_midnight">single_joycon_left_midnight.png</file>
<file alias="single_joycon_right">single_joycon_right.png</file>
<file alias="single_joycon_right_dark">single_joycon_right_dark.png</file>
<file alias="single_joycon_right_midnight">single_joycon_right_midnight.png</file>
<file alias="single_joycon_left_vertical">single_joycon_left_vertical.png</file>
<file alias="single_joycon_left_vertical_dark">single_joycon_left_vertical_dark.png</file>
<file alias="single_joycon_left_vertical_midnight">single_joycon_left_vertical_midnight.png</file>
<file alias="single_joycon_right_vertical">single_joycon_right_vertical.png</file>
<file alias="single_joycon_right_vertical_dark">single_joycon_right_vertical_dark.png</file>
<file alias="single_joycon_right_vertical_midnight">single_joycon_right_vertical_midnight.png</file>
<file alias="applet_dual_joycon">applet_dual_joycon.png</file>
<file alias="applet_dual_joycon_dark">applet_dual_joycon_dark.png</file>
<file alias="applet_dual_joycon_midnight">applet_dual_joycon_midnight.png</file>

View File

@ -38,34 +38,4 @@ u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
#endif
u128 Multiply64Into128(u64 a, u64 b) {
u128 result;
#ifdef _MSC_VER
result[0] = _umul128(a, b, &result[1]);
#else
unsigned __int128 tmp = a;
tmp *= b;
std::memcpy(&result, &tmp, sizeof(u128));
#endif
return result;
}
std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
u64 remainder = dividend[0] % divisor;
u64 accum = dividend[0] / divisor;
if (dividend[1] == 0)
return {accum, remainder};
// We ignore dividend[1] / divisor as that overflows
const u64 first_segment = (dividend[1] % divisor) << 32;
accum += (first_segment / divisor) << 32;
const u64 second_segment = (first_segment % divisor) << 32;
accum += (second_segment / divisor);
remainder += second_segment % divisor;
if (remainder >= divisor) {
accum++;
remainder -= divisor;
}
return {accum, remainder};
}
} // namespace Common

View File

@ -12,11 +12,4 @@ namespace Common {
// This function multiplies 2 u64 values and divides it by a u64 value.
[[nodiscard]] u64 MultiplyAndDivide64(u64 a, u64 b, u64 d);
// This function multiplies 2 u64 values and produces a u128 value;
[[nodiscard]] u128 Multiply64Into128(u64 a, u64 b);
// This function divides a u128 by a u32 value and produces two u64 values:
// the result of division and the remainder
[[nodiscard]] std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
} // namespace Common

View File

@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/uint128.h"
#include "common/wall_clock.h"
#ifdef ARCHITECTURE_x86_64
@ -41,16 +40,11 @@ public:
}
u64 GetClockCycles() override {
std::chrono::nanoseconds time_now = GetTimeNS();
const u128 temporary =
Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
return Common::Divide128On32(temporary, 1000000000).first;
return GetTimeNS().count() * (emulated_clock_frequency / 1000) / 1000000;
}
u64 GetCPUCycles() override {
std::chrono::nanoseconds time_now = GetTimeNS();
const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
return Common::Divide128On32(temporary, 1000000000).first;
return GetTimeNS().count() * (emulated_cpu_frequency / 1000) / 1000000;
}
void Pause([[maybe_unused]] bool is_paused) override {

View File

@ -19,7 +19,6 @@ add_library(core STATIC
core.h
core_timing.cpp
core_timing.h
core_timing_util.cpp
core_timing_util.h
cpu_manager.cpp
cpu_manager.h

View File

@ -1,24 +1,59 @@
// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
// Licensed under GPLv2+
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <chrono>
#include "common/common_types.h"
#include "core/hardware_properties.h"
namespace Core::Timing {
s64 msToCycles(std::chrono::milliseconds ms);
s64 usToCycles(std::chrono::microseconds us);
s64 nsToCycles(std::chrono::nanoseconds ns);
u64 msToClockCycles(std::chrono::milliseconds ns);
u64 usToClockCycles(std::chrono::microseconds ns);
u64 nsToClockCycles(std::chrono::nanoseconds ns);
std::chrono::milliseconds CyclesToMs(s64 cycles);
std::chrono::nanoseconds CyclesToNs(s64 cycles);
std::chrono::microseconds CyclesToUs(s64 cycles);
namespace detail {
constexpr u64 CNTFREQ_ADJUSTED = Hardware::CNTFREQ / 1000;
constexpr u64 BASE_CLOCK_RATE_ADJUSTED = Hardware::BASE_CLOCK_RATE / 1000;
} // namespace detail
u64 CpuCyclesToClockCycles(u64 ticks);
[[nodiscard]] constexpr s64 msToCycles(std::chrono::milliseconds ms) {
return ms.count() * detail::BASE_CLOCK_RATE_ADJUSTED;
}
[[nodiscard]] constexpr s64 usToCycles(std::chrono::microseconds us) {
return us.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000;
}
[[nodiscard]] constexpr s64 nsToCycles(std::chrono::nanoseconds ns) {
return ns.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000000;
}
[[nodiscard]] constexpr u64 msToClockCycles(std::chrono::milliseconds ms) {
return static_cast<u64>(ms.count()) * detail::CNTFREQ_ADJUSTED;
}
[[nodiscard]] constexpr u64 usToClockCycles(std::chrono::microseconds us) {
return us.count() * detail::CNTFREQ_ADJUSTED / 1000;
}
[[nodiscard]] constexpr u64 nsToClockCycles(std::chrono::nanoseconds ns) {
return ns.count() * detail::CNTFREQ_ADJUSTED / 1000000;
}
[[nodiscard]] constexpr u64 CpuCyclesToClockCycles(u64 ticks) {
return ticks * detail::CNTFREQ_ADJUSTED / detail::BASE_CLOCK_RATE_ADJUSTED;
}
[[nodiscard]] constexpr std::chrono::milliseconds CyclesToMs(s64 cycles) {
return std::chrono::milliseconds(cycles / detail::BASE_CLOCK_RATE_ADJUSTED);
}
[[nodiscard]] constexpr std::chrono::nanoseconds CyclesToNs(s64 cycles) {
return std::chrono::nanoseconds(cycles * 1000000 / detail::BASE_CLOCK_RATE_ADJUSTED);
}
[[nodiscard]] constexpr std::chrono::microseconds CyclesToUs(s64 cycles) {
return std::chrono::microseconds(cycles * 1000 / detail::BASE_CLOCK_RATE_ADJUSTED);
}
} // namespace Core::Timing

View File

@ -21,21 +21,18 @@ public:
std::mutex mutex;
bool touch_pressed = false; ///< True if touchpad area is currently pressed, otherwise false
float touch_x = 0.0f; ///< Touchpad X-position
float touch_y = 0.0f; ///< Touchpad Y-position
Input::TouchStatus status;
private:
class Device : public Input::TouchDevice {
public:
explicit Device(std::weak_ptr<TouchState>&& touch_state) : touch_state(touch_state) {}
std::tuple<float, float, bool> GetStatus() const override {
Input::TouchStatus GetStatus() const override {
if (auto state = touch_state.lock()) {
std::lock_guard guard{state->mutex};
return std::make_tuple(state->touch_x, state->touch_y, state->touch_pressed);
return state->status;
}
return std::make_tuple(0.0f, 0.0f, false);
return {};
}
private:
@ -79,36 +76,44 @@ std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsi
return std::make_tuple(new_x, new_y);
}
void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y))
void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y, std::size_t id) {
if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y)) {
return;
}
if (id >= touch_state->status.size()) {
return;
}
std::lock_guard guard{touch_state->mutex};
touch_state->touch_x =
const float x =
static_cast<float>(framebuffer_x - framebuffer_layout.screen.left) /
static_cast<float>(framebuffer_layout.screen.right - framebuffer_layout.screen.left);
touch_state->touch_y =
const float y =
static_cast<float>(framebuffer_y - framebuffer_layout.screen.top) /
static_cast<float>(framebuffer_layout.screen.bottom - framebuffer_layout.screen.top);
touch_state->touch_pressed = true;
touch_state->status[id] = std::make_tuple(x, y, true);
}
void EmuWindow::TouchReleased() {
void EmuWindow::TouchReleased(std::size_t id) {
if (id >= touch_state->status.size()) {
return;
}
std::lock_guard guard{touch_state->mutex};
touch_state->touch_pressed = false;
touch_state->touch_x = 0;
touch_state->touch_y = 0;
touch_state->status[id] = std::make_tuple(0.0f, 0.0f, false);
}
void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y) {
if (!touch_state->touch_pressed)
void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y, std::size_t id) {
if (id >= touch_state->status.size()) {
return;
}
if (!std::get<2>(touch_state->status[id]))
return;
if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y))
std::tie(framebuffer_x, framebuffer_y) = ClipToTouchScreen(framebuffer_x, framebuffer_y);
TouchPressed(framebuffer_x, framebuffer_y);
TouchPressed(framebuffer_x, framebuffer_y, id);
}
void EmuWindow::UpdateCurrentFramebufferLayout(unsigned width, unsigned height) {

View File

@ -117,18 +117,23 @@ public:
* Signal that a touch pressed event has occurred (e.g. mouse click pressed)
* @param framebuffer_x Framebuffer x-coordinate that was pressed
* @param framebuffer_y Framebuffer y-coordinate that was pressed
* @param id Touch event ID
*/
void TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y);
void TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y, std::size_t id);
/// Signal that a touch released event has occurred (e.g. mouse click released)
void TouchReleased();
/**
* Signal that a touch released event has occurred (e.g. mouse click released)
* @param id Touch event ID
*/
void TouchReleased(std::size_t id);
/**
* Signal that a touch movement event has occurred (e.g. mouse was moved over the emu window)
* @param framebuffer_x Framebuffer x-coordinate
* @param framebuffer_y Framebuffer y-coordinate
* @param id Touch event ID
*/
void TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y);
void TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y, std::size_t id);
/**
* Returns currently active configuration.

View File

@ -21,6 +21,11 @@ enum class AnalogDirection : u8 {
UP,
DOWN,
};
struct AnalogProperties {
float deadzone;
float range;
float threshold;
};
/// An abstract class template for an input device (a button, an analog input, etc.).
template <typename StatusType>
@ -30,6 +35,12 @@ public:
virtual StatusType GetStatus() const {
return {};
}
virtual StatusType GetRawStatus() const {
return GetStatus();
}
virtual AnalogProperties GetAnalogProperties() const {
return {};
}
virtual bool GetAnalogDirectionStatus([[maybe_unused]] AnalogDirection direction) const {
return {};
}
@ -163,10 +174,11 @@ using MotionStatus = std::tuple<Common::Vec3<float>, Common::Vec3<float>, Common
using MotionDevice = InputDevice<MotionStatus>;
/**
* A touch status is an object that returns a tuple of two floats and a bool. The floats are
* x and y coordinates in the range 0.0 - 1.0, and the bool indicates whether it is pressed.
* A touch status is an object that returns an array of 16 tuple elements of two floats and a bool.
* The floats are x and y coordinates in the range 0.0 - 1.0, and the bool indicates whether it is
* pressed.
*/
using TouchStatus = std::tuple<float, float, bool>;
using TouchStatus = std::array<std::tuple<float, float, bool>, 16>;
/**
* A touch device is an input device that returns a touch status object

View File

@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstring>
#include "common/common_types.h"
#include "core/core_timing.h"
@ -16,7 +17,13 @@ constexpr std::size_t SHARED_MEMORY_OFFSET = 0x400;
Controller_Touchscreen::Controller_Touchscreen(Core::System& system) : ControllerBase(system) {}
Controller_Touchscreen::~Controller_Touchscreen() = default;
void Controller_Touchscreen::OnInit() {}
void Controller_Touchscreen::OnInit() {
for (std::size_t id = 0; id < MAX_FINGERS; ++id) {
mouse_finger_id[id] = MAX_FINGERS;
keyboard_finger_id[id] = MAX_FINGERS;
udp_finger_id[id] = MAX_FINGERS;
}
}
void Controller_Touchscreen::OnRelease() {}
@ -40,38 +47,106 @@ void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timin
cur_entry.sampling_number = last_entry.sampling_number + 1;
cur_entry.sampling_number2 = cur_entry.sampling_number;
bool pressed = false;
float x, y;
std::tie(x, y, pressed) = touch_device->GetStatus();
auto& touch_entry = cur_entry.states[0];
touch_entry.attribute.raw = 0;
if (!pressed && touch_btn_device) {
std::tie(x, y, pressed) = touch_btn_device->GetStatus();
}
if (pressed && Settings::values.touchscreen.enabled) {
touch_entry.x = static_cast<u16>(x * Layout::ScreenUndocked::Width);
touch_entry.y = static_cast<u16>(y * Layout::ScreenUndocked::Height);
touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
const u64 tick = core_timing.GetCPUTicks();
touch_entry.delta_time = tick - last_touch;
last_touch = tick;
touch_entry.finger = Settings::values.touchscreen.finger;
cur_entry.entry_count = 1;
} else {
cur_entry.entry_count = 0;
const Input::TouchStatus& mouse_status = touch_mouse_device->GetStatus();
const Input::TouchStatus& udp_status = touch_udp_device->GetStatus();
for (std::size_t id = 0; id < mouse_status.size(); ++id) {
mouse_finger_id[id] = UpdateTouchInputEvent(mouse_status[id], mouse_finger_id[id]);
udp_finger_id[id] = UpdateTouchInputEvent(udp_status[id], udp_finger_id[id]);
}
if (Settings::values.use_touch_from_button) {
const Input::TouchStatus& keyboard_status = touch_btn_device->GetStatus();
for (std::size_t id = 0; id < mouse_status.size(); ++id) {
keyboard_finger_id[id] =
UpdateTouchInputEvent(keyboard_status[id], keyboard_finger_id[id]);
}
}
std::array<Finger, 16> active_fingers;
const auto end_iter = std::copy_if(fingers.begin(), fingers.end(), active_fingers.begin(),
[](const auto& finger) { return finger.pressed; });
const auto active_fingers_count =
static_cast<std::size_t>(std::distance(active_fingers.begin(), end_iter));
const u64 tick = core_timing.GetCPUTicks();
cur_entry.entry_count = static_cast<s32_le>(active_fingers_count);
for (std::size_t id = 0; id < MAX_FINGERS; ++id) {
auto& touch_entry = cur_entry.states[id];
if (id < active_fingers_count) {
touch_entry.x = static_cast<u16>(active_fingers[id].x * Layout::ScreenUndocked::Width);
touch_entry.y = static_cast<u16>(active_fingers[id].y * Layout::ScreenUndocked::Height);
touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
touch_entry.delta_time = tick - active_fingers[id].last_touch;
fingers[active_fingers[id].id].last_touch = tick;
touch_entry.finger = active_fingers[id].id;
touch_entry.attribute.raw = active_fingers[id].attribute.raw;
} else {
// Clear touch entry
touch_entry.attribute.raw = 0;
touch_entry.x = 0;
touch_entry.y = 0;
touch_entry.diameter_x = 0;
touch_entry.diameter_y = 0;
touch_entry.rotation_angle = 0;
touch_entry.delta_time = 0;
touch_entry.finger = 0;
}
}
std::memcpy(data + SHARED_MEMORY_OFFSET, &shared_memory, sizeof(TouchScreenSharedMemory));
}
void Controller_Touchscreen::OnLoadInputDevices() {
touch_device = Input::CreateDevice<Input::TouchDevice>(Settings::values.touchscreen.device);
if (Settings::values.use_touch_from_button) {
touch_btn_device = Input::CreateDevice<Input::TouchDevice>("engine:touch_from_button");
} else {
touch_btn_device.reset();
}
touch_mouse_device = Input::CreateDevice<Input::TouchDevice>("engine:emu_window");
touch_udp_device = Input::CreateDevice<Input::TouchDevice>("engine:cemuhookudp");
touch_btn_device = Input::CreateDevice<Input::TouchDevice>("engine:touch_from_button");
}
std::optional<std::size_t> Controller_Touchscreen::GetUnusedFingerID() const {
std::size_t first_free_id = 0;
while (first_free_id < MAX_FINGERS) {
if (!fingers[first_free_id].pressed) {
return first_free_id;
} else {
first_free_id++;
}
}
return std::nullopt;
}
std::size_t Controller_Touchscreen::UpdateTouchInputEvent(
const std::tuple<float, float, bool>& touch_input, std::size_t finger_id) {
const auto& [x, y, pressed] = touch_input;
if (pressed) {
Attributes attribute{};
if (finger_id == MAX_FINGERS) {
const auto first_free_id = GetUnusedFingerID();
if (!first_free_id) {
// Invalid finger id do nothing
return MAX_FINGERS;
}
finger_id = first_free_id.value();
fingers[finger_id].pressed = true;
fingers[finger_id].id = static_cast<u32_le>(finger_id);
attribute.start_touch.Assign(1);
}
fingers[finger_id].x = x;
fingers[finger_id].y = y;
fingers[finger_id].attribute = attribute;
return finger_id;
}
if (finger_id != MAX_FINGERS) {
if (!fingers[finger_id].attribute.end_touch) {
fingers[finger_id].attribute.end_touch.Assign(1);
fingers[finger_id].attribute.start_touch.Assign(0);
return finger_id;
}
fingers[finger_id].pressed = false;
}
return MAX_FINGERS;
}
} // namespace Service::HID

View File

@ -30,6 +30,18 @@ public:
void OnLoadInputDevices() override;
private:
static constexpr std::size_t MAX_FINGERS = 16;
// Returns an unused finger id, if there is no fingers available std::nullopt will be returned
std::optional<std::size_t> GetUnusedFingerID() const;
// If the touch is new it tries to assing a new finger id, if there is no fingers avaliable no
// changes will be made. Updates the coordinates if the finger id it's already set. If the touch
// ends delays the output by one frame to set the end_touch flag before finally freeing the
// finger id
std::size_t UpdateTouchInputEvent(const std::tuple<float, float, bool>& touch_input,
std::size_t finger_id);
struct Attributes {
union {
u32 raw{};
@ -55,7 +67,7 @@ private:
s64_le sampling_number;
s64_le sampling_number2;
s32_le entry_count;
std::array<TouchState, 16> states;
std::array<TouchState, MAX_FINGERS> states;
};
static_assert(sizeof(TouchScreenEntry) == 0x298, "TouchScreenEntry is an invalid size");
@ -66,9 +78,23 @@ private:
};
static_assert(sizeof(TouchScreenSharedMemory) == 0x3000,
"TouchScreenSharedMemory is an invalid size");
struct Finger {
u64_le last_touch{};
float x{};
float y{};
u32_le id{};
bool pressed{};
Attributes attribute;
};
TouchScreenSharedMemory shared_memory{};
std::unique_ptr<Input::TouchDevice> touch_device;
std::unique_ptr<Input::TouchDevice> touch_mouse_device;
std::unique_ptr<Input::TouchDevice> touch_udp_device;
std::unique_ptr<Input::TouchDevice> touch_btn_device;
s64_le last_touch{};
std::array<std::size_t, MAX_FINGERS> mouse_finger_id;
std::array<std::size_t, MAX_FINGERS> keyboard_finger_id;
std::array<std::size_t, MAX_FINGERS> udp_finger_id;
std::array<Finger, MAX_FINGERS> fingers;
};
} // namespace Service::HID

View File

@ -37,6 +37,7 @@ NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,
Tegra::ChCommandHeaderList cmdlist(1);
cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F};
system.GPU().PushCommandBuffer(cmdlist);
system.GPU().MemoryManager().InvalidateQueuedCaches();
}
return UnmapBuffer(input, output);
}

View File

@ -193,7 +193,13 @@ NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vec
return NvResult::InvalidState;
}
if (const auto size{RemoveBufferMap(object->dma_map_addr)}; size) {
gpu.MemoryManager().Unmap(object->dma_map_addr, *size);
if (vic_device) {
// UnmapVicFrame defers texture_cache invalidation of the frame address until
// the stream is over
gpu.MemoryManager().UnmapVicFrame(object->dma_map_addr, *size);
} else {
gpu.MemoryManager().Unmap(object->dma_map_addr, *size);
}
} else {
// This occurs quite frequently, however does not seem to impact functionality
LOG_DEBUG(Service_NVDRV, "invalid offset=0x{:X} dma=0x{:X}", object->addr,

View File

@ -160,6 +160,7 @@ protected:
s32_le nvmap_fd{};
u32_le submit_timeout{};
bool vic_device{};
std::shared_ptr<nvmap> nvmap_dev;
SyncpointManager& syncpoint_manager;
std::array<u32, MaxSyncPoints> device_syncpoints{};

View File

@ -12,8 +12,9 @@
namespace Service::Nvidia::Devices {
nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
SyncpointManager& syncpoint_manager)
: nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
: nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {
vic_device = true;
}
nvhost_vic::~nvhost_vic() = default;
NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {

View File

@ -195,8 +195,9 @@ struct Memory::Impl {
switch (type) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {}) "
"at PC 0x{:08X}",
current_vaddr, src_addr, size, system.CurrentArmInterface().GetPC());
std::memset(dest_buffer, 0, copy_amount);
break;
}
@ -240,8 +241,9 @@ struct Memory::Impl {
switch (type) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {}) "
"at PC 0x{:08X}",
current_vaddr, src_addr, size, system.CurrentArmInterface().GetPC());
std::memset(dest_buffer, 0, copy_amount);
break;
}
@ -291,8 +293,9 @@ struct Memory::Impl {
switch (type) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {}) "
"at PC 0x{:08X}",
current_vaddr, dest_addr, size, system.CurrentArmInterface().GetPC());
break;
}
case Common::PageType::Memory: {
@ -334,8 +337,9 @@ struct Memory::Impl {
switch (type) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {}) "
"at PC 0x{:08X}",
current_vaddr, dest_addr, size, system.CurrentArmInterface().GetPC());
break;
}
case Common::PageType::Memory: {
@ -383,8 +387,9 @@ struct Memory::Impl {
switch (type) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
"Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {}) "
"at PC 0x{:08X}",
current_vaddr, dest_addr, size, system.CurrentArmInterface().GetPC());
break;
}
case Common::PageType::Memory: {
@ -429,8 +434,9 @@ struct Memory::Impl {
switch (type) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
"Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {}) "
"at PC 0x{:08X}",
current_vaddr, src_addr, size, system.CurrentArmInterface().GetPC());
ZeroBlock(process, dest_addr, copy_amount);
break;
}
@ -601,7 +607,8 @@ struct Memory::Impl {
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X} at PC 0x{:08X}", sizeof(T) * 8, vaddr,
system.CurrentArmInterface().GetPC());
return 0;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
@ -638,8 +645,9 @@ struct Memory::Impl {
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data), vaddr);
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X} at PC 0x{:08X}",
sizeof(data) * 8, static_cast<u32>(data), vaddr,
system.CurrentArmInterface().GetPC());
return;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);

View File

@ -185,6 +185,16 @@ public:
return {0.0f, 0.0f};
}
std::tuple<float, float> GetRawStatus() const override {
const float x = GetAxis(axis_x);
const float y = GetAxis(axis_y);
return {x, y};
}
Input::AnalogProperties GetAnalogProperties() const override {
return {deadzone, range, 0.5f};
}
bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override {
const auto [x, y] = GetStatus();
const float directional_deadzone = 0.5f;

View File

@ -373,6 +373,16 @@ public:
return {};
}
std::tuple<float, float> GetRawStatus() const override {
const float x = joystick->GetAxis(axis_x, range);
const float y = joystick->GetAxis(axis_y, range);
return {x, -y};
}
Input::AnalogProperties GetAnalogProperties() const override {
return {deadzone, range, 0.5f};
}
bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override {
const auto [x, y] = GetStatus();
const float directional_deadzone = 0.5f;

View File

@ -25,18 +25,19 @@ public:
}
}
std::tuple<float, float, bool> GetStatus() const override {
for (const auto& m : map) {
const bool state = std::get<0>(m)->GetStatus();
Input::TouchStatus GetStatus() const override {
Input::TouchStatus touch_status{};
for (std::size_t id = 0; id < map.size() && id < touch_status.size(); ++id) {
const bool state = std::get<0>(map[id])->GetStatus();
if (state) {
const float x = static_cast<float>(std::get<1>(m)) /
const float x = static_cast<float>(std::get<1>(map[id])) /
static_cast<int>(Layout::ScreenUndocked::Width);
const float y = static_cast<float>(std::get<2>(m)) /
const float y = static_cast<float>(std::get<2>(map[id])) /
static_cast<int>(Layout::ScreenUndocked::Height);
return {x, y, true};
touch_status[id] = {x, y, true};
}
}
return {};
return touch_status;
}
private:

View File

@ -136,6 +136,7 @@ static void SocketLoop(Socket* socket) {
Client::Client() {
LOG_INFO(Input, "Udp Initialization started");
finger_id.fill(MAX_TOUCH_FINGERS);
ReloadSockets();
}
@ -176,7 +177,7 @@ void Client::ReloadSockets() {
std::string server_token;
std::size_t client = 0;
while (std::getline(servers_ss, server_token, ',')) {
if (client == max_udp_clients) {
if (client == MAX_UDP_CLIENTS) {
break;
}
std::stringstream server_ss(server_token);
@ -194,7 +195,7 @@ void Client::ReloadSockets() {
for (std::size_t pad = 0; pad < 4; ++pad) {
const std::size_t client_number =
GetClientNumber(udp_input_address, udp_input_port, pad);
if (client_number != max_udp_clients) {
if (client_number != MAX_UDP_CLIENTS) {
LOG_ERROR(Input, "Duplicated UDP servers found");
continue;
}
@ -213,7 +214,7 @@ std::size_t Client::GetClientNumber(std::string_view host, u16 port, std::size_t
return client;
}
}
return max_udp_clients;
return MAX_UDP_CLIENTS;
}
void Client::OnVersion([[maybe_unused]] Response::Version data) {
@ -259,33 +260,14 @@ void Client::OnPadData(Response::PadData data, std::size_t client) {
std::lock_guard guard(clients[client].status.update_mutex);
clients[client].status.motion_status = clients[client].motion.GetMotion();
// TODO: add a setting for "click" touch. Click touch refers to a device that differentiates
// between a simple "tap" and a hard press that causes the touch screen to click.
const bool is_active = data.touch_1.is_active != 0;
float x = 0;
float y = 0;
if (is_active && clients[client].status.touch_calibration) {
const u16 min_x = clients[client].status.touch_calibration->min_x;
const u16 max_x = clients[client].status.touch_calibration->max_x;
const u16 min_y = clients[client].status.touch_calibration->min_y;
const u16 max_y = clients[client].status.touch_calibration->max_y;
x = static_cast<float>(std::clamp(static_cast<u16>(data.touch_1.x), min_x, max_x) -
min_x) /
static_cast<float>(max_x - min_x);
y = static_cast<float>(std::clamp(static_cast<u16>(data.touch_1.y), min_y, max_y) -
min_y) /
static_cast<float>(max_y - min_y);
for (std::size_t id = 0; id < data.touch.size(); ++id) {
UpdateTouchInput(data.touch[id], client, id);
}
clients[client].status.touch_status = {x, y, is_active};
if (configuring) {
const Common::Vec3f gyroscope = clients[client].motion.GetGyroscope();
const Common::Vec3f accelerometer = clients[client].motion.GetAcceleration();
UpdateYuzuSettings(client, accelerometer, gyroscope, is_active);
UpdateYuzuSettings(client, accelerometer, gyroscope);
}
}
}
@ -320,21 +302,17 @@ void Client::Reset() {
}
void Client::UpdateYuzuSettings(std::size_t client, const Common::Vec3<float>& acc,
const Common::Vec3<float>& gyro, bool touch) {
const Common::Vec3<float>& gyro) {
if (gyro.Length() > 0.2f) {
LOG_DEBUG(Input, "UDP Controller {}: gyro=({}, {}, {}), accel=({}, {}, {}), touch={}",
client, gyro[0], gyro[1], gyro[2], acc[0], acc[1], acc[2], touch);
LOG_DEBUG(Input, "UDP Controller {}: gyro=({}, {}, {}), accel=({}, {}, {})", client,
gyro[0], gyro[1], gyro[2], acc[0], acc[1], acc[2]);
}
UDPPadStatus pad{
.host = clients[client].host,
.port = clients[client].port,
.pad_index = clients[client].pad_index,
};
if (touch) {
pad.touch = PadTouch::Click;
pad_queue.Push(pad);
}
for (size_t i = 0; i < 3; ++i) {
for (std::size_t i = 0; i < 3; ++i) {
if (gyro[i] > 5.0f || gyro[i] < -5.0f) {
pad.motion = static_cast<PadMotion>(i);
pad.motion_value = gyro[i];
@ -348,6 +326,50 @@ void Client::UpdateYuzuSettings(std::size_t client, const Common::Vec3<float>& a
}
}
std::optional<std::size_t> Client::GetUnusedFingerID() const {
std::size_t first_free_id = 0;
while (first_free_id < MAX_TOUCH_FINGERS) {
if (!std::get<2>(touch_status[first_free_id])) {
return first_free_id;
} else {
first_free_id++;
}
}
return std::nullopt;
}
void Client::UpdateTouchInput(Response::TouchPad& touch_pad, std::size_t client, std::size_t id) {
// TODO: Use custom calibration per device
const Common::ParamPackage touch_param(Settings::values.touch_device);
const u16 min_x = static_cast<u16>(touch_param.Get("min_x", 100));
const u16 min_y = static_cast<u16>(touch_param.Get("min_y", 50));
const u16 max_x = static_cast<u16>(touch_param.Get("max_x", 1800));
const u16 max_y = static_cast<u16>(touch_param.Get("max_y", 850));
const std::size_t touch_id = client * 2 + id;
if (touch_pad.is_active) {
if (finger_id[touch_id] == MAX_TOUCH_FINGERS) {
const auto first_free_id = GetUnusedFingerID();
if (!first_free_id) {
// Invalid finger id skip to next input
return;
}
finger_id[touch_id] = *first_free_id;
}
auto& [x, y, pressed] = touch_status[finger_id[touch_id]];
x = static_cast<float>(std::clamp(static_cast<u16>(touch_pad.x), min_x, max_x) - min_x) /
static_cast<float>(max_x - min_x);
y = static_cast<float>(std::clamp(static_cast<u16>(touch_pad.y), min_y, max_y) - min_y) /
static_cast<float>(max_y - min_y);
pressed = true;
return;
}
if (finger_id[touch_id] != MAX_TOUCH_FINGERS) {
touch_status[finger_id[touch_id]] = {};
finger_id[touch_id] = MAX_TOUCH_FINGERS;
}
}
void Client::BeginConfiguration() {
pad_queue.Clear();
configuring = true;
@ -360,7 +382,7 @@ void Client::EndConfiguration() {
DeviceStatus& Client::GetPadState(const std::string& host, u16 port, std::size_t pad) {
const std::size_t client_number = GetClientNumber(host, port, pad);
if (client_number == max_udp_clients) {
if (client_number == MAX_UDP_CLIENTS) {
return clients[0].status;
}
return clients[client_number].status;
@ -368,12 +390,20 @@ DeviceStatus& Client::GetPadState(const std::string& host, u16 port, std::size_t
const DeviceStatus& Client::GetPadState(const std::string& host, u16 port, std::size_t pad) const {
const std::size_t client_number = GetClientNumber(host, port, pad);
if (client_number == max_udp_clients) {
if (client_number == MAX_UDP_CLIENTS) {
return clients[0].status;
}
return clients[client_number].status;
}
Input::TouchStatus& Client::GetTouchState() {
return touch_status;
}
const Input::TouchStatus& Client::GetTouchState() const {
return touch_status;
}
Common::SPSCQueue<UDPPadStatus>& Client::GetPadQueue() {
return pad_queue;
}
@ -426,24 +456,24 @@ CalibrationConfigurationJob::CalibrationConfigurationJob(
current_status = Status::Ready;
status_callback(current_status);
}
if (data.touch_1.is_active == 0) {
if (data.touch[0].is_active == 0) {
return;
}
LOG_DEBUG(Input, "Current touch: {} {}", data.touch_1.x,
data.touch_1.y);
min_x = std::min(min_x, static_cast<u16>(data.touch_1.x));
min_y = std::min(min_y, static_cast<u16>(data.touch_1.y));
LOG_DEBUG(Input, "Current touch: {} {}", data.touch[0].x,
data.touch[0].y);
min_x = std::min(min_x, static_cast<u16>(data.touch[0].x));
min_y = std::min(min_y, static_cast<u16>(data.touch[0].y));
if (current_status == Status::Ready) {
// First touch - min data (min_x/min_y)
current_status = Status::Stage1Completed;
status_callback(current_status);
}
if (data.touch_1.x - min_x > CALIBRATION_THRESHOLD &&
data.touch_1.y - min_y > CALIBRATION_THRESHOLD) {
if (data.touch[0].x - min_x > CALIBRATION_THRESHOLD &&
data.touch[0].y - min_y > CALIBRATION_THRESHOLD) {
// Set the current position as max value and finishes
// configuration
max_x = data.touch_1.x;
max_y = data.touch_1.y;
max_x = data.touch[0].x;
max_y = data.touch[0].y;
current_status = Status::Completed;
data_callback(min_x, min_y, max_x, max_y);
status_callback(current_status);

View File

@ -28,6 +28,7 @@ class Socket;
namespace Response {
struct PadData;
struct PortInfo;
struct TouchPad;
struct Version;
} // namespace Response
@ -50,7 +51,6 @@ struct UDPPadStatus {
std::string host{"127.0.0.1"};
u16 port{26760};
std::size_t pad_index{};
PadTouch touch{PadTouch::Undefined};
PadMotion motion{PadMotion::Undefined};
f32 motion_value{0.0f};
};
@ -93,6 +93,9 @@ public:
DeviceStatus& GetPadState(const std::string& host, u16 port, std::size_t pad);
const DeviceStatus& GetPadState(const std::string& host, u16 port, std::size_t pad) const;
Input::TouchStatus& GetTouchState();
const Input::TouchStatus& GetTouchState() const;
private:
struct ClientData {
std::string host{"127.0.0.1"};
@ -122,14 +125,25 @@ private:
void StartCommunication(std::size_t client, const std::string& host, u16 port,
std::size_t pad_index, u32 client_id);
void UpdateYuzuSettings(std::size_t client, const Common::Vec3<float>& acc,
const Common::Vec3<float>& gyro, bool touch);
const Common::Vec3<float>& gyro);
// Returns an unused finger id, if there is no fingers available std::nullopt will be
// returned
std::optional<std::size_t> GetUnusedFingerID() const;
// Merges and updates all touch inputs into the touch_status array
void UpdateTouchInput(Response::TouchPad& touch_pad, std::size_t client, std::size_t id);
bool configuring = false;
// Allocate clients for 8 udp servers
const std::size_t max_udp_clients = 32;
std::array<ClientData, 4 * 8> clients;
Common::SPSCQueue<UDPPadStatus> pad_queue;
static constexpr std::size_t MAX_UDP_CLIENTS = 4 * 8;
// Each client can have up 2 touch inputs
static constexpr std::size_t MAX_TOUCH_FINGERS = MAX_UDP_CLIENTS * 2;
std::array<ClientData, MAX_UDP_CLIENTS> clients{};
Common::SPSCQueue<UDPPadStatus> pad_queue{};
Input::TouchStatus touch_status{};
std::array<std::size_t, MAX_TOUCH_FINGERS> finger_id{};
};
/// An async job allowing configuration of the touchpad calibration.

View File

@ -140,6 +140,14 @@ static_assert(sizeof(PortInfo) == 12, "UDP Response PortInfo struct has wrong si
static_assert(std::is_trivially_copyable_v<PortInfo>,
"UDP Response PortInfo is not trivially copyable");
struct TouchPad {
u8 is_active{};
u8 id{};
u16_le x{};
u16_le y{};
};
static_assert(sizeof(TouchPad) == 6, "UDP Response TouchPad struct has wrong size ");
#pragma pack(push, 1)
struct PadData {
PortInfo info{};
@ -190,12 +198,7 @@ struct PadData {
u8 button_13{};
} analog_button;
struct TouchPad {
u8 is_active{};
u8 id{};
u16_le x{};
u16_le y{};
} touch_1, touch_2;
std::array<TouchPad, 2> touch;
u64_le motion_timestamp;
@ -222,7 +225,6 @@ static_assert(sizeof(Message<PadData>) == MAX_PACKET_SIZE,
static_assert(sizeof(PadData::AnalogButton) == 12,
"UDP Response AnalogButton struct has wrong size ");
static_assert(sizeof(PadData::TouchPad) == 6, "UDP Response TouchPad struct has wrong size ");
static_assert(sizeof(PadData::Accelerometer) == 12,
"UDP Response Accelerometer struct has wrong size ");
static_assert(sizeof(PadData::Gyroscope) == 12, "UDP Response Gyroscope struct has wrong size ");

View File

@ -78,8 +78,8 @@ public:
explicit UDPTouch(std::string ip_, u16 port_, u16 pad_, CemuhookUDP::Client* client_)
: ip(std::move(ip_)), port(port_), pad(pad_), client(client_) {}
std::tuple<float, float, bool> GetStatus() const override {
return client->GetPadState(ip, port, pad).touch_status;
Input::TouchStatus GetStatus() const override {
return client->GetTouchState();
}
private:
@ -107,32 +107,4 @@ std::unique_ptr<Input::TouchDevice> UDPTouchFactory::Create(const Common::ParamP
return std::make_unique<UDPTouch>(std::move(ip), port, pad, client.get());
}
void UDPTouchFactory::BeginConfiguration() {
polling = true;
client->BeginConfiguration();
}
void UDPTouchFactory::EndConfiguration() {
polling = false;
client->EndConfiguration();
}
Common::ParamPackage UDPTouchFactory::GetNextInput() {
Common::ParamPackage params;
CemuhookUDP::UDPPadStatus pad;
auto& queue = client->GetPadQueue();
while (queue.Pop(pad)) {
if (pad.touch == CemuhookUDP::PadTouch::Undefined) {
continue;
}
params.Set("engine", "cemuhookudp");
params.Set("ip", pad.host);
params.Set("port", static_cast<u16>(pad.port));
params.Set("pad_index", static_cast<u16>(pad.pad_index));
params.Set("touch", static_cast<u16>(pad.touch));
return params;
}
return params;
}
} // namespace InputCommon

View File

@ -471,3 +471,79 @@ TEST_CASE("BufferBase: Unaligned page region query") {
REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000));
REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1));
}
TEST_CASE("BufferBase: Cached write") {
RasterizerInterface rasterizer;
BufferBase buffer(rasterizer, c, WORD);
buffer.UnmarkRegionAsCpuModified(c, WORD);
buffer.CachedCpuWrite(c + PAGE, PAGE);
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
buffer.FlushCachedWrites();
REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
buffer.MarkRegionAsCpuModified(c, WORD);
REQUIRE(rasterizer.Count() == 0);
}
TEST_CASE("BufferBase: Multiple cached write") {
RasterizerInterface rasterizer;
BufferBase buffer(rasterizer, c, WORD);
buffer.UnmarkRegionAsCpuModified(c, WORD);
buffer.CachedCpuWrite(c + PAGE, PAGE);
buffer.CachedCpuWrite(c + PAGE * 3, PAGE);
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
buffer.FlushCachedWrites();
REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
buffer.MarkRegionAsCpuModified(c, WORD);
REQUIRE(rasterizer.Count() == 0);
}
TEST_CASE("BufferBase: Cached write unmarked") {
RasterizerInterface rasterizer;
BufferBase buffer(rasterizer, c, WORD);
buffer.UnmarkRegionAsCpuModified(c, WORD);
buffer.CachedCpuWrite(c + PAGE, PAGE);
buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
buffer.FlushCachedWrites();
REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
buffer.MarkRegionAsCpuModified(c, WORD);
REQUIRE(rasterizer.Count() == 0);
}
TEST_CASE("BufferBase: Cached write iterated") {
RasterizerInterface rasterizer;
BufferBase buffer(rasterizer, c, WORD);
buffer.UnmarkRegionAsCpuModified(c, WORD);
buffer.CachedCpuWrite(c + PAGE, PAGE);
int num = 0;
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
REQUIRE(num == 0);
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
buffer.FlushCachedWrites();
REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
buffer.MarkRegionAsCpuModified(c, WORD);
REQUIRE(rasterizer.Count() == 0);
}
TEST_CASE("BufferBase: Cached write downloads") {
RasterizerInterface rasterizer;
BufferBase buffer(rasterizer, c, WORD);
buffer.UnmarkRegionAsCpuModified(c, WORD);
REQUIRE(rasterizer.Count() == 64);
buffer.CachedCpuWrite(c + PAGE, PAGE);
REQUIRE(rasterizer.Count() == 63);
buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
int num = 0;
buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
REQUIRE(num == 0);
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
buffer.FlushCachedWrites();
REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
buffer.MarkRegionAsCpuModified(c, WORD);
REQUIRE(rasterizer.Count() == 0);
}

View File

@ -2,10 +2,8 @@ add_subdirectory(host_shaders)
add_library(video_core STATIC
buffer_cache/buffer_base.h
buffer_cache/buffer_block.h
buffer_cache/buffer_cache.cpp
buffer_cache/buffer_cache.h
buffer_cache/map_interval.cpp
buffer_cache/map_interval.h
cdma_pusher.cpp
cdma_pusher.h
command_classes/codecs/codec.cpp
@ -154,8 +152,6 @@ add_library(video_core STATIC
renderer_vulkan/vk_staging_buffer_pool.h
renderer_vulkan/vk_state_tracker.cpp
renderer_vulkan/vk_state_tracker.h
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h
renderer_vulkan/vk_swapchain.cpp
renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_texture_cache.cpp

View File

@ -19,6 +19,7 @@ namespace VideoCommon {
enum class BufferFlagBits {
Picked = 1 << 0,
CachedWrites = 1 << 1,
};
DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits)
@ -40,7 +41,7 @@ class BufferBase {
static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
/// Vector tracking modified pages tightly packed with small vector optimization
union WrittenWords {
union WordsArray {
/// Returns the pointer to the words state
[[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
return is_short ? &stack : heap;
@ -55,49 +56,59 @@ class BufferBase {
u64* heap; ///< Not-small buffers pointer to the storage
};
struct GpuCpuWords {
explicit GpuCpuWords() = default;
explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} {
struct Words {
explicit Words() = default;
explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
if (IsShort()) {
cpu.stack = ~u64{0};
gpu.stack = 0;
cached_cpu.stack = 0;
untracked.stack = ~u64{0};
} else {
// Share allocation between CPU and GPU pages and set their default values
const size_t num_words = NumWords();
u64* const alloc = new u64[num_words * 2];
u64* const alloc = new u64[num_words * 4];
cpu.heap = alloc;
gpu.heap = alloc + num_words;
cached_cpu.heap = alloc + num_words * 2;
untracked.heap = alloc + num_words * 3;
std::fill_n(cpu.heap, num_words, ~u64{0});
std::fill_n(gpu.heap, num_words, 0);
std::fill_n(cached_cpu.heap, num_words, 0);
std::fill_n(untracked.heap, num_words, ~u64{0});
}
// Clean up tailing bits
const u64 last_local_page =
Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE);
const u64 last_word_size = size_bytes % BYTES_PER_WORD;
const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1];
last_word = (last_word << shift) >> shift;
const u64 last_word = (~u64{0} << shift) >> shift;
cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
}
~GpuCpuWords() {
~Words() {
Release();
}
GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept {
Words& operator=(Words&& rhs) noexcept {
Release();
size_bytes = rhs.size_bytes;
cpu = rhs.cpu;
gpu = rhs.gpu;
cached_cpu = rhs.cached_cpu;
untracked = rhs.untracked;
rhs.cpu.heap = nullptr;
return *this;
}
GpuCpuWords(GpuCpuWords&& rhs) noexcept
: size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} {
Words(Words&& rhs) noexcept
: size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
rhs.cpu.heap = nullptr;
}
GpuCpuWords& operator=(const GpuCpuWords&) = delete;
GpuCpuWords(const GpuCpuWords&) = delete;
Words& operator=(const Words&) = delete;
Words(const Words&) = delete;
/// Returns true when the buffer fits in the small vector optimization
[[nodiscard]] bool IsShort() const noexcept {
@ -118,8 +129,17 @@ class BufferBase {
}
u64 size_bytes = 0;
WrittenWords cpu;
WrittenWords gpu;
WordsArray cpu;
WordsArray gpu;
WordsArray cached_cpu;
WordsArray untracked;
};
enum class Type {
CPU,
GPU,
CachedCPU,
Untracked,
};
public:
@ -132,68 +152,93 @@ public:
BufferBase& operator=(const BufferBase&) = delete;
BufferBase(const BufferBase&) = delete;
BufferBase& operator=(BufferBase&&) = default;
BufferBase(BufferBase&&) = default;
/// Returns the inclusive CPU modified range in a begin end pair
[[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
u64 query_size) const noexcept {
const u64 offset = query_cpu_addr - cpu_addr;
return ModifiedRegion<false>(offset, query_size);
return ModifiedRegion<Type::CPU>(offset, query_size);
}
/// Returns the inclusive GPU modified range in a begin end pair
[[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
u64 query_size) const noexcept {
const u64 offset = query_cpu_addr - cpu_addr;
return ModifiedRegion<true>(offset, query_size);
return ModifiedRegion<Type::GPU>(offset, query_size);
}
/// Returns true if a region has been modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
const u64 offset = query_cpu_addr - cpu_addr;
return IsRegionModified<false>(offset, query_size);
return IsRegionModified<Type::CPU>(offset, query_size);
}
/// Returns true if a region has been modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
const u64 offset = query_cpu_addr - cpu_addr;
return IsRegionModified<true>(offset, query_size);
return IsRegionModified<Type::GPU>(offset, query_size);
}
/// Mark region as CPU modified, notifying the rasterizer about this change
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size);
ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
}
/// Unmark region as CPU modified, notifying the rasterizer about this change
void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size);
ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
}
/// Mark region as modified from the host GPU
void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size);
ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
}
/// Unmark region as modified from the host GPU
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size);
ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
}
/// Mark region as modified from the CPU
/// but don't mark it as modified until FlusHCachedWrites is called.
void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
flags |= BufferFlagBits::CachedWrites;
ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
}
/// Flushes cached CPU writes, and notify the rasterizer about the deltas
void FlushCachedWrites() noexcept {
flags &= ~BufferFlagBits::CachedWrites;
const u64 num_words = NumWords();
const u64* const cached_words = Array<Type::CachedCPU>();
u64* const untracked_words = Array<Type::Untracked>();
u64* const cpu_words = Array<Type::CPU>();
for (u64 word_index = 0; word_index < num_words; ++word_index) {
const u64 cached_bits = cached_words[word_index];
NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
untracked_words[word_index] |= cached_bits;
cpu_words[word_index] |= cached_bits;
}
}
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <typename Func>
void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
ForEachModifiedRange<false, true>(query_cpu_range, size, func);
ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
}
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <typename Func>
void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
ForEachModifiedRange<true, false>(query_cpu_range, size, func);
ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
}
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <typename Func>
void ForEachDownloadRange(Func&& func) {
ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func);
ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
}
/// Mark buffer as picked
@ -216,6 +261,11 @@ public:
return True(flags & BufferFlagBits::Picked);
}
/// Returns true when the buffer has pending cached writes
[[nodiscard]] bool HasCachedWrites() const noexcept {
return True(flags & BufferFlagBits::CachedWrites);
}
/// Returns the base CPU address of the buffer
[[nodiscard]] VAddr CpuAddr() const noexcept {
return cpu_addr;
@ -233,26 +283,48 @@ public:
}
private:
template <Type type>
u64* Array() noexcept {
if constexpr (type == Type::CPU) {
return words.cpu.Pointer(IsShort());
} else if constexpr (type == Type::GPU) {
return words.gpu.Pointer(IsShort());
} else if constexpr (type == Type::CachedCPU) {
return words.cached_cpu.Pointer(IsShort());
} else if constexpr (type == Type::Untracked) {
return words.untracked.Pointer(IsShort());
}
}
template <Type type>
const u64* Array() const noexcept {
if constexpr (type == Type::CPU) {
return words.cpu.Pointer(IsShort());
} else if constexpr (type == Type::GPU) {
return words.gpu.Pointer(IsShort());
} else if constexpr (type == Type::CachedCPU) {
return words.cached_cpu.Pointer(IsShort());
} else if constexpr (type == Type::Untracked) {
return words.untracked.Pointer(IsShort());
}
}
/**
* Change the state of a range of pages
*
* @param written_words Pages to be marked or unmarked as modified
* @param dirty_addr Base address to mark or unmark as modified
* @param size Size in bytes to mark or unmark as modified
*
* @tparam enable True when the bits will be set to one, false for zero
* @tparam notify_rasterizer True when the rasterizer has to be notified about the changes
*/
template <bool enable, bool notify_rasterizer>
void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr,
s64 size) noexcept(!notify_rasterizer) {
template <Type type, bool enable>
void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
const s64 difference = dirty_addr - cpu_addr;
const u64 offset = std::max<s64>(difference, 0);
size += std::min<s64>(difference, 0);
if (offset >= SizeBytes() || size < 0) {
return;
}
u64* const state_words = written_words.Pointer(IsShort());
u64* const untracked_words = Array<Type::Untracked>();
u64* const state_words = Array<type>();
const u64 offset_end = std::min(offset + size, SizeBytes());
const u64 begin_page_index = offset / BYTES_PER_PAGE;
const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
@ -268,13 +340,19 @@ private:
u64 bits = ~u64{0};
bits = (bits >> right_offset) << right_offset;
bits = (bits << left_offset) >> left_offset;
if constexpr (notify_rasterizer) {
NotifyRasterizer<!enable>(word_index, state_words[word_index], bits);
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
}
if constexpr (enable) {
state_words[word_index] |= bits;
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
untracked_words[word_index] |= bits;
}
} else {
state_words[word_index] &= ~bits;
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
untracked_words[word_index] &= ~bits;
}
}
page_index = 0;
++word_index;
@ -291,7 +369,7 @@ private:
* @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
*/
template <bool add_to_rasterizer>
void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) {
void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
while (changed_bits != 0) {
@ -315,21 +393,20 @@ private:
* @param query_cpu_range Base CPU address to loop over
* @param size Size in bytes of the CPU range to loop over
* @param func Function to call for each turned off region
*
* @tparam gpu True for host GPU pages, false for CPU pages
* @tparam notify_rasterizer True when the rasterizer should be notified about state changes
*/
template <bool gpu, bool notify_rasterizer, typename Func>
template <Type type, typename Func>
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
static_assert(type != Type::Untracked);
const s64 difference = query_cpu_range - cpu_addr;
const u64 query_begin = std::max<s64>(difference, 0);
size += std::min<s64>(difference, 0);
if (query_begin >= SizeBytes() || size < 0) {
return;
}
const u64* const cpu_words = words.cpu.Pointer(IsShort());
u64* const untracked_words = Array<Type::Untracked>();
u64* const state_words = Array<type>();
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
@ -345,7 +422,8 @@ private:
const u64 word_index_end = std::distance(state_words, last_modified_word);
const unsigned local_page_begin = std::countr_zero(*first_modified_word);
const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]);
const unsigned local_page_end =
static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
@ -371,11 +449,13 @@ private:
const u64 current_word = state_words[word_index] & bits;
state_words[word_index] &= ~bits;
// Exclude CPU modified pages when visiting GPU pages
const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0);
if constexpr (notify_rasterizer) {
NotifyRasterizer<true>(word_index, word, ~u64{0});
if constexpr (type == Type::CPU) {
const u64 current_bits = untracked_words[word_index] & bits;
untracked_words[word_index] &= ~bits;
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
}
// Exclude CPU modified pages when visiting GPU pages
const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
u64 page = page_begin;
page_begin = 0;
@ -416,17 +496,20 @@ private:
* @param offset Offset in bytes from the start of the buffer
* @param size Size in bytes of the region to query for modifications
*/
template <bool gpu>
template <Type type>
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
const u64* const cpu_words = words.cpu.Pointer(IsShort());
const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
static_assert(type != Type::Untracked);
const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
const u64 word_end = std::min(word_begin + num_query_words, NumWords());
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
const u64 word = state_words[word_index] & ~off_word;
if (word == 0) {
continue;
}
@ -445,13 +528,13 @@ private:
*
* @param offset Offset in bytes from the start of the buffer
* @param size Size in bytes of the region to query for modifications
*
* @tparam True to query GPU modified pages, false for CPU pages
*/
template <bool gpu>
template <Type type>
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
const u64* const cpu_words = words.cpu.Pointer(IsShort());
const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
static_assert(type != Type::Untracked);
const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
const u64 word_end = std::min(word_begin + num_query_words, NumWords());
@ -460,7 +543,8 @@ private:
u64 begin = std::numeric_limits<u64>::max();
u64 end = 0;
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
const u64 word = state_words[word_index] & ~off_word;
if (word == 0) {
continue;
}
@ -488,7 +572,7 @@ private:
RasterizerInterface* rasterizer = nullptr;
VAddr cpu_addr = 0;
GpuCpuWords words;
Words words;
BufferFlagBits flags{};
};

View File

@ -0,0 +1,13 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/microprofile.h"
namespace VideoCommon {
MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128));
MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
} // namespace VideoCommon

File diff suppressed because it is too large Load Diff

View File

@ -110,12 +110,10 @@ void Vic::Execute() {
converted_frame_buffer.get(), block_height, 0, 0);
gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
gpu.Maxwell3D().OnMemoryWrite();
} else {
// send pitch linear frame
gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
linear_size);
gpu.Maxwell3D().OnMemoryWrite();
}
break;
}
@ -163,7 +161,6 @@ void Vic::Execute() {
}
gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
chroma_buffer.size());
gpu.Maxwell3D().OnMemoryWrite();
break;
}
default:

View File

@ -12,13 +12,30 @@
#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace VideoCommon::Dirty {
namespace {
using Tegra::Engines::Maxwell3D;
void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) {
static constexpr std::size_t num_array = 3;
for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) {
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
}
}
void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer);
}
void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
}
void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
static constexpr std::size_t num_per_rt = NUM(rt[0]);
static constexpr std::size_t begin = OFF(rt);
static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@ -41,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
FillBlock(table, OFF(zeta), NUM(zeta), flag);
}
}
} // Anonymous namespace
void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
SetupDirtyVertexBuffers(tables);
SetupIndexBuffer(tables);
SetupDirtyDescriptors(tables);
SetupDirtyRenderTargets(tables);
}
} // namespace VideoCommon::Dirty

View File

@ -30,6 +30,12 @@ enum : u8 {
ColorBuffer7,
ZetaBuffer,
VertexBuffers,
VertexBuffer0,
VertexBuffer31 = VertexBuffer0 + 31,
IndexBuffer,
LastCommonEntry,
};
@ -47,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_
FillBlock(tables[1], begin, num, index_b);
}
void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
} // namespace VideoCommon::Dirty

View File

@ -23,8 +23,6 @@ void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls);
gpu.SyncGuestHost();
// On entering GPU code, assume all memory may be touched by the ARM core.
gpu.Maxwell3D().OnMemoryWrite();
dma_pushbuffer_subindex = 0;

View File

@ -18,8 +18,8 @@ Fermi2D::Fermi2D() {
Fermi2D::~Fermi2D() = default;
void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
rasterizer = &rasterizer_;
void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
}
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {

View File

@ -38,7 +38,7 @@ public:
~Fermi2D();
/// Binds a rasterizer to this engine.
void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
/// Write the value to the register identified by method.
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;

View File

@ -21,8 +21,8 @@ KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manage
KeplerCompute::~KeplerCompute() = default;
void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
rasterizer = &rasterizer_;
void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
}
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
@ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
upload_state.ProcessData(method_argument, is_last_call);
if (is_last_call) {
system.GPU().Maxwell3D().OnMemoryWrite();
}
break;
}

View File

@ -46,7 +46,7 @@ public:
~KeplerCompute();
/// Binds a rasterizer to this engine.
void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
static constexpr std::size_t NumConstBuffers = 8;

View File

@ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
case KEPLERMEMORY_REG_INDEX(data): {
upload_state.ProcessData(method_argument, is_last_call);
if (is_last_call) {
system.GPU().Maxwell3D().OnMemoryWrite();
}
break;
}

View File

@ -30,8 +30,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
Maxwell3D::~Maxwell3D() = default;
void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
rasterizer = &rasterizer_;
void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
}
void Maxwell3D::InitializeRegisterDefaults() {
@ -223,7 +223,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
case MAXWELL3D_REG_INDEX(data_upload):
upload_state.ProcessData(argument, is_last_call);
if (is_last_call) {
OnMemoryWrite();
}
return;
case MAXWELL3D_REG_INDEX(fragment_barrier):
@ -570,17 +569,18 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
}
}
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
void Maxwell3D::ProcessCBBind(size_t stage_index) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
auto& shader = state.shader_stages[stage_index];
auto& bind_data = regs.cb_bind[stage_index];
ASSERT(bind_data.index < Regs::MaxConstBuffers);
auto& buffer = shader.const_buffers[bind_data.index];
const auto& bind_data = regs.cb_bind[stage_index];
auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index];
buffer.enabled = bind_data.valid.Value() != 0;
buffer.address = regs.const_buffer.BufferAddress();
buffer.size = regs.const_buffer.cb_size;
const bool is_enabled = bind_data.valid.Value() != 0;
const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0;
const u32 size = is_enabled ? regs.const_buffer.cb_size : 0;
rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
}
void Maxwell3D::ProcessCBData(u32 value) {
@ -635,7 +635,6 @@ void Maxwell3D::FinishCBData() {
const u32 id = cb_data_state.id;
memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
OnMemoryWrite();
cb_data_state.id = null_cb_data;
cb_data_state.current = null_cb_data;

View File

@ -55,7 +55,7 @@ public:
~Maxwell3D();
/// Binds a rasterizer to this engine.
void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@ -1314,8 +1314,7 @@ public:
GPUVAddr LimitAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
limit_low) +
1;
limit_low);
}
} vertex_array_limit[NumVertexArrays];
@ -1403,6 +1402,7 @@ public:
};
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
};
@ -1452,11 +1452,6 @@ public:
return *rasterizer;
}
/// Notify a memory write has happened.
void OnMemoryWrite() {
dirty.flags |= dirty.on_write_stores;
}
enum class MMEDrawMode : u32 {
Undefined,
Array,
@ -1478,7 +1473,6 @@ public:
using Tables = std::array<Table, 2>;
Flags flags;
Flags on_write_stores;
Tables tables{};
} dirty;
@ -1541,7 +1535,7 @@ private:
void FinishCBData();
/// Handles a write to the CB_BIND register.
void ProcessCBBind(std::size_t stage_index);
void ProcessCBBind(size_t stage_index);
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();

View File

@ -60,9 +60,6 @@ void MaxwellDMA::Launch() {
return;
}
// All copies here update the main memory, so mark all rasterizer states as invalid.
system.GPU().Maxwell3D().OnMemoryWrite();
if (is_src_pitch && is_dst_pitch) {
CopyPitchToPitch();
} else {

View File

@ -143,22 +143,26 @@ private:
}
bool ShouldWait() const {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
query_cache.ShouldWaitAsyncFlushes();
}
bool ShouldFlush() const {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
query_cache.HasUncommittedFlushes();
}
void PopAsyncFlushes() {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.PopAsyncFlushes();
buffer_cache.PopAsyncFlushes();
query_cache.PopAsyncFlushes();
}
void CommitAsyncFlushes() {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.CommitAsyncFlushes();
buffer_cache.CommitAsyncFlushes();
query_cache.CommitAsyncFlushes();

View File

@ -44,8 +44,8 @@ GPU::~GPU() = default;
void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
renderer = std::move(renderer_);
rasterizer = renderer->ReadRasterizer();
VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
memory_manager->BindRasterizer(rasterizer);
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
@ -171,7 +171,7 @@ void GPU::TickWork() {
const std::size_t size = request.size;
flush_requests.pop_front();
flush_request_mutex.unlock();
renderer->Rasterizer().FlushRegion(addr, size);
rasterizer->FlushRegion(addr, size);
current_flush_fence.store(fence);
flush_request_mutex.lock();
}
@ -193,11 +193,11 @@ u64 GPU::GetTicks() const {
}
void GPU::FlushCommands() {
renderer->Rasterizer().FlushCommands();
rasterizer->FlushCommands();
}
void GPU::SyncGuestHost() {
renderer->Rasterizer().SyncGuestHost();
rasterizer->SyncGuestHost();
}
enum class GpuSemaphoreOperation {

View File

@ -366,6 +366,7 @@ protected:
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
std::unique_ptr<VideoCore::RendererBase> renderer;
VideoCore::RasterizerInterface* rasterizer = nullptr;
const bool use_nvdec;
private:

View File

@ -38,6 +38,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
}
auto current_context = context.Acquire();
VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
CommandDataContainer next;
while (state.is_running) {
@ -52,13 +53,13 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
} else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
renderer.Rasterizer().ReleaseFences();
rasterizer->ReleaseFences();
} else if (std::holds_alternative<GPUTickCommand>(next.data)) {
system.GPU().TickWork();
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(flush->addr, flush->size);
rasterizer->FlushRegion(flush->addr, flush->size);
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size);
rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
return;
} else {
@ -84,6 +85,7 @@ ThreadManager::~ThreadManager() {
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context,
Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
rasterizer = renderer.ReadRasterizer();
thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
}
@ -129,12 +131,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
system.Renderer().Rasterizer().OnCPUWrite(addr, size);
rasterizer->OnCPUWrite(addr, size);
}
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
system.Renderer().Rasterizer().OnCPUWrite(addr, size);
rasterizer->OnCPUWrite(addr, size);
}
void ThreadManager::WaitIdle() const {

View File

@ -27,6 +27,7 @@ class System;
} // namespace Core
namespace VideoCore {
class RasterizerInterface;
class RendererBase;
} // namespace VideoCore
@ -151,11 +152,12 @@ private:
/// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data);
SynchState state;
Core::System& system;
std::thread thread;
std::thread::id thread_id;
const bool is_async;
VideoCore::RasterizerInterface* rasterizer = nullptr;
SynchState state;
std::thread thread;
};
} // namespace VideoCommon::GPUThread

View File

@ -12,7 +12,6 @@ set(SHADER_FILES
vulkan_blit_depth_stencil.frag
vulkan_present.frag
vulkan_present.vert
vulkan_quad_array.comp
vulkan_quad_indexed.comp
vulkan_uint8.comp
)

View File

@ -16,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
uint16_t output_indexes[];
};
uint AssembleIndex(uint id) {
// Most primitive restart indices are 0xFF
// Hardcode this to 0xFF for now
uint index = uint(input_indexes[id]);
return index == 0xFF ? 0xFFFF : index;
}
void main() {
uint id = gl_GlobalInvocationID.x;
if (id < input_indexes.length()) {
output_indexes[id] = uint16_t(input_indexes[id]);
output_indexes[id] = uint16_t(AssembleIndex(id));
}
}

View File

@ -20,8 +20,8 @@ MemoryManager::MemoryManager(Core::System& system_)
MemoryManager::~MemoryManager() = default;
void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
rasterizer = &rasterizer_;
void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
}
GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
@ -101,6 +101,25 @@ void MemoryManager::TryUnlockPage(PageEntry page_entry, std::size_t size) {
.IsSuccess());
}
void MemoryManager::UnmapVicFrame(GPUVAddr gpu_addr, std::size_t size) {
if (!size) {
return;
}
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
rasterizer->InvalidateExceptTextureCache(*cpu_addr, size);
cache_invalidate_queue.push_back({*cpu_addr, size});
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
}
void MemoryManager::InvalidateQueuedCaches() {
for (const auto& entry : cache_invalidate_queue) {
rasterizer->InvalidateTextureCache(entry.first, entry.second);
}
cache_invalidate_queue.clear();
}
PageEntry MemoryManager::GetPageEntry(GPUVAddr gpu_addr) const {
return page_table[PageEntryIndex(gpu_addr)];
}

View File

@ -72,7 +72,7 @@ public:
~MemoryManager();
/// Binds a renderer to the memory manager.
void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
@ -121,6 +121,14 @@ public:
[[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
void Unmap(GPUVAddr gpu_addr, std::size_t size);
/**
* Some Decoded NVDEC frames require that texture cache does not get invalidated.
* UnmapVicFrame defers the texture cache invalidation until the stream ends
* by invoking InvalidateQueuedCaches to invalidate all frame texture caches.
*/
void UnmapVicFrame(GPUVAddr gpu_addr, std::size_t size);
void InvalidateQueuedCaches();
private:
[[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
@ -150,6 +158,7 @@ private:
VideoCore::RasterizerInterface* rasterizer = nullptr;
std::vector<PageEntry> page_table;
std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue;
};
} // namespace Tegra

View File

@ -7,6 +7,7 @@
#include <atomic>
#include <functional>
#include <optional>
#include <span>
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
@ -49,6 +50,10 @@ public:
/// Records a GPU query and caches it
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
/// Signal an uniform buffer binding
virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) = 0;
/// Signal a GPU based semaphore as a fence
virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
@ -64,6 +69,12 @@ public:
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer to flush the texture cache to Switch memory
virtual void InvalidateExceptTextureCache(VAddr addr, u64 size) = 0;
/// Notify rasterizer to invalidate the texture cache
virtual void InvalidateTextureCache(VAddr addr, u64 size) = 0;
/// Check if the the specified memory area requires flushing to CPU Memory.
virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;

View File

@ -37,15 +37,11 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context);
virtual ~RendererBase();
/// Initialize the renderer
[[nodiscard]] virtual bool Init() = 0;
/// Shutdown the renderer
virtual void ShutDown() = 0;
/// Finalize rendering the guest frame and draw into the presentation texture
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
[[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
// Getter/setter functions:
// ------------------------
@ -57,14 +53,6 @@ public:
return m_current_frame;
}
[[nodiscard]] RasterizerInterface& Rasterizer() {
return *rasterizer;
}
[[nodiscard]] const RasterizerInterface& Rasterizer() const {
return *rasterizer;
}
[[nodiscard]] Core::Frontend::GraphicsContext& Context() {
return *context;
}
@ -98,7 +86,6 @@ public:
protected:
Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
std::unique_ptr<RasterizerInterface> rasterizer;
std::unique_ptr<Core::Frontend::GraphicsContext> context;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer

View File

@ -2,98 +2,235 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <span>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_instance.h"
#include "video_core/vulkan_common/vulkan_library.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
namespace OpenGL {
namespace {
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
GLsizei padding;
};
static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
constexpr std::array PROGRAM_LUT{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
} // Anonymous namespace
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
: BufferBlock{cpu_addr_, size_} {
gl_buffer.Create();
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
buffer.Create();
const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
if (runtime.device.UseAssemblyShaders()) {
CreateMemoryObjects(runtime);
glNamedBufferStorageMemEXT(buffer.handle, SizeBytes(), memory_commit.ExportOpenGLHandle(),
memory_commit.Offset());
} else {
glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
}
if (runtime.has_unified_vertex_buffers) {
glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
}
}
Buffer::~Buffer() = default;
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(data_size), data);
void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
const GLintptr gl_offset = static_cast<GLintptr>(offset);
if (read_buffer.handle == 0) {
read_buffer.Create();
glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
GL_STREAM_READ);
}
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size) {
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
}
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
const Device& device_, OGLStreamBuffer& stream_buffer_,
StateTracker& state_tracker)
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
if (!device.HasFastBufferSubData()) {
void Buffer::MakeResident(GLenum access) noexcept {
// Abuse GLenum's order to exit early
// GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
if (access <= current_residency_access || buffer.handle == 0) {
return;
}
if (std::exchange(current_residency_access, access) != GL_NONE) {
// If the buffer is already resident, remove its residency before promoting it
glMakeNamedBufferNonResidentNV(buffer.handle);
}
glMakeNamedBufferResidentNV(buffer.handle, access);
}
static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
for (const GLuint cbuf : cbufs) {
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
GLuint Buffer::SubBuffer(u32 offset) {
if (offset == 0) {
return buffer.handle;
}
for (const auto& [sub_buffer, sub_offset] : subs) {
if (sub_offset == offset) {
return sub_buffer.handle;
}
}
OGLBuffer sub_buffer;
sub_buffer.Create();
glNamedBufferStorageMemEXT(sub_buffer.handle, SizeBytes() - offset,
memory_commit.ExportOpenGLHandle(), memory_commit.Offset() + offset);
return subs.emplace_back(std::move(sub_buffer), offset).first.handle;
}
void Buffer::CreateMemoryObjects(BufferCacheRuntime& runtime) {
auto& allocator = runtime.vulkan_memory_allocator;
auto& device = runtime.vulkan_device->GetLogical();
auto vulkan_buffer = device.CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = SizeBytes(),
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
const VkMemoryRequirements requirements = device.GetBufferMemoryRequirements(*vulkan_buffer);
memory_commit = allocator->Commit(requirements, Vulkan::MemoryUsage::DeviceLocal);
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_,
Vulkan::MemoryAllocator* vulkan_memory_allocator_)
: device{device_}, vulkan_device{vulkan_device_},
vulkan_memory_allocator{vulkan_memory_allocator_},
stream_buffer{device.HasFastBufferSubData() ? std::nullopt
: std::make_optional<StreamBuffer>()} {
GLint gl_max_attributes;
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
max_attributes = static_cast<u32>(gl_max_attributes);
use_assembly_shaders = device.UseAssemblyShaders();
has_unified_vertex_buffers = device.HasVertexBufferUnifiedMemory();
for (auto& stage_uniforms : fast_uniforms) {
for (OGLBuffer& buffer : stage_uniforms) {
buffer.Create();
glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
}
}
}
OGLBufferCache::~OGLBufferCache() {
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) {
glCopyNamedBufferSubData(
src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
}
}
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<Buffer>(device, cpu_addr, size);
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
static_cast<GLsizeiptr>(size));
} else {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
index_buffer_offset = offset;
}
}
OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
return {0, 0, 0};
void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
u32 stride) {
if (index >= max_attributes) {
return;
}
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
} else {
glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
static_cast<GLsizei>(stride));
}
}
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
std::size_t size) {
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
const GLuint cbuf = cbufs[cbuf_cursor++];
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size) {
if (use_assembly_shaders) {
const GLuint sub_buffer = buffer.SubBuffer(offset);
glBindBufferRangeNV(PABO_LUT[stage], binding_index, sub_buffer, 0,
static_cast<GLsizeiptr>(size));
} else {
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
return {cbuf, 0, 0};
void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size) {
if (use_assembly_shaders) {
glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index,
buffer.SubBuffer(offset), 0, static_cast<GLsizeiptr>(size));
} else {
glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size, bool is_written) {
if (use_assembly_shaders) {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
} else {
const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size, bool is_written) {
if (use_assembly_shaders) {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
} else if (size == 0) {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
} else {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
u32 size) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
} // namespace OpenGL

View File

@ -5,79 +5,167 @@
#pragma once
#include <array>
#include <memory>
#include <span>
#include "common/alignment.h"
#include "common/common_types.h"
#include "common/dynamic_library.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
namespace Core {
class System;
}
namespace Vulkan {
class Device;
class MemoryAllocator;
} // namespace Vulkan
namespace OpenGL {
class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
class StateTracker;
class BufferCacheRuntime;
class Buffer : public VideoCommon::BufferBlock {
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
~Buffer();
explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
u64 size_bytes);
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
void Download(std::size_t offset, std::size_t data_size, u8* data);
void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size);
void MakeResident(GLenum access) noexcept;
GLuint Handle() const noexcept {
return gl_buffer.handle;
[[nodiscard]] GLuint SubBuffer(u32 offset);
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
return address;
}
u64 Address() const noexcept {
return gpu_address;
[[nodiscard]] GLuint Handle() const noexcept {
return buffer.handle;
}
private:
OGLBuffer gl_buffer;
OGLBuffer read_buffer;
u64 gpu_address = 0;
void CreateMemoryObjects(BufferCacheRuntime& runtime);
GLuint64EXT address = 0;
Vulkan::MemoryCommit memory_commit;
OGLBuffer buffer;
GLenum current_residency_access = GL_NONE;
std::vector<std::pair<OGLBuffer, u32>> subs;
};
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class OGLBufferCache final : public GenericBufferCache {
class BufferCacheRuntime {
friend Buffer;
public:
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device, OGLStreamBuffer& stream_buffer,
StateTracker& state_tracker);
~OGLBufferCache();
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
BufferInfo GetEmptyBuffer(std::size_t) override;
explicit BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_,
Vulkan::MemoryAllocator* vulkan_memory_allocator_);
void Acquire() noexcept {
cbuf_cursor = 0;
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
bool is_written);
void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
bool is_written);
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
if (use_assembly_shaders) {
const GLuint handle = fast_uniforms[stage][binding_index].handle;
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
} else {
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
fast_uniforms[stage][binding_index].handle, 0,
static_cast<GLsizeiptr>(size));
}
}
protected:
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
if (use_assembly_shaders) {
glProgramBufferParametersIuivNV(
PABO_LUT[stage], binding_index, 0,
static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
reinterpret_cast<const GLuint*>(data.data()));
} else {
glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
}
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
return mapped_span;
}
[[nodiscard]] const GLvoid* IndexOffset() const noexcept {
return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
}
[[nodiscard]] bool HasFastBufferSubData() const noexcept {
return device.HasFastBufferSubData();
}
private:
static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
};
const Device& device;
const Vulkan::Device* vulkan_device;
Vulkan::MemoryAllocator* vulkan_memory_allocator;
std::optional<StreamBuffer> stream_buffer;
std::size_t cbuf_cursor = 0;
std::array<GLuint, NUM_CBUFS> cbufs{};
u32 max_attributes = 0;
bool use_assembly_shaders = false;
bool has_unified_vertex_buffers = false;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
VideoCommon::NUM_STAGES>
fast_uniforms;
u32 index_buffer_offset = 0;
};
struct BufferCacheParams {
using Runtime = OpenGL::BufferCacheRuntime;
using Buffer = OpenGL::Buffer;
static constexpr bool IS_OPENGL = true;
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
} // namespace OpenGL

View File

@ -21,9 +21,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
namespace {
// One uniform block is reserved for emulation purposes
constexpr u32 ReservedUniformBlocks = 1;
@ -198,10 +196,18 @@ bool IsASTCSupported() {
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
}
[[nodiscard]] std::string UuidString(std::span<const GLubyte, GL_UUID_SIZE_EXT> uuid) {
return fmt::format("{:x}{:x}{:x}{:x}-{:x}{:x}-{:x}{:x}-{:x}{:x}-{:x}{:x}{:x}{:x}{:x}", uuid[0],
uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], uuid[8],
uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]);
}
} // Anonymous namespace
Device::Device()
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
Device::Device(bool has_vulkan_instance) {
if (!GLAD_GL_VERSION_4_3) {
LOG_ERROR(Render_OpenGL, "OpenGL 4.3 is not available");
throw std::runtime_error{"Insufficient version"};
}
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
@ -217,6 +223,9 @@ Device::Device()
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
max_uniform_buffers = BuildMaxUniformBuffers();
base_bindings = BuildBaseBindings();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
@ -243,7 +252,8 @@ Device::Device()
use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2 &&
has_vulkan_instance;
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();

View File

@ -10,18 +10,16 @@
namespace OpenGL {
static constexpr u32 EmulationUniformBlockBinding = 0;
class Device final {
class Device {
public:
struct BaseBindings final {
struct BaseBindings {
u32 uniform_buffer{};
u32 shader_storage_buffer{};
u32 sampler{};
u32 image{};
};
explicit Device();
explicit Device(bool has_vulkan_instance);
explicit Device(std::nullptr_t);
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {

View File

@ -47,7 +47,7 @@ void GLInnerFence::Wait() {
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
Tegra::GPU& gpu_, TextureCache& texture_cache_,
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
BufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {

View File

@ -32,14 +32,13 @@ private:
};
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
QueryCache& query_cache_);
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCache& texture_cache, BufferCache& buffer_cache,
QueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;

View File

@ -44,17 +44,10 @@ using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
namespace {
@ -104,20 +97,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
}
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
const ConstBufferEntry& entry) {
if (!entry.IsIndirect()) {
return entry.GetSize();
}
if (buffer.size > Maxwell::MaxConstBufferSize) {
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
Maxwell::MaxConstBufferSize);
return Maxwell::MaxConstBufferSize;
}
return buffer.size;
}
/// Translates hardware transform feedback indices
/// @param location Hardware location
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@ -150,14 +129,6 @@ void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
if (num_ssbos == 0) {
return;
}
glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
reinterpret_cast<const GLuint*>(ssbos));
}
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
if (entry.is_buffer) {
return ImageViewType::Buffer;
@ -199,49 +170,35 @@ ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
const Vulkan::Device* vulkan_device,
Vulkan::MemoryAllocator* vulkan_memory_allocator,
ScreenInfo& screen_info_, ProgramManager& program_manager_,
StateTracker& state_tracker_)
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
stream_buffer(device, state_tracker),
texture_cache_runtime(device, program_manager, state_tracker),
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
buffer_cache_runtime(device, vulkan_device, vulkan_memory_allocator),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
query_cache(*this, maxwell3d, gpu_memory),
buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
async_shaders(emu_window_) {
unified_uniform_buffer.Create();
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
if (device.UseAssemblyShaders()) {
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
for (const GLuint cbuf : staging_cbufs) {
glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
nullptr, 0);
}
}
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
}
}
RasterizerOpenGL::~RasterizerOpenGL() {
if (device.UseAssemblyShaders()) {
glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
}
}
RasterizerOpenGL::~RasterizerOpenGL() = default;
void RasterizerOpenGL::SetupVertexFormat() {
void RasterizerOpenGL::SyncVertexFormats() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexFormats]) {
return;
}
flags[Dirty::VertexFormats] = false;
MICROPROFILE_SCOPE(OpenGL_VAO);
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
// the first 16 vertex attributes always, as we don't know which ones are actually used until
// shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
@ -277,55 +234,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
}
}
void RasterizerOpenGL::SetupVertexBuffer() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexBuffers]) {
return;
}
flags[Dirty::VertexBuffers] = false;
MICROPROFILE_SCOPE(OpenGL_VB);
const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
// Upload all guest vertex arrays sequentially to our buffer
const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
}
flags[Dirty::VertexBuffer0 + index] = false;
const auto& vertex_array = regs.vertex_array[index];
if (!vertex_array.IsEnabled()) {
continue;
}
const GPUVAddr start = vertex_array.StartAddress();
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end >= start);
const GLuint gl_index = static_cast<GLuint>(index);
const u64 size = end - start;
if (size == 0) {
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
if (use_unified_memory) {
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
}
continue;
}
const auto info = buffer_cache.UploadMemory(start, size);
if (use_unified_memory) {
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
info.address + info.offset, size);
} else {
glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
}
}
}
void RasterizerOpenGL::SetupVertexInstances() {
void RasterizerOpenGL::SyncVertexInstances() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexInstances]) {
return;
@ -346,17 +255,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
}
}
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
MICROPROFILE_SCOPE(OpenGL_Index);
const auto& regs = maxwell3d.regs;
const std::size_t size = CalculateIndexBufferSize();
const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
return info.offset;
}
void RasterizerOpenGL::SetupShaders() {
MICROPROFILE_SCOPE(OpenGL_Shader);
void RasterizerOpenGL::SetupShaders(bool is_indexed) {
u32 clip_distances = 0;
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
@ -413,11 +312,19 @@ void RasterizerOpenGL::SetupShaders() {
const size_t stage = index == 0 ? 0 : index - 1;
shaders[stage] = shader;
SetupDrawConstBuffers(stage, shader);
SetupDrawGlobalMemory(stage, shader);
SetupDrawTextures(shader, stage);
SetupDrawImages(shader, stage);
buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
buffer_cache.UnbindGraphicsStorageBuffers(stage);
u32 ssbo_index = 0;
for (const auto& buffer : shader->GetEntries().global_memory_entries) {
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
buffer.cbuf_offset, buffer.is_written);
++ssbo_index;
}
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@ -433,43 +340,26 @@ void RasterizerOpenGL::SetupShaders() {
SyncClipEnabled(clip_distances);
maxwell3d.dirty.flags[Dirty::Shaders] = false;
buffer_cache.UpdateGraphicsBuffers(is_indexed);
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
buffer_cache.BindHostGeometryBuffers(is_indexed);
size_t image_view_index = 0;
size_t texture_index = 0;
size_t image_index = 0;
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
const Shader* const shader = shaders[stage];
if (shader) {
const auto base = device.GetBaseBindings(stage);
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
texture_index, image_index);
}
}
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
const auto& regs = maxwell3d.regs;
std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (!regs.vertex_array[index].IsEnabled())
if (!shader) {
continue;
const GPUVAddr start = regs.vertex_array[index].StartAddress();
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
size += end - start;
ASSERT(end >= start);
}
buffer_cache.BindHostStageBuffers(stage);
const auto& base = device.GetBaseBindings(stage);
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
texture_index, image_index);
}
return size;
}
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
}
void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
@ -478,6 +368,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
}
void RasterizerOpenGL::Clear() {
MICROPROFILE_SCOPE(OpenGL_Clears);
if (!maxwell3d.ShouldExecute()) {
return;
}
@ -528,11 +419,9 @@ void RasterizerOpenGL::Clear() {
}
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
{
auto lock = texture_cache.AcquireLock();
texture_cache.UpdateRenderTargets(true);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
}
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UpdateRenderTargets(true);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@ -544,7 +433,6 @@ void RasterizerOpenGL::Clear() {
} else if (use_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
++num_queued_commands;
}
@ -553,75 +441,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
query_cache.UpdateCounters();
SyncViewport();
SyncRasterizeEnable();
SyncPolygonModes();
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
SyncDepthTestState();
SyncDepthClamp();
SyncStencilTestState();
SyncBlendState();
SyncLogicOpState();
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest();
SyncPointState();
SyncLineState();
SyncPolygonOffset();
SyncAlphaTest();
SyncFramebufferSRGB();
buffer_cache.Acquire();
current_cbuf = 0;
std::size_t buffer_size = CalculateVertexArraysSize();
// Add space for index buffer
if (is_indexed) {
buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
}
// Uniform space for the 5 shader stages
buffer_size =
Common::AlignUp<std::size_t>(buffer_size, 4) +
(sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
// Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers *
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
// Prepare the vertex array.
buffer_cache.Map(buffer_size);
// Prepare vertex array format.
SetupVertexFormat();
// Upload vertex and index data.
SetupVertexBuffer();
SetupVertexInstances();
GLintptr index_buffer_offset = 0;
if (is_indexed) {
index_buffer_offset = SetupIndexBuffer();
}
// Setup emulation uniform buffer.
if (!device.UseAssemblyShaders()) {
MaxwellUniformData ubo;
ubo.SetFromRegs(maxwell3d);
const auto info =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
}
SyncState();
// Setup shaders and their used resources.
auto lock = texture_cache.AcquireLock();
SetupShaders();
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
SetupShaders(is_indexed);
// Signal the buffer cache that we are not going to upload more things.
buffer_cache.Unmap();
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
program_manager.BindGraphicsPipeline();
@ -635,7 +460,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
if (is_indexed) {
const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
@ -675,22 +500,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Acquire();
current_cbuf = 0;
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
BindComputeTextures(kernel);
const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
buffer_cache.Map(buffer_size);
SetupComputeConstBuffers(kernel);
SetupComputeGlobalMemory(kernel);
buffer_cache.Unmap();
const auto& entries = kernel->GetEntries();
buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
buffer_cache.UnbindComputeStorageBuffers();
u32 ssbo_index = 0;
for (const auto& buffer : entries.global_memory_entries) {
buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
buffer.is_written);
++ssbo_index;
}
buffer_cache.UpdateComputeBuffers();
buffer_cache.BindHostComputeBuffers();
const auto& launch_desc = kepler_compute.launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
@ -706,6 +531,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
query_cache.Query(gpu_addr, type, timestamp);
}
void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
}
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
@ -714,19 +545,43 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
return;
}
{
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
buffer_cache.FlushRegion(addr, size);
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.DownloadMemory(addr, size);
}
query_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateExceptTextureCache(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
shader_cache.InvalidateRegion(addr, size);
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::InvalidateTextureCache(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.MustFlushRegion(addr, size);
return buffer_cache.IsRegionGpuModified(addr, size);
}
return texture_cache.IsRegionGpuModified(addr, size) ||
buffer_cache.MustFlushRegion(addr, size);
buffer_cache.IsRegionGpuModified(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@ -735,11 +590,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
return;
}
{
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
shader_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}
@ -748,26 +606,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
shader_cache.OnCPUWrite(addr, size);
{
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
shader_cache.OnCPUWrite(addr, size);
buffer_cache.OnCPUWrite(addr, size);
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.CachedWriteMemory(addr, size);
}
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
buffer_cache.SyncGuestHost();
shader_cache.SyncGuestHost();
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.FlushCachedWrites();
}
}
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
{
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
buffer_cache.OnCPUWrite(addr, size);
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
shader_cache.OnCPUWrite(addr, size);
}
@ -802,14 +669,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
}
void RasterizerOpenGL::WaitForIdle() {
// Place a barrier on everything that is not framebuffer related.
// This is related to another flag that is not currently implemented.
glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
glMemoryBarrier(GL_ALL_BARRIER_BITS);
}
void RasterizerOpenGL::FragmentBarrier() {
@ -834,18 +694,21 @@ void RasterizerOpenGL::TickFrame() {
num_queued_commands = 0;
fence_manager.TickFrame();
buffer_cache.TickFrame();
{
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.TickFrame();
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.TickFrame();
}
}
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.BlitImage(dst, src, copy_config);
return true;
}
@ -857,7 +720,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
}
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
if (!image_view) {
return false;
@ -924,166 +787,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te
}
}
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
static constexpr std::array PARAMETER_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
};
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& stages = maxwell3d.state.shader_stages;
const auto& shader_stage = stages[stage_index];
const auto& entries = shader->GetEntries();
const bool use_unified = entries.use_unified_uniforms;
const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
const auto base_bindings = device.GetBaseBindings(stage_index);
u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
for (const auto& entry : entries.const_buffers) {
const u32 index = entry.GetIndex();
const auto& buffer = shader_stage.const_buffers[index];
SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
base_unified_offset + index * Maxwell::MaxConstBufferSize);
++binding;
}
if (use_unified) {
const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
entries.global_memory_entries.size());
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
}
}
void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& launch_desc = kepler_compute.launch_description;
const auto& entries = kernel->GetEntries();
const bool use_unified = entries.use_unified_uniforms;
u32 binding = 0;
for (const auto& entry : entries.const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
Tegra::Engines::ConstBufferInfo buffer;
buffer.address = config.Address();
buffer.size = config.size;
buffer.enabled = mask[entry.GetIndex()];
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
++binding;
}
if (use_unified) {
const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
NUM_CONST_BUFFERS_BYTES_PER_STAGE);
}
}
void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
const Tegra::Engines::ConstBufferInfo& buffer,
const ConstBufferEntry& entry, bool use_unified,
std::size_t unified_offset) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
if (device.UseAssemblyShaders()) {
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
} else {
glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
}
return;
}
// Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
// UBO alignment requirements.
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
const bool fast_upload = !use_unified && device.HasFastBufferSubData();
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
const GPUVAddr gpu_addr = buffer.address;
auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
if (device.UseAssemblyShaders()) {
UNIMPLEMENTED_IF(use_unified);
if (info.offset != 0) {
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
info.handle = staging_cbuf;
info.offset = 0;
}
glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
return;
}
if (use_unified) {
glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
unified_offset, size);
} else {
glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
}
}
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
static constexpr std::array TARGET_LUT = {
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
const auto& entries{shader->GetEntries().global_memory_entries};
std::array<BindlessSSBO, 32> ssbos;
ASSERT(entries.size() < ssbos.size());
const bool assembly_shaders = device.UseAssemblyShaders();
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
for (const auto& entry : entries) {
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
const u32 size{gpu_memory.Read<u32>(addr + 8)};
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
++binding;
}
if (assembly_shaders) {
UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
}
}
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
const auto& entries{kernel->GetEntries().global_memory_entries};
std::array<BindlessSSBO, 32> ssbos;
ASSERT(entries.size() < ssbos.size());
u32 binding = 0;
for (const auto& entry : entries) {
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
const u32 size{gpu_memory.Read<u32>(addr + 8)};
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
++binding;
}
if (device.UseAssemblyShaders()) {
UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
}
}
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
const size_t alignment{device.GetShaderStorageBufferAlignment()};
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
if (device.UseAssemblyShaders()) {
*ssbo = BindlessSSBO{
.address = static_cast<GLuint64EXT>(info.address + info.offset),
.length = static_cast<GLsizei>(size),
.padding = 0,
};
} else {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
static_cast<GLsizeiptr>(size));
}
}
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
const bool via_header_index =
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
@ -1131,6 +834,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
}
}
void RasterizerOpenGL::SyncState() {
SyncViewport();
SyncRasterizeEnable();
SyncPolygonModes();
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
SyncDepthTestState();
SyncDepthClamp();
SyncStencilTestState();
SyncBlendState();
SyncLogicOpState();
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest();
SyncPointState();
SyncLineState();
SyncPolygonOffset();
SyncAlphaTest();
SyncFramebufferSRGB();
SyncVertexFormats();
SyncVertexInstances();
}
void RasterizerOpenGL::SyncViewport() {
auto& flags = maxwell3d.dirty.flags;
const auto& regs = maxwell3d.regs;
@ -1166,9 +893,11 @@ void RasterizerOpenGL::SyncViewport() {
if (regs.screen_y_control.y_negate != 0) {
flip_y = !flip_y;
}
glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
: GL_NEGATIVE_ONE_TO_ONE);
const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
state_tracker.ClipControl(origin, depth);
state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
}
if (dirty_viewport) {
@ -1652,36 +1381,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
if (regs.tfb_enabled == 0) {
return;
}
if (device.UseAssemblyShaders()) {
SyncTransformFeedback();
}
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
const auto& binding = regs.tfb_bindings[index];
if (!binding.buffer_enable) {
if (enabled_transform_feedback_buffers[index]) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
0);
}
enabled_transform_feedback_buffers[index] = false;
continue;
}
enabled_transform_feedback_buffers[index] = true;
auto& tfb_buffer = transform_feedback_buffers[index];
tfb_buffer.Create();
const GLuint handle = tfb_buffer.handle;
const std::size_t size = binding.buffer_size;
glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
static_cast<GLsizeiptr>(size));
}
UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
// We may have to call BeginTransformFeedbackNV here since they seem to call different
// implementations on Nvidia's driver (the pointer is different) but we are using
@ -1695,23 +1401,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
if (regs.tfb_enabled == 0) {
return;
}
glEndTransformFeedback();
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
const auto& binding = regs.tfb_bindings[index];
if (!binding.buffer_enable) {
continue;
}
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
const GLuint handle = transform_feedback_buffers[index].handle;
const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size;
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
static_cast<GLsizeiptr>(size));
}
}
} // namespace OpenGL

View File

@ -30,7 +30,6 @@
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
@ -47,6 +46,11 @@ namespace Tegra {
class MemoryManager;
}
namespace Vulkan {
class Device;
class MemoryAllocator;
} // namespace Vulkan
namespace OpenGL {
struct ScreenInfo;
@ -63,6 +67,8 @@ class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
const Vulkan::Device* vulkan_device,
Vulkan::MemoryAllocator* vulkan_memory_allocator,
ScreenInfo& screen_info_, ProgramManager& program_manager_,
StateTracker& state_tracker_);
~RasterizerOpenGL() override;
@ -72,8 +78,11 @@ public:
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
void InvalidateTextureCache(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
@ -119,27 +128,6 @@ private:
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
size_t& image_view_index, size_t& texture_index, size_t& image_index);
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
/// Configures the current constbuffers to use for the kernel invocation.
void SetupComputeConstBuffers(Shader* kernel);
/// Configures a constant buffer.
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const ConstBufferEntry& entry, bool use_unified,
std::size_t unified_offset);
/// Configures the current global memory entries to use for the draw command.
void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
/// Configures the current global memory entries to use for the kernel invocation.
void SetupComputeGlobalMemory(Shader* kernel);
/// Configures a global memory buffer.
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
size_t size, BindlessSSBO* ssbo);
/// Configures the current textures to use for the draw command.
void SetupDrawTextures(const Shader* shader, size_t stage_index);
@ -152,6 +140,9 @@ private:
/// Configures images in a compute shader.
void SetupComputeImages(const Shader* shader);
/// Syncs state to match guest's
void SyncState();
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
@ -215,6 +206,12 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
/// Syncs vertex formats to match the guest state
void SyncVertexFormats();
/// Syncs vertex instances to match the guest state
void SyncVertexInstances();
/// Syncs transform feedback state to match guest state
/// @note Only valid on assembly shaders
void SyncTransformFeedback();
@ -225,19 +222,7 @@ private:
/// End a transform feedback
void EndTransformFeedback();
std::size_t CalculateVertexArraysSize() const;
std::size_t CalculateIndexBufferSize() const;
/// Updates the current vertex format
void SetupVertexFormat();
void SetupVertexBuffer();
void SetupVertexInstances();
GLintptr SetupIndexBuffer();
void SetupShaders();
void SetupShaders(bool is_indexed);
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
@ -249,12 +234,12 @@ private:
ProgramManager& program_manager;
StateTracker& state_tracker;
OGLStreamBuffer stream_buffer;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
ShaderCacheOpenGL shader_cache;
QueryCache query_cache;
OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
VideoCommon::Shader::AsyncShaders async_shaders;
@ -262,20 +247,8 @@ private:
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
std::array<GLuint, MAX_TEXTURES> texture_handles;
std::array<GLuint, MAX_IMAGES> image_handles;
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
transform_feedback_buffers;
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
enabled_transform_feedback_buffers;
static constexpr std::size_t NUM_CONSTANT_BUFFERS =
Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
std::size_t current_cbuf = 0;
OGLBuffer unified_uniform_buffer;
std::array<GLuint, MAX_TEXTURES> texture_handles{};
std::array<GLuint, MAX_IMAGES> image_handles{};
/// Number of commands queued to the OpenGL driver. Resetted on flush.
std::size_t num_queued_commands = 0;

View File

@ -171,12 +171,6 @@ void OGLBuffer::Release() {
handle = 0;
}
void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
}
void OGLSync::Create() {
if (handle != 0)
return;

View File

@ -234,9 +234,6 @@ public:
/// Deletes the internal OpenGL resource
void Release();
// Converts the buffer into a stream copy buffer with a fixed size
void MakeStreamCopy(std::size_t buffer_size);
GLuint handle = 0;
};

View File

@ -63,7 +63,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
#define itof intBitsToFloat
#define utof uintBitsToFloat
@ -76,10 +76,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
layout (std140, binding = {}) uniform vs_config {{
float y_direction;
}};
)";
class ShaderWriter final {
@ -401,13 +397,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
// We waste one UBO for emulation
const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
return num_ubos > num_available_ubos;
}
struct GenericVaryingDescription {
std::string name;
u8 first_element = 0;
@ -419,9 +408,8 @@ public:
explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
ShaderType stage_, std::string_view identifier_,
std::string_view suffix_)
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_},
suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{
UseUnifiedUniforms(device_, ir_, stage_)} {
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
@ -515,7 +503,8 @@ private:
if (!identifier.empty()) {
code.AddLine("// {}", identifier);
}
code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
if (device.HasShaderBallot()) {
code.AddLine("#extension GL_ARB_shader_ballot : require");
@ -541,7 +530,7 @@ private:
code.AddNewLine();
code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
code.AddLine(COMMON_DECLARATIONS);
}
void DeclareVertex() {
@ -864,17 +853,6 @@ private:
}
void DeclareConstantBuffers() {
if (use_unified_uniforms) {
const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
static_cast<u32>(ir.GetGlobalMemory().size());
code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
binding);
code.AddLine(" uint cbufs[];");
code.AddLine("}};");
code.AddNewLine();
return;
}
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
for (const auto& [index, info] : ir.GetConstantBuffers()) {
const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
@ -1080,29 +1058,17 @@ private:
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
const Node offset = cbuf->GetOffset();
const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
if (use_unified_uniforms) {
return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
Type::Uint};
} else {
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
offset_imm / (4 * 4), (offset_imm / 4) % 4),
Type::Uint};
}
}
// Indirect access
if (use_unified_uniforms) {
return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
Visit(offset).AsUint()),
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
offset_imm / (4 * 4), (offset_imm / 4) % 4),
Type::Uint};
}
// Indirect access
const std::string final_offset = code.GenerateTemporary();
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
@ -2292,7 +2258,6 @@ private:
}
}
}
if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
@ -2336,7 +2301,8 @@ private:
}
Expression YNegate(Operation operation) {
return {"y_direction", Type::Float};
// Y_NEGATE is mapped to this uniform value
return {"gl_FrontMaterial.ambient.a", Type::Float};
}
template <u32 element>
@ -2786,7 +2752,6 @@ private:
const std::string_view identifier;
const std::string_view suffix;
const Header header;
const bool use_unified_uniforms;
std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
@ -3002,8 +2967,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s
for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
}
for (const auto& buffer : entries.const_buffers) {
entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
}
entries.shader_length = ir.GetLength();
entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
return entries;
}

View File

@ -55,7 +55,7 @@ struct ShaderEntries {
std::vector<ImageEntry> images;
std::size_t shader_length{};
u32 clip_distances{};
bool use_unified_uniforms{};
u32 enabled_uniform_buffers{};
};
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,

View File

@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) {
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
}
void SetupDirtyVertexArrays(Tables& tables) {
static constexpr std::size_t num_array = 3;
void SetupDirtyVertexInstances(Tables& tables) {
static constexpr std::size_t instance_base_offset = 3;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
const std::size_t instance_array_offset = array_offset + instance_base_offset;
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_array_offset] = VertexInstances;
@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) {
StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
auto& dirty = gpu.Maxwell3D().dirty;
auto& tables = dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyFlags(tables);
SetupDirtyColorMasks(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyVertexArrays(tables);
SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
SetupDirtyClipControl(tables);
SetupDirtyDepthClampEnabled(tables);
SetupDirtyMisc(tables);
auto& store = dirty.on_write_stores;
store[VertexBuffers] = true;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
store[VertexBuffer0 + i] = true;
}
}
void StateTracker::InvalidateStreamBuffer() {
flags[Dirty::VertexBuffers] = true;
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
flags[index] = true;
}
}
} // namespace OpenGL

View File

@ -28,10 +28,6 @@ enum : u8 {
VertexFormat0,
VertexFormat31 = VertexFormat0 + 31,
VertexBuffers,
VertexBuffer0,
VertexBuffer31 = VertexBuffer0 + 31,
VertexInstances,
VertexInstance0,
VertexInstance31 = VertexInstance0 + 31,
@ -92,8 +88,6 @@ class StateTracker {
public:
explicit StateTracker(Tegra::GPU& gpu);
void InvalidateStreamBuffer();
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
return;
@ -110,13 +104,32 @@ public:
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
}
void ClipControl(GLenum new_origin, GLenum new_depth) {
if (new_origin == origin && new_depth == depth) {
return;
}
origin = new_origin;
depth = new_depth;
glClipControl(origin, depth);
}
void SetYNegate(bool new_y_negate) {
if (new_y_negate == y_negate) {
return;
}
// Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
y_negate = new_y_negate;
const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
}
void NotifyScreenDrawVertexArray() {
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
flags[OpenGL::Dirty::VertexBuffers] = true;
flags[OpenGL::Dirty::VertexBuffer0] = true;
flags[VideoCommon::Dirty::VertexBuffers] = true;
flags[VideoCommon::Dirty::VertexBuffer0] = true;
flags[OpenGL::Dirty::VertexInstances] = true;
flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
@ -202,6 +215,9 @@ private:
GLuint framebuffer = 0;
GLuint index_buffer = 0;
GLenum origin = GL_LOWER_LEFT;
GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
bool y_negate = false;
};
} // namespace OpenGL

View File

@ -1,70 +1,64 @@
// Copyright 2018 Citra Emulator Project
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <tuple>
#include <vector>
#include <array>
#include <memory>
#include <span>
#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
MP_RGB(128, 128, 192));
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
: state_tracker{state_tracker_} {
gl_buffer.Create();
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
mapped_ptr = static_cast<u8*>(
glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
StreamBuffer::StreamBuffer() {
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
buffer.Create();
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
mapped_pointer =
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
for (OGLSync& sync : fences) {
sync.Create();
}
}
OGLStreamBuffer::~OGLStreamBuffer() {
glUnmapNamedBuffer(gl_buffer.handle);
gl_buffer.Release();
}
std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
ASSERT(size <= BUFFER_SIZE);
ASSERT(alignment <= BUFFER_SIZE);
mapped_size = size;
if (alignment > 0) {
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
ASSERT(size < REGION_SIZE);
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
++region) {
fences[region].Create();
}
used_iterator = iterator;
if (buffer_pos + size > BUFFER_SIZE) {
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
glInvalidateBufferData(gl_buffer.handle);
state_tracker.InvalidateStreamBuffer();
buffer_pos = 0;
for (size_t region = Region(free_iterator) + 1,
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
region < region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
}
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
ASSERT(size <= mapped_size);
if (size > 0) {
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
if (iterator + size > free_iterator) {
free_iterator = iterator + size;
}
if (iterator + size > STREAM_BUFFER_SIZE) {
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
fences[region].Create();
}
used_iterator = 0;
iterator = 0;
free_iterator = size;
buffer_pos += size;
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
}
const size_t offset = iterator;
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
return {std::span(mapped_pointer + offset, size), offset};
}
} // namespace OpenGL

View File

@ -1,9 +1,12 @@
// Copyright 2018 Citra Emulator Project
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <memory>
#include <span>
#include <utility>
#include <glad/glad.h>
@ -13,48 +16,35 @@
namespace OpenGL {
class Device;
class StateTracker;
class StreamBuffer {
static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
static constexpr size_t NUM_SYNCS = 16;
static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
static constexpr size_t MAX_ALIGNMENT = 256;
static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
~OGLStreamBuffer();
explicit StreamBuffer();
/*
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
* and the optional alignment requirement.
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
* The return values are the pointer to the new chunk, and the offset within the buffer.
* The actual used size must be specified on unmapping the chunk.
*/
std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
[[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
void Unmap(GLsizeiptr size);
GLuint Handle() const {
return gl_buffer.handle;
}
u64 Address() const {
return gpu_address;
}
GLsizeiptr Size() const noexcept {
return BUFFER_SIZE;
[[nodiscard]] GLuint Handle() const noexcept {
return buffer.handle;
}
private:
static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
[[nodiscard]] static size_t Region(size_t offset) noexcept {
return offset / REGION_SIZE;
}
StateTracker& state_tracker;
OGLBuffer gl_buffer;
GLuint64EXT gpu_address = 0;
GLintptr buffer_pos = 0;
GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr;
size_t iterator = 0;
size_t used_iterator = 0;
size_t free_iterator = 0;
u8* mapped_pointer = nullptr;
OGLBuffer buffer;
std::array<OGLSync, NUM_SYNCS> fences;
};
} // namespace OpenGL

View File

@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
} // Anonymous namespace
ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
: span(map, size), sync{sync_}, handle{handle_} {}
ImageBufferMap::~ImageBufferMap() {
if (sync) {
sync->Create();
@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() {
glFinish();
}
ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return upload_buffers.RequestMap(size, true);
}
ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return download_buffers.RequestMap(size, false);
}
@ -596,7 +593,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_
bool insert_fence) {
const size_t index = RequestBuffer(requested_size);
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
return ImageBufferMap{
.mapped_span = std::span(maps[index], requested_size),
.sync = sync,
.buffer = buffers[index].handle,
};
}
size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
@ -711,7 +712,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies) {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
@ -735,7 +736,7 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) {
glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + buffer_offset,
copy.dst_offset, copy.size);
}
}
@ -744,7 +745,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies) {
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
u32 current_row_length = std::numeric_limits<u32>::max();

View File

@ -31,23 +31,12 @@ using VideoCommon::NUM_RT;
using VideoCommon::Offset2D;
using VideoCommon::RenderTargets;
class ImageBufferMap {
public:
explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
struct ImageBufferMap {
~ImageBufferMap();
GLuint Handle() const noexcept {
return handle;
}
std::span<u8> Span() const noexcept {
return span;
}
private:
std::span<u8> span;
std::span<u8> mapped_span;
OGLSync* sync;
GLuint handle;
GLuint buffer;
};
struct FormatProperties {
@ -69,9 +58,9 @@ public:
void Finish();
ImageBufferMap MapUploadBuffer(size_t size);
ImageBufferMap UploadStagingBuffer(size_t size);
ImageBufferMap MapDownloadBuffer(size_t size);
ImageBufferMap DownloadStagingBuffer(size_t size);
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);

View File

@ -27,11 +27,14 @@
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/textures/decoders.h"
#include "video_core/vulkan_common/vulkan_debug_callback.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_instance.h"
#include "video_core/vulkan_common/vulkan_library.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
namespace OpenGL {
namespace {
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
@ -125,14 +128,100 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
}
}
Vulkan::vk::PhysicalDevice FindPhysicalDevice(Vulkan::vk::Instance& instance) {
using namespace Vulkan;
using UUID = std::array<GLubyte, GL_UUID_SIZE_EXT>;
GLint num_device_uuids;
glGetIntegerv(GL_NUM_DEVICE_UUIDS_EXT, &num_device_uuids);
std::vector<UUID> device_uuids(num_device_uuids);
for (GLint index = 0; index < num_device_uuids; ++index) {
glGetUnsignedBytei_vEXT(GL_DEVICE_UUID_EXT, 0, device_uuids[index].data());
}
UUID driver_uuid;
glGetUnsignedBytevEXT(GL_DRIVER_UUID_EXT, driver_uuid.data());
for (const VkPhysicalDevice raw_physical_device : instance.EnumeratePhysicalDevices()) {
VkPhysicalDeviceIDProperties device_id_properties{};
device_id_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
VkPhysicalDeviceProperties2KHR properties{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
.pNext = &device_id_properties,
.properties{},
};
vk::PhysicalDevice physical_device(raw_physical_device, instance.Dispatch());
physical_device.GetProperties2KHR(properties);
if (!std::ranges::equal(device_id_properties.driverUUID, driver_uuid)) {
continue;
}
const auto it =
std::ranges::find_if(device_uuids, [&device_id_properties, driver_uuid](UUID uuid) {
return std::ranges::equal(device_id_properties.deviceUUID, uuid);
});
if (it != device_uuids.end()) {
return physical_device;
}
}
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
}
} // Anonymous namespace
struct VulkanObjects {
static std::unique_ptr<VulkanObjects> TryCreate() {
if (!GLAD_GL_EXT_memory_object) {
// Interop is not present
return nullptr;
}
const std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
if (vendor == "ATI Technologies Inc.") {
// Avoid using GL_EXT_memory_object on AMD, as it makes the GL driver crash
return nullptr;
}
if (!Settings::values.use_assembly_shaders.GetValue()) {
// We only need interop when assembly shaders are enabled
return nullptr;
}
#ifdef __linux__
LOG_WARNING(Render_OpenGL, "Interop doesn't work on Linux at the moment");
return nullptr;
#endif
try {
return std::make_unique<VulkanObjects>();
} catch (const Vulkan::vk::Exception& exception) {
LOG_ERROR(Render_OpenGL, "Failed to initialize Vulkan objects with error: {}",
exception.what());
return nullptr;
}
}
Common::DynamicLibrary library{Vulkan::OpenLibrary()};
Vulkan::vk::InstanceDispatch dld;
Vulkan::vk::Instance instance{Vulkan::CreateInstance(library, dld, VK_API_VERSION_1_1)};
Vulkan::Device device{*instance, FindPhysicalDevice(instance), nullptr, dld};
Vulkan::MemoryAllocator memory_allocator{device, true};
};
RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_},
vulkan_objects{VulkanObjects::TryCreate()}, device{vulkan_objects != nullptr},
state_tracker{gpu}, program_manager{device},
rasterizer(emu_window, gpu, cpu_memory, device,
vulkan_objects ? &vulkan_objects->device : nullptr,
vulkan_objects ? &vulkan_objects->memory_allocator : nullptr, screen_info,
program_manager, state_tracker) {
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
glDebugMessageCallback(DebugHandler, nullptr);
}
AddTelemetryFields();
InitOpenGLObjects();
}
RendererOpenGL::~RendererOpenGL() = default;
@ -148,7 +237,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
++m_current_frame;
rasterizer->TickFrame();
rasterizer.TickFrame();
context->SwapBuffers();
render_window.OnFrameDisplayed();
@ -179,7 +268,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
framebuffer_crop_rect = framebuffer.crop_rect;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
return;
}
@ -267,6 +356,7 @@ void RendererOpenGL::InitOpenGLObjects() {
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
@ -289,14 +379,6 @@ void RendererOpenGL::AddTelemetryFields() {
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
}
void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
screen_info, program_manager, state_tracker);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer) {
texture.width = framebuffer.width;
@ -407,6 +489,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
program_manager.BindHostPipeline(pipeline.handle);
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
glEnable(GL_FRAMEBUFFER_SRGB);
@ -425,7 +508,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
glDepthRangeIndexed(0, 0.0, 0.0);
@ -497,25 +579,4 @@ void RendererOpenGL::RenderScreenshot() {
renderer_settings.screenshot_requested = false;
}
bool RendererOpenGL::Init() {
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
glDebugMessageCallback(DebugHandler, nullptr);
}
AddTelemetryFields();
if (!GLAD_GL_VERSION_4_3) {
return false;
}
InitOpenGLObjects();
CreateRasterizer();
return true;
}
void RendererOpenGL::ShutDown() {}
} // namespace OpenGL

View File

@ -10,6 +10,7 @@
#include "common/math_util.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
@ -37,6 +38,8 @@ class GPU;
namespace OpenGL {
struct VulkanObjects;
/// Structure used for storing information about the textures for the Switch screen
struct TextureInfo {
OGLTexture resource;
@ -63,18 +66,18 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererOpenGL() override;
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer;
}
private:
/// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
void AddTelemetryFields();
void CreateRasterizer();
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
@ -98,8 +101,11 @@ private:
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
const Device device;
StateTracker state_tracker{gpu};
std::unique_ptr<VulkanObjects> vulkan_objects;
Device device;
StateTracker state_tracker;
ProgramManager program_manager;
RasterizerOpenGL rasterizer;
// OpenGL object IDs
OGLSampler present_sampler;
@ -115,9 +121,6 @@ private:
/// Display information for Switch screen
ScreenInfo screen_info;
/// Global dummy shader pipeline
ProgramManager program_manager;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;

View File

@ -71,7 +71,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@ -91,8 +91,8 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
@ -108,7 +108,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
@ -132,8 +132,8 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
glUniform1ui(7, params.block_height_mask);
glUniform1ui(8, params.block_depth);
glUniform1ui(9, params.block_depth_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
"Non-power of two images are not implemented");
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
@ -172,8 +172,8 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
}
program_manager.RestoreGuestCompute();

View File

@ -15,9 +15,10 @@
namespace OpenGL {
class Image;
class ImageBufferMap;
class ProgramManager;
struct ImageBufferMap;
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);

View File

@ -526,13 +526,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
return {};
}
VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) {
VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
switch (index_format) {
case Maxwell::IndexFormat::UnsignedByte:
if (!device.IsExtIndexTypeUint8Supported()) {
UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
return VK_INDEX_TYPE_UINT16;
}
return VK_INDEX_TYPE_UINT8_EXT;
case Maxwell::IndexFormat::UnsignedShort:
return VK_INDEX_TYPE_UINT16;

View File

@ -45,7 +45,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format);
VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);

View File

@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
return separated_extensions;
}
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface) {
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
const s32 device_index = Settings::values.vulkan_device.GetValue();
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
const vk::PhysicalDevice physical_device(devices[device_index], dld);
return Device(*instance, physical_device, surface, dld);
}
} // Anonymous namespace
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_},
cpu_memory{cpu_memory_}, gpu{gpu_} {}
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
: RendererBase(emu_window, std::move(context_)),
telemetry_session(telemetry_session_),
cpu_memory(cpu_memory_),
gpu(gpu_),
library(OpenLibrary()),
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
true, Settings::values.renderer_debug)),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)),
memory_allocator(device, false),
state_tracker(gpu),
scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
screen_info),
rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
memory_allocator, state_tracker, scheduler) {
Report();
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
}
RendererVulkan::~RendererVulkan() {
ShutDown();
void(device.GetLogical().WaitIdle());
}
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
swapchain->Create(layout.width, layout.height, is_srgb);
blit_screen->Recreate();
if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
swapchain.Create(layout.width, layout.height, is_srgb);
blit_screen.Recreate();
}
scheduler->WaitWorker();
scheduler.WaitWorker();
swapchain->AcquireNextImage();
const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated);
swapchain.AcquireNextImage();
const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
scheduler->Flush(render_semaphore);
scheduler.Flush(render_semaphore);
if (swapchain->Present(render_semaphore)) {
blit_screen->Recreate();
if (swapchain.Present(render_semaphore)) {
blit_screen.Recreate();
}
rasterizer->TickFrame();
rasterizer.TickFrame();
}
render_window.OnFrameDisplayed();
}
bool RendererVulkan::Init() try {
library = OpenLibrary();
instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
true, Settings::values.renderer_debug);
if (Settings::values.renderer_debug) {
debug_callback = CreateDebugCallback(instance);
}
surface = CreateSurface(instance, render_window);
InitializeDevice();
Report();
memory_allocator = std::make_unique<MemoryAllocator>(*device);
state_tracker = std::make_unique<StateTracker>(gpu);
scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
const auto& framebuffer = render_window.GetFramebufferLayout();
swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
swapchain->Create(framebuffer.width, framebuffer.height, false);
rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
cpu_memory, screen_info, *device,
*memory_allocator, *state_tracker, *scheduler);
blit_screen =
std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
*memory_allocator, *swapchain, *scheduler, screen_info);
return true;
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
return false;
}
void RendererVulkan::ShutDown() {
if (!device) {
return;
}
if (const auto& dev = device->GetLogical()) {
dev.WaitIdle();
}
rasterizer.reset();
blit_screen.reset();
scheduler.reset();
swapchain.reset();
memory_allocator.reset();
device.reset();
}
void RendererVulkan::InitializeDevice() {
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
const s32 device_index = Settings::values.vulkan_device.GetValue();
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld);
device = std::make_unique<Device>(*instance, physical_device, *surface, dld);
}
void RendererVulkan::Report() const {
const std::string vendor_name{device->GetVendorName()};
const std::string model_name{device->GetModelName()};
const std::string driver_version = GetDriverVersion(*device);
const std::string vendor_name{device.GetVendorName()};
const std::string model_name{device.GetModelName()};
const std::string driver_version = GetDriverVersion(device);
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
const std::string api_version = GetReadableVersion(device->ApiVersion());
const std::string api_version = GetReadableVersion(device.ApiVersion());
const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
@ -209,21 +179,4 @@ void RendererVulkan::Report() const {
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
}
std::vector<std::string> RendererVulkan::EnumerateDevices() try {
vk::InstanceDispatch dld;
const Common::DynamicLibrary library = OpenLibrary();
const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
std::vector<std::string> names;
names.reserve(physical_devices.size());
for (const VkPhysicalDevice device : physical_devices) {
names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
}
return names;
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what());
return {};
}
} // namespace Vulkan

View File

@ -9,8 +9,14 @@
#include <vector>
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
@ -27,20 +33,6 @@ class GPU;
namespace Vulkan {
class Device;
class StateTracker;
class MemoryAllocator;
class VKBlitScreen;
class VKSwapchain;
class VKScheduler;
struct VKScreenInfo {
VkImageView image_view{};
u32 width{};
u32 height{};
bool is_srgb{};
};
class RendererVulkan final : public VideoCore::RendererBase {
public:
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@ -49,15 +41,13 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererVulkan() override;
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
static std::vector<std::string> EnumerateDevices();
VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer;
}
private:
void InitializeDevice();
void Report() const;
Core::TelemetrySession& telemetry_session;
@ -68,18 +58,18 @@ private:
vk::InstanceDispatch dld;
vk::Instance instance;
vk::DebugUtilsMessenger debug_callback;
vk::SurfaceKHR surface;
VKScreenInfo screen_info;
vk::DebugUtilsMessenger debug_callback;
std::unique_ptr<Device> device;
std::unique_ptr<MemoryAllocator> memory_allocator;
std::unique_ptr<StateTracker> state_tracker;
std::unique_ptr<VKScheduler> scheduler;
std::unique_ptr<VKSwapchain> swapchain;
std::unique_ptr<VKBlitScreen> blit_screen;
Device device;
MemoryAllocator memory_allocator;
StateTracker state_tracker;
VKScheduler scheduler;
VKSwapchain swapchain;
VKBlitScreen blit_screen;
RasterizerVulkan rasterizer;
};
} // namespace Vulkan

View File

@ -18,7 +18,6 @@
#include "video_core/gpu.h"
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData {
};
VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
Core::Frontend::EmuWindow& render_window_,
VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
Core::Frontend::EmuWindow& render_window_, const Device& device_,
MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
: cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_},
scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
: cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
resource_ticks.resize(image_count);
CreateStaticResources();
@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
SetUniformData(data, framebuffer);
SetVertexData(data, framebuffer);
const std::span<u8> map = buffer_commit.Map();
std::memcpy(map.data(), &data, sizeof(data));
const std::span<u8> mapped_span = buffer_commit.Map();
std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
const size_t size_bytes = GetSizeInBytes(framebuffer);
rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
Tegra::Texture::UnswizzleTexture(
map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel,
framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
const VkBufferImageCopy copy{
.bufferOffset = image_offset,
@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
return *semaphores[image_index];
}

View File

@ -38,12 +38,18 @@ class RasterizerVulkan;
class VKScheduler;
class VKSwapchain;
class VKBlitScreen final {
struct VKScreenInfo {
VkImageView image_view{};
u32 width{};
u32 height{};
bool is_srgb{};
};
class VKBlitScreen {
public:
explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
Core::Frontend::EmuWindow& render_window,
VideoCore::RasterizerInterface& rasterizer, const Device& device,
MemoryAllocator& memory_allocator, VKSwapchain& swapchain,
Core::Frontend::EmuWindow& render_window, const Device& device,
MemoryAllocator& memory_manager, VKSwapchain& swapchain,
VKScheduler& scheduler, const VKScreenInfo& screen_info);
~VKBlitScreen();
@ -84,7 +90,6 @@ private:
Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
VideoCore::RasterizerInterface& rasterizer;
const Device& device;
MemoryAllocator& memory_allocator;
VKSwapchain& swapchain;

View File

@ -3,188 +3,276 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstring>
#include <memory>
#include <span>
#include <vector>
#include "core/core.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
return VkBufferCopy{
.srcOffset = copy.src_offset,
.dstOffset = copy.dst_offset,
.size = copy.size,
};
}
constexpr VkBufferUsageFlags BUFFER_USAGE =
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
return VK_INDEX_TYPE_UINT8_EXT;
}
if (num_elements <= 0xffff) {
return VK_INDEX_TYPE_UINT16;
}
return VK_INDEX_TYPE_UINT32;
}
constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE =
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
size_t BytesPerIndex(VkIndexType index_type) {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
return 1;
case VK_INDEX_TYPE_UINT16:
return 2;
case VK_INDEX_TYPE_UINT32:
return 4;
default:
UNREACHABLE_MSG("Invalid index type={}", index_type);
return 1;
}
}
template <typename T>
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
std::ranges::transform(indices, indices.begin(),
[quad, first](u32 index) { return first + index + quad * 4; });
return indices;
}
} // Anonymous namespace
Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_,
StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
: BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
staging_pool_} {
buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = static_cast<VkDeviceSize>(size_),
.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.size = SizeBytes(),
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
}
Buffer::~Buffer() = default;
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
VKDescriptorPool& descriptor_pool)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload);
std::memcpy(staging.mapped_span.data(), data, data_size);
StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Upload);
}
StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Download);
}
void BufferCacheRuntime::Finish() {
scheduler.Finish();
}
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies) {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
};
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, READ_BARRIER);
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, WRITE_BARRIER);
});
}
const VkBuffer handle = Handle();
scheduler.Record([staging = staging.buffer, handle, offset, data_size,
&device = device](vk::CommandBuffer cmdbuf) {
const VkBufferMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask =
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_WRITE_BIT |
(device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
u32 base_vertex, u32 num_indices, VkBuffer buffer,
u32 offset, [[maybe_unused]] u32 size) {
VkIndexType index_type = MaxwellToVK::IndexFormat(index_format);
if (topology == PrimitiveTopology::Quads) {
index_type = VK_INDEX_TYPE_UINT32;
std::tie(buffer, offset) =
quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
} else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
index_type = VK_INDEX_TYPE_UINT16;
std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset);
}
scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(buffer, offset, index_type);
});
}
void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
ReserveQuadArrayLUT(first + count, true);
// The LUT has the indices 0, 1, 2, and 3 copied as an array
// To apply these 'first' offsets we can apply an offset based on the modulus.
const VkIndexType index_type = quad_array_lut_index_type;
const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(buffer, offset, index_type);
});
}
void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
u32 stride) {
if (device.IsExtExtendedDynamicStateSupported()) {
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = offset;
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
const VkDeviceSize vk_stride = stride;
cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
});
} else {
scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffer(index, buffer, offset);
});
}
}
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
u32 size) {
if (!device.IsExtTransformFeedbackSupported()) {
// Already logged in the rasterizer
return;
}
scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = offset;
const VkDeviceSize vk_size = size;
cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
});
}
void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
update_descriptor_queue.AddBuffer(buffer, offset, size);
}
void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
if (num_indices <= current_num_indices) {
return;
}
if (wait_for_idle) {
scheduler.Finish();
}
current_num_indices = num_indices;
quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
const u32 num_quads = num_indices / 4;
const u32 num_triangle_indices = num_quads * 6;
const u32 num_first_offset_copies = 4;
const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = size_bytes,
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
if (device.HasDebuggingToolAttached()) {
quad_array_lut.SetObjectNameEXT("Quad LUT");
}
quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
u8* staging_data = staging.mapped_span.data();
const size_t quad_size = bytes_per_index * 6;
for (u32 first = 0; first < num_first_offset_copies; ++first) {
for (u32 quad = 0; quad < num_quads; ++quad) {
switch (quad_array_lut_index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
break;
case VK_INDEX_TYPE_UINT16:
std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
break;
case VK_INDEX_TYPE_UINT32:
std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
break;
default:
UNREACHABLE();
break;
}
staging_data += quad_size;
}
}
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = staging.buffer, dst_buffer = *quad_array_lut,
size_bytes](vk::CommandBuffer cmdbuf) {
const VkBufferCopy copy{
.srcOffset = 0,
.dstOffset = 0,
.size = size_bytes,
};
const VkBufferMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = UPLOAD_ACCESS_BARRIERS,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
.buffer = dst_buffer,
.offset = 0,
.size = size_bytes,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
write_barrier);
cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0, write_barrier);
});
}
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
auto staging = staging_pool.Request(data_size, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext();
const VkBuffer handle = Handle();
scheduler.Record(
[staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
const VkBufferMemoryBarrier barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
});
scheduler.Finish();
std::memcpy(data, staging.mapped_span.data(), data_size);
}
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size) {
scheduler.RequestOutsideRenderPassOperationContext();
const VkBuffer dst_buffer = Handle();
scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
copy_size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size});
std::array<VkBufferMemoryBarrier, 2> barriers;
barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barriers[0].pNext = nullptr;
barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[0].buffer = src_buffer;
barriers[0].offset = src_offset;
barriers[0].size = copy_size;
barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barriers[1].pNext = nullptr;
barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[1].buffer = dst_buffer;
barriers[1].offset = dst_offset;
barriers[1].size = copy_size;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
barriers, {});
});
}
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
const Device& device_, MemoryAllocator& memory_allocator_,
VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
StagingBufferPool& staging_pool_)
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
cpu_memory_, stream_buffer_},
device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_} {}
VKBufferCache::~VKBufferCache() = default;
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr,
size);
}
VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
size = std::max(size, std::size_t(4));
const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, size, 0);
});
return {empty.buffer, 0, 0};
}
} // namespace Vulkan

View File

@ -4,69 +4,112 @@
#pragma once
#include <memory>
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class VKDescriptorPool;
class VKScheduler;
class VKUpdateDescriptorQueue;
class Buffer final : public VideoCommon::BufferBlock {
class BufferCacheRuntime;
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
~Buffer();
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_);
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
void Download(std::size_t offset, std::size_t data_size, u8* data);
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size);
VkBuffer Handle() const {
[[nodiscard]] VkBuffer Handle() const noexcept {
return *buffer;
}
u64 Address() const {
return 0;
operator VkBuffer() const noexcept {
return *buffer;
}
private:
const Device& device;
VKScheduler& scheduler;
StagingBufferPool& staging_pool;
vk::Buffer buffer;
MemoryCommit commit;
};
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
class BufferCacheRuntime {
friend Buffer;
using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
public:
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device, MemoryAllocator& memory_allocator,
VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
StagingBufferPool& staging_pool);
~VKBufferCache();
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
VKDescriptorPool& descriptor_pool);
BufferInfo GetEmptyBuffer(std::size_t size) override;
void Finish();
protected:
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
std::span<const VideoCommon::BufferCopy> copies);
void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
void BindQuadArrayIndexBuffer(u32 first, u32 count);
void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
BindBuffer(buffer, offset, size);
}
void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
[[maybe_unused]] bool is_written) {
BindBuffer(buffer, offset, size);
}
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
const Device& device;
MemoryAllocator& memory_allocator;
VKScheduler& scheduler;
StagingBufferPool& staging_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
vk::Buffer quad_array_lut;
MemoryCommit quad_array_lut_commit;
VkIndexType quad_array_lut_index_type{};
u32 current_num_indices = 0;
Uint8Pass uint8_pass;
QuadIndexedPass quad_index_pass;
};
struct BufferCacheParams {
using Runtime = Vulkan::BufferCacheRuntime;
using Buffer = Vulkan::Buffer;
static constexpr bool IS_OPENGL = false;
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
static constexpr bool USE_MEMORY_MAPS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
} // namespace Vulkan

View File

@ -10,7 +10,6 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
@ -22,19 +21,7 @@
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
return {
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
};
}
VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
return {
.dstBinding = 0,
@ -162,55 +149,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
return set;
}
QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
QuadArrayPass::~QuadArrayPass() = default;
std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
const u32 num_triangle_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
ASSERT(num_vertices % 4 == 0);
const u32 num_quads = num_vertices / 4;
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
num_quads, first, set](vk::CommandBuffer cmdbuf) {
constexpr u32 dispatch_size = 1024;
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = buffer;
barrier.offset = 0;
barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
});
return {staging_ref.buffer, 0};
}
Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
@ -221,18 +159,18 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
Uint8Pass::~Uint8Pass() = default;
std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u64 src_offset) {
std::pair<VkBuffer, u32> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u32 src_offset) {
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
num_vertices](vk::CommandBuffer cmdbuf) {
constexpr u32 dispatch_size = 1024;
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
@ -252,7 +190,7 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
});
return {staging_ref.buffer, 0};
return {staging.buffer, 0};
}
QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
@ -267,9 +205,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
QuadIndexedPass::~QuadIndexedPass() = default;
std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
std::pair<VkBuffer, u32> QuadIndexedPass::Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
VkBuffer src_buffer, u64 src_offset) {
VkBuffer src_buffer, u32 src_offset) {
const u32 index_shift = [index_format] {
switch (index_format) {
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@ -286,15 +224,15 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
const u32 num_tri_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
static constexpr u32 dispatch_size = 1024;
const std::array push_constants = {base_vertex, index_shift};
@ -317,7 +255,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
});
return {staging_ref.buffer, 0};
return {staging.buffer, 0};
}
} // namespace Vulkan

View File

@ -41,22 +41,6 @@ private:
vk::ShaderModule module;
};
class QuadArrayPass final : public VKComputePass {
public:
explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadArrayPass();
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
private:
VKScheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
};
class Uint8Pass final : public VKComputePass {
public:
explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
@ -64,7 +48,9 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
/// Assemble uint8 indices into an uint16 index buffer
/// Returns a pair with the staging buffer, and the offset where the assembled data is
std::pair<VkBuffer, u32> Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset);
private:
VKScheduler& scheduler;
@ -80,9 +66,9 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
std::pair<VkBuffer, u32> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
u64 src_offset);
u32 src_offset);
private:
VKScheduler& scheduler;

View File

@ -45,8 +45,8 @@ void InnerFence::Wait() {
}
VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
TextureCache& texture_cache_, BufferCache& buffer_cache_,
VKQueryCache& query_cache_, const Device& device_,
VKScheduler& scheduler_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
scheduler{scheduler_} {}

View File

@ -22,7 +22,6 @@ class RasterizerInterface;
namespace Vulkan {
class Device;
class VKBufferCache;
class VKQueryCache;
class VKScheduler;
@ -45,14 +44,14 @@ private:
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
class VKFenceManager final : public GenericFenceManager {
public:
explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
VKScheduler& scheduler_);
explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCache& texture_cache, BufferCache& buffer_cache,
VKQueryCache& query_cache, const Device& device,
VKScheduler& scheduler);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;

File diff suppressed because it is too large Load Diff

View File

@ -18,14 +18,12 @@
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader/async_shaders.h"
@ -49,7 +47,6 @@ namespace Vulkan {
struct VKScreenInfo;
class StateTracker;
class BufferBindings;
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
@ -65,8 +62,11 @@ public:
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
void InvalidateTextureCache(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
@ -107,24 +107,11 @@ private:
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
struct DrawParameters {
void Draw(vk::CommandBuffer cmdbuf) const;
u32 base_instance = 0;
u32 num_instances = 0;
u32 base_vertex = 0;
u32 num_vertices = 0;
bool is_indexed = 0;
};
void FlushWork();
/// Setups geometry buffers and state.
DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
bool is_indexed, bool is_instanced);
/// Setup descriptors in the graphics pipeline.
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
bool is_indexed);
void UpdateDynamicStates();
@ -132,16 +119,6 @@ private:
void EndTransformFeedback();
void SetupVertexArrays(BufferBindings& buffer_bindings);
void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
/// Setup constant buffers in the graphics pipeline.
void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup global buffers in the graphics pipeline.
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup uniform texels in the graphics pipeline.
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
@ -154,12 +131,6 @@ private:
/// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
/// Setup constant buffers in the compute pipeline.
void SetupComputeConstBuffers(const ShaderEntries& entries);
/// Setup global buffers in the compute pipeline.
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
/// Setup texel buffers in the compute pipeline.
void SetupComputeUniformTexels(const ShaderEntries& entries);
@ -172,11 +143,6 @@ private:
/// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries);
void SetupConstBuffer(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer);
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@ -193,19 +159,6 @@ private:
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
size_t CalculateComputeStreamBufferSize() const;
size_t CalculateVertexArraysSize() const;
size_t CalculateIndexBufferSize() const;
size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer) const;
VkBuffer DefaultBuffer();
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
@ -217,24 +170,19 @@ private:
StateTracker& state_tracker;
VKScheduler& scheduler;
VKStreamBuffer stream_buffer;
StagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
QuadArrayPass quad_array_pass;
QuadIndexedPass quad_indexed_pass;
Uint8Pass uint8_pass;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKQueryCache query_cache;
VKFenceManager fence_manager;
vk::Buffer default_buffer;
MemoryCommit default_buffer_commit;
vk::Event wfi_event;
VideoCommon::Shader::AsyncShaders async_shaders;

View File

@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() {
worker_thread.join();
}
u64 VKScheduler::CurrentTick() const noexcept {
return master_semaphore->CurrentTick();
}
bool VKScheduler::IsFree(u64 tick) const noexcept {
return master_semaphore->IsFree(tick);
}
void VKScheduler::Wait(u64 tick) {
master_semaphore->Wait(tick);
}
void VKScheduler::Flush(VkSemaphore semaphore) {
SubmitExecution(semaphore);
AllocateNewContext();
@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
vk::Span(barriers.data(), num_images));
});
state.renderpass = nullptr;

View File

@ -12,6 +12,7 @@
#include <utility>
#include "common/common_types.h"
#include "common/threadsafe_queue.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
@ -19,7 +20,6 @@ namespace Vulkan {
class CommandPool;
class Device;
class Framebuffer;
class MasterSemaphore;
class StateTracker;
class VKQueryCache;
@ -30,15 +30,6 @@ public:
explicit VKScheduler(const Device& device, StateTracker& state_tracker);
~VKScheduler();
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept;
/// Returns true when a tick has been triggered by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept;
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick);
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore semaphore = nullptr);
@ -80,6 +71,21 @@ public:
(void)chunk->Record(command);
}
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore->CurrentTick();
}
/// Returns true when a tick has been triggered by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return master_semaphore->IsFree(tick);
}
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick) {
master_semaphore->Wait(tick);
}
/// Returns the master timeline semaphore.
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
return *master_semaphore;

View File

@ -3124,6 +3124,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
entries.attributes.insert(GetGenericAttributeLocation(attribute));
}
}
for (const auto& buffer : entries.const_buffers) {
entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
entries.uses_warps = ir.UsesWarps();

View File

@ -39,24 +39,7 @@ private:
u32 index{};
};
class GlobalBufferEntry {
public:
constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
: cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
constexpr u32 GetCbufIndex() const {
return cbuf_index;
}
constexpr u32 GetCbufOffset() const {
return cbuf_offset;
}
constexpr bool IsWritten() const {
return is_written;
}
private:
struct GlobalBufferEntry {
u32 cbuf_index{};
u32 cbuf_offset{};
bool is_written{};
@ -78,6 +61,7 @@ struct ShaderEntries {
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
u32 enabled_uniform_buffers{};
bool uses_warps{};
};

View File

@ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
static constexpr std::array INVALIDATION_FLAGS{
static constexpr int INVALIDATION_FLAGS[]{
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
};
Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
flags[flag] = true;
}
for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
flags[index] = true;
}
return flags;
}
@ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
StateTracker::StateTracker(Tegra::GPU& gpu)
: flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
auto& tables = gpu.Maxwell3D().dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyDepthBias(tables);

View File

@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
} // Anonymous namespace
VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_)
: surface{surface_}, device{device_}, scheduler{scheduler_} {}
VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_,
u32 width, u32 height, bool srgb)
: surface{surface_}, device{device_}, scheduler{scheduler_} {
Create(width, height, srgb);
}
VKSwapchain::~VKSwapchain() = default;

View File

@ -20,7 +20,8 @@ class VKScheduler;
class VKSwapchain {
public:
explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler);
explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler,
u32 width, u32 height, bool srgb);
~VKSwapchain();
/// Creates (or recreates) the swapchain with a given size.

View File

@ -410,46 +410,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
VkImageAspectFlags aspect_mask, bool is_initialized,
std::span<const VkBufferImageCopy> copies) {
static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = ACCESS_FLAGS,
.srcAccessMask = WRITE_ACCESS_FLAGS,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange =
{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkImageMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = ACCESS_FLAGS,
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange =
{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
read_barrier);
@ -553,20 +554,12 @@ void TextureCacheRuntime::Finish() {
scheduler.Finish();
}
ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload);
return {
.handle = staging_ref.buffer,
.span = staging_ref.mapped_span,
};
StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.Request(size, MemoryUsage::Upload);
}
ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download);
return {
.handle = staging_ref.buffer,
.span = staging_ref.mapped_span,
};
StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_buffer_pool.Request(size, MemoryUsage::Download);
}
void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
@ -738,7 +731,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -749,12 +742,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@ -812,12 +802,12 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
}
}
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const BufferImageCopy> copies) {
// TODO: Move this to another API
scheduler->RequestOutsideRenderPassOperationContext();
std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
const VkBuffer src_buffer = map.handle;
const VkBuffer src_buffer = map.buffer;
const VkImage vk_image = *image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
const bool is_initialized = std::exchange(initialized, true);
@ -827,12 +817,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
});
}
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies) {
// TODO: Move this to another API
scheduler->RequestOutsideRenderPassOperationContext();
std::vector vk_copies = TransformBufferCopies(copies, buffer_offset);
const VkBuffer src_buffer = map.handle;
const VkBuffer src_buffer = map.buffer;
const VkBuffer dst_buffer = *buffer;
scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
@ -840,13 +830,58 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
});
}
void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const BufferImageCopy> copies) {
std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
vk_copies](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies);
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
}
@ -1106,7 +1141,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
.pAttachments = attachments.data(),
.width = key.size.width,
.height = key.size.height,
.layers = static_cast<u32>(num_layers),
.layers = static_cast<u32>(std::max(num_layers, 1)),
});
if (runtime.device.HasDebuggingToolAttached()) {
framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());

View File

@ -7,6 +7,7 @@
#include <compare>
#include <span>
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> {
namespace Vulkan {
struct ImageBufferMap {
[[nodiscard]] VkBuffer Handle() const noexcept {
return handle;
}
[[nodiscard]] std::span<u8> Span() const noexcept {
return span;
}
VkBuffer handle;
std::span<u8> span;
};
struct TextureCacheRuntime {
const Device& device;
VKScheduler& scheduler;
@ -76,9 +64,9 @@ struct TextureCacheRuntime {
void Finish();
[[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
[[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size);
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const std::array<Offset2D, 2>& dst_region,
@ -94,7 +82,7 @@ struct TextureCacheRuntime {
return false;
}
void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t,
std::span<const VideoCommon::SwizzleParameters>) {
UNREACHABLE();
}
@ -112,13 +100,13 @@ public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
void UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
void UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies);
void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
[[nodiscard]] VkImage Handle() const noexcept {

View File

@ -9,16 +9,7 @@
#include <shared_mutex>
#include <thread>
// This header includes both Vulkan and OpenGL headers, this has to be fixed
// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues.
// Forcefully include glad early and undefine macros
#include <glad/glad.h>
#ifdef CreateEvent
#undef CreateEvent
#endif
#ifdef CreateSemaphore
#undef CreateSemaphore
#endif
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_device.h"

View File

@ -76,6 +76,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
case SystemVariable::InvocationId:
return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
uses_y_negate = true;
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:
LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");

View File

@ -139,6 +139,10 @@ public:
return uses_legacy_varyings;
}
bool UsesYNegate() const {
return uses_y_negate;
}
bool UsesWarps() const {
return uses_warps;
}
@ -465,6 +469,7 @@ private:
bool uses_instance_id{};
bool uses_vertex_id{};
bool uses_legacy_varyings{};
bool uses_y_negate{};
bool uses_warps{};
bool uses_indexed_samplers{};

Some files were not shown because too many files have changed in this diff Show More