early-access version 4109
This commit is contained in:
parent
71a56797a3
commit
2391029e47
@ -307,7 +307,7 @@ find_package(ZLIB 1.2 REQUIRED)
|
||||
find_package(zstd 1.5 REQUIRED)
|
||||
|
||||
if (NOT YUZU_USE_EXTERNAL_VULKAN_HEADERS)
|
||||
find_package(Vulkan 1.3.274 REQUIRED)
|
||||
find_package(VulkanHeaders 1.3.274 REQUIRED)
|
||||
endif()
|
||||
|
||||
if (NOT YUZU_USE_EXTERNAL_VULKAN_UTILITY_LIBRARIES)
|
||||
|
@ -1,7 +1,7 @@
|
||||
yuzu emulator early access
|
||||
=============
|
||||
|
||||
This is the source code for early-access 4107.
|
||||
This is the source code for early-access 4109.
|
||||
|
||||
## Legal Notice
|
||||
|
||||
|
@ -30,6 +30,7 @@ namespace Settings {
|
||||
#define SETTING(TYPE, RANGED) template class Setting<TYPE, RANGED>
|
||||
#define SWITCHABLE(TYPE, RANGED) template class SwitchableSetting<TYPE, RANGED>
|
||||
|
||||
SETTING(AppletMode, false);
|
||||
SETTING(AudioEngine, false);
|
||||
SETTING(bool, false);
|
||||
SETTING(int, false);
|
||||
@ -215,6 +216,8 @@ const char* TranslateCategory(Category category) {
|
||||
return "Debugging";
|
||||
case Category::GpuDriver:
|
||||
return "GpuDriver";
|
||||
case Category::LibraryApplet:
|
||||
return "LibraryApplet";
|
||||
case Category::Miscellaneous:
|
||||
return "Miscellaneous";
|
||||
case Category::Network:
|
||||
|
@ -133,6 +133,38 @@ struct TouchFromButtonMap {
|
||||
struct Values {
|
||||
Linkage linkage{};
|
||||
|
||||
// Applet
|
||||
Setting<AppletMode> cabinet_applet_mode{linkage, AppletMode::LLE, "cabinet_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
Setting<AppletMode> controller_applet_mode{linkage, AppletMode::HLE, "controller_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
Setting<AppletMode> data_erase_applet_mode{linkage, AppletMode::HLE, "data_erase_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
Setting<AppletMode> error_applet_mode{linkage, AppletMode::HLE, "error_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
Setting<AppletMode> net_connect_applet_mode{linkage, AppletMode::HLE, "net_connect_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
Setting<AppletMode> player_select_applet_mode{
|
||||
linkage, AppletMode::HLE, "player_select_applet_mode", Category::LibraryApplet};
|
||||
Setting<AppletMode> swkbd_applet_mode{linkage, AppletMode::LLE, "swkbd_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
Setting<AppletMode> mii_edit_applet_mode{linkage, AppletMode::LLE, "mii_edit_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
Setting<AppletMode> web_applet_mode{linkage, AppletMode::HLE, "web_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
Setting<AppletMode> shop_applet_mode{linkage, AppletMode::HLE, "shop_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
Setting<AppletMode> photo_viewer_applet_mode{
|
||||
linkage, AppletMode::LLE, "photo_viewer_applet_mode", Category::LibraryApplet};
|
||||
Setting<AppletMode> offline_web_applet_mode{linkage, AppletMode::LLE, "offline_web_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
Setting<AppletMode> login_share_applet_mode{linkage, AppletMode::HLE, "login_share_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
Setting<AppletMode> wifi_web_auth_applet_mode{
|
||||
linkage, AppletMode::HLE, "wifi_web_auth_applet_mode", Category::LibraryApplet};
|
||||
Setting<AppletMode> my_page_applet_mode{linkage, AppletMode::LLE, "my_page_applet_mode",
|
||||
Category::LibraryApplet};
|
||||
|
||||
// Audio
|
||||
SwitchableSetting<AudioEngine> sink_id{linkage, AudioEngine::Auto, "output_engine",
|
||||
Category::Audio, Specialization::RuntimeList};
|
||||
|
@ -44,6 +44,7 @@ enum class Category : u32 {
|
||||
Services,
|
||||
Paths,
|
||||
Linux,
|
||||
LibraryApplet,
|
||||
MaxEnum,
|
||||
};
|
||||
|
||||
|
@ -151,6 +151,8 @@ ENUM(AspectRatio, R16_9, R4_3, R21_9, R16_10, Stretch);
|
||||
|
||||
ENUM(ConsoleMode, Handheld, Docked);
|
||||
|
||||
ENUM(AppletMode, HLE, LLE);
|
||||
|
||||
template <typename Type>
|
||||
inline std::string CanonicalizeEnum(Type id) {
|
||||
const auto group = EnumMetadata<Type>::Canonicalizations();
|
||||
|
@ -43,6 +43,8 @@ public:
|
||||
DeviceMemoryManager(const DeviceMemory& device_memory);
|
||||
~DeviceMemoryManager();
|
||||
|
||||
static constexpr bool HAS_FLUSH_INVALIDATION = true;
|
||||
|
||||
void BindInterface(DeviceInterface* device_inter);
|
||||
|
||||
DAddr Allocate(size_t size);
|
||||
|
@ -44,15 +44,32 @@ public:
|
||||
GuestMemory() = delete;
|
||||
explicit GuestMemory(M& memory, u64 addr, std::size_t size,
|
||||
Common::ScratchBuffer<T>* backup = nullptr)
|
||||
: m_memory{memory}, m_addr{addr}, m_size{size} {
|
||||
: m_memory{&memory}, m_addr{addr}, m_size{size} {
|
||||
static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Read) {
|
||||
if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
|
||||
if (!this->TrySetSpan()) {
|
||||
if (backup) {
|
||||
backup->resize_destructive(this->size());
|
||||
m_data_span = *backup;
|
||||
m_span_valid = true;
|
||||
m_is_data_copy = true;
|
||||
} else {
|
||||
m_data_copy.resize(this->size());
|
||||
m_data_span = std::span(m_data_copy);
|
||||
m_span_valid = true;
|
||||
m_is_data_copy = true;
|
||||
}
|
||||
}
|
||||
} else if constexpr (FLAGS & GuestMemoryFlags::Read) {
|
||||
Read(addr, size, backup);
|
||||
}
|
||||
}
|
||||
|
||||
~GuestMemory() = default;
|
||||
|
||||
GuestMemory(GuestMemory&& rhs) = default;
|
||||
GuestMemory& operator=(GuestMemory&& rhs) = default;
|
||||
|
||||
T* data() noexcept {
|
||||
return m_data_span.data();
|
||||
}
|
||||
@ -109,8 +126,8 @@ public:
|
||||
}
|
||||
|
||||
if (this->TrySetSpan()) {
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
m_memory.FlushRegion(m_addr, this->size_bytes());
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Safe && M::HAS_FLUSH_INVALIDATION) {
|
||||
m_memory->FlushRegion(m_addr, this->size_bytes());
|
||||
}
|
||||
} else {
|
||||
if (backup) {
|
||||
@ -123,9 +140,9 @@ public:
|
||||
m_is_data_copy = true;
|
||||
m_span_valid = true;
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
m_memory.ReadBlock(m_addr, this->data(), this->size_bytes());
|
||||
m_memory->ReadBlock(m_addr, this->data(), this->size_bytes());
|
||||
} else {
|
||||
m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
|
||||
m_memory->ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
|
||||
}
|
||||
}
|
||||
return m_data_span;
|
||||
@ -133,18 +150,19 @@ public:
|
||||
|
||||
void Write(std::span<T> write_data) noexcept {
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
|
||||
m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
|
||||
m_memory->WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
|
||||
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes());
|
||||
m_memory->WriteBlock(m_addr, write_data.data(), this->size_bytes());
|
||||
} else {
|
||||
m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
|
||||
m_memory->WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
bool TrySetSpan() noexcept {
|
||||
if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) {
|
||||
if (u8* ptr = m_memory->GetSpan(m_addr, this->size_bytes()); ptr) {
|
||||
m_data_span = {reinterpret_cast<T*>(ptr), this->size()};
|
||||
m_span_valid = true;
|
||||
m_is_data_copy = false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -159,7 +177,7 @@ protected:
|
||||
return m_addr_changed;
|
||||
}
|
||||
|
||||
M& m_memory;
|
||||
M* m_memory;
|
||||
u64 m_addr{};
|
||||
size_t m_size{};
|
||||
std::span<T> m_data_span{};
|
||||
@ -175,17 +193,7 @@ public:
|
||||
GuestMemoryScoped() = delete;
|
||||
explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size,
|
||||
Common::ScratchBuffer<T>* backup = nullptr)
|
||||
: GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {
|
||||
if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
|
||||
if (!this->TrySetSpan()) {
|
||||
if (backup) {
|
||||
this->m_data_span = *backup;
|
||||
this->m_span_valid = true;
|
||||
this->m_is_data_copy = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
: GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {}
|
||||
|
||||
~GuestMemoryScoped() {
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Write) {
|
||||
@ -196,15 +204,17 @@ public:
|
||||
if (this->AddressChanged() || this->IsDataCopy()) {
|
||||
ASSERT(this->m_span_valid);
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
|
||||
this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes());
|
||||
this->m_memory->WriteBlockCached(this->m_addr, this->data(),
|
||||
this->size_bytes());
|
||||
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes());
|
||||
this->m_memory->WriteBlock(this->m_addr, this->data(), this->size_bytes());
|
||||
} else {
|
||||
this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes());
|
||||
this->m_memory->WriteBlockUnsafe(this->m_addr, this->data(),
|
||||
this->size_bytes());
|
||||
}
|
||||
} else if constexpr ((FLAGS & GuestMemoryFlags::Safe) ||
|
||||
(FLAGS & GuestMemoryFlags::Cached)) {
|
||||
this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes());
|
||||
this->m_memory->InvalidateRegion(this->m_addr, this->size_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,8 +4,9 @@
|
||||
#include <random>
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/arm/dynarmic/arm_dynarmic.h"
|
||||
#include "core/arm/dynarmic/dynarmic_exclusive_monitor.h"
|
||||
#include "core/core.h"
|
||||
#include "core/gpu_dirty_memory_manager.h"
|
||||
#include "core/hle/kernel/k_process.h"
|
||||
#include "core/hle/kernel/k_scoped_resource_reservation.h"
|
||||
#include "core/hle/kernel/k_shared_memory.h"
|
||||
@ -1258,6 +1259,10 @@ void KProcess::InitializeInterfaces() {
|
||||
|
||||
#ifdef HAS_NCE
|
||||
if (this->IsApplication() && Settings::IsNceEnabled()) {
|
||||
// Register the scoped JIT handler before creating any NCE instances
|
||||
// so that its signal handler will appear first in the signal chain.
|
||||
Core::ScopedJitExecution::RegisterHandler();
|
||||
|
||||
for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
|
||||
m_arm_interfaces[i] = std::make_unique<Core::ArmNce>(m_kernel.System(), true, i);
|
||||
}
|
||||
|
@ -130,9 +130,9 @@ enum class AppletProgramId : u64 {
|
||||
|
||||
enum class LibraryAppletMode : u32 {
|
||||
AllForeground = 0,
|
||||
Background = 1,
|
||||
NoUI = 2,
|
||||
BackgroundIndirectDisplay = 3,
|
||||
PartialForeground = 1,
|
||||
NoUi = 2,
|
||||
PartialForegroundIndirectDisplay = 3,
|
||||
AllForegroundInitiallyHidden = 4,
|
||||
};
|
||||
|
||||
|
@ -68,9 +68,9 @@ void SoftwareKeyboard::Initialize() {
|
||||
case LibraryAppletMode::AllForeground:
|
||||
InitializeForeground();
|
||||
break;
|
||||
case LibraryAppletMode::Background:
|
||||
case LibraryAppletMode::BackgroundIndirectDisplay:
|
||||
InitializeBackground(applet_mode);
|
||||
case LibraryAppletMode::PartialForeground:
|
||||
case LibraryAppletMode::PartialForegroundIndirectDisplay:
|
||||
InitializePartialForeground(applet_mode);
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid LibraryAppletMode={}", applet_mode);
|
||||
@ -243,7 +243,7 @@ void SoftwareKeyboard::InitializeForeground() {
|
||||
InitializeFrontendNormalKeyboard();
|
||||
}
|
||||
|
||||
void SoftwareKeyboard::InitializeBackground(LibraryAppletMode library_applet_mode) {
|
||||
void SoftwareKeyboard::InitializePartialForeground(LibraryAppletMode library_applet_mode) {
|
||||
LOG_INFO(Service_AM, "Initializing Inline Software Keyboard Applet.");
|
||||
|
||||
is_background = true;
|
||||
@ -258,9 +258,9 @@ void SoftwareKeyboard::InitializeBackground(LibraryAppletMode library_applet_mod
|
||||
swkbd_inline_initialize_arg.size());
|
||||
|
||||
if (swkbd_initialize_arg.library_applet_mode_flag) {
|
||||
ASSERT(library_applet_mode == LibraryAppletMode::Background);
|
||||
ASSERT(library_applet_mode == LibraryAppletMode::PartialForeground);
|
||||
} else {
|
||||
ASSERT(library_applet_mode == LibraryAppletMode::BackgroundIndirectDisplay);
|
||||
ASSERT(library_applet_mode == LibraryAppletMode::PartialForegroundIndirectDisplay);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -62,7 +62,7 @@ private:
|
||||
void InitializeForeground();
|
||||
|
||||
/// Initializes the inline software keyboard.
|
||||
void InitializeBackground(LibraryAppletMode library_applet_mode);
|
||||
void InitializePartialForeground(LibraryAppletMode library_applet_mode);
|
||||
|
||||
/// Processes the text check sent by the application.
|
||||
void ProcessTextCheck();
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/settings.h"
|
||||
#include "core/hle/kernel/k_transfer_memory.h"
|
||||
#include "core/hle/service/am/applet_data_broker.h"
|
||||
#include "core/hle/service/am/applet_manager.h"
|
||||
@ -16,6 +17,34 @@ namespace Service::AM {
|
||||
|
||||
namespace {
|
||||
|
||||
bool ShouldCreateGuestApplet(AppletId applet_id) {
|
||||
#define X(Name, name) \
|
||||
if (applet_id == AppletId::Name && \
|
||||
Settings::values.name##_applet_mode.GetValue() != Settings::AppletMode::LLE) { \
|
||||
return false; \
|
||||
}
|
||||
|
||||
X(Cabinet, cabinet)
|
||||
X(Controller, controller)
|
||||
X(DataErase, data_erase)
|
||||
X(Error, error)
|
||||
X(NetConnect, net_connect)
|
||||
X(ProfileSelect, player_select)
|
||||
X(SoftwareKeyboard, swkbd)
|
||||
X(MiiEdit, mii_edit)
|
||||
X(Web, web)
|
||||
X(Shop, shop)
|
||||
X(PhotoViewer, photo_viewer)
|
||||
X(OfflineWeb, offline_web)
|
||||
X(LoginShare, login_share)
|
||||
X(WebAuth, wifi_web_auth)
|
||||
X(MyPage, my_page)
|
||||
|
||||
#undef X
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
AppletProgramId AppletIdToProgramId(AppletId applet_id) {
|
||||
switch (applet_id) {
|
||||
case AppletId::OverlayDisplay:
|
||||
@ -63,9 +92,10 @@ AppletProgramId AppletIdToProgramId(AppletId applet_id) {
|
||||
}
|
||||
}
|
||||
|
||||
[[maybe_unused]] std::shared_ptr<ILibraryAppletAccessor> CreateGuestApplet(
|
||||
Core::System& system, std::shared_ptr<Applet> caller_applet, AppletId applet_id,
|
||||
LibraryAppletMode mode) {
|
||||
std::shared_ptr<ILibraryAppletAccessor> CreateGuestApplet(Core::System& system,
|
||||
std::shared_ptr<Applet> caller_applet,
|
||||
AppletId applet_id,
|
||||
LibraryAppletMode mode) {
|
||||
const auto program_id = static_cast<u64>(AppletIdToProgramId(applet_id));
|
||||
if (program_id == 0) {
|
||||
// Unknown applet
|
||||
@ -87,7 +117,7 @@ AppletProgramId AppletIdToProgramId(AppletId applet_id) {
|
||||
// Set focus state
|
||||
switch (mode) {
|
||||
case LibraryAppletMode::AllForeground:
|
||||
case LibraryAppletMode::NoUI:
|
||||
case LibraryAppletMode::NoUi:
|
||||
applet->focus_state = FocusState::InFocus;
|
||||
applet->hid_registration.EnableAppletToGetInput(true);
|
||||
applet->message_queue.PushMessage(AppletMessageQueue::AppletMessage::ChangeIntoForeground);
|
||||
@ -99,8 +129,8 @@ AppletProgramId AppletIdToProgramId(AppletId applet_id) {
|
||||
applet->hid_registration.EnableAppletToGetInput(false);
|
||||
applet->message_queue.PushMessage(AppletMessageQueue::AppletMessage::FocusStateChanged);
|
||||
break;
|
||||
case LibraryAppletMode::Background:
|
||||
case LibraryAppletMode::BackgroundIndirectDisplay:
|
||||
case LibraryAppletMode::PartialForeground:
|
||||
case LibraryAppletMode::PartialForegroundIndirectDisplay:
|
||||
default:
|
||||
applet->focus_state = FocusState::Background;
|
||||
applet->hid_registration.EnableAppletToGetInput(true);
|
||||
@ -117,9 +147,10 @@ AppletProgramId AppletIdToProgramId(AppletId applet_id) {
|
||||
return std::make_shared<ILibraryAppletAccessor>(system, broker, applet);
|
||||
}
|
||||
|
||||
[[maybe_unused]] std::shared_ptr<ILibraryAppletAccessor> CreateFrontendApplet(
|
||||
Core::System& system, std::shared_ptr<Applet> caller_applet, AppletId applet_id,
|
||||
LibraryAppletMode mode) {
|
||||
std::shared_ptr<ILibraryAppletAccessor> CreateFrontendApplet(Core::System& system,
|
||||
std::shared_ptr<Applet> caller_applet,
|
||||
AppletId applet_id,
|
||||
LibraryAppletMode mode) {
|
||||
const auto program_id = static_cast<u64>(AppletIdToProgramId(applet_id));
|
||||
|
||||
auto process = std::make_unique<Process>(system);
|
||||
@ -163,7 +194,13 @@ void ILibraryAppletCreator::CreateLibraryApplet(HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}", applet_id,
|
||||
applet_mode);
|
||||
|
||||
auto library_applet = CreateFrontendApplet(system, applet, applet_id, applet_mode);
|
||||
std::shared_ptr<ILibraryAppletAccessor> library_applet;
|
||||
if (ShouldCreateGuestApplet(applet_id)) {
|
||||
library_applet = CreateGuestApplet(system, applet, applet_id, applet_mode);
|
||||
}
|
||||
if (!library_applet) {
|
||||
library_applet = CreateFrontendApplet(system, applet, applet_id, applet_mode);
|
||||
}
|
||||
if (!library_applet) {
|
||||
LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", applet_id);
|
||||
|
||||
|
@ -288,7 +288,8 @@ void ISelfController::GetSystemSharedBufferHandle(HLERequestContext& ctx) {
|
||||
}
|
||||
|
||||
Result ISelfController::EnsureBufferSharingEnabled(Kernel::KProcess* process) {
|
||||
if (applet->system_buffer_manager.Initialize(&nvnflinger, process, applet->applet_id)) {
|
||||
if (applet->system_buffer_manager.Initialize(&nvnflinger, process, applet->applet_id,
|
||||
applet->library_applet_mode)) {
|
||||
return ResultSuccess;
|
||||
}
|
||||
|
||||
|
@ -17,11 +17,12 @@ SystemBufferManager::~SystemBufferManager() {
|
||||
|
||||
// Clean up shared layers.
|
||||
if (m_buffer_sharing_enabled) {
|
||||
m_nvnflinger->GetSystemBufferManager().Finalize(m_process);
|
||||
}
|
||||
}
|
||||
|
||||
bool SystemBufferManager::Initialize(Nvnflinger::Nvnflinger* nvnflinger, Kernel::KProcess* process,
|
||||
AppletId applet_id) {
|
||||
AppletId applet_id, LibraryAppletMode mode) {
|
||||
if (m_nvnflinger) {
|
||||
return m_buffer_sharing_enabled;
|
||||
}
|
||||
@ -36,9 +37,14 @@ bool SystemBufferManager::Initialize(Nvnflinger::Nvnflinger* nvnflinger, Kernel:
|
||||
return false;
|
||||
}
|
||||
|
||||
Nvnflinger::LayerBlending blending = Nvnflinger::LayerBlending::None;
|
||||
if (mode == LibraryAppletMode::PartialForeground) {
|
||||
blending = Nvnflinger::LayerBlending::Coverage;
|
||||
}
|
||||
|
||||
const auto display_id = m_nvnflinger->OpenDisplay("Default").value();
|
||||
const auto res = m_nvnflinger->GetSystemBufferManager().Initialize(
|
||||
&m_system_shared_buffer_id, &m_system_shared_layer_id, display_id);
|
||||
m_process, &m_system_shared_buffer_id, &m_system_shared_layer_id, display_id, blending);
|
||||
|
||||
if (res.IsSuccess()) {
|
||||
m_buffer_sharing_enabled = true;
|
||||
@ -62,8 +68,12 @@ void SystemBufferManager::SetWindowVisibility(bool visible) {
|
||||
|
||||
Result SystemBufferManager::WriteAppletCaptureBuffer(bool* out_was_written,
|
||||
s32* out_fbshare_layer_index) {
|
||||
// TODO
|
||||
R_SUCCEED();
|
||||
if (!m_buffer_sharing_enabled) {
|
||||
return VI::ResultPermissionDenied;
|
||||
}
|
||||
|
||||
return m_nvnflinger->GetSystemBufferManager().WriteAppletCaptureBuffer(out_was_written,
|
||||
out_fbshare_layer_index);
|
||||
}
|
||||
|
||||
} // namespace Service::AM
|
||||
|
@ -27,7 +27,8 @@ public:
|
||||
SystemBufferManager();
|
||||
~SystemBufferManager();
|
||||
|
||||
bool Initialize(Nvnflinger::Nvnflinger* flinger, Kernel::KProcess* process, AppletId applet_id);
|
||||
bool Initialize(Nvnflinger::Nvnflinger* flinger, Kernel::KProcess* process, AppletId applet_id,
|
||||
LibraryAppletMode mode);
|
||||
|
||||
void GetSystemSharedLayerHandle(u64* out_system_shared_buffer_id,
|
||||
u64* out_system_shared_layer_id) {
|
||||
|
@ -49,6 +49,7 @@ SessionId Container::OpenSession(Kernel::KProcess* process) {
|
||||
continue;
|
||||
}
|
||||
if (session.process == process) {
|
||||
session.ref_count++;
|
||||
return session.id;
|
||||
}
|
||||
}
|
||||
@ -66,6 +67,7 @@ SessionId Container::OpenSession(Kernel::KProcess* process) {
|
||||
}
|
||||
auto& session = impl->sessions[new_id];
|
||||
session.is_active = true;
|
||||
session.ref_count = 1;
|
||||
// Optimization
|
||||
if (process->IsApplication()) {
|
||||
auto& page_table = process->GetPageTable().GetBasePageTable();
|
||||
@ -114,8 +116,11 @@ SessionId Container::OpenSession(Kernel::KProcess* process) {
|
||||
|
||||
void Container::CloseSession(SessionId session_id) {
|
||||
std::scoped_lock lk(impl->session_guard);
|
||||
impl->file.UnmapAllHandles(session_id);
|
||||
auto& session = impl->sessions[session_id.id];
|
||||
if (--session.ref_count > 0) {
|
||||
return;
|
||||
}
|
||||
impl->file.UnmapAllHandles(session_id);
|
||||
auto& smmu = impl->host1x.MemoryManager();
|
||||
if (session.has_preallocated_area) {
|
||||
const DAddr region_start = session.mapper->GetRegionStart();
|
||||
|
@ -46,6 +46,7 @@ struct Session {
|
||||
bool has_preallocated_area{};
|
||||
std::unique_ptr<HeapMapper> mapper{};
|
||||
bool is_active{};
|
||||
s32 ref_count{};
|
||||
};
|
||||
|
||||
class Container {
|
||||
@ -67,10 +68,7 @@ public:
|
||||
const SyncpointManager& GetSyncpointManager() const;
|
||||
|
||||
struct Host1xDeviceFileData {
|
||||
std::unordered_map<DeviceFD, u32> fd_to_id{};
|
||||
std::deque<u32> syncpts_accumulated{};
|
||||
u32 nvdec_next_id{};
|
||||
u32 vic_next_id{};
|
||||
};
|
||||
|
||||
Host1xDeviceFileData& Host1xDeviceFile();
|
||||
|
@ -333,9 +333,13 @@ void NvMap::UnmapAllHandles(NvCore::SessionId session_id) {
|
||||
}();
|
||||
|
||||
for (auto& [id, handle] : handles_copy) {
|
||||
if (handle->session_id.id == session_id.id) {
|
||||
FreeHandle(id, false);
|
||||
{
|
||||
std::scoped_lock lk{handle->mutex};
|
||||
if (handle->session_id.id != session_id.id || handle->dupes <= 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
FreeHandle(id, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,22 @@
|
||||
|
||||
namespace Service::Nvidia::Devices {
|
||||
|
||||
namespace {
|
||||
|
||||
Tegra::BlendMode ConvertBlending(Service::Nvnflinger::LayerBlending blending) {
|
||||
switch (blending) {
|
||||
case Service::Nvnflinger::LayerBlending::None:
|
||||
default:
|
||||
return Tegra::BlendMode::Opaque;
|
||||
case Service::Nvnflinger::LayerBlending::Premultiplied:
|
||||
return Tegra::BlendMode::Premultiplied;
|
||||
case Service::Nvnflinger::LayerBlending::Coverage:
|
||||
return Tegra::BlendMode::Coverage;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
nvdisp_disp0::nvdisp_disp0(Core::System& system_, NvCore::Container& core)
|
||||
: nvdevice{system_}, container{core}, nvmap{core.GetNvMapFile()} {}
|
||||
nvdisp_disp0::~nvdisp_disp0() = default;
|
||||
@ -56,6 +72,7 @@ void nvdisp_disp0::Composite(std::span<const Nvnflinger::HwcLayer> sorted_layers
|
||||
.pixel_format = layer.format,
|
||||
.transform_flags = layer.transform,
|
||||
.crop_rect = layer.crop_rect,
|
||||
.blending = ConvertBlending(layer.blending),
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < layer.acquire_fence.num_fences; i++) {
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "core/hle/service/nvdrv/core/container.h"
|
||||
#include "core/hle/service/nvdrv/devices/ioctl_serialization.h"
|
||||
#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace Service::Nvidia::Devices {
|
||||
@ -21,13 +22,8 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
|
||||
switch (command.group) {
|
||||
case 0x0:
|
||||
switch (command.cmd) {
|
||||
case 0x1: {
|
||||
auto& host1x_file = core.Host1xDeviceFile();
|
||||
if (!host1x_file.fd_to_id.contains(fd)) {
|
||||
host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++;
|
||||
}
|
||||
case 0x1:
|
||||
return WrapFixedVariable(this, &nvhost_nvdec::Submit, input, output, fd);
|
||||
}
|
||||
case 0x2:
|
||||
return WrapFixed(this, &nvhost_nvdec::GetSyncpoint, input, output);
|
||||
case 0x3:
|
||||
@ -72,15 +68,12 @@ void nvhost_nvdec::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
|
||||
LOG_INFO(Service_NVDRV, "NVDEC video stream started");
|
||||
system.SetNVDECActive(true);
|
||||
sessions[fd] = session_id;
|
||||
host1x.StartDevice(fd, Tegra::Host1x::ChannelType::NvDec, channel_syncpoint);
|
||||
}
|
||||
|
||||
void nvhost_nvdec::OnClose(DeviceFD fd) {
|
||||
LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
|
||||
auto& host1x_file = core.Host1xDeviceFile();
|
||||
const auto iter = host1x_file.fd_to_id.find(fd);
|
||||
if (iter != host1x_file.fd_to_id.end()) {
|
||||
system.GPU().ClearCdmaInstance(iter->second);
|
||||
}
|
||||
host1x.StopDevice(fd, Tegra::Host1x::ChannelType::NvDec);
|
||||
system.SetNVDECActive(false);
|
||||
auto it = sessions.find(fd);
|
||||
if (it != sessions.end()) {
|
||||
|
@ -55,8 +55,9 @@ std::size_t WriteVectors(std::span<u8> dst, const std::vector<T>& src, std::size
|
||||
|
||||
nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_,
|
||||
NvCore::ChannelType channel_type_)
|
||||
: nvdevice{system_}, core{core_}, syncpoint_manager{core.GetSyncpointManager()},
|
||||
nvmap{core.GetNvMapFile()}, channel_type{channel_type_} {
|
||||
: nvdevice{system_}, host1x{system_.Host1x()}, core{core_},
|
||||
syncpoint_manager{core.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
|
||||
channel_type{channel_type_} {
|
||||
auto& syncpts_accumulated = core.Host1xDeviceFile().syncpts_accumulated;
|
||||
if (syncpts_accumulated.empty()) {
|
||||
channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false);
|
||||
@ -95,24 +96,24 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
|
||||
offset += SliceVectors(data, syncpt_increments, params.syncpoint_count, offset);
|
||||
offset += SliceVectors(data, fence_thresholds, params.fence_count, offset);
|
||||
|
||||
auto& gpu = system.GPU();
|
||||
auto* session = core.GetSession(sessions[fd]);
|
||||
|
||||
if (gpu.UseNvdec()) {
|
||||
for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
|
||||
const SyncptIncr& syncpt_incr = syncpt_increments[i];
|
||||
fence_thresholds[i] =
|
||||
syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments);
|
||||
}
|
||||
for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
|
||||
const SyncptIncr& syncpt_incr = syncpt_increments[i];
|
||||
fence_thresholds[i] =
|
||||
syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments);
|
||||
}
|
||||
|
||||
for (const auto& cmd_buffer : command_buffers) {
|
||||
const auto object = nvmap.GetHandle(cmd_buffer.memory_id);
|
||||
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
|
||||
Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
|
||||
session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
|
||||
cmdlist.size() * sizeof(u32));
|
||||
gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
|
||||
Core::Memory::CpuGuestMemory<Tegra::ChCommandHeader,
|
||||
Core::Memory::GuestMemoryFlags::SafeRead>
|
||||
cmdlist(session->process->GetMemory(), object->address + cmd_buffer.offset,
|
||||
cmd_buffer.word_count);
|
||||
host1x.PushEntries(fd, std::move(cmdlist));
|
||||
}
|
||||
|
||||
// Some games expect command_buffers to be written back
|
||||
offset = 0;
|
||||
offset += WriteVectors(data, command_buffers, offset);
|
||||
|
@ -119,6 +119,7 @@ protected:
|
||||
|
||||
Kernel::KEvent* QueryEvent(u32 event_id) override;
|
||||
|
||||
Tegra::Host1x::Host1x& host1x;
|
||||
u32 channel_syncpoint;
|
||||
s32_le nvmap_fd{};
|
||||
u32_le submit_timeout{};
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "core/hle/service/nvdrv/core/container.h"
|
||||
#include "core/hle/service/nvdrv/devices/ioctl_serialization.h"
|
||||
#include "core/hle/service/nvdrv/devices/nvhost_vic.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace Service::Nvidia::Devices {
|
||||
@ -21,13 +22,8 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
|
||||
switch (command.group) {
|
||||
case 0x0:
|
||||
switch (command.cmd) {
|
||||
case 0x1: {
|
||||
auto& host1x_file = core.Host1xDeviceFile();
|
||||
if (!host1x_file.fd_to_id.contains(fd)) {
|
||||
host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++;
|
||||
}
|
||||
case 0x1:
|
||||
return WrapFixedVariable(this, &nvhost_vic::Submit, input, output, fd);
|
||||
}
|
||||
case 0x2:
|
||||
return WrapFixed(this, &nvhost_vic::GetSyncpoint, input, output);
|
||||
case 0x3:
|
||||
@ -70,14 +66,11 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
|
||||
|
||||
void nvhost_vic::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
|
||||
sessions[fd] = session_id;
|
||||
host1x.StartDevice(fd, Tegra::Host1x::ChannelType::VIC, channel_syncpoint);
|
||||
}
|
||||
|
||||
void nvhost_vic::OnClose(DeviceFD fd) {
|
||||
auto& host1x_file = core.Host1xDeviceFile();
|
||||
const auto iter = host1x_file.fd_to_id.find(fd);
|
||||
if (iter != host1x_file.fd_to_id.end()) {
|
||||
system.GPU().ClearCdmaInstance(iter->second);
|
||||
}
|
||||
host1x.StopDevice(fd, Tegra::Host1x::ChannelType::VIC);
|
||||
sessions.erase(fd);
|
||||
}
|
||||
|
||||
|
@ -14,24 +14,20 @@
|
||||
#include "core/hle/service/nvnflinger/ui/graphic_buffer.h"
|
||||
#include "core/hle/service/vi/layer/vi_layer.h"
|
||||
#include "core/hle/service/vi/vi_results.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
|
||||
namespace Service::Nvnflinger {
|
||||
|
||||
namespace {
|
||||
|
||||
Result AllocateIoForProcessAddressSpace(Common::ProcessAddress* out_map_address,
|
||||
std::unique_ptr<Kernel::KPageGroup>* out_page_group,
|
||||
Core::System& system, u32 size) {
|
||||
Result AllocateSharedBufferMemory(std::unique_ptr<Kernel::KPageGroup>* out_page_group,
|
||||
Core::System& system, u32 size) {
|
||||
using Core::Memory::YUZU_PAGESIZE;
|
||||
|
||||
// Allocate memory for the system shared buffer.
|
||||
// FIXME: Because the gmmu can only point to cpu addresses, we need
|
||||
// to map this in the application space to allow it to be used.
|
||||
// FIXME: Add proper smmu emulation.
|
||||
// FIXME: This memory belongs to vi's .data section.
|
||||
auto& kernel = system.Kernel();
|
||||
auto* process = system.ApplicationProcess();
|
||||
auto& page_table = process->GetPageTable();
|
||||
|
||||
// Hold a temporary page group reference while we try to map it.
|
||||
auto pg = std::make_unique<Kernel::KPageGroup>(
|
||||
@ -43,6 +39,30 @@ Result AllocateIoForProcessAddressSpace(Common::ProcessAddress* out_map_address,
|
||||
Kernel::KMemoryManager::EncodeOption(Kernel::KMemoryManager::Pool::Secure,
|
||||
Kernel::KMemoryManager::Direction::FromBack)));
|
||||
|
||||
// Fill the output data with red.
|
||||
for (auto& block : *pg) {
|
||||
u32* start = system.DeviceMemory().GetPointer<u32>(block.GetAddress());
|
||||
u32* end = system.DeviceMemory().GetPointer<u32>(block.GetAddress() + block.GetSize());
|
||||
|
||||
for (; start < end; start++) {
|
||||
*start = 0xFF0000FF;
|
||||
}
|
||||
}
|
||||
|
||||
// Return the mapped page group.
|
||||
*out_page_group = std::move(pg);
|
||||
|
||||
// We succeeded.
|
||||
R_SUCCEED();
|
||||
}
|
||||
|
||||
Result MapSharedBufferIntoProcessAddressSpace(Common::ProcessAddress* out_map_address,
|
||||
std::unique_ptr<Kernel::KPageGroup>& pg,
|
||||
Kernel::KProcess* process, Core::System& system) {
|
||||
using Core::Memory::YUZU_PAGESIZE;
|
||||
|
||||
auto& page_table = process->GetPageTable();
|
||||
|
||||
// Get bounds of where mapping is possible.
|
||||
const VAddr alias_code_begin = GetInteger(page_table.GetAliasCodeRegionStart());
|
||||
const VAddr alias_code_size = page_table.GetAliasCodeRegionSize() / YUZU_PAGESIZE;
|
||||
@ -64,9 +84,6 @@ Result AllocateIoForProcessAddressSpace(Common::ProcessAddress* out_map_address,
|
||||
// Return failure, if necessary
|
||||
R_UNLESS(i < 64, res);
|
||||
|
||||
// Return the mapped page group.
|
||||
*out_page_group = std::move(pg);
|
||||
|
||||
// We succeeded.
|
||||
R_SUCCEED();
|
||||
}
|
||||
@ -135,6 +152,13 @@ Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, Nvidia::D
|
||||
R_RETURN(AllocNvMapHandle(*nvmap, *out_handle, buffer, size, nvmap_fd));
|
||||
}
|
||||
|
||||
void FreeHandle(u32 handle, Nvidia::Module& nvdrv, Nvidia::DeviceFD nvmap_fd) {
|
||||
auto nvmap = nvdrv.GetDevice<Nvidia::Devices::nvmap>(nvmap_fd);
|
||||
ASSERT(nvmap != nullptr);
|
||||
|
||||
R_ASSERT(FreeNvMapHandle(*nvmap, handle, nvmap_fd));
|
||||
}
|
||||
|
||||
constexpr auto SharedBufferBlockLinearFormat = android::PixelFormat::Rgba8888;
|
||||
constexpr u32 SharedBufferBlockLinearBpp = 4;
|
||||
|
||||
@ -186,53 +210,97 @@ FbShareBufferManager::FbShareBufferManager(Core::System& system, Nvnflinger& fli
|
||||
|
||||
FbShareBufferManager::~FbShareBufferManager() = default;
|
||||
|
||||
Result FbShareBufferManager::Initialize(u64* out_buffer_id, u64* out_layer_id, u64 display_id) {
|
||||
Result FbShareBufferManager::Initialize(Kernel::KProcess* owner_process, u64* out_buffer_id,
|
||||
u64* out_layer_handle, u64 display_id,
|
||||
LayerBlending blending) {
|
||||
std::scoped_lock lk{m_guard};
|
||||
|
||||
// Ensure we have not already created a buffer.
|
||||
R_UNLESS(m_buffer_id == 0, VI::ResultOperationFailed);
|
||||
// Ensure we haven't already created.
|
||||
const u64 aruid = owner_process->GetProcessId();
|
||||
R_UNLESS(!m_sessions.contains(aruid), VI::ResultPermissionDenied);
|
||||
|
||||
// Allocate memory and space for the shared buffer.
|
||||
Common::ProcessAddress map_address;
|
||||
R_TRY(AllocateIoForProcessAddressSpace(std::addressof(map_address),
|
||||
std::addressof(m_buffer_page_group), m_system,
|
||||
SharedBufferSize));
|
||||
// Allocate memory for the shared buffer if needed.
|
||||
if (!m_buffer_page_group) {
|
||||
R_TRY(AllocateSharedBufferMemory(std::addressof(m_buffer_page_group), m_system,
|
||||
SharedBufferSize));
|
||||
|
||||
// Record buffer id.
|
||||
m_buffer_id = m_next_buffer_id++;
|
||||
|
||||
// Record display id.
|
||||
m_display_id = display_id;
|
||||
}
|
||||
|
||||
// Map into process.
|
||||
Common::ProcessAddress map_address{};
|
||||
R_TRY(MapSharedBufferIntoProcessAddressSpace(std::addressof(map_address), m_buffer_page_group,
|
||||
owner_process, m_system));
|
||||
|
||||
// Create new session.
|
||||
auto [it, was_emplaced] = m_sessions.emplace(aruid, FbShareSession{});
|
||||
auto& session = it->second;
|
||||
|
||||
auto& container = m_nvdrv->GetContainer();
|
||||
m_session_id = container.OpenSession(m_system.ApplicationProcess());
|
||||
m_nvmap_fd = m_nvdrv->Open("/dev/nvmap", m_session_id);
|
||||
session.session_id = container.OpenSession(owner_process);
|
||||
session.nvmap_fd = m_nvdrv->Open("/dev/nvmap", session.session_id);
|
||||
|
||||
// Create an nvmap handle for the buffer and assign the memory to it.
|
||||
R_TRY(AllocateHandleForBuffer(std::addressof(m_buffer_nvmap_handle), *m_nvdrv, m_nvmap_fd,
|
||||
map_address, SharedBufferSize));
|
||||
|
||||
// Record the display id.
|
||||
m_display_id = display_id;
|
||||
R_TRY(AllocateHandleForBuffer(std::addressof(session.buffer_nvmap_handle), *m_nvdrv,
|
||||
session.nvmap_fd, map_address, SharedBufferSize));
|
||||
|
||||
// Create and open a layer for the display.
|
||||
m_layer_id = m_flinger.CreateLayer(m_display_id).value();
|
||||
m_flinger.OpenLayer(m_layer_id);
|
||||
|
||||
// Set up the buffer.
|
||||
m_buffer_id = m_next_buffer_id++;
|
||||
session.layer_id = m_flinger.CreateLayer(m_display_id, blending).value();
|
||||
m_flinger.OpenLayer(session.layer_id);
|
||||
|
||||
// Get the layer.
|
||||
VI::Layer* layer = m_flinger.FindLayer(m_display_id, m_layer_id);
|
||||
VI::Layer* layer = m_flinger.FindLayer(m_display_id, session.layer_id);
|
||||
ASSERT(layer != nullptr);
|
||||
|
||||
// Get the producer and set preallocated buffers.
|
||||
auto& producer = layer->GetBufferQueue();
|
||||
MakeGraphicBuffer(producer, 0, m_buffer_nvmap_handle);
|
||||
MakeGraphicBuffer(producer, 1, m_buffer_nvmap_handle);
|
||||
MakeGraphicBuffer(producer, 0, session.buffer_nvmap_handle);
|
||||
MakeGraphicBuffer(producer, 1, session.buffer_nvmap_handle);
|
||||
|
||||
// Assign outputs.
|
||||
*out_buffer_id = m_buffer_id;
|
||||
*out_layer_id = m_layer_id;
|
||||
*out_layer_handle = session.layer_id;
|
||||
|
||||
// We succeeded.
|
||||
R_SUCCEED();
|
||||
}
|
||||
|
||||
void FbShareBufferManager::Finalize(Kernel::KProcess* owner_process) {
|
||||
std::scoped_lock lk{m_guard};
|
||||
|
||||
if (m_buffer_id == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 aruid = owner_process->GetProcessId();
|
||||
const auto it = m_sessions.find(aruid);
|
||||
if (it == m_sessions.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto& session = it->second;
|
||||
|
||||
// Destroy the layer.
|
||||
m_flinger.DestroyLayer(session.layer_id);
|
||||
|
||||
// Close nvmap handle.
|
||||
FreeHandle(session.buffer_nvmap_handle, *m_nvdrv, session.nvmap_fd);
|
||||
|
||||
// Close nvmap device.
|
||||
m_nvdrv->Close(session.nvmap_fd);
|
||||
|
||||
// Close session.
|
||||
auto& container = m_nvdrv->GetContainer();
|
||||
container.CloseSession(session.session_id);
|
||||
|
||||
// Erase.
|
||||
m_sessions.erase(it);
|
||||
}
|
||||
|
||||
Result FbShareBufferManager::GetSharedBufferMemoryHandleId(u64* out_buffer_size,
|
||||
s32* out_nvmap_handle,
|
||||
SharedMemoryPoolLayout* out_pool_layout,
|
||||
@ -242,17 +310,18 @@ Result FbShareBufferManager::GetSharedBufferMemoryHandleId(u64* out_buffer_size,
|
||||
|
||||
R_UNLESS(m_buffer_id > 0, VI::ResultNotFound);
|
||||
R_UNLESS(buffer_id == m_buffer_id, VI::ResultNotFound);
|
||||
R_UNLESS(m_sessions.contains(applet_resource_user_id), VI::ResultNotFound);
|
||||
|
||||
*out_pool_layout = SharedBufferPoolLayout;
|
||||
*out_buffer_size = SharedBufferSize;
|
||||
*out_nvmap_handle = m_buffer_nvmap_handle;
|
||||
*out_nvmap_handle = m_sessions[applet_resource_user_id].buffer_nvmap_handle;
|
||||
|
||||
R_SUCCEED();
|
||||
}
|
||||
|
||||
Result FbShareBufferManager::GetLayerFromId(VI::Layer** out_layer, u64 layer_id) {
|
||||
// Ensure the layer id is valid.
|
||||
R_UNLESS(m_layer_id > 0 && layer_id == m_layer_id, VI::ResultNotFound);
|
||||
R_UNLESS(layer_id > 0, VI::ResultNotFound);
|
||||
|
||||
// Get the layer.
|
||||
VI::Layer* layer = m_flinger.FindLayer(m_display_id, layer_id);
|
||||
@ -309,6 +378,10 @@ Result FbShareBufferManager::PresentSharedFrameBuffer(android::Fence fence,
|
||||
android::Status::NoError,
|
||||
VI::ResultOperationFailed);
|
||||
|
||||
ON_RESULT_FAILURE {
|
||||
producer.CancelBuffer(static_cast<s32>(slot), fence);
|
||||
};
|
||||
|
||||
// Queue the buffer to the producer.
|
||||
android::QueueBufferInput input{};
|
||||
android::QueueBufferOutput output{};
|
||||
@ -342,4 +415,33 @@ Result FbShareBufferManager::GetSharedFrameBufferAcquirableEvent(Kernel::KReadab
|
||||
R_SUCCEED();
|
||||
}
|
||||
|
||||
Result FbShareBufferManager::WriteAppletCaptureBuffer(bool* out_was_written, s32* out_layer_index) {
|
||||
std::vector<u8> capture_buffer(m_system.GPU().GetAppletCaptureBuffer());
|
||||
Common::ScratchBuffer<u32> scratch;
|
||||
|
||||
// TODO: this could be optimized
|
||||
s64 e = -1280 * 768 * 4;
|
||||
for (auto& block : *m_buffer_page_group) {
|
||||
u8* start = m_system.DeviceMemory().GetPointer<u8>(block.GetAddress());
|
||||
u8* end = m_system.DeviceMemory().GetPointer<u8>(block.GetAddress() + block.GetSize());
|
||||
|
||||
for (; start < end; start++) {
|
||||
*start = 0;
|
||||
|
||||
if (e >= 0 && e < static_cast<s64>(capture_buffer.size())) {
|
||||
*start = capture_buffer[e];
|
||||
}
|
||||
e++;
|
||||
}
|
||||
|
||||
m_system.GPU().Host1x().MemoryManager().ApplyOpOnPointer(start, scratch, [&](DAddr addr) {
|
||||
m_system.GPU().InvalidateRegion(addr, end - start);
|
||||
});
|
||||
}
|
||||
|
||||
*out_was_written = true;
|
||||
*out_layer_index = 1;
|
||||
R_SUCCEED();
|
||||
}
|
||||
|
||||
} // namespace Service::Nvnflinger
|
||||
|
@ -3,9 +3,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "common/math_util.h"
|
||||
#include "core/hle/service/nvdrv/core/container.h"
|
||||
#include "core/hle/service/nvdrv/nvdata.h"
|
||||
#include "core/hle/service/nvnflinger/hwc_layer.h"
|
||||
#include "core/hle/service/nvnflinger/nvnflinger.h"
|
||||
#include "core/hle/service/nvnflinger/ui/fence.h"
|
||||
|
||||
@ -29,13 +32,18 @@ struct SharedMemoryPoolLayout {
|
||||
};
|
||||
static_assert(sizeof(SharedMemoryPoolLayout) == 0x188, "SharedMemoryPoolLayout has wrong size");
|
||||
|
||||
struct FbShareSession;
|
||||
|
||||
class FbShareBufferManager final {
|
||||
public:
|
||||
explicit FbShareBufferManager(Core::System& system, Nvnflinger& flinger,
|
||||
std::shared_ptr<Nvidia::Module> nvdrv);
|
||||
~FbShareBufferManager();
|
||||
|
||||
Result Initialize(u64* out_buffer_id, u64* out_layer_handle, u64 display_id);
|
||||
Result Initialize(Kernel::KProcess* owner_process, u64* out_buffer_id, u64* out_layer_handle,
|
||||
u64 display_id, LayerBlending blending);
|
||||
void Finalize(Kernel::KProcess* owner_process);
|
||||
|
||||
Result GetSharedBufferMemoryHandleId(u64* out_buffer_size, s32* out_nvmap_handle,
|
||||
SharedMemoryPoolLayout* out_pool_layout, u64 buffer_id,
|
||||
u64 applet_resource_user_id);
|
||||
@ -45,6 +53,8 @@ public:
|
||||
u32 transform, s32 swap_interval, u64 layer_id, s64 slot);
|
||||
Result GetSharedFrameBufferAcquirableEvent(Kernel::KReadableEvent** out_event, u64 layer_id);
|
||||
|
||||
Result WriteAppletCaptureBuffer(bool* out_was_written, s32* out_layer_index);
|
||||
|
||||
private:
|
||||
Result GetLayerFromId(VI::Layer** out_layer, u64 layer_id);
|
||||
|
||||
@ -52,11 +62,8 @@ private:
|
||||
u64 m_next_buffer_id = 1;
|
||||
u64 m_display_id = 0;
|
||||
u64 m_buffer_id = 0;
|
||||
u64 m_layer_id = 0;
|
||||
u32 m_buffer_nvmap_handle = 0;
|
||||
SharedMemoryPoolLayout m_pool_layout = {};
|
||||
Nvidia::DeviceFD m_nvmap_fd = {};
|
||||
Nvidia::NvCore::SessionId m_session_id = {};
|
||||
std::map<u64, FbShareSession> m_sessions;
|
||||
std::unique_ptr<Kernel::KPageGroup> m_buffer_page_group;
|
||||
|
||||
std::mutex m_guard;
|
||||
@ -65,4 +72,11 @@ private:
|
||||
std::shared_ptr<Nvidia::Module> m_nvdrv;
|
||||
};
|
||||
|
||||
struct FbShareSession {
|
||||
Nvidia::DeviceFD nvmap_fd = {};
|
||||
Nvidia::NvCore::SessionId session_id = {};
|
||||
u64 layer_id = {};
|
||||
u32 buffer_nvmap_handle = 0;
|
||||
};
|
||||
|
||||
} // namespace Service::Nvnflinger
|
||||
|
@ -109,6 +109,7 @@ u32 HardwareComposer::ComposeLocked(f32* out_speed_scale, VI::Display& display,
|
||||
.height = igbp_buffer.Height(),
|
||||
.stride = igbp_buffer.Stride(),
|
||||
.z_index = 0,
|
||||
.blending = layer.GetBlending(),
|
||||
.transform = static_cast<android::BufferTransformFlags>(item.transform),
|
||||
.crop_rect = item.crop,
|
||||
.acquire_fence = item.fence,
|
||||
|
@ -11,6 +11,18 @@
|
||||
|
||||
namespace Service::Nvnflinger {
|
||||
|
||||
// hwc_layer_t::blending values
|
||||
enum class LayerBlending : u32 {
|
||||
// No blending
|
||||
None = 0x100,
|
||||
|
||||
// ONE / ONE_MINUS_SRC_ALPHA
|
||||
Premultiplied = 0x105,
|
||||
|
||||
// SRC_ALPHA / ONE_MINUS_SRC_ALPHA
|
||||
Coverage = 0x405,
|
||||
};
|
||||
|
||||
struct HwcLayer {
|
||||
u32 buffer_handle;
|
||||
u32 offset;
|
||||
@ -19,6 +31,7 @@ struct HwcLayer {
|
||||
u32 height;
|
||||
u32 stride;
|
||||
s32 z_index;
|
||||
LayerBlending blending;
|
||||
android::BufferTransformFlags transform;
|
||||
Common::Rectangle<int> crop_rect;
|
||||
android::Fence acquire_fence;
|
||||
|
@ -157,7 +157,7 @@ bool Nvnflinger::CloseDisplay(u64 display_id) {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<u64> Nvnflinger::CreateLayer(u64 display_id) {
|
||||
std::optional<u64> Nvnflinger::CreateLayer(u64 display_id, LayerBlending blending) {
|
||||
const auto lock_guard = Lock();
|
||||
auto* const display = FindDisplay(display_id);
|
||||
|
||||
@ -166,13 +166,14 @@ std::optional<u64> Nvnflinger::CreateLayer(u64 display_id) {
|
||||
}
|
||||
|
||||
const u64 layer_id = next_layer_id++;
|
||||
CreateLayerAtId(*display, layer_id);
|
||||
CreateLayerAtId(*display, layer_id, blending);
|
||||
return layer_id;
|
||||
}
|
||||
|
||||
void Nvnflinger::CreateLayerAtId(VI::Display& display, u64 layer_id) {
|
||||
void Nvnflinger::CreateLayerAtId(VI::Display& display, u64 layer_id, LayerBlending blending) {
|
||||
const auto buffer_id = next_buffer_queue_id++;
|
||||
display.CreateLayer(layer_id, buffer_id, nvdrv->container);
|
||||
display.FindLayer(layer_id)->SetBlending(blending);
|
||||
}
|
||||
|
||||
bool Nvnflinger::OpenLayer(u64 layer_id) {
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "common/thread.h"
|
||||
#include "core/hle/result.h"
|
||||
#include "core/hle/service/kernel_helpers.h"
|
||||
#include "core/hle/service/nvnflinger/hwc_layer.h"
|
||||
|
||||
namespace Common {
|
||||
class Event;
|
||||
@ -72,7 +73,8 @@ public:
|
||||
/// Creates a layer on the specified display and returns the layer ID.
|
||||
///
|
||||
/// If an invalid display ID is specified, then an empty optional is returned.
|
||||
[[nodiscard]] std::optional<u64> CreateLayer(u64 display_id);
|
||||
[[nodiscard]] std::optional<u64> CreateLayer(u64 display_id,
|
||||
LayerBlending blending = LayerBlending::None);
|
||||
|
||||
/// Opens a layer on all displays for the given layer ID.
|
||||
bool OpenLayer(u64 layer_id);
|
||||
@ -128,7 +130,7 @@ private:
|
||||
[[nodiscard]] VI::Layer* FindLayer(u64 display_id, u64 layer_id);
|
||||
|
||||
/// Creates a layer with the specified layer ID in the desired display.
|
||||
void CreateLayerAtId(VI::Display& display, u64 layer_id);
|
||||
void CreateLayerAtId(VI::Display& display, u64 layer_id, LayerBlending blending);
|
||||
|
||||
void SplitVSync(std::stop_token stop_token);
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "core/hle/service/nvnflinger/hwc_layer.h"
|
||||
#include "core/hle/service/vi/layer/vi_layer.h"
|
||||
|
||||
namespace Service::VI {
|
||||
@ -8,8 +9,9 @@ namespace Service::VI {
|
||||
Layer::Layer(u64 layer_id_, u32 binder_id_, android::BufferQueueCore& core_,
|
||||
android::BufferQueueProducer& binder_,
|
||||
std::shared_ptr<android::BufferItemConsumer>&& consumer_)
|
||||
: layer_id{layer_id_}, binder_id{binder_id_}, core{core_}, binder{binder_},
|
||||
consumer{std::move(consumer_)}, open{false}, visible{true} {}
|
||||
: layer_id{layer_id_}, binder_id{binder_id_}, core{core_}, binder{binder_}, consumer{std::move(
|
||||
consumer_)},
|
||||
blending{Nvnflinger::LayerBlending::None}, open{false}, visible{true} {}
|
||||
|
||||
Layer::~Layer() = default;
|
||||
|
||||
|
@ -14,6 +14,10 @@ class BufferQueueCore;
|
||||
class BufferQueueProducer;
|
||||
} // namespace Service::android
|
||||
|
||||
namespace Service::Nvnflinger {
|
||||
enum class LayerBlending : u32;
|
||||
}
|
||||
|
||||
namespace Service::VI {
|
||||
|
||||
/// Represents a single display layer.
|
||||
@ -92,12 +96,21 @@ public:
|
||||
return !std::exchange(open, true);
|
||||
}
|
||||
|
||||
Nvnflinger::LayerBlending GetBlending() {
|
||||
return blending;
|
||||
}
|
||||
|
||||
void SetBlending(Nvnflinger::LayerBlending b) {
|
||||
blending = b;
|
||||
}
|
||||
|
||||
private:
|
||||
const u64 layer_id;
|
||||
const u32 binder_id;
|
||||
android::BufferQueueCore& core;
|
||||
android::BufferQueueProducer& binder;
|
||||
std::shared_ptr<android::BufferItemConsumer> consumer;
|
||||
Service::Nvnflinger::LayerBlending blending;
|
||||
bool open;
|
||||
bool visible;
|
||||
};
|
||||
|
@ -64,6 +64,8 @@ public:
|
||||
Memory(Memory&&) = default;
|
||||
Memory& operator=(Memory&&) = delete;
|
||||
|
||||
static constexpr bool HAS_FLUSH_INVALIDATION = false;
|
||||
|
||||
/**
|
||||
* Resets the state of the Memory system.
|
||||
*/
|
||||
|
@ -401,6 +401,14 @@ void Config::ReadNetworkValues() {
|
||||
EndGroup();
|
||||
}
|
||||
|
||||
void Config::ReadLibraryAppletValues() {
|
||||
BeginGroup(Settings::TranslateCategory(Settings::Category::LibraryApplet));
|
||||
|
||||
ReadCategory(Settings::Category::LibraryApplet);
|
||||
|
||||
EndGroup();
|
||||
}
|
||||
|
||||
void Config::ReadValues() {
|
||||
if (global) {
|
||||
ReadDataStorageValues();
|
||||
@ -410,6 +418,7 @@ void Config::ReadValues() {
|
||||
ReadServiceValues();
|
||||
ReadWebServiceValues();
|
||||
ReadMiscellaneousValues();
|
||||
ReadLibraryAppletValues();
|
||||
}
|
||||
ReadControlValues();
|
||||
ReadCoreValues();
|
||||
@ -511,6 +520,7 @@ void Config::SaveValues() {
|
||||
SaveNetworkValues();
|
||||
SaveWebServiceValues();
|
||||
SaveMiscellaneousValues();
|
||||
SaveLibraryAppletValues();
|
||||
} else {
|
||||
LOG_DEBUG(Config, "Saving only generic configuration values");
|
||||
}
|
||||
@ -691,6 +701,14 @@ void Config::SaveWebServiceValues() {
|
||||
EndGroup();
|
||||
}
|
||||
|
||||
void Config::SaveLibraryAppletValues() {
|
||||
BeginGroup(Settings::TranslateCategory(Settings::Category::LibraryApplet));
|
||||
|
||||
WriteCategory(Settings::Category::LibraryApplet);
|
||||
|
||||
EndGroup();
|
||||
}
|
||||
|
||||
bool Config::ReadBooleanSetting(const std::string& key, const std::optional<bool> default_value) {
|
||||
std::string full_key = GetFullKey(key, false);
|
||||
if (!default_value.has_value()) {
|
||||
|
@ -88,6 +88,7 @@ protected:
|
||||
void ReadSystemValues();
|
||||
void ReadWebServiceValues();
|
||||
void ReadNetworkValues();
|
||||
void ReadLibraryAppletValues();
|
||||
|
||||
// Read platform specific sections
|
||||
virtual void ReadHidbusValues() = 0;
|
||||
@ -121,6 +122,7 @@ protected:
|
||||
void SaveScreenshotValues();
|
||||
void SaveSystemValues();
|
||||
void SaveWebServiceValues();
|
||||
void SaveLibraryAppletValues();
|
||||
|
||||
// Save platform specific sections
|
||||
virtual void SaveHidbusValues() = 0;
|
||||
|
@ -18,6 +18,7 @@ add_library(video_core STATIC
|
||||
buffer_cache/usage_tracker.h
|
||||
buffer_cache/word_manager.h
|
||||
cache_types.h
|
||||
capture.h
|
||||
cdma_pusher.cpp
|
||||
cdma_pusher.h
|
||||
compatible_formats.cpp
|
||||
@ -59,8 +60,8 @@ add_library(video_core STATIC
|
||||
framebuffer_config.h
|
||||
fsr.cpp
|
||||
fsr.h
|
||||
host1x/codecs/codec.cpp
|
||||
host1x/codecs/codec.h
|
||||
host1x/codecs/decoder.cpp
|
||||
host1x/codecs/decoder.h
|
||||
host1x/codecs/h264.cpp
|
||||
host1x/codecs/h264.h
|
||||
host1x/codecs/vp8.cpp
|
||||
@ -79,8 +80,6 @@ add_library(video_core STATIC
|
||||
host1x/nvdec.cpp
|
||||
host1x/nvdec.h
|
||||
host1x/nvdec_common.h
|
||||
host1x/sync_manager.cpp
|
||||
host1x/sync_manager.h
|
||||
host1x/syncpoint_manager.cpp
|
||||
host1x/syncpoint_manager.h
|
||||
host1x/vic.cpp
|
||||
@ -101,6 +100,7 @@ add_library(video_core STATIC
|
||||
memory_manager.cpp
|
||||
memory_manager.h
|
||||
precompiled_headers.h
|
||||
present.h
|
||||
pte_kind.h
|
||||
query_cache/bank_base.h
|
||||
query_cache/query_base.h
|
||||
|
36
src/video_core/capture.h
Executable file
36
src/video_core/capture.h
Executable file
@ -0,0 +1,36 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/frontend/framebuffer_layout.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace VideoCore::Capture {
|
||||
|
||||
constexpr u32 BlockHeight = 4;
|
||||
constexpr u32 BlockDepth = 0;
|
||||
constexpr u32 BppLog2 = 2;
|
||||
|
||||
constexpr auto PixelFormat = Surface::PixelFormat::B8G8R8A8_UNORM;
|
||||
|
||||
constexpr auto LinearWidth = Layout::ScreenUndocked::Width;
|
||||
constexpr auto LinearHeight = Layout::ScreenUndocked::Height;
|
||||
constexpr auto LinearDepth = 1U;
|
||||
constexpr auto BytesPerPixel = 4U;
|
||||
|
||||
constexpr auto TiledWidth = LinearWidth;
|
||||
constexpr auto TiledHeight = Common::AlignUpLog2(LinearHeight, BlockHeight + BlockDepth + BppLog2);
|
||||
constexpr auto TiledSize = TiledWidth * TiledHeight * (1 << BppLog2);
|
||||
|
||||
constexpr Layout::FramebufferLayout Layout{
|
||||
.width = LinearWidth,
|
||||
.height = LinearHeight,
|
||||
.screen = {0, 0, LinearWidth, LinearHeight},
|
||||
.is_srgb = false,
|
||||
};
|
||||
|
||||
} // namespace VideoCore::Capture
|
@ -2,136 +2,130 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <bit>
|
||||
|
||||
#include "common/thread.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/cdma_pusher.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/host1x/control.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/host1x/nvdec.h"
|
||||
#include "video_core/host1x/nvdec_common.h"
|
||||
#include "video_core/host1x/sync_manager.h"
|
||||
#include "video_core/host1x/vic.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra {
|
||||
CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_)
|
||||
: host1x{host1x_}, nvdec_processor(std::make_shared<Host1x::Nvdec>(host1x)),
|
||||
vic_processor(std::make_unique<Host1x::Vic>(host1x, nvdec_processor)),
|
||||
host1x_processor(std::make_unique<Host1x::Control>(host1x)),
|
||||
sync_manager(std::make_unique<Host1x::SyncptIncrManager>(host1x)) {}
|
||||
|
||||
CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id)
|
||||
: host1x{host1x_}, memory_manager{host1x.GMMU()},
|
||||
host_processor{std::make_unique<Host1x::Control>(host1x_)}, current_class{
|
||||
static_cast<ChClassId>(id)} {
|
||||
thread = std::jthread([this](std::stop_token stop_token) { ProcessEntries(stop_token); });
|
||||
}
|
||||
|
||||
CDmaPusher::~CDmaPusher() = default;
|
||||
|
||||
void CDmaPusher::ProcessEntries(ChCommandHeaderList&& entries) {
|
||||
for (const auto& value : entries) {
|
||||
if (mask != 0) {
|
||||
const auto lbs = static_cast<u32>(std::countr_zero(mask));
|
||||
mask &= ~(1U << lbs);
|
||||
ExecuteCommand(offset + lbs, value.raw);
|
||||
continue;
|
||||
} else if (count != 0) {
|
||||
--count;
|
||||
ExecuteCommand(offset, value.raw);
|
||||
if (incrementing) {
|
||||
++offset;
|
||||
void CDmaPusher::ProcessEntries(std::stop_token stop_token) {
|
||||
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
|
||||
ChCommandHeaderList command_list{host1x.System().ApplicationMemory(), 0, 0};
|
||||
u32 count{};
|
||||
u32 method_offset{};
|
||||
u32 mask{};
|
||||
bool incrementing{};
|
||||
|
||||
while (!stop_token.stop_requested()) {
|
||||
{
|
||||
std::unique_lock l{command_mutex};
|
||||
Common::CondvarWait(command_cv, l, stop_token,
|
||||
[this]() { return command_lists.size() > 0; });
|
||||
if (stop_token.stop_requested()) {
|
||||
return;
|
||||
}
|
||||
continue;
|
||||
|
||||
command_list = std::move(command_lists.front());
|
||||
command_lists.pop_front();
|
||||
}
|
||||
const auto mode = value.submission_mode.Value();
|
||||
switch (mode) {
|
||||
case ChSubmissionMode::SetClass: {
|
||||
mask = value.value & 0x3f;
|
||||
offset = value.method_offset;
|
||||
current_class = static_cast<ChClassId>((value.value >> 6) & 0x3ff);
|
||||
break;
|
||||
}
|
||||
case ChSubmissionMode::Incrementing:
|
||||
case ChSubmissionMode::NonIncrementing:
|
||||
count = value.value;
|
||||
offset = value.method_offset;
|
||||
incrementing = mode == ChSubmissionMode::Incrementing;
|
||||
break;
|
||||
case ChSubmissionMode::Mask:
|
||||
mask = value.value;
|
||||
offset = value.method_offset;
|
||||
break;
|
||||
case ChSubmissionMode::Immediate: {
|
||||
const u32 data = value.value & 0xfff;
|
||||
offset = value.method_offset;
|
||||
ExecuteCommand(offset, data);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast<u32>(mode));
|
||||
break;
|
||||
|
||||
size_t i = 0;
|
||||
for (const auto value : command_list) {
|
||||
i++;
|
||||
if (mask != 0) {
|
||||
const auto lbs = static_cast<u32>(std::countr_zero(mask));
|
||||
mask &= ~(1U << lbs);
|
||||
ExecuteCommand(method_offset + lbs, value.raw);
|
||||
continue;
|
||||
} else if (count != 0) {
|
||||
--count;
|
||||
ExecuteCommand(method_offset, value.raw);
|
||||
if (incrementing) {
|
||||
++method_offset;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const auto mode = value.submission_mode.Value();
|
||||
switch (mode) {
|
||||
case ChSubmissionMode::SetClass: {
|
||||
mask = value.value & 0x3f;
|
||||
method_offset = value.method_offset;
|
||||
current_class = static_cast<ChClassId>((value.value >> 6) & 0x3ff);
|
||||
break;
|
||||
}
|
||||
case ChSubmissionMode::Incrementing:
|
||||
case ChSubmissionMode::NonIncrementing:
|
||||
count = value.value;
|
||||
method_offset = value.method_offset;
|
||||
incrementing = mode == ChSubmissionMode::Incrementing;
|
||||
break;
|
||||
case ChSubmissionMode::Mask:
|
||||
mask = value.value;
|
||||
method_offset = value.method_offset;
|
||||
break;
|
||||
case ChSubmissionMode::Immediate: {
|
||||
const u32 data = value.value & 0xfff;
|
||||
method_offset = value.method_offset;
|
||||
ExecuteCommand(method_offset, data);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Bad command at index {} (bytes 0x{:X}), buffer size {}", i - 1,
|
||||
(i - 1) * sizeof(u32), command_list.size());
|
||||
UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!",
|
||||
static_cast<u32>(mode));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
|
||||
void CDmaPusher::ExecuteCommand(u32 method, u32 arg) {
|
||||
switch (current_class) {
|
||||
case ChClassId::NvDec:
|
||||
ThiStateWrite(nvdec_thi_state, offset, data);
|
||||
switch (static_cast<ThiMethod>(offset)) {
|
||||
case ThiMethod::IncSyncpt: {
|
||||
LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
|
||||
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
|
||||
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
|
||||
if (cond == 0) {
|
||||
sync_manager->Increment(syncpoint_id);
|
||||
} else {
|
||||
sync_manager->SignalDone(
|
||||
sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ThiMethod::SetMethod1:
|
||||
LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
|
||||
static_cast<u32>(nvdec_thi_state.method_0));
|
||||
nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ChClassId::GraphicsVic:
|
||||
ThiStateWrite(vic_thi_state, static_cast<u32>(state_offset), {data});
|
||||
switch (static_cast<ThiMethod>(state_offset)) {
|
||||
case ThiMethod::IncSyncpt: {
|
||||
LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method");
|
||||
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
|
||||
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
|
||||
if (cond == 0) {
|
||||
sync_manager->Increment(syncpoint_id);
|
||||
} else {
|
||||
sync_manager->SignalDone(
|
||||
sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ThiMethod::SetMethod1:
|
||||
LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
|
||||
static_cast<u32>(vic_thi_state.method_0), data);
|
||||
vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0),
|
||||
data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ChClassId::Control:
|
||||
// This device is mainly for syncpoint synchronization
|
||||
LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
|
||||
host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data);
|
||||
LOG_TRACE(Service_NVDRV, "Class {} method 0x{:X} arg 0x{:X}",
|
||||
static_cast<u32>(current_class), method, arg);
|
||||
host_processor->ProcessMethod(static_cast<Host1x::Control::Method>(method), arg);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
|
||||
break;
|
||||
thi_regs.reg_array[method] = arg;
|
||||
switch (static_cast<ThiMethod>(method)) {
|
||||
case ThiMethod::IncSyncpt: {
|
||||
const auto syncpoint_id = static_cast<u32>(arg & 0xFF);
|
||||
[[maybe_unused]] const auto cond = static_cast<u32>((arg >> 8) & 0xFF);
|
||||
LOG_TRACE(Service_NVDRV, "Class {} IncSyncpt Method, syncpt {} cond {}",
|
||||
static_cast<u32>(current_class), syncpoint_id, cond);
|
||||
auto& syncpoint_manager = host1x.GetSyncpointManager();
|
||||
syncpoint_manager.IncrementGuest(syncpoint_id);
|
||||
syncpoint_manager.IncrementHost(syncpoint_id);
|
||||
break;
|
||||
}
|
||||
case ThiMethod::SetMethod1:
|
||||
LOG_TRACE(Service_NVDRV, "Class {} method 0x{:X} arg 0x{:X}",
|
||||
static_cast<u32>(current_class), static_cast<u32>(thi_regs.method_0), arg);
|
||||
ProcessMethod(thi_regs.method_0, arg);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset, u32 argument) {
|
||||
u8* const offset_ptr = reinterpret_cast<u8*>(&state) + sizeof(u32) * state_offset;
|
||||
std::memcpy(offset_ptr, &argument, sizeof(u32));
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
@ -3,12 +3,18 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
@ -62,23 +68,31 @@ struct ChCommand {
|
||||
std::vector<u32> arguments;
|
||||
};
|
||||
|
||||
using ChCommandHeaderList = std::vector<ChCommandHeader>;
|
||||
using ChCommandHeaderList =
|
||||
Core::Memory::CpuGuestMemory<Tegra::ChCommandHeader, Core::Memory::GuestMemoryFlags::SafeRead>;
|
||||
|
||||
struct ThiRegisters {
|
||||
u32_le increment_syncpt{};
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32_le increment_syncpt_error{};
|
||||
u32_le ctx_switch_incremement_syncpt{};
|
||||
INSERT_PADDING_WORDS(4);
|
||||
u32_le ctx_switch{};
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32_le ctx_syncpt_eof{};
|
||||
INSERT_PADDING_WORDS(5);
|
||||
u32_le method_0{};
|
||||
u32_le method_1{};
|
||||
INSERT_PADDING_WORDS(12);
|
||||
u32_le int_status{};
|
||||
u32_le int_mask{};
|
||||
static constexpr std::size_t NUM_REGS = 0x20;
|
||||
|
||||
union {
|
||||
struct {
|
||||
u32_le increment_syncpt;
|
||||
INSERT_PADDING_WORDS_NOINIT(1);
|
||||
u32_le increment_syncpt_error;
|
||||
u32_le ctx_switch_incremement_syncpt;
|
||||
INSERT_PADDING_WORDS_NOINIT(4);
|
||||
u32_le ctx_switch;
|
||||
INSERT_PADDING_WORDS_NOINIT(1);
|
||||
u32_le ctx_syncpt_eof;
|
||||
INSERT_PADDING_WORDS_NOINIT(5);
|
||||
u32_le method_0;
|
||||
u32_le method_1;
|
||||
INSERT_PADDING_WORDS_NOINIT(12);
|
||||
u32_le int_status;
|
||||
u32_le int_mask;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
};
|
||||
|
||||
enum class ThiMethod : u32 {
|
||||
@ -89,32 +103,39 @@ enum class ThiMethod : u32 {
|
||||
|
||||
class CDmaPusher {
|
||||
public:
|
||||
explicit CDmaPusher(Host1x::Host1x& host1x);
|
||||
~CDmaPusher();
|
||||
CDmaPusher() = delete;
|
||||
virtual ~CDmaPusher();
|
||||
|
||||
/// Process the command entry
|
||||
void ProcessEntries(ChCommandHeaderList&& entries);
|
||||
void PushEntries(ChCommandHeaderList&& entries) {
|
||||
std::scoped_lock l{command_mutex};
|
||||
command_lists.push_back(std::move(entries));
|
||||
command_cv.notify_one();
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit CDmaPusher(Host1x::Host1x& host1x, s32 id);
|
||||
|
||||
virtual void ProcessMethod(u32 method, u32 arg) = 0;
|
||||
|
||||
Host1x::Host1x& host1x;
|
||||
Tegra::MemoryManager& memory_manager;
|
||||
|
||||
private:
|
||||
/// Process the command entry
|
||||
void ProcessEntries(std::stop_token stop_token);
|
||||
|
||||
/// Invoke command class devices to execute the command based on the current state
|
||||
void ExecuteCommand(u32 state_offset, u32 data);
|
||||
|
||||
/// Write arguments value to the ThiRegisters member at the specified offset
|
||||
void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument);
|
||||
std::unique_ptr<Host1x::Control> host_processor;
|
||||
|
||||
Host1x::Host1x& host1x;
|
||||
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
|
||||
std::unique_ptr<Tegra::Host1x::Vic> vic_processor;
|
||||
std::unique_ptr<Tegra::Host1x::Control> host1x_processor;
|
||||
std::unique_ptr<Host1x::SyncptIncrManager> sync_manager;
|
||||
ChClassId current_class{};
|
||||
ThiRegisters vic_thi_state{};
|
||||
ThiRegisters nvdec_thi_state{};
|
||||
std::mutex command_mutex;
|
||||
std::condition_variable_any command_cv;
|
||||
std::deque<ChCommandHeaderList> command_lists;
|
||||
std::jthread thread;
|
||||
|
||||
u32 count{};
|
||||
u32 offset{};
|
||||
u32 mask{};
|
||||
bool incrementing{};
|
||||
ThiRegisters thi_regs{};
|
||||
ChClassId current_class;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
@ -11,6 +11,12 @@
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
enum class BlendMode {
|
||||
Opaque,
|
||||
Premultiplied,
|
||||
Coverage,
|
||||
};
|
||||
|
||||
/**
|
||||
* Struct describing framebuffer configuration
|
||||
*/
|
||||
@ -23,6 +29,7 @@ struct FramebufferConfig {
|
||||
Service::android::PixelFormat pixel_format{};
|
||||
Service::android::BufferTransformFlags transform_flags{};
|
||||
Common::Rectangle<int> crop_rect{};
|
||||
BlendMode blending{};
|
||||
};
|
||||
|
||||
Common::Rectangle<f32> NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width,
|
||||
|
@ -250,30 +250,6 @@ struct GPU::Impl {
|
||||
gpu_thread.SubmitList(channel, std::move(entries));
|
||||
}
|
||||
|
||||
/// Push GPU command buffer entries to be processed
|
||||
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
|
||||
if (!use_nvdec) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!cdma_pushers.contains(id)) {
|
||||
cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(host1x));
|
||||
}
|
||||
|
||||
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
|
||||
// TODO(ameerj): RE proper async nvdec operation
|
||||
// gpu_thread.SubmitCommandBuffer(std::move(entries));
|
||||
cdma_pushers[id]->ProcessEntries(std::move(entries));
|
||||
}
|
||||
|
||||
/// Frees the CDMAPusher instance to free up resources
|
||||
void ClearCdmaInstance(u32 id) {
|
||||
const auto iter = cdma_pushers.find(id);
|
||||
if (iter != cdma_pushers.end()) {
|
||||
cdma_pushers.erase(iter);
|
||||
}
|
||||
}
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
void FlushRegion(DAddr addr, u64 size) {
|
||||
gpu_thread.FlushRegion(addr, size);
|
||||
@ -347,11 +323,21 @@ struct GPU::Impl {
|
||||
WaitForSyncOperation(wait_fence);
|
||||
}
|
||||
|
||||
std::vector<u8> GetAppletCaptureBuffer() {
|
||||
std::vector<u8> out;
|
||||
|
||||
const auto wait_fence =
|
||||
RequestSyncOperation([&] { out = renderer->GetAppletCaptureBuffer(); });
|
||||
gpu_thread.TickGPU();
|
||||
WaitForSyncOperation(wait_fence);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
GPU& gpu;
|
||||
Core::System& system;
|
||||
Host1x::Host1x& host1x;
|
||||
|
||||
std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
|
||||
std::unique_ptr<VideoCore::RendererBase> renderer;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
const bool use_nvdec;
|
||||
@ -505,6 +491,10 @@ void GPU::RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
|
||||
impl->RequestComposite(std::move(layers), std::move(fences));
|
||||
}
|
||||
|
||||
std::vector<u8> GPU::GetAppletCaptureBuffer() {
|
||||
return impl->GetAppletCaptureBuffer();
|
||||
}
|
||||
|
||||
u64 GPU::GetTicks() const {
|
||||
return impl->GetTicks();
|
||||
}
|
||||
@ -541,14 +531,6 @@ void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
|
||||
impl->PushGPUEntries(channel, std::move(entries));
|
||||
}
|
||||
|
||||
void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
|
||||
impl->PushCommandBuffer(id, entries);
|
||||
}
|
||||
|
||||
void GPU::ClearCdmaInstance(u32 id) {
|
||||
impl->ClearCdmaInstance(id);
|
||||
}
|
||||
|
||||
VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) {
|
||||
return impl->OnCPURead(addr, size);
|
||||
}
|
||||
|
@ -215,6 +215,8 @@ public:
|
||||
void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
|
||||
std::vector<Service::Nvidia::NvFence>&& fences);
|
||||
|
||||
std::vector<u8> GetAppletCaptureBuffer();
|
||||
|
||||
/// Performs any additional setup necessary in order to begin GPU emulation.
|
||||
/// This can be used to launch any necessary threads and register any necessary
|
||||
/// core timing events.
|
||||
@ -232,15 +234,6 @@ public:
|
||||
/// Push GPU command entries to be processed
|
||||
void PushGPUEntries(s32 channel, Tegra::CommandList&& entries);
|
||||
|
||||
/// Push GPU command buffer entries to be processed
|
||||
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
|
||||
|
||||
/// Frees the CDMAPusher instance to free up resources
|
||||
void ClearCdmaInstance(u32 id);
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
[[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size);
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCommon::GPUThread {
|
||||
|
@ -9,8 +9,10 @@
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_)
|
||||
: host1x(host1x_), memory_manager{host1x.GMMU()}, regs{regs_}, id{id_} {}
|
||||
Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_,
|
||||
Host1x::FrameQueue& frame_queue_)
|
||||
: host1x(host1x_), memory_manager{host1x.GMMU()}, regs{regs_}, id{id_}, frame_queue{
|
||||
frame_queue_} {}
|
||||
|
||||
Decoder::~Decoder() = default;
|
||||
|
||||
@ -43,11 +45,11 @@ void Decoder::Decode() {
|
||||
}
|
||||
|
||||
if (UsingDecodeOrder()) {
|
||||
decode_order_frames.insert_or_assign(luma_top, std::move(frame));
|
||||
decode_order_frames.insert_or_assign(luma_bottom, std::move(frame_copy));
|
||||
frame_queue.PushDecodeOrder(id, luma_top, std::move(frame));
|
||||
frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy));
|
||||
} else {
|
||||
presentation_order_frames.push(std::move(frame));
|
||||
presentation_order_frames.push(std::move(frame_copy));
|
||||
frame_queue.PushPresentOrder(id, luma_top, std::move(frame));
|
||||
frame_queue.PushPresentOrder(id, luma_bottom, std::move(frame_copy));
|
||||
}
|
||||
} else {
|
||||
auto [luma_offset, chroma_offset] = GetProgressiveOffsets();
|
||||
@ -57,9 +59,9 @@ void Decoder::Decode() {
|
||||
}
|
||||
|
||||
if (UsingDecodeOrder()) {
|
||||
decode_order_frames.insert_or_assign(luma_offset, std::move(frame));
|
||||
frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame));
|
||||
} else {
|
||||
presentation_order_frames.push(std::move(frame));
|
||||
frame_queue.PushPresentOrder(id, luma_offset, std::move(frame));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
@ -17,6 +18,7 @@ namespace Tegra {
|
||||
|
||||
namespace Host1x {
|
||||
class Host1x;
|
||||
class FrameQueue;
|
||||
} // namespace Host1x
|
||||
|
||||
class Decoder {
|
||||
@ -30,23 +32,6 @@ public:
|
||||
return decode_api.UsingDecodeOrder();
|
||||
}
|
||||
|
||||
std::shared_ptr<FFmpeg::Frame> GetFrame(u64 luma_offset) {
|
||||
if (UsingDecodeOrder()) {
|
||||
auto it = decode_order_frames.find(luma_offset);
|
||||
if (it == decode_order_frames.end()) {
|
||||
return {};
|
||||
}
|
||||
return decode_order_frames.extract(it).mapped();
|
||||
}
|
||||
|
||||
if (presentation_order_frames.size() == 0) {
|
||||
return {};
|
||||
}
|
||||
auto frame = std::move(presentation_order_frames.front());
|
||||
presentation_order_frames.pop();
|
||||
return frame;
|
||||
}
|
||||
|
||||
/// Returns the value of current_codec
|
||||
[[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const {
|
||||
return codec;
|
||||
@ -57,7 +42,8 @@ public:
|
||||
|
||||
protected:
|
||||
explicit Decoder(Host1x::Host1x& host1x, s32 id,
|
||||
const Host1x::NvdecCommon::NvdecRegisters& regs);
|
||||
const Host1x::NvdecCommon::NvdecRegisters& regs,
|
||||
Host1x::FrameQueue& frame_queue);
|
||||
|
||||
virtual std::span<const u8> ComposeFrame() = 0;
|
||||
virtual std::tuple<u64, u64> GetProgressiveOffsets() = 0;
|
||||
@ -68,12 +54,10 @@ protected:
|
||||
Tegra::MemoryManager& memory_manager;
|
||||
const Host1x::NvdecCommon::NvdecRegisters& regs;
|
||||
s32 id;
|
||||
Host1x::FrameQueue& frame_queue;
|
||||
Host1x::NvdecCommon::VideoCodec codec;
|
||||
FFmpeg::DecodeApi decode_api;
|
||||
bool initialized{};
|
||||
std::queue<std::shared_ptr<FFmpeg::Frame>> presentation_order_frames;
|
||||
std::unordered_map<u64, std::shared_ptr<FFmpeg::Frame>> decode_order_frames;
|
||||
|
||||
bool vp9_hidden_frame{};
|
||||
};
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra::Decoder {
|
||||
namespace Tegra::Decoders {
|
||||
namespace {
|
||||
// ZigZag LUTs from libavcodec.
|
||||
constexpr std::array<u8, 64> zig_zag_direct{
|
||||
@ -25,23 +25,56 @@ constexpr std::array<u8, 16> zig_zag_scan{
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {}
|
||||
H264::H264(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
|
||||
Host1x::FrameQueue& frame_queue_)
|
||||
: Decoder{host1x_, id_, regs_, frame_queue_} {
|
||||
codec = Host1x::NvdecCommon::VideoCodec::H264;
|
||||
initialized = decode_api.Initialize(codec);
|
||||
}
|
||||
|
||||
H264::~H264() = default;
|
||||
|
||||
std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
|
||||
size_t* out_configuration_size, bool is_first_frame) {
|
||||
H264DecoderContext context;
|
||||
host1x.GMMU().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
|
||||
std::tuple<u64, u64> H264::GetProgressiveOffsets() {
|
||||
auto pic_idx{current_context.h264_parameter_set.curr_pic_idx};
|
||||
auto luma{regs.surface_luma_offsets[pic_idx].Address() +
|
||||
current_context.h264_parameter_set.luma_frame_offset.Address()};
|
||||
auto chroma{regs.surface_chroma_offsets[pic_idx].Address() +
|
||||
current_context.h264_parameter_set.chroma_frame_offset.Address()};
|
||||
return {luma, chroma};
|
||||
}
|
||||
|
||||
const s64 frame_number = context.h264_parameter_set.frame_number.Value();
|
||||
std::tuple<u64, u64, u64, u64> H264::GetInterlacedOffsets() {
|
||||
auto pic_idx{current_context.h264_parameter_set.curr_pic_idx};
|
||||
auto luma_top{regs.surface_luma_offsets[pic_idx].Address() +
|
||||
current_context.h264_parameter_set.luma_top_offset.Address()};
|
||||
auto luma_bottom{regs.surface_luma_offsets[pic_idx].Address() +
|
||||
current_context.h264_parameter_set.luma_bot_offset.Address()};
|
||||
auto chroma_top{regs.surface_chroma_offsets[pic_idx].Address() +
|
||||
current_context.h264_parameter_set.chroma_top_offset.Address()};
|
||||
auto chroma_bottom{regs.surface_chroma_offsets[pic_idx].Address() +
|
||||
current_context.h264_parameter_set.chroma_bot_offset.Address()};
|
||||
return {luma_top, luma_bottom, chroma_top, chroma_bottom};
|
||||
}
|
||||
|
||||
bool H264::IsInterlaced() {
|
||||
return current_context.h264_parameter_set.luma_top_offset.Address() != 0 ||
|
||||
current_context.h264_parameter_set.luma_bot_offset.Address() != 0;
|
||||
}
|
||||
|
||||
std::span<const u8> H264::ComposeFrame() {
|
||||
memory_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_context,
|
||||
sizeof(H264DecoderContext));
|
||||
|
||||
const s64 frame_number = current_context.h264_parameter_set.frame_number.Value();
|
||||
if (!is_first_frame && frame_number != 0) {
|
||||
frame.resize_destructive(context.stream_len);
|
||||
host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
|
||||
*out_configuration_size = 0;
|
||||
return frame;
|
||||
frame_scratch.resize_destructive(current_context.stream_len);
|
||||
memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data(),
|
||||
frame_scratch.size());
|
||||
return frame_scratch;
|
||||
}
|
||||
|
||||
is_first_frame = false;
|
||||
|
||||
// Encode header
|
||||
H264BitWriter writer{};
|
||||
writer.WriteU(1, 24);
|
||||
@ -53,7 +86,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
|
||||
writer.WriteU(31, 8);
|
||||
writer.WriteUe(0);
|
||||
const u32 chroma_format_idc =
|
||||
static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value());
|
||||
static_cast<u32>(current_context.h264_parameter_set.chroma_format_idc.Value());
|
||||
writer.WriteUe(chroma_format_idc);
|
||||
if (chroma_format_idc == 3) {
|
||||
writer.WriteBit(false);
|
||||
@ -61,42 +94,44 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
|
||||
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(0);
|
||||
writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
|
||||
writer.WriteBit(current_context.qpprime_y_zero_transform_bypass_flag.Value() != 0);
|
||||
writer.WriteBit(false); // Scaling matrix present flag
|
||||
|
||||
writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
|
||||
writer.WriteUe(
|
||||
static_cast<u32>(current_context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
|
||||
|
||||
const auto order_cnt_type =
|
||||
static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value());
|
||||
static_cast<u32>(current_context.h264_parameter_set.pic_order_cnt_type.Value());
|
||||
writer.WriteUe(order_cnt_type);
|
||||
if (order_cnt_type == 0) {
|
||||
writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
|
||||
writer.WriteUe(current_context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
|
||||
} else if (order_cnt_type == 1) {
|
||||
writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
|
||||
writer.WriteBit(current_context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
|
||||
|
||||
writer.WriteSe(0);
|
||||
writer.WriteSe(0);
|
||||
writer.WriteUe(0);
|
||||
}
|
||||
|
||||
const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
|
||||
(context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
|
||||
const s32 pic_height = current_context.h264_parameter_set.frame_height_in_mbs /
|
||||
(current_context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
|
||||
|
||||
// TODO (ameerj): Where do we get this number, it seems to be particular for each stream
|
||||
const auto nvdec_decoding = Settings::values.nvdec_emulation.GetValue();
|
||||
const bool uses_gpu_decoding = nvdec_decoding == Settings::NvdecEmulation::Gpu;
|
||||
const u32 max_num_ref_frames = uses_gpu_decoding ? 6u : 16u;
|
||||
u32 max_num_ref_frames =
|
||||
std::max(std::max(current_context.h264_parameter_set.num_refidx_l0_default_active,
|
||||
current_context.h264_parameter_set.num_refidx_l1_default_active) +
|
||||
1,
|
||||
4);
|
||||
writer.WriteUe(max_num_ref_frames);
|
||||
writer.WriteBit(false);
|
||||
writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
|
||||
writer.WriteUe(current_context.h264_parameter_set.pic_width_in_mbs - 1);
|
||||
writer.WriteUe(pic_height - 1);
|
||||
writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
|
||||
writer.WriteBit(current_context.h264_parameter_set.frame_mbs_only_flag != 0);
|
||||
|
||||
if (!context.h264_parameter_set.frame_mbs_only_flag) {
|
||||
writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
|
||||
if (!current_context.h264_parameter_set.frame_mbs_only_flag) {
|
||||
writer.WriteBit(current_context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
|
||||
}
|
||||
|
||||
writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
|
||||
writer.WriteBit(current_context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
|
||||
writer.WriteBit(false); // Frame cropping flag
|
||||
writer.WriteBit(false); // VUI parameter present flag
|
||||
|
||||
@ -111,57 +146,59 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(0);
|
||||
|
||||
writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.pic_order_present_flag != 0);
|
||||
writer.WriteBit(current_context.h264_parameter_set.entropy_coding_mode_flag != 0);
|
||||
writer.WriteBit(current_context.h264_parameter_set.pic_order_present_flag != 0);
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
|
||||
writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
|
||||
writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0);
|
||||
writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2);
|
||||
s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value());
|
||||
writer.WriteUe(current_context.h264_parameter_set.num_refidx_l0_default_active);
|
||||
writer.WriteUe(current_context.h264_parameter_set.num_refidx_l1_default_active);
|
||||
writer.WriteBit(current_context.h264_parameter_set.flags.weighted_pred.Value() != 0);
|
||||
writer.WriteU(static_cast<s32>(current_context.h264_parameter_set.weighted_bipred_idc.Value()),
|
||||
2);
|
||||
s32 pic_init_qp =
|
||||
static_cast<s32>(current_context.h264_parameter_set.pic_init_qp_minus26.Value());
|
||||
writer.WriteSe(pic_init_qp);
|
||||
writer.WriteSe(0);
|
||||
s32 chroma_qp_index_offset =
|
||||
static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value());
|
||||
static_cast<s32>(current_context.h264_parameter_set.chroma_qp_index_offset.Value());
|
||||
|
||||
writer.WriteSe(chroma_qp_index_offset);
|
||||
writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
|
||||
writer.WriteBit(current_context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
|
||||
writer.WriteBit(current_context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
|
||||
writer.WriteBit(current_context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
|
||||
writer.WriteBit(current_context.h264_parameter_set.transform_8x8_mode_flag != 0);
|
||||
|
||||
writer.WriteBit(true); // pic_scaling_matrix_present_flag
|
||||
|
||||
for (s32 index = 0; index < 6; index++) {
|
||||
writer.WriteBit(true);
|
||||
std::span<const u8> matrix{context.weight_scale};
|
||||
writer.WriteScalingList(scan, matrix, index * 16, 16);
|
||||
std::span<const u8> matrix{current_context.weight_scale_4x4};
|
||||
writer.WriteScalingList(scan_scratch, matrix, index * 16, 16);
|
||||
}
|
||||
|
||||
if (context.h264_parameter_set.transform_8x8_mode_flag) {
|
||||
if (current_context.h264_parameter_set.transform_8x8_mode_flag) {
|
||||
for (s32 index = 0; index < 2; index++) {
|
||||
writer.WriteBit(true);
|
||||
std::span<const u8> matrix{context.weight_scale_8x8};
|
||||
writer.WriteScalingList(scan, matrix, index * 64, 64);
|
||||
std::span<const u8> matrix{current_context.weight_scale_8x8};
|
||||
writer.WriteScalingList(scan_scratch, matrix, index * 64, 64);
|
||||
}
|
||||
}
|
||||
|
||||
s32 chroma_qp_index_offset2 =
|
||||
static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value());
|
||||
static_cast<s32>(current_context.h264_parameter_set.second_chroma_qp_index_offset.Value());
|
||||
|
||||
writer.WriteSe(chroma_qp_index_offset2);
|
||||
|
||||
writer.End();
|
||||
|
||||
const auto& encoded_header = writer.GetByteArray();
|
||||
frame.resize(encoded_header.size() + context.stream_len);
|
||||
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
|
||||
frame_scratch.resize(encoded_header.size() + current_context.stream_len);
|
||||
std::memcpy(frame_scratch.data(), encoded_header.data(), encoded_header.size());
|
||||
|
||||
*out_configuration_size = encoded_header.size();
|
||||
host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data() + encoded_header.size(),
|
||||
context.stream_len);
|
||||
memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(),
|
||||
frame_scratch.data() + encoded_header.size(),
|
||||
current_context.stream_len);
|
||||
|
||||
return frame;
|
||||
return frame_scratch;
|
||||
}
|
||||
|
||||
H264BitWriter::H264BitWriter() = default;
|
||||
@ -278,4 +315,4 @@ void H264BitWriter::Flush() {
|
||||
buffer = 0;
|
||||
buffer_pos = 0;
|
||||
}
|
||||
} // namespace Tegra::Decoder
|
||||
} // namespace Tegra::Decoders
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "video_core/host1x/codecs/decoder.h"
|
||||
#include "video_core/host1x/nvdec_common.h"
|
||||
|
||||
namespace Tegra {
|
||||
@ -18,7 +19,7 @@ namespace Host1x {
|
||||
class Host1x;
|
||||
} // namespace Host1x
|
||||
|
||||
namespace Decoder {
|
||||
namespace Decoders {
|
||||
|
||||
class H264BitWriter {
|
||||
public:
|
||||
@ -60,123 +61,213 @@ private:
|
||||
std::vector<u8> byte_array;
|
||||
};
|
||||
|
||||
class H264 {
|
||||
public:
|
||||
explicit H264(Host1x::Host1x& host1x);
|
||||
~H264();
|
||||
|
||||
/// Compose the H264 frame for FFmpeg decoding
|
||||
[[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
|
||||
size_t* out_configuration_size,
|
||||
bool is_first_frame = false);
|
||||
struct Offset {
|
||||
constexpr u32 Address() const noexcept {
|
||||
return offset << 8;
|
||||
}
|
||||
|
||||
private:
|
||||
Common::ScratchBuffer<u8> frame;
|
||||
Common::ScratchBuffer<u8> scan;
|
||||
Host1x::Host1x& host1x;
|
||||
u32 offset;
|
||||
};
|
||||
static_assert(std::is_trivial_v<Offset>, "Offset must be trivial");
|
||||
static_assert(sizeof(Offset) == 0x4, "Offset has the wrong size!");
|
||||
|
||||
struct H264ParameterSet {
|
||||
s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
|
||||
s32 delta_pic_order_always_zero_flag; ///< 0x04
|
||||
s32 frame_mbs_only_flag; ///< 0x08
|
||||
u32 pic_width_in_mbs; ///< 0x0C
|
||||
u32 frame_height_in_map_units; ///< 0x10
|
||||
union { ///< 0x14
|
||||
BitField<0, 2, u32> tile_format;
|
||||
BitField<2, 3, u32> gob_height;
|
||||
};
|
||||
u32 entropy_coding_mode_flag; ///< 0x18
|
||||
s32 pic_order_present_flag; ///< 0x1C
|
||||
s32 num_refidx_l0_default_active; ///< 0x20
|
||||
s32 num_refidx_l1_default_active; ///< 0x24
|
||||
s32 deblocking_filter_control_present_flag; ///< 0x28
|
||||
s32 redundant_pic_cnt_present_flag; ///< 0x2C
|
||||
u32 transform_8x8_mode_flag; ///< 0x30
|
||||
u32 pitch_luma; ///< 0x34
|
||||
u32 pitch_chroma; ///< 0x38
|
||||
u32 luma_top_offset; ///< 0x3C
|
||||
u32 luma_bot_offset; ///< 0x40
|
||||
u32 luma_frame_offset; ///< 0x44
|
||||
u32 chroma_top_offset; ///< 0x48
|
||||
u32 chroma_bot_offset; ///< 0x4C
|
||||
u32 chroma_frame_offset; ///< 0x50
|
||||
u32 hist_buffer_size; ///< 0x54
|
||||
union { ///< 0x58
|
||||
union {
|
||||
BitField<0, 1, u64> mbaff_frame;
|
||||
BitField<1, 1, u64> direct_8x8_inference;
|
||||
BitField<2, 1, u64> weighted_pred;
|
||||
BitField<3, 1, u64> constrained_intra_pred;
|
||||
BitField<4, 1, u64> ref_pic;
|
||||
BitField<5, 1, u64> field_pic;
|
||||
BitField<6, 1, u64> bottom_field;
|
||||
BitField<7, 1, u64> second_field;
|
||||
} flags;
|
||||
BitField<8, 4, u64> log2_max_frame_num_minus4;
|
||||
BitField<12, 2, u64> chroma_format_idc;
|
||||
BitField<14, 2, u64> pic_order_cnt_type;
|
||||
BitField<16, 6, s64> pic_init_qp_minus26;
|
||||
BitField<22, 5, s64> chroma_qp_index_offset;
|
||||
BitField<27, 5, s64> second_chroma_qp_index_offset;
|
||||
BitField<32, 2, u64> weighted_bipred_idc;
|
||||
BitField<34, 7, u64> curr_pic_idx;
|
||||
BitField<41, 5, u64> curr_col_idx;
|
||||
BitField<46, 16, u64> frame_number;
|
||||
BitField<62, 1, u64> frame_surfaces;
|
||||
BitField<63, 1, u64> output_memory_layout;
|
||||
};
|
||||
struct H264ParameterSet {
|
||||
s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
|
||||
s32 delta_pic_order_always_zero_flag; ///< 0x04
|
||||
s32 frame_mbs_only_flag; ///< 0x08
|
||||
u32 pic_width_in_mbs; ///< 0x0C
|
||||
u32 frame_height_in_mbs; ///< 0x10
|
||||
union { ///< 0x14
|
||||
BitField<0, 2, u32> tile_format;
|
||||
BitField<2, 3, u32> gob_height;
|
||||
BitField<5, 27, u32> reserved_surface_format;
|
||||
};
|
||||
static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
|
||||
|
||||
struct H264DecoderContext {
|
||||
INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000
|
||||
u32 stream_len; ///< 0x0048
|
||||
INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C
|
||||
H264ParameterSet h264_parameter_set; ///< 0x0058
|
||||
INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8
|
||||
std::array<u8, 0x60> weight_scale; ///< 0x01C0
|
||||
std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
|
||||
u32 entropy_coding_mode_flag; ///< 0x18
|
||||
s32 pic_order_present_flag; ///< 0x1C
|
||||
s32 num_refidx_l0_default_active; ///< 0x20
|
||||
s32 num_refidx_l1_default_active; ///< 0x24
|
||||
s32 deblocking_filter_control_present_flag; ///< 0x28
|
||||
s32 redundant_pic_cnt_present_flag; ///< 0x2C
|
||||
u32 transform_8x8_mode_flag; ///< 0x30
|
||||
u32 pitch_luma; ///< 0x34
|
||||
u32 pitch_chroma; ///< 0x38
|
||||
Offset luma_top_offset; ///< 0x3C
|
||||
Offset luma_bot_offset; ///< 0x40
|
||||
Offset luma_frame_offset; ///< 0x44
|
||||
Offset chroma_top_offset; ///< 0x48
|
||||
Offset chroma_bot_offset; ///< 0x4C
|
||||
Offset chroma_frame_offset; ///< 0x50
|
||||
u32 hist_buffer_size; ///< 0x54
|
||||
union { ///< 0x58
|
||||
union {
|
||||
BitField<0, 1, u64> mbaff_frame;
|
||||
BitField<1, 1, u64> direct_8x8_inference;
|
||||
BitField<2, 1, u64> weighted_pred;
|
||||
BitField<3, 1, u64> constrained_intra_pred;
|
||||
BitField<4, 1, u64> ref_pic;
|
||||
BitField<5, 1, u64> field_pic;
|
||||
BitField<6, 1, u64> bottom_field;
|
||||
BitField<7, 1, u64> second_field;
|
||||
} flags;
|
||||
BitField<8, 4, u64> log2_max_frame_num_minus4;
|
||||
BitField<12, 2, u64> chroma_format_idc;
|
||||
BitField<14, 2, u64> pic_order_cnt_type;
|
||||
BitField<16, 6, s64> pic_init_qp_minus26;
|
||||
BitField<22, 5, s64> chroma_qp_index_offset;
|
||||
BitField<27, 5, s64> second_chroma_qp_index_offset;
|
||||
BitField<32, 2, u64> weighted_bipred_idc;
|
||||
BitField<34, 7, u64> curr_pic_idx;
|
||||
BitField<41, 5, u64> curr_col_idx;
|
||||
BitField<46, 16, u64> frame_number;
|
||||
BitField<62, 1, u64> frame_surfaces;
|
||||
BitField<63, 1, u64> output_memory_layout;
|
||||
};
|
||||
static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size");
|
||||
};
|
||||
static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
|
||||
|
||||
#define ASSERT_POSITION(field_name, position) \
|
||||
static_assert(offsetof(H264ParameterSet, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
|
||||
ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
|
||||
ASSERT_POSITION(frame_mbs_only_flag, 0x08);
|
||||
ASSERT_POSITION(pic_width_in_mbs, 0x0C);
|
||||
ASSERT_POSITION(frame_height_in_map_units, 0x10);
|
||||
ASSERT_POSITION(tile_format, 0x14);
|
||||
ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
|
||||
ASSERT_POSITION(pic_order_present_flag, 0x1C);
|
||||
ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
|
||||
ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
|
||||
ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
|
||||
ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
|
||||
ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
|
||||
ASSERT_POSITION(pitch_luma, 0x34);
|
||||
ASSERT_POSITION(pitch_chroma, 0x38);
|
||||
ASSERT_POSITION(luma_top_offset, 0x3C);
|
||||
ASSERT_POSITION(luma_bot_offset, 0x40);
|
||||
ASSERT_POSITION(luma_frame_offset, 0x44);
|
||||
ASSERT_POSITION(chroma_top_offset, 0x48);
|
||||
ASSERT_POSITION(chroma_bot_offset, 0x4C);
|
||||
ASSERT_POSITION(chroma_frame_offset, 0x50);
|
||||
ASSERT_POSITION(hist_buffer_size, 0x54);
|
||||
ASSERT_POSITION(flags, 0x58);
|
||||
ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
|
||||
ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
|
||||
ASSERT_POSITION(frame_mbs_only_flag, 0x08);
|
||||
ASSERT_POSITION(pic_width_in_mbs, 0x0C);
|
||||
ASSERT_POSITION(frame_height_in_mbs, 0x10);
|
||||
ASSERT_POSITION(tile_format, 0x14);
|
||||
ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
|
||||
ASSERT_POSITION(pic_order_present_flag, 0x1C);
|
||||
ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
|
||||
ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
|
||||
ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
|
||||
ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
|
||||
ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
|
||||
ASSERT_POSITION(pitch_luma, 0x34);
|
||||
ASSERT_POSITION(pitch_chroma, 0x38);
|
||||
ASSERT_POSITION(luma_top_offset, 0x3C);
|
||||
ASSERT_POSITION(luma_bot_offset, 0x40);
|
||||
ASSERT_POSITION(luma_frame_offset, 0x44);
|
||||
ASSERT_POSITION(chroma_top_offset, 0x48);
|
||||
ASSERT_POSITION(chroma_bot_offset, 0x4C);
|
||||
ASSERT_POSITION(chroma_frame_offset, 0x50);
|
||||
ASSERT_POSITION(hist_buffer_size, 0x54);
|
||||
ASSERT_POSITION(flags, 0x58);
|
||||
#undef ASSERT_POSITION
|
||||
|
||||
struct DpbEntry {
|
||||
union {
|
||||
BitField<0, 7, u32> index;
|
||||
BitField<7, 5, u32> col_idx;
|
||||
BitField<12, 2, u32> state;
|
||||
BitField<14, 1, u32> is_long_term;
|
||||
BitField<15, 1, u32> non_existing;
|
||||
BitField<16, 1, u32> is_field;
|
||||
BitField<17, 4, u32> top_field_marking;
|
||||
BitField<21, 4, u32> bottom_field_marking;
|
||||
BitField<25, 1, u32> output_memory_layout;
|
||||
BitField<26, 6, u32> reserved;
|
||||
} flags;
|
||||
std::array<u32, 2> field_order_cnt;
|
||||
u32 frame_idx;
|
||||
};
|
||||
static_assert(sizeof(DpbEntry) == 0x10, "DpbEntry has the wrong size!");
|
||||
|
||||
struct DisplayParam {
|
||||
union {
|
||||
BitField<0, 1, u32> enable_tf_output;
|
||||
BitField<1, 1, u32> vc1_map_y_flag;
|
||||
BitField<2, 3, u32> map_y_value;
|
||||
BitField<5, 1, u32> vc1_map_uv_flag;
|
||||
BitField<6, 3, u32> map_uv_value;
|
||||
BitField<9, 8, u32> out_stride;
|
||||
BitField<17, 3, u32> tiling_format;
|
||||
BitField<20, 1, u32> output_structure; // 0=frame, 1=field
|
||||
BitField<21, 11, u32> reserved0;
|
||||
};
|
||||
std::array<s32, 2> output_top;
|
||||
std::array<s32, 2> output_bottom;
|
||||
union {
|
||||
BitField<0, 1, u32> enable_histogram;
|
||||
BitField<1, 12, u32> histogram_start_x;
|
||||
BitField<13, 12, u32> histogram_start_y;
|
||||
BitField<25, 7, u32> reserved1;
|
||||
};
|
||||
union {
|
||||
BitField<0, 12, u32> histogram_end_x;
|
||||
BitField<12, 12, u32> histogram_end_y;
|
||||
BitField<24, 8, u32> reserved2;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(DisplayParam) == 0x1C, "DisplayParam has the wrong size!");
|
||||
|
||||
struct H264DecoderContext {
|
||||
INSERT_PADDING_WORDS_NOINIT(13); ///< 0x0000
|
||||
std::array<u8, 16> eos; ///< 0x0034
|
||||
u8 explicit_eos_present_flag; ///< 0x0044
|
||||
u8 hint_dump_en; ///< 0x0045
|
||||
INSERT_PADDING_BYTES_NOINIT(2); ///< 0x0046
|
||||
u32 stream_len; ///< 0x0048
|
||||
u32 slice_count; ///< 0x004C
|
||||
u32 mbhist_buffer_size; ///< 0x0050
|
||||
u32 gptimer_timeout_value; ///< 0x0054
|
||||
H264ParameterSet h264_parameter_set; ///< 0x0058
|
||||
std::array<s32, 2> curr_field_order_cnt; ///< 0x00B8
|
||||
std::array<DpbEntry, 16> dpb; ///< 0x00C0
|
||||
std::array<u8, 0x60> weight_scale_4x4; ///< 0x01C0
|
||||
std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
|
||||
std::array<u8, 2> num_inter_view_refs_lX; ///< 0x02A0
|
||||
std::array<u8, 14> reserved2; ///< 0x02A2
|
||||
std::array<std::array<s8, 16>, 2> inter_view_refidx_lX; ///< 0x02B0
|
||||
union { ///< 0x02D0
|
||||
BitField<0, 1, u32> lossless_ipred8x8_filter_enable;
|
||||
BitField<1, 1, u32> qpprime_y_zero_transform_bypass_flag;
|
||||
BitField<2, 30, u32> reserved3;
|
||||
};
|
||||
DisplayParam display_param; ///< 0x02D4
|
||||
std::array<u32, 3> reserved4; ///< 0x02F0
|
||||
};
|
||||
static_assert(sizeof(H264DecoderContext) == 0x2FC, "H264DecoderContext is an invalid size");
|
||||
|
||||
#define ASSERT_POSITION(field_name, position) \
|
||||
static_assert(offsetof(H264DecoderContext, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_POSITION(stream_len, 0x48);
|
||||
ASSERT_POSITION(h264_parameter_set, 0x58);
|
||||
ASSERT_POSITION(weight_scale, 0x1C0);
|
||||
ASSERT_POSITION(stream_len, 0x48);
|
||||
ASSERT_POSITION(h264_parameter_set, 0x58);
|
||||
ASSERT_POSITION(dpb, 0xC0);
|
||||
ASSERT_POSITION(weight_scale_4x4, 0x1C0);
|
||||
#undef ASSERT_POSITION
|
||||
|
||||
class H264 final : public Decoder {
|
||||
public:
|
||||
explicit H264(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
|
||||
Host1x::FrameQueue& frame_queue);
|
||||
~H264() override;
|
||||
|
||||
H264(const H264&) = delete;
|
||||
H264& operator=(const H264&) = delete;
|
||||
|
||||
H264(H264&&) = delete;
|
||||
H264& operator=(H264&&) = delete;
|
||||
|
||||
/// Compose the H264 frame for FFmpeg decoding
|
||||
[[nodiscard]] std::span<const u8> ComposeFrame() override;
|
||||
|
||||
std::tuple<u64, u64> GetProgressiveOffsets() override;
|
||||
std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() override;
|
||||
bool IsInterlaced() override;
|
||||
|
||||
std::string_view GetCurrentCodecName() const override {
|
||||
return "H264";
|
||||
}
|
||||
|
||||
private:
|
||||
bool is_first_frame{true};
|
||||
Common::ScratchBuffer<u8> frame_scratch;
|
||||
Common::ScratchBuffer<u8> scan_scratch;
|
||||
H264DecoderContext current_context{};
|
||||
};
|
||||
|
||||
} // namespace Decoder
|
||||
} // namespace Decoders
|
||||
} // namespace Tegra
|
||||
|
@ -7,47 +7,70 @@
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra::Decoder {
|
||||
VP8::VP8(Host1x::Host1x& host1x_) : host1x{host1x_} {}
|
||||
namespace Tegra::Decoders {
|
||||
VP8::VP8(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
|
||||
Host1x::FrameQueue& frame_queue_)
|
||||
: Decoder{host1x_, id_, regs_, frame_queue_} {
|
||||
codec = Host1x::NvdecCommon::VideoCodec::VP8;
|
||||
initialized = decode_api.Initialize(codec);
|
||||
}
|
||||
|
||||
VP8::~VP8() = default;
|
||||
|
||||
std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
|
||||
VP8PictureInfo info;
|
||||
host1x.GMMU().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
|
||||
std::tuple<u64, u64> VP8::GetProgressiveOffsets() {
|
||||
auto luma{regs.surface_luma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
|
||||
auto chroma{regs.surface_chroma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
|
||||
return {luma, chroma};
|
||||
}
|
||||
|
||||
const bool is_key_frame = info.key_frame == 1u;
|
||||
const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size);
|
||||
std::tuple<u64, u64, u64, u64> VP8::GetInterlacedOffsets() {
|
||||
auto luma_top{regs.surface_luma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
|
||||
auto luma_bottom{
|
||||
regs.surface_luma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
|
||||
auto chroma_top{
|
||||
regs.surface_chroma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
|
||||
auto chroma_bottom{
|
||||
regs.surface_chroma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
|
||||
return {luma_top, luma_bottom, chroma_top, chroma_bottom};
|
||||
}
|
||||
|
||||
std::span<const u8> VP8::ComposeFrame() {
|
||||
memory_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_context,
|
||||
sizeof(VP8PictureInfo));
|
||||
|
||||
const bool is_key_frame = current_context.key_frame == 1u;
|
||||
const auto bitstream_size = static_cast<size_t>(current_context.vld_buffer_size);
|
||||
const size_t header_size = is_key_frame ? 10u : 3u;
|
||||
frame.resize(header_size + bitstream_size);
|
||||
frame_scratch.resize(header_size + bitstream_size);
|
||||
|
||||
// Based on page 30 of the VP8 specification.
|
||||
// https://datatracker.ietf.org/doc/rfc6386/
|
||||
frame[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes).
|
||||
frame[0] |= static_cast<u8>((info.version & 7u) << 1u); // 3-bit version number
|
||||
frame[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag
|
||||
frame_scratch[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes).
|
||||
frame_scratch[0] |=
|
||||
static_cast<u8>((current_context.version & 7u) << 1u); // 3-bit version number
|
||||
frame_scratch[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag
|
||||
|
||||
// The next 19-bits are the first partition size
|
||||
frame[0] |= static_cast<u8>((info.first_part_size & 7u) << 5u);
|
||||
frame[1] = static_cast<u8>((info.first_part_size & 0x7f8u) >> 3u);
|
||||
frame[2] = static_cast<u8>((info.first_part_size & 0x7f800u) >> 11u);
|
||||
frame_scratch[0] |= static_cast<u8>((current_context.first_part_size & 7u) << 5u);
|
||||
frame_scratch[1] = static_cast<u8>((current_context.first_part_size & 0x7f8u) >> 3u);
|
||||
frame_scratch[2] = static_cast<u8>((current_context.first_part_size & 0x7f800u) >> 11u);
|
||||
|
||||
if (is_key_frame) {
|
||||
frame[3] = 0x9du;
|
||||
frame[4] = 0x01u;
|
||||
frame[5] = 0x2au;
|
||||
frame_scratch[3] = 0x9du;
|
||||
frame_scratch[4] = 0x01u;
|
||||
frame_scratch[5] = 0x2au;
|
||||
// TODO(ameerj): Horizontal/Vertical Scale
|
||||
// 16 bits: (2 bits Horizontal Scale << 14) | Width (14 bits)
|
||||
frame[6] = static_cast<u8>(info.frame_width & 0xff);
|
||||
frame[7] = static_cast<u8>(((info.frame_width >> 8) & 0x3f));
|
||||
frame_scratch[6] = static_cast<u8>(current_context.frame_width & 0xff);
|
||||
frame_scratch[7] = static_cast<u8>(((current_context.frame_width >> 8) & 0x3f));
|
||||
// 16 bits:(2 bits Vertical Scale << 14) | Height (14 bits)
|
||||
frame[8] = static_cast<u8>(info.frame_height & 0xff);
|
||||
frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f));
|
||||
frame_scratch[8] = static_cast<u8>(current_context.frame_height & 0xff);
|
||||
frame_scratch[9] = static_cast<u8>(((current_context.frame_height >> 8) & 0x3f));
|
||||
}
|
||||
const u64 bitstream_offset = state.frame_bitstream_offset;
|
||||
host1x.GMMU().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
|
||||
const u64 bitstream_offset = regs.frame_bitstream_offset.Address();
|
||||
memory_manager.ReadBlock(bitstream_offset, frame_scratch.data() + header_size, bitstream_size);
|
||||
|
||||
return frame;
|
||||
return frame_scratch;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Decoder
|
||||
} // namespace Tegra::Decoders
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "video_core/host1x/codecs/decoder.h"
|
||||
#include "video_core/host1x/nvdec_common.h"
|
||||
|
||||
namespace Tegra {
|
||||
@ -17,20 +18,41 @@ namespace Host1x {
|
||||
class Host1x;
|
||||
} // namespace Host1x
|
||||
|
||||
namespace Decoder {
|
||||
namespace Decoders {
|
||||
enum class Vp8SurfaceIndex : u32 {
|
||||
Last = 0,
|
||||
Golden = 1,
|
||||
AltRef = 2,
|
||||
Current = 3,
|
||||
};
|
||||
|
||||
class VP8 {
|
||||
class VP8 final : public Decoder {
|
||||
public:
|
||||
explicit VP8(Host1x::Host1x& host1x);
|
||||
~VP8();
|
||||
explicit VP8(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
|
||||
Host1x::FrameQueue& frame_queue);
|
||||
~VP8() override;
|
||||
|
||||
/// Compose the VP8 frame for FFmpeg decoding
|
||||
[[nodiscard]] std::span<const u8> ComposeFrame(
|
||||
const Host1x::NvdecCommon::NvdecRegisters& state);
|
||||
VP8(const VP8&) = delete;
|
||||
VP8& operator=(const VP8&) = delete;
|
||||
|
||||
VP8(VP8&&) = delete;
|
||||
VP8& operator=(VP8&&) = delete;
|
||||
|
||||
[[nodiscard]] std::span<const u8> ComposeFrame() override;
|
||||
|
||||
std::tuple<u64, u64> GetProgressiveOffsets() override;
|
||||
std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() override;
|
||||
|
||||
bool IsInterlaced() override {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string_view GetCurrentCodecName() const override {
|
||||
return "VP8";
|
||||
}
|
||||
|
||||
private:
|
||||
Common::ScratchBuffer<u8> frame;
|
||||
Host1x::Host1x& host1x;
|
||||
Common::ScratchBuffer<u8> frame_scratch;
|
||||
|
||||
struct VP8PictureInfo {
|
||||
INSERT_PADDING_WORDS_NOINIT(14);
|
||||
@ -73,7 +95,9 @@ private:
|
||||
INSERT_PADDING_WORDS_NOINIT(3);
|
||||
};
|
||||
static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size");
|
||||
|
||||
VP8PictureInfo current_context{};
|
||||
};
|
||||
|
||||
} // namespace Decoder
|
||||
} // namespace Decoders
|
||||
} // namespace Tegra
|
||||
|
@ -4,12 +4,13 @@
|
||||
#include <algorithm> // for std::copy
|
||||
#include <numeric>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "video_core/host1x/codecs/vp9.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra::Decoder {
|
||||
namespace Tegra::Decoders {
|
||||
namespace {
|
||||
constexpr u32 diff_update_probability = 252;
|
||||
constexpr u32 frame_sync_code = 0x498342;
|
||||
@ -237,7 +238,12 @@ constexpr std::array<u8, 254> map_lut{
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
VP9::VP9(Host1x::Host1x& host1x_) : host1x{host1x_} {}
|
||||
VP9::VP9(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
|
||||
Host1x::FrameQueue& frame_queue_)
|
||||
: Decoder{host1x_, id_, regs_, frame_queue_} {
|
||||
codec = Host1x::NvdecCommon::VideoCodec::VP9;
|
||||
initialized = decode_api.Initialize(codec);
|
||||
}
|
||||
|
||||
VP9::~VP9() = default;
|
||||
|
||||
@ -356,35 +362,113 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
|
||||
}
|
||||
}
|
||||
|
||||
Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) {
|
||||
PictureInfo picture_info;
|
||||
host1x.GMMU().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
|
||||
Vp9PictureInfo vp9_info = picture_info.Convert();
|
||||
void VP9::WriteSegmentation(VpxBitStreamWriter& writer) {
|
||||
bool enabled = current_picture_info.segmentation.enabled != 0;
|
||||
writer.WriteBit(enabled);
|
||||
if (!enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
|
||||
auto update_map = current_picture_info.segmentation.update_map != 0;
|
||||
writer.WriteBit(update_map);
|
||||
|
||||
if (update_map) {
|
||||
EntropyProbs entropy_probs{};
|
||||
memory_manager.ReadBlock(regs.vp9_prob_tab_buffer_offset.Address(), &entropy_probs,
|
||||
sizeof(entropy_probs));
|
||||
|
||||
auto WriteProb = [&](u8 prob) {
|
||||
bool coded = prob != 255;
|
||||
writer.WriteBit(coded);
|
||||
if (coded) {
|
||||
writer.WriteU(prob, 8);
|
||||
}
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < entropy_probs.mb_segment_tree_probs.size(); i++) {
|
||||
WriteProb(entropy_probs.mb_segment_tree_probs[i]);
|
||||
}
|
||||
|
||||
auto temporal_update = current_picture_info.segmentation.temporal_update != 0;
|
||||
writer.WriteBit(temporal_update);
|
||||
|
||||
if (temporal_update) {
|
||||
for (s32 i = 0; i < 3; i++) {
|
||||
WriteProb(entropy_probs.segment_pred_probs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (last_segmentation == current_picture_info.segmentation) {
|
||||
writer.WriteBit(false);
|
||||
return;
|
||||
}
|
||||
|
||||
last_segmentation = current_picture_info.segmentation;
|
||||
writer.WriteBit(true);
|
||||
writer.WriteBit(current_picture_info.segmentation.abs_delta != 0);
|
||||
|
||||
constexpr s32 MAX_SEGMENTS = 8;
|
||||
constexpr std::array SegmentationFeatureBits = {8, 6, 2, 0};
|
||||
|
||||
for (s32 i = 0; i < MAX_SEGMENTS; i++) {
|
||||
auto q_enabled = current_picture_info.segmentation.feature_enabled[i][0] != 0;
|
||||
writer.WriteBit(q_enabled);
|
||||
if (q_enabled) {
|
||||
writer.WriteS(current_picture_info.segmentation.feature_data[i][0],
|
||||
SegmentationFeatureBits[0]);
|
||||
}
|
||||
|
||||
auto lf_enabled = current_picture_info.segmentation.feature_enabled[i][1] != 0;
|
||||
writer.WriteBit(lf_enabled);
|
||||
if (lf_enabled) {
|
||||
writer.WriteS(current_picture_info.segmentation.feature_data[i][1],
|
||||
SegmentationFeatureBits[1]);
|
||||
}
|
||||
|
||||
auto ref_enabled = current_picture_info.segmentation.feature_enabled[i][2] != 0;
|
||||
writer.WriteBit(ref_enabled);
|
||||
if (ref_enabled) {
|
||||
writer.WriteU(current_picture_info.segmentation.feature_data[i][2],
|
||||
SegmentationFeatureBits[2]);
|
||||
}
|
||||
|
||||
auto skip_enabled = current_picture_info.segmentation.feature_enabled[i][3] != 0;
|
||||
writer.WriteBit(skip_enabled);
|
||||
}
|
||||
}
|
||||
|
||||
Vp9PictureInfo VP9::GetVp9PictureInfo() {
|
||||
memory_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_picture_info,
|
||||
sizeof(PictureInfo));
|
||||
Vp9PictureInfo vp9_info = current_picture_info.Convert();
|
||||
|
||||
InsertEntropy(regs.vp9_prob_tab_buffer_offset.Address(), vp9_info.entropy);
|
||||
|
||||
// surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
|
||||
// order: last, golden, altref, current.
|
||||
std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4,
|
||||
vp9_info.frame_offsets.begin());
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
vp9_info.frame_offsets[i] = regs.surface_luma_offsets[i].Address();
|
||||
}
|
||||
|
||||
return vp9_info;
|
||||
}
|
||||
|
||||
void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
|
||||
EntropyProbs entropy;
|
||||
host1x.GMMU().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
|
||||
memory_manager.ReadBlock(offset, &entropy, sizeof(EntropyProbs));
|
||||
entropy.Convert(dst);
|
||||
}
|
||||
|
||||
Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
|
||||
Vp9FrameContainer VP9::GetCurrentFrame() {
|
||||
Vp9FrameContainer current_frame{};
|
||||
{
|
||||
// gpu.SyncGuestHost(); epic, why?
|
||||
current_frame.info = GetVp9PictureInfo(state);
|
||||
current_frame.info = GetVp9PictureInfo();
|
||||
current_frame.bit_stream.resize(current_frame.info.bitstream_size);
|
||||
host1x.GMMU().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
|
||||
current_frame.info.bitstream_size);
|
||||
memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(),
|
||||
current_frame.bit_stream.data(),
|
||||
current_frame.info.bitstream_size);
|
||||
}
|
||||
if (!next_frame.bit_stream.empty()) {
|
||||
Vp9FrameContainer temp{
|
||||
@ -742,8 +826,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
|
||||
uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
|
||||
uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
|
||||
|
||||
ASSERT(!current_frame_info.segment_enabled);
|
||||
uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
|
||||
WriteSegmentation(uncomp_writer);
|
||||
|
||||
const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
|
||||
const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width);
|
||||
@ -770,10 +853,29 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
|
||||
return uncomp_writer;
|
||||
}
|
||||
|
||||
void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
|
||||
std::tuple<u64, u64> VP9::GetProgressiveOffsets() {
|
||||
auto luma{regs.surface_luma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
|
||||
auto chroma{regs.surface_chroma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
|
||||
return {luma, chroma};
|
||||
}
|
||||
|
||||
std::tuple<u64, u64, u64, u64> VP9::GetInterlacedOffsets() {
|
||||
auto luma_top{regs.surface_luma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
|
||||
auto luma_bottom{
|
||||
regs.surface_luma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
|
||||
auto chroma_top{
|
||||
regs.surface_chroma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
|
||||
auto chroma_bottom{
|
||||
regs.surface_chroma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
|
||||
return {luma_top, luma_bottom, chroma_top, chroma_bottom};
|
||||
}
|
||||
|
||||
std::span<const u8> VP9::ComposeFrame() {
|
||||
vp9_hidden_frame = false;
|
||||
|
||||
std::vector<u8> bitstream;
|
||||
{
|
||||
Vp9FrameContainer curr_frame = GetCurrentFrame(state);
|
||||
Vp9FrameContainer curr_frame = GetCurrentFrame();
|
||||
current_frame_info = curr_frame.info;
|
||||
bitstream = std::move(curr_frame.bit_stream);
|
||||
}
|
||||
@ -786,12 +888,16 @@ void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
|
||||
std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray();
|
||||
|
||||
// Write headers and frame to buffer
|
||||
frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
|
||||
std::copy(uncompressed_header.begin(), uncompressed_header.end(), frame.begin());
|
||||
frame_scratch.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
|
||||
std::copy(uncompressed_header.begin(), uncompressed_header.end(), frame_scratch.begin());
|
||||
std::copy(compressed_header.begin(), compressed_header.end(),
|
||||
frame.begin() + uncompressed_header.size());
|
||||
frame_scratch.begin() + uncompressed_header.size());
|
||||
std::copy(bitstream.begin(), bitstream.end(),
|
||||
frame.begin() + uncompressed_header.size() + compressed_header.size());
|
||||
frame_scratch.begin() + uncompressed_header.size() + compressed_header.size());
|
||||
|
||||
vp9_hidden_frame = WasFrameHidden();
|
||||
|
||||
return GetFrameBytes();
|
||||
}
|
||||
|
||||
VpxRangeEncoder::VpxRangeEncoder() {
|
||||
@ -944,4 +1050,4 @@ const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const {
|
||||
return byte_array;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Decoder
|
||||
} // namespace Tegra::Decoders
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "common/common_types.h"
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "common/stream.h"
|
||||
#include "video_core/host1x/codecs/decoder.h"
|
||||
#include "video_core/host1x/codecs/vp9_types.h"
|
||||
#include "video_core/host1x/nvdec_common.h"
|
||||
|
||||
@ -19,7 +20,7 @@ namespace Host1x {
|
||||
class Host1x;
|
||||
} // namespace Host1x
|
||||
|
||||
namespace Decoder {
|
||||
namespace Decoders {
|
||||
|
||||
/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
|
||||
/// VP9 header bitstreams.
|
||||
@ -110,21 +111,32 @@ private:
|
||||
std::vector<u8> byte_array;
|
||||
};
|
||||
|
||||
class VP9 {
|
||||
class VP9 final : public Decoder {
|
||||
public:
|
||||
explicit VP9(Host1x::Host1x& host1x);
|
||||
~VP9();
|
||||
explicit VP9(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
|
||||
Host1x::FrameQueue& frame_queue);
|
||||
~VP9() override;
|
||||
|
||||
VP9(const VP9&) = delete;
|
||||
VP9& operator=(const VP9&) = delete;
|
||||
|
||||
VP9(VP9&&) = default;
|
||||
VP9(VP9&&) = delete;
|
||||
VP9& operator=(VP9&&) = delete;
|
||||
|
||||
/// Composes the VP9 frame from the GPU state information.
|
||||
/// Based on the official VP9 spec documentation
|
||||
void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state);
|
||||
[[nodiscard]] std::span<const u8> ComposeFrame() override;
|
||||
|
||||
std::tuple<u64, u64> GetProgressiveOffsets() override;
|
||||
std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() override;
|
||||
|
||||
bool IsInterlaced() override {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string_view GetCurrentCodecName() const override {
|
||||
return "VP9";
|
||||
}
|
||||
|
||||
private:
|
||||
/// Returns true if the most recent frame was a hidden frame.
|
||||
[[nodiscard]] bool WasFrameHidden() const {
|
||||
return !current_frame_info.show_frame;
|
||||
@ -132,10 +144,9 @@ public:
|
||||
|
||||
/// Returns a const span to the composed frame data.
|
||||
[[nodiscard]] std::span<const u8> GetFrameBytes() const {
|
||||
return frame;
|
||||
return frame_scratch;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Generates compressed header probability updates in the bitstream writer
|
||||
template <typename T, std::size_t N>
|
||||
void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
|
||||
@ -167,23 +178,22 @@ private:
|
||||
/// Write motion vector probability updates. 6.3.17 in the spec
|
||||
void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
|
||||
|
||||
void WriteSegmentation(VpxBitStreamWriter& writer);
|
||||
|
||||
/// Returns VP9 information from NVDEC provided offset and size
|
||||
[[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(
|
||||
const Host1x::NvdecCommon::NvdecRegisters& state);
|
||||
[[nodiscard]] Vp9PictureInfo GetVp9PictureInfo();
|
||||
|
||||
/// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
|
||||
void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
|
||||
|
||||
/// Returns frame to be decoded after buffering
|
||||
[[nodiscard]] Vp9FrameContainer GetCurrentFrame(
|
||||
const Host1x::NvdecCommon::NvdecRegisters& state);
|
||||
[[nodiscard]] Vp9FrameContainer GetCurrentFrame();
|
||||
|
||||
/// Use NVDEC providied information to compose the headers for the current frame
|
||||
[[nodiscard]] std::vector<u8> ComposeCompressedHeader();
|
||||
[[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
|
||||
|
||||
Host1x::Host1x& host1x;
|
||||
Common::ScratchBuffer<u8> frame;
|
||||
Common::ScratchBuffer<u8> frame_scratch;
|
||||
|
||||
std::array<s8, 4> loop_filter_ref_deltas{};
|
||||
std::array<s8, 2> loop_filter_mode_deltas{};
|
||||
@ -192,9 +202,11 @@ private:
|
||||
std::array<Vp9EntropyProbs, 4> frame_ctxs{};
|
||||
bool swap_ref_indices{};
|
||||
|
||||
Segmentation last_segmentation{};
|
||||
PictureInfo current_picture_info{};
|
||||
Vp9PictureInfo current_frame_info{};
|
||||
Vp9EntropyProbs prev_frame_probs{};
|
||||
};
|
||||
|
||||
} // namespace Decoder
|
||||
} // namespace Decoders
|
||||
} // namespace Tegra
|
||||
|
@ -11,7 +11,14 @@
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Decoder {
|
||||
namespace Decoders {
|
||||
enum class Vp9SurfaceIndex : u32 {
|
||||
Last = 0,
|
||||
Golden = 1,
|
||||
AltRef = 2,
|
||||
Current = 3,
|
||||
};
|
||||
|
||||
struct Vp9FrameDimensions {
|
||||
s16 width;
|
||||
s16 height;
|
||||
@ -48,11 +55,13 @@ enum class TxMode {
|
||||
};
|
||||
|
||||
struct Segmentation {
|
||||
constexpr bool operator==(const Segmentation& rhs) const = default;
|
||||
|
||||
u8 enabled;
|
||||
u8 update_map;
|
||||
u8 temporal_update;
|
||||
u8 abs_delta;
|
||||
std::array<u32, 8> feature_mask;
|
||||
std::array<std::array<u8, 4>, 8> feature_enabled;
|
||||
std::array<std::array<s16, 4>, 8> feature_data;
|
||||
};
|
||||
static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
|
||||
@ -190,7 +199,17 @@ struct PictureInfo {
|
||||
static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
|
||||
|
||||
struct EntropyProbs {
|
||||
INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000
|
||||
std::array<u8, 10 * 10 * 8> kf_bmode_prob; ///< 0x0000
|
||||
std::array<u8, 10 * 10 * 1> kf_bmode_probB; ///< 0x0320
|
||||
std::array<u8, 3> ref_pred_probs; ///< 0x0384
|
||||
std::array<u8, 7> mb_segment_tree_probs; ///< 0x0387
|
||||
std::array<u8, 3> segment_pred_probs; ///< 0x038E
|
||||
std::array<u8, 4> ref_scores; ///< 0x0391
|
||||
std::array<u8, 2> prob_comppred; ///< 0x0395
|
||||
INSERT_PADDING_BYTES_NOINIT(9); ///< 0x0397
|
||||
std::array<u8, 10 * 8> kf_uv_mode_prob; ///< 0x03A0
|
||||
std::array<u8, 10 * 1> kf_uv_mode_probB; ///< 0x03F0
|
||||
INSERT_PADDING_BYTES_NOINIT(6); ///< 0x03FA
|
||||
std::array<u8, 28> inter_mode_prob; ///< 0x0400
|
||||
std::array<u8, 4> intra_inter_prob; ///< 0x041C
|
||||
INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420
|
||||
@ -302,5 +321,5 @@ ASSERT_POSITION(class_0_fr, 0x560);
|
||||
ASSERT_POSITION(coef_probs, 0x5A0);
|
||||
#undef ASSERT_POSITION
|
||||
|
||||
}; // namespace Decoder
|
||||
}; // namespace Decoders
|
||||
}; // namespace Tegra
|
||||
|
@ -27,6 +27,7 @@ void Control::ProcessMethod(Method method, u32 argument) {
|
||||
}
|
||||
|
||||
void Control::Execute(u32 data) {
|
||||
LOG_TRACE(Service_NVDRV, "Control wait syncpt {} value {}", data, syncpoint_value);
|
||||
host1x.GetSyncpointManager().WaitHost(data, syncpoint_value);
|
||||
}
|
||||
|
||||
|
@ -6,9 +6,7 @@
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Host1x {
|
||||
namespace Tegra::Host1x {
|
||||
|
||||
class Host1x;
|
||||
class Nvdec;
|
||||
@ -31,10 +29,8 @@ private:
|
||||
/// For Host1x, execute is waiting on a syncpoint previously written into the state
|
||||
void Execute(u32 data);
|
||||
|
||||
u32 syncpoint_value{};
|
||||
Host1x& host1x;
|
||||
u32 syncpoint_value{};
|
||||
};
|
||||
|
||||
} // namespace Host1x
|
||||
|
||||
} // namespace Tegra
|
||||
} // namespace Tegra::Host1x
|
||||
|
@ -5,7 +5,9 @@
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/host1x/ffmpeg/ffmpeg.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
extern "C" {
|
||||
#ifdef LIBVA_FOUND
|
||||
@ -132,7 +134,7 @@ bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context,
|
||||
const Decoder& decoder) {
|
||||
const auto supported_types = GetSupportedDeviceTypes();
|
||||
for (const auto type : PreferredGpuDecoders) {
|
||||
AVPixelFormat hw_pix_fmt;
|
||||
// AVPixelFormat hw_pix_fmt;
|
||||
|
||||
if (std::ranges::find(supported_types, type) == supported_types.end()) {
|
||||
LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
|
||||
@ -143,12 +145,14 @@ bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
|
||||
decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
|
||||
return true;
|
||||
}
|
||||
// Disable GPU decoding as it cannot return decode frame ordering which breaks everything.
|
||||
// if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
|
||||
// decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
|
||||
// return true;
|
||||
//}
|
||||
}
|
||||
|
||||
LOG_INFO(HW_GPU, "Hardware decoding is disabled due to implementation issues, using CPU.");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -183,8 +187,8 @@ bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
|
||||
return true;
|
||||
}
|
||||
|
||||
DecoderContext::DecoderContext(const Decoder& decoder) {
|
||||
m_codec_context = avcodec_alloc_context3(decoder.GetCodec());
|
||||
DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} {
|
||||
m_codec_context = avcodec_alloc_context3(m_decoder.GetCodec());
|
||||
av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
|
||||
m_codec_context->thread_count = 0;
|
||||
m_codec_context->thread_type &= ~FF_THREAD_FRAME;
|
||||
@ -216,6 +220,25 @@ bool DecoderContext::OpenContext(const Decoder& decoder) {
|
||||
}
|
||||
|
||||
bool DecoderContext::SendPacket(const Packet& packet) {
|
||||
m_temp_frame = std::make_shared<Frame>();
|
||||
m_got_frame = 0;
|
||||
|
||||
// Android can randomly crash when calling decode directly, so skip.
|
||||
// TODO update ffmpeg and hope that fixes it.
|
||||
#ifndef ANDROID
|
||||
if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) {
|
||||
m_decode_order = true;
|
||||
auto* codec{ffcodec(m_decoder.GetCodec())};
|
||||
if (const int ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(),
|
||||
&m_got_frame, packet.GetPacket());
|
||||
ret < 0) {
|
||||
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", AVError(ret));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
|
||||
return false;
|
||||
@ -224,139 +247,73 @@ bool DecoderContext::SendPacket(const Packet& packet) {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<Frame> DecoderContext::ReceiveFrame(bool* out_is_interlaced) {
|
||||
auto dst_frame = std::make_unique<Frame>();
|
||||
std::shared_ptr<Frame> DecoderContext::ReceiveFrame() {
|
||||
// Android can randomly crash when calling decode directly, so skip.
|
||||
// TODO update ffmpeg and hope that fixes it.
|
||||
#ifndef ANDROID
|
||||
if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) {
|
||||
m_decode_order = true;
|
||||
auto* codec{ffcodec(m_decoder.GetCodec())};
|
||||
int ret{0};
|
||||
|
||||
const auto ReceiveImpl = [&](AVFrame* frame) {
|
||||
if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
|
||||
return false;
|
||||
if (m_got_frame == 0) {
|
||||
Packet packet{{}};
|
||||
auto* pkt = packet.GetPacket();
|
||||
pkt->data = nullptr;
|
||||
pkt->size = 0;
|
||||
ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(), &m_got_frame, pkt);
|
||||
m_codec_context->has_b_frames = 0;
|
||||
}
|
||||
|
||||
*out_is_interlaced =
|
||||
#if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59
|
||||
(frame->flags & AV_FRAME_FLAG_INTERLACED) != 0;
|
||||
#else
|
||||
frame->interlaced_frame != 0;
|
||||
if (m_got_frame == 0 || ret < 0) {
|
||||
LOG_ERROR(Service_NVDRV, "Failed to receive a frame! error {}", ret);
|
||||
return {};
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
{
|
||||
|
||||
if (m_codec_context->hw_device_ctx) {
|
||||
// If we have a hardware context, make a separate frame here to receive the
|
||||
// hardware result before sending it to the output.
|
||||
Frame intermediate_frame;
|
||||
const auto ReceiveImpl = [&](AVFrame* frame) {
|
||||
if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ReceiveImpl(intermediate_frame.GetFrame())) {
|
||||
return {};
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
dst_frame->SetFormat(PreferredGpuFormat);
|
||||
if (const int ret =
|
||||
av_hwframe_transfer_data(dst_frame->GetFrame(), intermediate_frame.GetFrame(), 0);
|
||||
ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
|
||||
return {};
|
||||
}
|
||||
} else {
|
||||
// Otherwise, decode the frame as normal.
|
||||
if (!ReceiveImpl(dst_frame->GetFrame())) {
|
||||
return {};
|
||||
if (m_codec_context->hw_device_ctx) {
|
||||
// If we have a hardware context, make a separate frame here to receive the
|
||||
// hardware result before sending it to the output.
|
||||
Frame intermediate_frame;
|
||||
|
||||
if (!ReceiveImpl(intermediate_frame.GetFrame())) {
|
||||
return {};
|
||||
}
|
||||
|
||||
m_temp_frame->SetFormat(PreferredGpuFormat);
|
||||
if (const int ret = av_hwframe_transfer_data(m_temp_frame->GetFrame(),
|
||||
intermediate_frame.GetFrame(), 0);
|
||||
ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
|
||||
return {};
|
||||
}
|
||||
} else {
|
||||
// Otherwise, decode the frame as normal.
|
||||
if (!ReceiveImpl(m_temp_frame->GetFrame())) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dst_frame;
|
||||
}
|
||||
|
||||
DeinterlaceFilter::DeinterlaceFilter(const Frame& frame) {
|
||||
const AVFilter* buffer_src = avfilter_get_by_name("buffer");
|
||||
const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
|
||||
AVFilterInOut* inputs = avfilter_inout_alloc();
|
||||
AVFilterInOut* outputs = avfilter_inout_alloc();
|
||||
SCOPE_EXIT({
|
||||
avfilter_inout_free(&inputs);
|
||||
avfilter_inout_free(&outputs);
|
||||
});
|
||||
|
||||
// Don't know how to get the accurate time_base but it doesn't matter for yadif filter
|
||||
// so just use 1/1 to make buffer filter happy
|
||||
std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame.GetWidth(),
|
||||
frame.GetHeight(), static_cast<int>(frame.GetPixelFormat()));
|
||||
|
||||
m_filter_graph = avfilter_graph_alloc();
|
||||
int ret = avfilter_graph_create_filter(&m_source_context, buffer_src, "in", args.c_str(),
|
||||
nullptr, m_filter_graph);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter source error: {}", AVError(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
ret = avfilter_graph_create_filter(&m_sink_context, buffer_sink, "out", nullptr, nullptr,
|
||||
m_filter_graph);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter sink error: {}", AVError(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
inputs->name = av_strdup("out");
|
||||
inputs->filter_ctx = m_sink_context;
|
||||
inputs->pad_idx = 0;
|
||||
inputs->next = nullptr;
|
||||
|
||||
outputs->name = av_strdup("in");
|
||||
outputs->filter_ctx = m_source_context;
|
||||
outputs->pad_idx = 0;
|
||||
outputs->next = nullptr;
|
||||
|
||||
const char* description = "yadif=1:-1:0";
|
||||
ret = avfilter_graph_parse_ptr(m_filter_graph, description, &inputs, &outputs, nullptr);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avfilter_graph_parse_ptr error: {}", AVError(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
ret = avfilter_graph_config(m_filter_graph, nullptr);
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "avfilter_graph_config error: {}", AVError(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
m_initialized = true;
|
||||
}
|
||||
|
||||
bool DeinterlaceFilter::AddSourceFrame(const Frame& frame) {
|
||||
if (const int ret = av_buffersrc_add_frame_flags(m_source_context, frame.GetFrame(),
|
||||
AV_BUFFERSRC_FLAG_KEEP_REF);
|
||||
ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "av_buffersrc_add_frame_flags error: {}", AVError(ret));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<Frame> DeinterlaceFilter::DrainSinkFrame() {
|
||||
auto dst_frame = std::make_unique<Frame>();
|
||||
const int ret = av_buffersink_get_frame(m_sink_context, dst_frame->GetFrame());
|
||||
|
||||
if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "av_buffersink_get_frame error: {}", AVError(ret));
|
||||
return {};
|
||||
}
|
||||
|
||||
return dst_frame;
|
||||
}
|
||||
|
||||
DeinterlaceFilter::~DeinterlaceFilter() {
|
||||
avfilter_graph_free(&m_filter_graph);
|
||||
#if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59
|
||||
m_temp_frame->GetFrame()->interlaced_frame =
|
||||
(m_temp_frame->GetFrame()->flags & AV_FRAME_FLAG_INTERLACED) != 0;
|
||||
#endif
|
||||
return std::move(m_temp_frame);
|
||||
}
|
||||
|
||||
void DecodeApi::Reset() {
|
||||
m_deinterlace_filter.reset();
|
||||
m_hardware_context.reset();
|
||||
m_decoder_context.reset();
|
||||
m_decoder.reset();
|
||||
@ -382,43 +339,14 @@ bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DecodeApi::SendPacket(std::span<const u8> packet_data, size_t configuration_size) {
|
||||
bool DecodeApi::SendPacket(std::span<const u8> packet_data) {
|
||||
FFmpeg::Packet packet(packet_data);
|
||||
return m_decoder_context->SendPacket(packet);
|
||||
}
|
||||
|
||||
void DecodeApi::ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue) {
|
||||
std::shared_ptr<Frame> DecodeApi::ReceiveFrame() {
|
||||
// Receive raw frame from decoder.
|
||||
bool is_interlaced;
|
||||
auto frame = m_decoder_context->ReceiveFrame(&is_interlaced);
|
||||
if (!frame) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!is_interlaced) {
|
||||
// If the frame is not interlaced, we can pend it now.
|
||||
frame_queue.push(std::move(frame));
|
||||
} else {
|
||||
// Create the deinterlacer if needed.
|
||||
if (!m_deinterlace_filter) {
|
||||
m_deinterlace_filter.emplace(*frame);
|
||||
}
|
||||
|
||||
// Add the frame we just received.
|
||||
if (!m_deinterlace_filter->AddSourceFrame(*frame)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Pend output fields.
|
||||
while (true) {
|
||||
auto filter_frame = m_deinterlace_filter->DrainSinkFrame();
|
||||
if (!filter_frame) {
|
||||
break;
|
||||
}
|
||||
|
||||
frame_queue.push(std::move(filter_frame));
|
||||
}
|
||||
}
|
||||
return m_decoder_context->ReceiveFrame();
|
||||
}
|
||||
|
||||
} // namespace FFmpeg
|
||||
|
@ -20,17 +20,20 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavfilter/avfilter.h>
|
||||
#include <libavfilter/buffersink.h>
|
||||
#include <libavfilter/buffersrc.h>
|
||||
#include <libavutil/avutil.h>
|
||||
#include <libavutil/opt.h>
|
||||
#ifndef ANDROID
|
||||
#include <libavcodec/codec_internal.h>
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace FFmpeg {
|
||||
|
||||
class Packet;
|
||||
@ -90,6 +93,10 @@ public:
|
||||
return m_frame->data[plane];
|
||||
}
|
||||
|
||||
const u8* GetPlane(int plane) const {
|
||||
return m_frame->data[plane];
|
||||
}
|
||||
|
||||
u8** GetPlanes() const {
|
||||
return m_frame->data;
|
||||
}
|
||||
@ -98,6 +105,14 @@ public:
|
||||
m_frame->format = format;
|
||||
}
|
||||
|
||||
bool IsInterlaced() const {
|
||||
return m_frame->interlaced_frame != 0;
|
||||
}
|
||||
|
||||
bool IsHardwareDecoded() const {
|
||||
return m_frame->hw_frames_ctx != nullptr;
|
||||
}
|
||||
|
||||
AVFrame* GetFrame() const {
|
||||
return m_frame;
|
||||
}
|
||||
@ -160,33 +175,22 @@ public:
|
||||
void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
|
||||
bool OpenContext(const Decoder& decoder);
|
||||
bool SendPacket(const Packet& packet);
|
||||
std::unique_ptr<Frame> ReceiveFrame(bool* out_is_interlaced);
|
||||
std::shared_ptr<Frame> ReceiveFrame();
|
||||
|
||||
AVCodecContext* GetCodecContext() const {
|
||||
return m_codec_context;
|
||||
}
|
||||
|
||||
bool UsingDecodeOrder() const {
|
||||
return m_decode_order;
|
||||
}
|
||||
|
||||
private:
|
||||
const Decoder& m_decoder;
|
||||
AVCodecContext* m_codec_context{};
|
||||
};
|
||||
|
||||
// Wraps an AVFilterGraph.
|
||||
class DeinterlaceFilter {
|
||||
public:
|
||||
YUZU_NON_COPYABLE(DeinterlaceFilter);
|
||||
YUZU_NON_MOVEABLE(DeinterlaceFilter);
|
||||
|
||||
explicit DeinterlaceFilter(const Frame& frame);
|
||||
~DeinterlaceFilter();
|
||||
|
||||
bool AddSourceFrame(const Frame& frame);
|
||||
std::unique_ptr<Frame> DrainSinkFrame();
|
||||
|
||||
private:
|
||||
AVFilterGraph* m_filter_graph{};
|
||||
AVFilterContext* m_source_context{};
|
||||
AVFilterContext* m_sink_context{};
|
||||
bool m_initialized{};
|
||||
s32 m_got_frame{};
|
||||
std::shared_ptr<Frame> m_temp_frame{};
|
||||
bool m_decode_order{};
|
||||
};
|
||||
|
||||
class DecodeApi {
|
||||
@ -200,14 +204,17 @@ public:
|
||||
bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
|
||||
void Reset();
|
||||
|
||||
bool SendPacket(std::span<const u8> packet_data, size_t configuration_size);
|
||||
void ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue);
|
||||
bool UsingDecodeOrder() const {
|
||||
return m_decoder_context->UsingDecodeOrder();
|
||||
}
|
||||
|
||||
bool SendPacket(std::span<const u8> packet_data);
|
||||
std::shared_ptr<Frame> ReceiveFrame();
|
||||
|
||||
private:
|
||||
std::optional<FFmpeg::Decoder> m_decoder;
|
||||
std::optional<FFmpeg::DecoderContext> m_decoder_context;
|
||||
std::optional<FFmpeg::HardwareContext> m_hardware_context;
|
||||
std::optional<FFmpeg::DeinterlaceFilter> m_deinterlace_filter;
|
||||
};
|
||||
|
||||
} // namespace FFmpeg
|
||||
|
@ -3,10 +3,10 @@
|
||||
|
||||
#include "core/core.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/host1x/nvdec.h"
|
||||
#include "video_core/host1x/vic.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Host1x {
|
||||
namespace Tegra::Host1x {
|
||||
|
||||
Host1x::Host1x(Core::System& system_)
|
||||
: system{system_}, syncpoint_manager{},
|
||||
@ -15,6 +15,22 @@ Host1x::Host1x(Core::System& system_)
|
||||
|
||||
Host1x::~Host1x() = default;
|
||||
|
||||
} // namespace Host1x
|
||||
void Host1x::StartDevice(s32 fd, ChannelType type, u32 syncpt) {
|
||||
switch (type) {
|
||||
case ChannelType::NvDec:
|
||||
devices[fd] = std::make_unique<Tegra::Host1x::Nvdec>(*this, fd, syncpt, frame_queue);
|
||||
break;
|
||||
case ChannelType::VIC:
|
||||
devices[fd] = std::make_unique<Tegra::Host1x::Vic>(*this, fd, syncpt, frame_queue);
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", static_cast<u32>(type));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
void Host1x::StopDevice(s32 fd, ChannelType type) {
|
||||
devices.erase(fd);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Host1x
|
||||
|
@ -3,9 +3,14 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <queue>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "common/address_space.h"
|
||||
#include "video_core/cdma_pusher.h"
|
||||
#include "video_core/host1x/gpu_device_memory_manager.h"
|
||||
#include "video_core/host1x/syncpoint_manager.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
@ -14,15 +19,128 @@ namespace Core {
|
||||
class System;
|
||||
} // namespace Core
|
||||
|
||||
namespace Tegra {
|
||||
namespace FFmpeg {
|
||||
class Frame;
|
||||
} // namespace FFmpeg
|
||||
|
||||
namespace Host1x {
|
||||
namespace Tegra::Host1x {
|
||||
class Nvdec;
|
||||
|
||||
class FrameQueue {
|
||||
public:
|
||||
void Open(s32 fd) {
|
||||
std::scoped_lock l{m_mutex};
|
||||
m_presentation_order.insert({fd, {}});
|
||||
m_decode_order.insert({fd, {}});
|
||||
}
|
||||
|
||||
void Close(s32 fd) {
|
||||
std::scoped_lock l{m_mutex};
|
||||
m_presentation_order.erase(fd);
|
||||
m_decode_order.erase(fd);
|
||||
}
|
||||
|
||||
s32 VicFindNvdecFdFromOffset(u64 search_offset) {
|
||||
std::scoped_lock l{m_mutex};
|
||||
// Vic does not know which nvdec is producing frames for it, so search all the fds here for
|
||||
// the given offset.
|
||||
for (auto& map : m_presentation_order) {
|
||||
for (auto& [offset, frame] : map.second) {
|
||||
if (offset == search_offset) {
|
||||
return map.first;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& map : m_decode_order) {
|
||||
for (auto& [offset, frame] : map.second) {
|
||||
if (offset == search_offset) {
|
||||
return map.first;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
|
||||
std::scoped_lock l{m_mutex};
|
||||
auto map = m_presentation_order.find(fd);
|
||||
map->second.emplace_back(offset, std::move(frame));
|
||||
}
|
||||
|
||||
void PushDecodeOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
|
||||
std::scoped_lock l{m_mutex};
|
||||
auto map = m_decode_order.find(fd);
|
||||
map->second.insert_or_assign(offset, std::move(frame));
|
||||
}
|
||||
|
||||
std::shared_ptr<FFmpeg::Frame> GetFrame(s32 fd, u64 offset) {
|
||||
if (fd == -1) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::scoped_lock l{m_mutex};
|
||||
auto present_map = m_presentation_order.find(fd);
|
||||
if (present_map->second.size() > 0) {
|
||||
return GetPresentOrderLocked(fd);
|
||||
}
|
||||
|
||||
auto decode_map = m_decode_order.find(fd);
|
||||
if (decode_map->second.size() > 0) {
|
||||
return GetDecodeOrderLocked(fd, offset);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<FFmpeg::Frame> GetPresentOrderLocked(s32 fd) {
|
||||
auto map = m_presentation_order.find(fd);
|
||||
if (map->second.size() == 0) {
|
||||
return {};
|
||||
}
|
||||
auto frame = std::move(map->second.front().second);
|
||||
map->second.pop_front();
|
||||
return frame;
|
||||
}
|
||||
|
||||
std::shared_ptr<FFmpeg::Frame> GetDecodeOrderLocked(s32 fd, u64 offset) {
|
||||
auto map = m_decode_order.find(fd);
|
||||
auto it = map->second.find(offset);
|
||||
if (it == map->second.end()) {
|
||||
return {};
|
||||
}
|
||||
return std::move(map->second.extract(it).mapped());
|
||||
}
|
||||
|
||||
using FramePtr = std::shared_ptr<FFmpeg::Frame>;
|
||||
|
||||
std::mutex m_mutex{};
|
||||
std::unordered_map<s32, std::deque<std::pair<u64, FramePtr>>> m_presentation_order;
|
||||
std::unordered_map<s32, std::unordered_map<u64, FramePtr>> m_decode_order;
|
||||
};
|
||||
|
||||
enum class ChannelType : u32 {
|
||||
MsEnc = 0,
|
||||
VIC = 1,
|
||||
GPU = 2,
|
||||
NvDec = 3,
|
||||
Display = 4,
|
||||
NvJpg = 5,
|
||||
TSec = 6,
|
||||
Max = 7,
|
||||
};
|
||||
|
||||
class Host1x {
|
||||
public:
|
||||
explicit Host1x(Core::System& system);
|
||||
~Host1x();
|
||||
|
||||
Core::System& System() {
|
||||
return system;
|
||||
}
|
||||
|
||||
SyncpointManager& GetSyncpointManager() {
|
||||
return syncpoint_manager;
|
||||
}
|
||||
@ -55,14 +173,25 @@ public:
|
||||
return *allocator;
|
||||
}
|
||||
|
||||
void StartDevice(s32 fd, ChannelType type, u32 syncpt);
|
||||
void StopDevice(s32 fd, ChannelType type);
|
||||
|
||||
void PushEntries(s32 fd, ChCommandHeaderList&& entries) {
|
||||
auto it = devices.find(fd);
|
||||
if (it == devices.end()) {
|
||||
return;
|
||||
}
|
||||
it->second->PushEntries(std::move(entries));
|
||||
}
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
SyncpointManager syncpoint_manager;
|
||||
Tegra::MaxwellDeviceMemoryManager memory_manager;
|
||||
Tegra::MemoryManager gmmu_manager;
|
||||
std::unique_ptr<Common::FlatAllocator<u32, 0, 32>> allocator;
|
||||
FrameQueue frame_queue;
|
||||
std::unordered_map<s32, std::unique_ptr<CDmaPusher>> devices;
|
||||
};
|
||||
|
||||
} // namespace Host1x
|
||||
|
||||
} // namespace Tegra
|
||||
} // namespace Tegra::Host1x
|
||||
|
@ -2,6 +2,12 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/host1x/codecs/h264.h"
|
||||
#include "video_core/host1x/codecs/vp8.h"
|
||||
#include "video_core/host1x/codecs/vp9.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/host1x/nvdec.h"
|
||||
|
||||
@ -10,37 +16,70 @@ namespace Tegra::Host1x {
|
||||
#define NVDEC_REG_INDEX(field_name) \
|
||||
(offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
|
||||
|
||||
Nvdec::Nvdec(Host1x& host1x_)
|
||||
: host1x(host1x_), state{}, codec(std::make_unique<Codec>(host1x, state)) {}
|
||||
Nvdec::Nvdec(Host1x& host1x_, s32 id_, u32 syncpt, FrameQueue& frame_queue_)
|
||||
: CDmaPusher{host1x_, id_}, id{id_}, syncpoint{syncpt}, frame_queue{frame_queue_} {
|
||||
LOG_INFO(HW_GPU, "Created nvdec {}", id);
|
||||
frame_queue.Open(id);
|
||||
}
|
||||
|
||||
Nvdec::~Nvdec() = default;
|
||||
Nvdec::~Nvdec() {
|
||||
LOG_INFO(HW_GPU, "Destroying nvdec {}", id);
|
||||
frame_queue.Close(id);
|
||||
}
|
||||
|
||||
void Nvdec::ProcessMethod(u32 method, u32 argument) {
|
||||
state.reg_array[method] = static_cast<u64>(argument) << 8;
|
||||
regs.reg_array[method] = argument;
|
||||
|
||||
switch (method) {
|
||||
case NVDEC_REG_INDEX(set_codec_id):
|
||||
codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
|
||||
CreateDecoder(static_cast<NvdecCommon::VideoCodec>(argument));
|
||||
break;
|
||||
case NVDEC_REG_INDEX(execute):
|
||||
case NVDEC_REG_INDEX(execute): {
|
||||
if (wait_needed) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(32));
|
||||
wait_needed = false;
|
||||
}
|
||||
Execute();
|
||||
break;
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<FFmpeg::Frame> Nvdec::GetFrame() {
|
||||
return codec->GetCurrentFrame();
|
||||
void Nvdec::CreateDecoder(NvdecCommon::VideoCodec codec) {
|
||||
if (decoder.get()) {
|
||||
return;
|
||||
}
|
||||
switch (codec) {
|
||||
case NvdecCommon::VideoCodec::H264:
|
||||
decoder = std::make_unique<Decoders::H264>(host1x, regs, id, frame_queue);
|
||||
break;
|
||||
case NvdecCommon::VideoCodec::VP8:
|
||||
decoder = std::make_unique<Decoders::VP8>(host1x, regs, id, frame_queue);
|
||||
break;
|
||||
case NvdecCommon::VideoCodec::VP9:
|
||||
decoder = std::make_unique<Decoders::VP9>(host1x, regs, id, frame_queue);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Codec {}", decoder->GetCurrentCodecName());
|
||||
break;
|
||||
}
|
||||
LOG_INFO(HW_GPU, "Created decoder {} for id {}", decoder->GetCurrentCodecName(), id);
|
||||
}
|
||||
|
||||
void Nvdec::Execute() {
|
||||
switch (codec->GetCurrentCodec()) {
|
||||
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Off) [[unlikely]] {
|
||||
// Signalling syncpts too fast can cause games to get stuck as they don't expect a <1ms
|
||||
// execution time. Sleep for half of a 60 fps frame just in case.
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(8));
|
||||
return;
|
||||
}
|
||||
switch (decoder->GetCurrentCodec()) {
|
||||
case NvdecCommon::VideoCodec::H264:
|
||||
case NvdecCommon::VideoCodec::VP8:
|
||||
case NvdecCommon::VideoCodec::VP9:
|
||||
codec->Decode();
|
||||
decoder->Decode();
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Codec {}", codec->GetCurrentCodecName());
|
||||
UNIMPLEMENTED_MSG("Codec {}", decoder->GetCurrentCodecName());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -5,33 +5,47 @@
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/host1x/codecs/codec.h"
|
||||
#include "video_core/cdma_pusher.h"
|
||||
#include "video_core/host1x/codecs/decoder.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Host1x {
|
||||
|
||||
class Host1x;
|
||||
class FrameQueue;
|
||||
|
||||
class Nvdec {
|
||||
class Nvdec final : public CDmaPusher {
|
||||
public:
|
||||
explicit Nvdec(Host1x& host1x);
|
||||
explicit Nvdec(Host1x& host1x, s32 id, u32 syncpt, FrameQueue& frame_queue_);
|
||||
~Nvdec();
|
||||
|
||||
/// Writes the method into the state, Invoke Execute() if encountered
|
||||
void ProcessMethod(u32 method, u32 argument);
|
||||
void ProcessMethod(u32 method, u32 arg) override;
|
||||
|
||||
/// Return most recently decoded frame
|
||||
[[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetFrame();
|
||||
u32 GetSyncpoint() const {
|
||||
return syncpoint;
|
||||
}
|
||||
|
||||
void SetWait() {
|
||||
wait_needed = true;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Create the decoder when the codec id is set
|
||||
void CreateDecoder(NvdecCommon::VideoCodec codec);
|
||||
|
||||
/// Invoke codec to decode a frame
|
||||
void Execute();
|
||||
|
||||
Host1x& host1x;
|
||||
NvdecCommon::NvdecRegisters state;
|
||||
std::unique_ptr<Codec> codec;
|
||||
s32 id;
|
||||
u32 syncpoint;
|
||||
FrameQueue& frame_queue;
|
||||
|
||||
NvdecCommon::NvdecRegisters regs{};
|
||||
std::unique_ptr<Decoder> decoder;
|
||||
bool wait_needed{false};
|
||||
};
|
||||
|
||||
} // namespace Host1x
|
||||
|
@ -17,6 +17,17 @@ enum class VideoCodec : u64 {
|
||||
VP9 = 0x9,
|
||||
};
|
||||
|
||||
struct Offset {
|
||||
constexpr u64 Address() const noexcept {
|
||||
return offset << 8;
|
||||
}
|
||||
|
||||
private:
|
||||
u64 offset;
|
||||
};
|
||||
static_assert(std::is_trivial_v<Offset>, "Offset must be trivial");
|
||||
static_assert(sizeof(Offset) == 0x8, "Offset has the wrong size!");
|
||||
|
||||
// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
|
||||
// doubling the sizes here is compensating for that.
|
||||
struct NvdecRegisters {
|
||||
@ -38,29 +49,40 @@ struct NvdecRegisters {
|
||||
BitField<17, 1, u64> all_intra_frame;
|
||||
};
|
||||
} control_params;
|
||||
u64 picture_info_offset; ///< 0x0808
|
||||
u64 frame_bitstream_offset; ///< 0x0810
|
||||
u64 frame_number; ///< 0x0818
|
||||
u64 h264_slice_data_offsets; ///< 0x0820
|
||||
u64 h264_mv_dump_offset; ///< 0x0828
|
||||
INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830
|
||||
u64 frame_stats_offset; ///< 0x0848
|
||||
u64 h264_last_surface_luma_offset; ///< 0x0850
|
||||
u64 h264_last_surface_chroma_offset; ///< 0x0858
|
||||
std::array<u64, 17> surface_luma_offset; ///< 0x0860
|
||||
std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
|
||||
INSERT_PADDING_WORDS_NOINIT(68); ///< 0x0970
|
||||
u64 vp8_prob_data_offset; ///< 0x0A80
|
||||
u64 vp8_header_partition_buf_offset; ///< 0x0A88
|
||||
INSERT_PADDING_WORDS_NOINIT(60); ///< 0x0A90
|
||||
u64 vp9_entropy_probs_offset; ///< 0x0B80
|
||||
u64 vp9_backward_updates_offset; ///< 0x0B88
|
||||
u64 vp9_last_frame_segmap_offset; ///< 0x0B90
|
||||
u64 vp9_curr_frame_segmap_offset; ///< 0x0B98
|
||||
INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0
|
||||
u64 vp9_last_frame_mvs_offset; ///< 0x0BA8
|
||||
u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0
|
||||
INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8
|
||||
Offset picture_info_offset; ///< 0x0808
|
||||
Offset frame_bitstream_offset; ///< 0x0810
|
||||
u64 frame_number; ///< 0x0818
|
||||
Offset h264_slice_data_offsets; ///< 0x0820
|
||||
Offset h264_mv_dump_offset; ///< 0x0828
|
||||
INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830
|
||||
Offset frame_stats_offset; ///< 0x0848
|
||||
Offset h264_last_surface_luma_offset; ///< 0x0850
|
||||
Offset h264_last_surface_chroma_offset; ///< 0x0858
|
||||
std::array<Offset, 17> surface_luma_offsets; ///< 0x0860
|
||||
std::array<Offset, 17> surface_chroma_offsets; ///< 0x08E8
|
||||
Offset pic_scratch_buf_offset; ///< 0x0970
|
||||
Offset external_mvbuffer_offset; ///< 0x0978
|
||||
INSERT_PADDING_WORDS_NOINIT(32); ///< 0x0980
|
||||
Offset h264_mbhist_buffer_offset; ///< 0x0A00
|
||||
INSERT_PADDING_WORDS_NOINIT(30); ///< 0x0A08
|
||||
Offset vp8_prob_data_offset; ///< 0x0A80
|
||||
Offset vp8_header_partition_buf_offset; ///< 0x0A88
|
||||
INSERT_PADDING_WORDS_NOINIT(28); ///< 0x0A90
|
||||
Offset hvec_scalist_list_offset; ///< 0x0B00
|
||||
Offset hvec_tile_sizes_offset; ///< 0x0B08
|
||||
Offset hvec_filter_buffer_offset; ///< 0x0B10
|
||||
Offset hvec_sao_buffer_offset; ///< 0x0B18
|
||||
Offset hvec_slice_info_buffer_offset; ///< 0x0B20
|
||||
Offset hvec_slice_group_index_offset; ///< 0x0B28
|
||||
INSERT_PADDING_WORDS_NOINIT(20); ///< 0x0B30
|
||||
Offset vp9_prob_tab_buffer_offset; ///< 0x0B80
|
||||
Offset vp9_ctx_counter_buffer_offset; ///< 0x0B88
|
||||
Offset vp9_segment_read_buffer_offset; ///< 0x0B90
|
||||
Offset vp9_segment_write_buffer_offset; ///< 0x0B98
|
||||
Offset vp9_tile_size_buffer_offset; ///< 0x0BA0
|
||||
Offset vp9_col_mvwrite_buffer_offset; ///< 0x0BA8
|
||||
Offset vp9_col_mvread_buffer_offset; ///< 0x0BB0
|
||||
Offset vp9_filter_buffer_offset; ///< 0x0BB8
|
||||
};
|
||||
std::array<u64, NUM_REGS> reg_array;
|
||||
};
|
||||
@ -81,16 +103,16 @@ ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104);
|
||||
ASSERT_REG_POSITION(frame_stats_offset, 0x109);
|
||||
ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
|
||||
ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
|
||||
ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
|
||||
ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
|
||||
ASSERT_REG_POSITION(surface_luma_offsets, 0x10C);
|
||||
ASSERT_REG_POSITION(surface_chroma_offsets, 0x11D);
|
||||
ASSERT_REG_POSITION(vp8_prob_data_offset, 0x150);
|
||||
ASSERT_REG_POSITION(vp8_header_partition_buf_offset, 0x151);
|
||||
ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
|
||||
ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
|
||||
ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);
|
||||
ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173);
|
||||
ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175);
|
||||
ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
|
||||
ASSERT_REG_POSITION(vp9_prob_tab_buffer_offset, 0x170);
|
||||
ASSERT_REG_POSITION(vp9_ctx_counter_buffer_offset, 0x171);
|
||||
ASSERT_REG_POSITION(vp9_segment_read_buffer_offset, 0x172);
|
||||
ASSERT_REG_POSITION(vp9_segment_write_buffer_offset, 0x173);
|
||||
ASSERT_REG_POSITION(vp9_col_mvwrite_buffer_offset, 0x175);
|
||||
ASSERT_REG_POSITION(vp9_col_mvread_buffer_offset, 0x176);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
|
@ -18,7 +18,7 @@ SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
|
||||
return {};
|
||||
}
|
||||
|
||||
std::unique_lock lk(guard);
|
||||
std::scoped_lock lk(guard);
|
||||
if (syncpoint.load(std::memory_order_relaxed) >= expected_value) {
|
||||
action();
|
||||
return {};
|
||||
@ -35,7 +35,7 @@ SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
|
||||
|
||||
void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage,
|
||||
const ActionHandle& handle) {
|
||||
std::unique_lock lk(guard);
|
||||
std::scoped_lock lk(guard);
|
||||
|
||||
// We want to ensure the iterator still exists prior to erasing it
|
||||
// Otherwise, if an invalid iterator was passed in then it could lead to UB
|
||||
@ -78,7 +78,7 @@ void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_var
|
||||
std::list<RegisteredAction>& action_storage) {
|
||||
auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1};
|
||||
|
||||
std::unique_lock lk(guard);
|
||||
std::scoped_lock lk(guard);
|
||||
auto it = action_storage.begin();
|
||||
while (it != action_storage.end()) {
|
||||
if (it->expected_value > new_value) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -3,65 +3,646 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "video_core/cdma_pusher.h"
|
||||
|
||||
struct SwsContext;
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Host1x {
|
||||
|
||||
namespace Tegra::Host1x {
|
||||
class Host1x;
|
||||
class Nvdec;
|
||||
union VicConfig;
|
||||
|
||||
class Vic {
|
||||
struct Pixel {
|
||||
u16 r;
|
||||
u16 g;
|
||||
u16 b;
|
||||
u16 a;
|
||||
};
|
||||
|
||||
// One underscore represents separate pixels.
|
||||
// Double underscore represents separate planes.
|
||||
// _N represents chroma subsampling, not a separate pixel.
|
||||
enum class VideoPixelFormat : u32 {
|
||||
A8 = 0,
|
||||
L8 = 1,
|
||||
A4L4 = 2,
|
||||
L4A4 = 3,
|
||||
R8 = 4,
|
||||
A8L8 = 5,
|
||||
L8A8 = 6,
|
||||
R8G8 = 7,
|
||||
G8R8 = 8,
|
||||
B5G6R5 = 9,
|
||||
R5G6B5 = 10,
|
||||
B6G5R5 = 11,
|
||||
R5G5B6 = 12,
|
||||
A1B5G5R5 = 13,
|
||||
A1R5G5B5 = 14,
|
||||
B5G5R5A1 = 15,
|
||||
R5G5B5A1 = 16,
|
||||
A5B5G5R1 = 17,
|
||||
A5R1G5B5 = 18,
|
||||
B5G5R1A5 = 19,
|
||||
R1G5B5A5 = 20,
|
||||
X1B5G5R5 = 21,
|
||||
X1R5G5B5 = 22,
|
||||
B5G5R5X1 = 23,
|
||||
R5G5B5X1 = 24,
|
||||
A4B4G5R4 = 25,
|
||||
A4R4G4B4 = 26,
|
||||
B4G4R4A4 = 27,
|
||||
R4G4B4A4 = 28,
|
||||
B8G8R8 = 29,
|
||||
R8G8B8 = 30,
|
||||
A8B8G8R8 = 31,
|
||||
A8R8G8B8 = 32,
|
||||
B8G8R8A8 = 33,
|
||||
R8G8B8A8 = 34,
|
||||
X8B8G8R8 = 35,
|
||||
X8R8G8B8 = 36,
|
||||
B8G8R8X8 = 37,
|
||||
R8G8B8X8 = 38,
|
||||
A8B10G10R10 = 39,
|
||||
A2R10G10B10 = 40,
|
||||
B10G10R10A2 = 41,
|
||||
R10G10B10A2 = 42,
|
||||
A4P4 = 43,
|
||||
P4A4 = 44,
|
||||
P8A8 = 45,
|
||||
A8P8 = 46,
|
||||
P8 = 47,
|
||||
P1 = 48,
|
||||
U8V8 = 49,
|
||||
V8U8 = 50,
|
||||
A8Y8U8V8 = 51,
|
||||
V8U8Y8A8 = 52,
|
||||
Y8U8V8 = 53,
|
||||
Y8V8U8 = 54,
|
||||
U8V8Y8 = 55,
|
||||
V8U8Y8 = 56,
|
||||
Y8U8_Y8V8 = 57,
|
||||
Y8V8_Y8U8 = 58,
|
||||
U8Y8_V8Y8 = 59,
|
||||
V8Y8_U8Y8 = 60,
|
||||
Y8__U8V8_N444 = 61,
|
||||
Y8__V8U8_N444 = 62,
|
||||
Y8__U8V8_N422 = 63,
|
||||
Y8__V8U8_N422 = 64,
|
||||
Y8__U8V8_N422R = 65,
|
||||
Y8__V8U8_N422R = 66,
|
||||
Y8__U8V8_N420 = 67,
|
||||
Y8__V8U8_N420 = 68,
|
||||
Y8__U8__V8_N444 = 69,
|
||||
Y8__U8__V8_N422 = 70,
|
||||
Y8__U8__V8_N422R = 71,
|
||||
Y8__U8__V8_N420 = 72,
|
||||
U8 = 73,
|
||||
V8 = 74,
|
||||
};
|
||||
|
||||
struct Offset {
|
||||
constexpr u32 Address() const noexcept {
|
||||
return offset << 8;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 offset;
|
||||
};
|
||||
static_assert(std::is_trivial_v<Offset>, "Offset must be trivial");
|
||||
static_assert(sizeof(Offset) == 0x4, "Offset has the wrong size!");
|
||||
|
||||
struct PlaneOffsets {
|
||||
Offset luma;
|
||||
Offset chroma_u;
|
||||
Offset chroma_v;
|
||||
};
|
||||
static_assert(sizeof(PlaneOffsets) == 0xC, "PlaneOffsets has the wrong size!");
|
||||
|
||||
enum SurfaceIndex : u32 {
|
||||
Current = 0,
|
||||
Previous = 1,
|
||||
Next = 2,
|
||||
NextNoiseReduced = 3,
|
||||
CurrentMotion = 4,
|
||||
PreviousMotion = 5,
|
||||
PreviousPreviousMotion = 6,
|
||||
CombinedMotion = 7,
|
||||
};
|
||||
|
||||
enum class DXVAHD_ALPHA_FILL_MODE : u32 {
|
||||
OPAQUE = 0,
|
||||
BACKGROUND = 1,
|
||||
DESTINATION = 2,
|
||||
SOURCE_STREAM = 3,
|
||||
COMPOSITED = 4,
|
||||
SOURCE_ALPHA = 5,
|
||||
};
|
||||
|
||||
enum class DXVAHD_FRAME_FORMAT : u64 {
|
||||
PROGRESSIVE = 0,
|
||||
INTERLACED_TOP_FIELD_FIRST = 1,
|
||||
INTERLACED_BOTTOM_FIELD_FIRST = 2,
|
||||
TOP_FIELD = 3,
|
||||
BOTTOM_FIELD = 4,
|
||||
SUBPIC_PROGRESSIVE = 5,
|
||||
SUBPIC_INTERLACED_TOP_FIELD_FIRST = 6,
|
||||
SUBPIC_INTERLACED_BOTTOM_FIELD_FIRST = 7,
|
||||
SUBPIC_TOP_FIELD = 8,
|
||||
SUBPIC_BOTTOM_FIELD = 9,
|
||||
TOP_FIELD_CHROMA_BOTTOM = 10,
|
||||
BOTTOM_FIELD_CHROMA_TOP = 11,
|
||||
SUBPIC_TOP_FIELD_CHROMA_BOTTOM = 12,
|
||||
SUBPIC_BOTTOM_FIELD_CHROMA_TOP = 13,
|
||||
};
|
||||
|
||||
enum class DXVAHD_DEINTERLACE_MODE_PRIVATE : u64 {
|
||||
WEAVE = 0,
|
||||
BOB_FIELD = 1,
|
||||
BOB = 2,
|
||||
NEWBOB = 3,
|
||||
DISI1 = 4,
|
||||
WEAVE_LUMA_BOB_FIELD_CHROMA = 5,
|
||||
MAX = 0xF,
|
||||
};
|
||||
|
||||
enum class BLK_KIND {
|
||||
PITCH = 0,
|
||||
GENERIC_16Bx2 = 1,
|
||||
// These are unsupported in the vic
|
||||
BL_NAIVE = 2,
|
||||
BL_KEPLER_XBAR_RAW = 3,
|
||||
VP2_TILED = 15,
|
||||
};
|
||||
|
||||
enum class BLEND_SRCFACTC : u32 {
|
||||
K1 = 0,
|
||||
K1_TIMES_DST = 1,
|
||||
NEG_K1_TIMES_DST = 2,
|
||||
K1_TIMES_SRC = 3,
|
||||
ZERO = 4,
|
||||
};
|
||||
|
||||
enum class BLEND_DSTFACTC : u32 {
|
||||
K1 = 0,
|
||||
K2 = 1,
|
||||
K1_TIMES_DST = 2,
|
||||
NEG_K1_TIMES_DST = 3,
|
||||
NEG_K1_TIMES_SRC = 4,
|
||||
ZERO = 5,
|
||||
ONE = 6,
|
||||
};
|
||||
|
||||
enum class BLEND_SRCFACTA : u32 {
|
||||
K1 = 0,
|
||||
K2 = 1,
|
||||
NEG_K1_TIMES_DST = 2,
|
||||
ZERO = 3,
|
||||
MAX = 7,
|
||||
};
|
||||
|
||||
enum class BLEND_DSTFACTA : u32 {
|
||||
K2 = 0,
|
||||
NEG_K1_TIMES_SRC = 1,
|
||||
ZERO = 2,
|
||||
ONE = 3,
|
||||
MAX = 7,
|
||||
};
|
||||
|
||||
struct PipeConfig {
|
||||
union {
|
||||
BitField<0, 11, u32> downsample_horiz;
|
||||
BitField<11, 5, u32> reserved0;
|
||||
BitField<16, 11, u32> downsample_vert;
|
||||
BitField<27, 5, u32> reserved1;
|
||||
};
|
||||
u32 reserved2;
|
||||
u32 reserved3;
|
||||
u32 reserved4;
|
||||
};
|
||||
static_assert(sizeof(PipeConfig) == 0x10, "PipeConfig has the wrong size!");
|
||||
|
||||
struct OutputConfig {
|
||||
union {
|
||||
BitField<0, 3, DXVAHD_ALPHA_FILL_MODE> alpha_fill_mode;
|
||||
BitField<3, 3, u64> alpha_fill_slot;
|
||||
BitField<6, 10, u64> background_a;
|
||||
BitField<16, 10, u64> background_r;
|
||||
BitField<26, 10, u64> background_g;
|
||||
BitField<36, 10, u64> background_b;
|
||||
BitField<46, 2, u64> regamma_mode;
|
||||
BitField<48, 1, u64> output_flip_x;
|
||||
BitField<49, 1, u64> output_flip_y;
|
||||
BitField<50, 1, u64> output_transpose;
|
||||
BitField<51, 1, u64> reserved1;
|
||||
BitField<52, 12, u64> reserved2;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> target_rect_left;
|
||||
BitField<14, 2, u32> reserved3;
|
||||
BitField<16, 14, u32> target_rect_right;
|
||||
BitField<30, 2, u32> reserved4;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> target_rect_top;
|
||||
BitField<14, 2, u32> reserved5;
|
||||
BitField<16, 14, u32> target_rect_bottom;
|
||||
BitField<30, 2, u32> reserved6;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(OutputConfig) == 0x10, "OutputConfig has the wrong size!");
|
||||
|
||||
struct OutputSurfaceConfig {
|
||||
union {
|
||||
BitField<0, 7, VideoPixelFormat> out_pixel_format;
|
||||
BitField<7, 2, u32> out_chroma_loc_horiz;
|
||||
BitField<9, 2, u32> out_chroma_loc_vert;
|
||||
BitField<11, 4, BLK_KIND> out_block_kind;
|
||||
BitField<15, 4, u32> out_block_height; // in gobs, log2
|
||||
BitField<19, 3, u32> reserved0;
|
||||
BitField<22, 10, u32> reserved1;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> out_surface_width; // - 1
|
||||
BitField<14, 14, u32> out_surface_height; // - 1
|
||||
BitField<28, 4, u32> reserved2;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> out_luma_width; // - 1
|
||||
BitField<14, 14, u32> out_luma_height; // - 1
|
||||
BitField<28, 4, u32> reserved3;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> out_chroma_width; // - 1
|
||||
BitField<14, 14, u32> out_chroma_height; // - 1
|
||||
BitField<28, 4, u32> reserved4;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(OutputSurfaceConfig) == 0x10, "OutputSurfaceConfig has the wrong size!");
|
||||
|
||||
struct MatrixStruct {
|
||||
union {
|
||||
BitField<0, 20, s64> matrix_coeff00; // (0,0) of 4x3 conversion matrix
|
||||
BitField<20, 20, s64> matrix_coeff10; // (1,0) of 4x3 conversion matrix
|
||||
BitField<40, 20, s64> matrix_coeff20; // (2,0) of 4x3 conversion matrix
|
||||
BitField<60, 4, u64> matrix_r_shift;
|
||||
};
|
||||
union {
|
||||
BitField<0, 20, s64> matrix_coeff01; // (0,1) of 4x3 conversion matrix
|
||||
BitField<20, 20, s64> matrix_coeff11; // (1,1) of 4x3 conversion matrix
|
||||
BitField<40, 20, s64> matrix_coeff21; // (2,1) of 4x3 conversion matrix
|
||||
BitField<60, 3, u64> reserved0;
|
||||
BitField<63, 1, u64> matrix_enable;
|
||||
};
|
||||
union {
|
||||
BitField<0, 20, s64> matrix_coeff02; // (0,2) of 4x3 conversion matrix
|
||||
BitField<20, 20, s64> matrix_coeff12; // (1,2) of 4x3 conversion matrix
|
||||
BitField<40, 20, s64> matrix_coeff22; // (2,2) of 4x3 conversion matrix
|
||||
BitField<60, 4, u64> reserved1;
|
||||
};
|
||||
union {
|
||||
BitField<0, 20, s64> matrix_coeff03; // (0,3) of 4x3 conversion matrix
|
||||
BitField<20, 20, s64> matrix_coeff13; // (1,3) of 4x3 conversion matrix
|
||||
BitField<40, 20, s64> matrix_coeff23; // (2,3) of 4x3 conversion matrix
|
||||
BitField<60, 4, u64> reserved2;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(MatrixStruct) == 0x20, "MatrixStruct has the wrong size!");
|
||||
|
||||
struct ClearRectStruct {
|
||||
union {
|
||||
BitField<0, 14, u32> clear_rect0_left;
|
||||
BitField<14, 2, u32> reserved0;
|
||||
BitField<16, 14, u32> clear_rect0_right;
|
||||
BitField<30, 2, u32> reserved1;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> clear_rect0_top;
|
||||
BitField<14, 2, u32> reserved2;
|
||||
BitField<16, 14, u32> clear_rect0_bottom;
|
||||
BitField<30, 2, u32> reserved3;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> clear_rect1_left;
|
||||
BitField<14, 2, u32> reserved4;
|
||||
BitField<16, 14, u32> clear_rect1_right;
|
||||
BitField<30, 2, u32> reserved5;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> clear_rect1_top;
|
||||
BitField<14, 2, u32> reserved6;
|
||||
BitField<16, 14, u32> clear_rect1_bottom;
|
||||
BitField<30, 2, u32> reserved7;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(ClearRectStruct) == 0x10, "ClearRectStruct has the wrong size!");
|
||||
|
||||
struct SlotConfig {
|
||||
union {
|
||||
BitField<0, 1, u64> slot_enable;
|
||||
BitField<1, 1, u64> denoise;
|
||||
BitField<2, 1, u64> advanced_denoise;
|
||||
BitField<3, 1, u64> cadence_detect;
|
||||
BitField<4, 1, u64> motion_map;
|
||||
BitField<5, 1, u64> motion_map_capture;
|
||||
BitField<6, 1, u64> is_even;
|
||||
BitField<7, 1, u64> chroma_even;
|
||||
// fetch control struct
|
||||
BitField<8, 1, u64> current_field_enable;
|
||||
BitField<9, 1, u64> prev_field_enable;
|
||||
BitField<10, 1, u64> next_field_enable;
|
||||
BitField<11, 1, u64> next_nr_field_enable; // noise reduction
|
||||
BitField<12, 1, u64> current_motion_field_enable;
|
||||
BitField<13, 1, u64> prev_motion_field_enable;
|
||||
BitField<14, 1, u64> prev_prev_motion_field_enable;
|
||||
BitField<15, 1, u64> combined_motion_field_enable;
|
||||
|
||||
BitField<16, 4, DXVAHD_FRAME_FORMAT> frame_format;
|
||||
BitField<20, 2, u64> filter_length_y; // 0: 1-tap, 1: 2-tap, 2: 5-tap, 3: 10-tap
|
||||
BitField<22, 2, u64> filter_length_x;
|
||||
BitField<24, 12, u64> panoramic;
|
||||
BitField<36, 22, u64> reserved1;
|
||||
BitField<58, 6, u64> detail_filter_clamp;
|
||||
};
|
||||
union {
|
||||
BitField<0, 10, u64> filter_noise;
|
||||
BitField<10, 10, u64> filter_detail;
|
||||
BitField<20, 10, u64> chroma_noise;
|
||||
BitField<30, 10, u64> chroma_detail;
|
||||
BitField<40, 4, DXVAHD_DEINTERLACE_MODE_PRIVATE> deinterlace_mode;
|
||||
BitField<44, 3, u64> motion_accumulation_weight;
|
||||
BitField<47, 11, u64> noise_iir;
|
||||
BitField<58, 4, u64> light_level;
|
||||
BitField<62, 2, u64> reserved4;
|
||||
};
|
||||
union {
|
||||
BitField<0, 10, u64> soft_clamp_low;
|
||||
BitField<10, 10, u64> soft_clamp_high;
|
||||
BitField<20, 3, u64> reserved5;
|
||||
BitField<23, 9, u64> reserved6;
|
||||
BitField<32, 10, u64> planar_alpha;
|
||||
BitField<42, 1, u64> constant_alpha;
|
||||
BitField<43, 3, u64> stereo_interleave;
|
||||
BitField<46, 1, u64> clip_enabled;
|
||||
BitField<47, 8, u64> clear_rect_mask;
|
||||
BitField<55, 2, u64> degamma_mode;
|
||||
BitField<57, 1, u64> reserved7;
|
||||
BitField<58, 1, u64> decompress_enable;
|
||||
BitField<59, 5, u64> reserved9;
|
||||
};
|
||||
union {
|
||||
BitField<0, 8, u64> decompress_ctb_count;
|
||||
BitField<8, 32, u64> decompress_zbc_count;
|
||||
BitField<40, 24, u64> reserved12;
|
||||
};
|
||||
union {
|
||||
BitField<0, 30, u64> source_rect_left;
|
||||
BitField<30, 2, u64> reserved14;
|
||||
BitField<32, 30, u64> source_rect_right;
|
||||
BitField<62, 2, u64> reserved15;
|
||||
};
|
||||
union {
|
||||
BitField<0, 30, u64> source_rect_top;
|
||||
BitField<30, 2, u64> reserved16;
|
||||
BitField<32, 30, u64> source_rect_bottom;
|
||||
BitField<62, 2, u64> reserved17;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u64> dest_rect_left;
|
||||
BitField<14, 2, u64> reserved18;
|
||||
BitField<16, 14, u64> dest_rect_right;
|
||||
BitField<30, 2, u64> reserved19;
|
||||
BitField<32, 14, u64> dest_rect_top;
|
||||
BitField<46, 2, u64> reserved20;
|
||||
BitField<48, 14, u64> dest_rect_bottom;
|
||||
BitField<62, 2, u64> reserved21;
|
||||
};
|
||||
u32 reserved22;
|
||||
u32 reserved23;
|
||||
};
|
||||
static_assert(sizeof(SlotConfig) == 0x40, "SlotConfig has the wrong size!");
|
||||
|
||||
struct SlotSurfaceConfig {
|
||||
union {
|
||||
BitField<0, 7, VideoPixelFormat> slot_pixel_format;
|
||||
BitField<7, 2, u32> slot_chroma_loc_horiz;
|
||||
BitField<9, 2, u32> slot_chroma_loc_vert;
|
||||
BitField<11, 4, u32> slot_block_kind;
|
||||
BitField<15, 4, u32> slot_block_height;
|
||||
BitField<19, 3, u32> slot_cache_width;
|
||||
BitField<22, 10, u32> reserved0;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> slot_surface_width; // - 1
|
||||
BitField<14, 14, u32> slot_surface_height; // - 1
|
||||
BitField<28, 4, u32> reserved1;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> slot_luma_width; // padded, - 1
|
||||
BitField<14, 14, u32> slot_luma_height; // padded, - 1
|
||||
BitField<28, 4, u32> reserved2;
|
||||
};
|
||||
union {
|
||||
BitField<0, 14, u32> slot_chroma_width; // padded, - 1
|
||||
BitField<14, 14, u32> slot_chroma_height; // padded, - 1
|
||||
BitField<28, 4, u32> reserved3;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(SlotSurfaceConfig) == 0x10, "SlotSurfaceConfig has the wrong size!");
|
||||
|
||||
struct LumaKeyStruct {
|
||||
union {
|
||||
BitField<0, 20, u64> luma_coeff0; // (0) of 4x1 conversion matrix, S12.8 format
|
||||
BitField<20, 20, u64> luma_coeff1; // (1) of 4x1 conversion matrix, S12.8 format
|
||||
BitField<40, 20, u64> luma_coeff2; // (2) of 4x1 conversion matrix, S12.8 format
|
||||
BitField<60, 4, u64> luma_r_shift;
|
||||
};
|
||||
union {
|
||||
BitField<0, 20, u64> luma_coeff3; // (3) of 4x1 conversion matrix, S12.8 format
|
||||
BitField<20, 10, u64> luma_key_lower;
|
||||
BitField<30, 10, u64> luma_key_upper;
|
||||
BitField<40, 1, u64> luma_key_enabled;
|
||||
BitField<41, 2, u64> reserved0;
|
||||
BitField<43, 21, u64> reserved1;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(LumaKeyStruct) == 0x10, "LumaKeyStruct has the wrong size!");
|
||||
|
||||
struct BlendingSlotStruct {
|
||||
union {
|
||||
BitField<0, 10, u32> alpha_k1;
|
||||
BitField<10, 6, u32> reserved0;
|
||||
BitField<16, 10, u32> alpha_k2;
|
||||
BitField<26, 6, u32> reserved1;
|
||||
};
|
||||
union {
|
||||
BitField<0, 3, BLEND_SRCFACTC> src_factor_color_match_select;
|
||||
BitField<3, 1, u32> reserved2;
|
||||
BitField<4, 3, BLEND_DSTFACTC> dst_factor_color_match_select;
|
||||
BitField<7, 1, u32> reserved3;
|
||||
BitField<8, 3, BLEND_SRCFACTA> src_factor_a_match_select;
|
||||
BitField<11, 1, u32> reserved4;
|
||||
BitField<12, 3, BLEND_DSTFACTA> dst_factor_a_match_select;
|
||||
BitField<15, 1, u32> reserved5;
|
||||
BitField<16, 4, u32> reserved6;
|
||||
BitField<20, 4, u32> reserved7;
|
||||
BitField<24, 4, u32> reserved8;
|
||||
BitField<28, 4, u32> reserved9;
|
||||
};
|
||||
union {
|
||||
BitField<0, 2, u32> reserved10;
|
||||
BitField<2, 10, u32> override_r;
|
||||
BitField<12, 10, u32> override_g;
|
||||
BitField<22, 10, u32> override_b;
|
||||
};
|
||||
union {
|
||||
BitField<0, 10, u32> override_a;
|
||||
BitField<10, 2, u32> reserved11;
|
||||
BitField<12, 1, u32> use_override_r;
|
||||
BitField<13, 1, u32> use_override_g;
|
||||
BitField<14, 1, u32> use_override_b;
|
||||
BitField<15, 1, u32> use_override_a;
|
||||
BitField<16, 1, u32> mask_r;
|
||||
BitField<17, 1, u32> mask_g;
|
||||
BitField<18, 1, u32> mask_b;
|
||||
BitField<19, 1, u32> mask_a;
|
||||
BitField<20, 12, u32> reserved12;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(BlendingSlotStruct) == 0x10, "BlendingSlotStruct has the wrong size!");
|
||||
|
||||
struct SlotStruct {
|
||||
SlotConfig config;
|
||||
SlotSurfaceConfig surface_config;
|
||||
LumaKeyStruct luma_key;
|
||||
MatrixStruct color_matrix;
|
||||
MatrixStruct gamut_matrix;
|
||||
BlendingSlotStruct blending;
|
||||
};
|
||||
static_assert(sizeof(SlotStruct) == 0xB0, "SlotStruct has the wrong size!");
|
||||
|
||||
struct ConfigStruct {
|
||||
PipeConfig pipe_config;
|
||||
OutputConfig output_config;
|
||||
OutputSurfaceConfig output_surface_config;
|
||||
MatrixStruct out_color_matrix;
|
||||
std::array<ClearRectStruct, 4> clear_rects;
|
||||
std::array<SlotStruct, 8> slot_structs;
|
||||
};
|
||||
static_assert(offsetof(ConfigStruct, pipe_config) == 0x0, "pipe_config is in the wrong place!");
|
||||
static_assert(offsetof(ConfigStruct, output_config) == 0x10,
|
||||
"output_config is in the wrong place!");
|
||||
static_assert(offsetof(ConfigStruct, output_surface_config) == 0x20,
|
||||
"output_surface_config is in the wrong place!");
|
||||
static_assert(offsetof(ConfigStruct, out_color_matrix) == 0x30,
|
||||
"out_color_matrix is in the wrong place!");
|
||||
static_assert(offsetof(ConfigStruct, clear_rects) == 0x50, "clear_rects is in the wrong place!");
|
||||
static_assert(offsetof(ConfigStruct, slot_structs) == 0x90, "slot_structs is in the wrong place!");
|
||||
static_assert(sizeof(ConfigStruct) == 0x610, "ConfigStruct has the wrong size!");
|
||||
|
||||
struct VicRegisters {
|
||||
static constexpr std::size_t NUM_REGS = 0x446;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS_NOINIT(0xC0);
|
||||
u32 execute;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x3F);
|
||||
std::array<std::array<PlaneOffsets, 8>, 8> surfaces;
|
||||
u32 picture_index;
|
||||
u32 control_params;
|
||||
Offset config_struct_offset;
|
||||
Offset filter_struct_offset;
|
||||
Offset palette_offset;
|
||||
Offset hist_offset;
|
||||
u32 context_id;
|
||||
u32 fce_ucode_size;
|
||||
PlaneOffsets output_surface;
|
||||
Offset fce_ucode_offset;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x4);
|
||||
std::array<u32, 8> slot_context_ids;
|
||||
std::array<Offset, 8> comp_tag_buffer_offsets;
|
||||
std::array<Offset, 8> history_buffer_offset;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x25D);
|
||||
u32 pm_trigger_end;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
};
|
||||
static_assert(offsetof(VicRegisters, execute) == 0x300, "execute is in the wrong place!");
|
||||
static_assert(offsetof(VicRegisters, surfaces) == 0x400, "surfaces is in the wrong place!");
|
||||
static_assert(offsetof(VicRegisters, picture_index) == 0x700,
|
||||
"picture_index is in the wrong place!");
|
||||
static_assert(offsetof(VicRegisters, control_params) == 0x704,
|
||||
"control_params is in the wrong place!");
|
||||
static_assert(offsetof(VicRegisters, config_struct_offset) == 0x708,
|
||||
"config_struct_offset is in the wrong place!");
|
||||
static_assert(offsetof(VicRegisters, output_surface) == 0x720,
|
||||
"output_surface is in the wrong place!");
|
||||
static_assert(offsetof(VicRegisters, slot_context_ids) == 0x740,
|
||||
"slot_context_ids is in the wrong place!");
|
||||
static_assert(offsetof(VicRegisters, history_buffer_offset) == 0x780,
|
||||
"history_buffer_offset is in the wrong place!");
|
||||
static_assert(offsetof(VicRegisters, pm_trigger_end) == 0x1114,
|
||||
"pm_trigger_end is in the wrong place!");
|
||||
static_assert(sizeof(VicRegisters) == 0x1118, "VicRegisters has the wrong size!");
|
||||
|
||||
class Vic final : public CDmaPusher {
|
||||
public:
|
||||
enum class Method : u32 {
|
||||
Execute = 0xc0,
|
||||
SetControlParams = 0x1c1,
|
||||
SetConfigStructOffset = 0x1c2,
|
||||
SetOutputSurfaceLumaOffset = 0x1c8,
|
||||
SetOutputSurfaceChromaOffset = 0x1c9,
|
||||
SetOutputSurfaceChromaUnusedOffset = 0x1ca
|
||||
Execute = offsetof(VicRegisters, execute),
|
||||
SetControlParams = offsetof(VicRegisters, control_params),
|
||||
SetConfigStructOffset = offsetof(VicRegisters, config_struct_offset),
|
||||
SetOutputSurfaceLumaOffset = offsetof(VicRegisters, output_surface.luma),
|
||||
SetOutputSurfaceChromaOffset = offsetof(VicRegisters, output_surface.chroma_u),
|
||||
SetOutputSurfaceChromaUnusedOffset = offsetof(VicRegisters, output_surface.chroma_v)
|
||||
};
|
||||
|
||||
explicit Vic(Host1x& host1x, std::shared_ptr<Nvdec> nvdec_processor);
|
||||
|
||||
explicit Vic(Host1x& host1x, s32 id, u32 syncpt, FrameQueue& frame_queue);
|
||||
~Vic();
|
||||
|
||||
/// Write to the device state.
|
||||
void ProcessMethod(Method method, u32 argument);
|
||||
void ProcessMethod(u32 method, u32 arg) override;
|
||||
|
||||
private:
|
||||
void Execute();
|
||||
|
||||
void WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
|
||||
void Blend(const ConfigStruct& config, const SlotStruct& slot);
|
||||
|
||||
void WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
|
||||
template <bool Planar, bool Interlaced = false>
|
||||
void ReadProgressiveY8__V8U8_N420(const SlotStruct& slot, std::span<const PlaneOffsets> offsets,
|
||||
std::shared_ptr<const FFmpeg::Frame> frame);
|
||||
template <bool Planar, bool TopField>
|
||||
void ReadInterlacedY8__V8U8_N420(const SlotStruct& slot, std::span<const PlaneOffsets> offsets,
|
||||
std::shared_ptr<const FFmpeg::Frame> frame);
|
||||
|
||||
Host1x& host1x;
|
||||
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
|
||||
template <bool Planar>
|
||||
void ReadY8__V8U8_N420(const SlotStruct& slot, std::span<const PlaneOffsets> offsets,
|
||||
std::shared_ptr<const FFmpeg::Frame> frame);
|
||||
|
||||
/// Avoid reallocation of the following buffers every frame, as their
|
||||
/// size does not change during a stream
|
||||
using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
|
||||
AVMallocPtr converted_frame_buffer;
|
||||
Common::ScratchBuffer<u8> luma_buffer;
|
||||
Common::ScratchBuffer<u8> chroma_buffer;
|
||||
void WriteY8__V8U8_N420(const OutputSurfaceConfig& output_surface_config);
|
||||
|
||||
GPUVAddr config_struct_address{};
|
||||
GPUVAddr output_surface_luma_address{};
|
||||
GPUVAddr output_surface_chroma_address{};
|
||||
template <VideoPixelFormat Format>
|
||||
void WriteABGR(const OutputSurfaceConfig& output_surface_config);
|
||||
|
||||
SwsContext* scaler_ctx{};
|
||||
s32 scaler_width{};
|
||||
s32 scaler_height{};
|
||||
s32 id;
|
||||
s32 nvdec_id{-1};
|
||||
u32 syncpoint;
|
||||
|
||||
VicRegisters regs{};
|
||||
FrameQueue& frame_queue;
|
||||
|
||||
const bool has_sse41{false};
|
||||
|
||||
Common::ScratchBuffer<Pixel> output_surface;
|
||||
Common::ScratchBuffer<Pixel> slot_surface;
|
||||
Common::ScratchBuffer<u8> luma_scratch;
|
||||
Common::ScratchBuffer<u8> chroma_scratch;
|
||||
Common::ScratchBuffer<u8> swizzle_scratch;
|
||||
};
|
||||
|
||||
} // namespace Host1x
|
||||
|
||||
} // namespace Tegra
|
||||
} // namespace Tegra::Host1x
|
||||
|
@ -37,6 +37,7 @@ layout(set=0,binding=0) uniform sampler2D InputTexture;
|
||||
|
||||
#define A_GPU 1
|
||||
#define A_GLSL 1
|
||||
#define FSR_RCAS_PASSTHROUGH_ALPHA 1
|
||||
|
||||
#ifndef YUZU_USE_FP16
|
||||
#include "ffx_a.h"
|
||||
@ -71,9 +72,7 @@ layout(set=0,binding=0) uniform sampler2D InputTexture;
|
||||
|
||||
#include "ffx_fsr1.h"
|
||||
|
||||
#if USE_RCAS
|
||||
layout(location = 0) in vec2 frag_texcoord;
|
||||
#endif
|
||||
layout (location = 0) in vec2 frag_texcoord;
|
||||
layout (location = 0) out vec4 frag_color;
|
||||
|
||||
void CurrFilter(AU2 pos) {
|
||||
@ -81,22 +80,22 @@ void CurrFilter(AU2 pos) {
|
||||
#ifndef YUZU_USE_FP16
|
||||
AF3 c;
|
||||
FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
|
||||
frag_color = AF4(c, 1.0);
|
||||
frag_color = AF4(c, texture(InputTexture, frag_texcoord).a);
|
||||
#else
|
||||
AH3 c;
|
||||
FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
|
||||
frag_color = AH4(c, 1.0);
|
||||
frag_color = AH4(c, texture(InputTexture, frag_texcoord).a);
|
||||
#endif
|
||||
#endif
|
||||
#if USE_RCAS
|
||||
#ifndef YUZU_USE_FP16
|
||||
AF3 c;
|
||||
FsrRcasF(c.r, c.g, c.b, pos, Const0);
|
||||
frag_color = AF4(c, 1.0);
|
||||
AF4 c;
|
||||
FsrRcasF(c.r, c.g, c.b, c.a, pos, Const0);
|
||||
frag_color = c;
|
||||
#else
|
||||
AH3 c;
|
||||
FsrRcasH(c.r, c.g, c.b, pos, Const0);
|
||||
frag_color = AH4(c, 1.0);
|
||||
AH4 c;
|
||||
FsrRcasH(c.r, c.g, c.b, c.a, pos, Const0);
|
||||
frag_color = c;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
@ -71,5 +71,5 @@ vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) {
|
||||
}
|
||||
|
||||
void main() {
|
||||
frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0);
|
||||
frag_color = vec4(FxaaPixelShader(posPos, input_texture), texture(input_texture, posPos.xy).a);
|
||||
}
|
||||
|
@ -31,6 +31,7 @@ layout (location = 0) uniform uvec4 constants[4];
|
||||
|
||||
#define A_GPU 1
|
||||
#define A_GLSL 1
|
||||
#define FSR_RCAS_PASSTHROUGH_ALPHA 1
|
||||
|
||||
#ifdef YUZU_USE_FP16
|
||||
#define A_HALF
|
||||
@ -67,9 +68,7 @@ layout (location = 0) uniform uvec4 constants[4];
|
||||
|
||||
#include "ffx_fsr1.h"
|
||||
|
||||
#if USE_RCAS
|
||||
layout(location = 0) in vec2 frag_texcoord;
|
||||
#endif
|
||||
layout (location = 0) in vec2 frag_texcoord;
|
||||
layout (location = 0) out vec4 frag_color;
|
||||
|
||||
void CurrFilter(AU2 pos)
|
||||
@ -78,22 +77,22 @@ void CurrFilter(AU2 pos)
|
||||
#ifndef YUZU_USE_FP16
|
||||
AF3 c;
|
||||
FsrEasuF(c, pos, constants[0], constants[1], constants[2], constants[3]);
|
||||
frag_color = AF4(c, 1.0);
|
||||
frag_color = AF4(c, texture(InputTexture, frag_texcoord).a);
|
||||
#else
|
||||
AH3 c;
|
||||
FsrEasuH(c, pos, constants[0], constants[1], constants[2], constants[3]);
|
||||
frag_color = AH4(c, 1.0);
|
||||
frag_color = AH4(c, texture(InputTexture, frag_texcoord).a);
|
||||
#endif
|
||||
#endif
|
||||
#if USE_RCAS
|
||||
#ifndef YUZU_USE_FP16
|
||||
AF3 c;
|
||||
FsrRcasF(c.r, c.g, c.b, pos, constants[0]);
|
||||
frag_color = AF4(c, 1.0);
|
||||
AF4 c;
|
||||
FsrRcasF(c.r, c.g, c.b, c.a, pos, constants[0]);
|
||||
frag_color = c;
|
||||
#else
|
||||
AH3 c;
|
||||
FsrRcasH(c.r, c.g, c.b, pos, constants[0]);
|
||||
frag_color = AH4(c, 1.0);
|
||||
FsrRcasH(c.r, c.g, c.b, c.a, pos, constants[0]);
|
||||
frag_color = c;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
@ -9,5 +9,5 @@ layout (location = 0) out vec4 color;
|
||||
layout (binding = 0) uniform sampler2D color_texture;
|
||||
|
||||
void main() {
|
||||
color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
|
||||
color = vec4(texture(color_texture, frag_tex_coord));
|
||||
}
|
||||
|
@ -52,5 +52,5 @@ vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) {
|
||||
}
|
||||
|
||||
void main() {
|
||||
color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f);
|
||||
color = textureBicubic(color_texture, frag_tex_coord);
|
||||
}
|
||||
|
@ -46,14 +46,14 @@ vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) {
|
||||
}
|
||||
|
||||
void main() {
|
||||
vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0];
|
||||
vec4 base = texture(color_texture, vec2(frag_tex_coord)) * weight[0];
|
||||
vec2 tex_offset = 1.0f / textureSize(color_texture, 0);
|
||||
|
||||
// TODO(Blinkhawk): This code can be optimized through shader group instructions.
|
||||
vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb;
|
||||
vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
|
||||
vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb;
|
||||
vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb;
|
||||
vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
|
||||
color = vec4(combination + base, 1.0f);
|
||||
vec4 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset);
|
||||
vec4 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset);
|
||||
vec4 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset);
|
||||
vec4 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0));
|
||||
vec4 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
|
||||
color = combination + base;
|
||||
}
|
||||
|
@ -6,5 +6,6 @@
|
||||
|
||||
#define YUZU_USE_FP16
|
||||
#define USE_EASU 1
|
||||
#define VERSION 1
|
||||
|
||||
#include "fidelityfx_fsr.frag"
|
||||
|
@ -5,5 +5,6 @@
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#define USE_EASU 1
|
||||
#define VERSION 1
|
||||
|
||||
#include "fidelityfx_fsr.frag"
|
||||
|
@ -6,5 +6,6 @@
|
||||
|
||||
#define YUZU_USE_FP16
|
||||
#define USE_RCAS 1
|
||||
#define VERSION 1
|
||||
|
||||
#include "fidelityfx_fsr.frag"
|
||||
|
@ -5,5 +5,6 @@
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#define USE_RCAS 1
|
||||
#define VERSION 1
|
||||
|
||||
#include "fidelityfx_fsr.frag"
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#define VERSION 1
|
||||
#define VERSION 2
|
||||
#define YUZU_USE_FP16
|
||||
|
||||
#include "opengl_present_scaleforce.frag"
|
||||
|
@ -5,6 +5,6 @@
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#define VERSION 1
|
||||
#define VERSION 2
|
||||
|
||||
#include "opengl_present_scaleforce.frag"
|
||||
|
@ -42,6 +42,8 @@ public:
|
||||
u64 page_bits_ = 12);
|
||||
~MemoryManager();
|
||||
|
||||
static constexpr bool HAS_FLUSH_INVALIDATION = true;
|
||||
|
||||
size_t GetID() const {
|
||||
return unique_identifier;
|
||||
}
|
||||
|
37
src/video_core/present.h
Executable file
37
src/video_core/present.h
Executable file
@ -0,0 +1,37 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/settings.h"
|
||||
|
||||
static inline Settings::ScalingFilter GetScalingFilter() {
|
||||
return Settings::values.scaling_filter.GetValue();
|
||||
}
|
||||
|
||||
static inline Settings::AntiAliasing GetAntiAliasing() {
|
||||
return Settings::values.anti_aliasing.GetValue();
|
||||
}
|
||||
|
||||
static inline Settings::ScalingFilter GetScalingFilterForAppletCapture() {
|
||||
return Settings::ScalingFilter::Bilinear;
|
||||
}
|
||||
|
||||
static inline Settings::AntiAliasing GetAntiAliasingForAppletCapture() {
|
||||
return Settings::AntiAliasing::None;
|
||||
}
|
||||
|
||||
struct PresentFilters {
|
||||
Settings::ScalingFilter (*get_scaling_filter)();
|
||||
Settings::AntiAliasing (*get_anti_aliasing)();
|
||||
};
|
||||
|
||||
constexpr PresentFilters PresentFiltersForDisplay{
|
||||
.get_scaling_filter = &GetScalingFilter,
|
||||
.get_anti_aliasing = &GetAntiAliasing,
|
||||
};
|
||||
|
||||
constexpr PresentFilters PresentFiltersForAppletCapture{
|
||||
.get_scaling_filter = &GetScalingFilterForAppletCapture,
|
||||
.get_anti_aliasing = &GetAntiAliasingForAppletCapture,
|
||||
};
|
@ -40,6 +40,9 @@ public:
|
||||
/// Finalize rendering the guest frame and draw into the presentation texture
|
||||
virtual void Composite(std::span<const Tegra::FramebufferConfig> layers) = 0;
|
||||
|
||||
/// Get the tiled applet layer capture buffer
|
||||
virtual std::vector<u8> GetAppletCaptureBuffer() = 0;
|
||||
|
||||
[[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
|
||||
|
||||
[[nodiscard]] virtual std::string GetDeviceVendor() const = 0;
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "core/frontend/graphics_context.h"
|
||||
#include "video_core/capture.h"
|
||||
#include "video_core/renderer_null/renderer_null.h"
|
||||
|
||||
namespace Null {
|
||||
@ -22,4 +23,8 @@ void RendererNull::Composite(std::span<const Tegra::FramebufferConfig> framebuff
|
||||
render_window.OnFrameDisplayed();
|
||||
}
|
||||
|
||||
std::vector<u8> RendererNull::GetAppletCaptureBuffer() {
|
||||
return std::vector<u8>(VideoCore::Capture::TiledSize);
|
||||
}
|
||||
|
||||
} // namespace Null
|
||||
|
@ -19,6 +19,8 @@ public:
|
||||
|
||||
void Composite(std::span<const Tegra::FramebufferConfig> framebuffer) override;
|
||||
|
||||
std::vector<u8> GetAppletCaptureBuffer() override;
|
||||
|
||||
VideoCore::RasterizerInterface* ReadRasterizer() override {
|
||||
return &m_rasterizer;
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/settings.h"
|
||||
#include "video_core/present.h"
|
||||
#include "video_core/renderer_opengl/gl_blit_screen.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/present/filters.h"
|
||||
@ -13,14 +14,14 @@ namespace OpenGL {
|
||||
BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_,
|
||||
Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
StateTracker& state_tracker_, ProgramManager& program_manager_,
|
||||
Device& device_)
|
||||
Device& device_, const PresentFilters& filters_)
|
||||
: rasterizer(rasterizer_), device_memory(device_memory_), state_tracker(state_tracker_),
|
||||
program_manager(program_manager_), device(device_) {}
|
||||
program_manager(program_manager_), device(device_), filters(filters_) {}
|
||||
|
||||
BlitScreen::~BlitScreen() = default;
|
||||
|
||||
void BlitScreen::DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffers,
|
||||
const Layout::FramebufferLayout& layout) {
|
||||
const Layout::FramebufferLayout& layout, bool invert_y) {
|
||||
// TODO: Signal state tracker about these changes
|
||||
state_tracker.NotifyScreenDrawVertexArray();
|
||||
state_tracker.NotifyPolygonModes();
|
||||
@ -56,22 +57,22 @@ void BlitScreen::DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffe
|
||||
glDepthRangeIndexed(0, 0.0, 0.0);
|
||||
|
||||
while (layers.size() < framebuffers.size()) {
|
||||
layers.emplace_back(rasterizer, device_memory);
|
||||
layers.emplace_back(rasterizer, device_memory, filters);
|
||||
}
|
||||
|
||||
CreateWindowAdapt();
|
||||
window_adapt->DrawToFramebuffer(program_manager, layers, framebuffers, layout);
|
||||
window_adapt->DrawToFramebuffer(program_manager, layers, framebuffers, layout, invert_y);
|
||||
|
||||
// TODO
|
||||
// program_manager.RestoreGuestPipeline();
|
||||
}
|
||||
|
||||
void BlitScreen::CreateWindowAdapt() {
|
||||
if (window_adapt && Settings::values.scaling_filter.GetValue() == current_window_adapt) {
|
||||
if (window_adapt && filters.get_scaling_filter() == current_window_adapt) {
|
||||
return;
|
||||
}
|
||||
|
||||
current_window_adapt = Settings::values.scaling_filter.GetValue();
|
||||
current_window_adapt = filters.get_scaling_filter();
|
||||
switch (current_window_adapt) {
|
||||
case Settings::ScalingFilter::NearestNeighbor:
|
||||
window_adapt = MakeNearestNeighbor(device);
|
||||
|
@ -15,6 +15,8 @@ namespace Layout {
|
||||
struct FramebufferLayout;
|
||||
}
|
||||
|
||||
struct PresentFilters;
|
||||
|
||||
namespace Tegra {
|
||||
struct FramebufferConfig;
|
||||
}
|
||||
@ -46,12 +48,12 @@ public:
|
||||
explicit BlitScreen(RasterizerOpenGL& rasterizer,
|
||||
Tegra::MaxwellDeviceMemoryManager& device_memory,
|
||||
StateTracker& state_tracker, ProgramManager& program_manager,
|
||||
Device& device);
|
||||
Device& device, const PresentFilters& filters);
|
||||
~BlitScreen();
|
||||
|
||||
/// Draws the emulated screens to the emulator window.
|
||||
void DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffers,
|
||||
const Layout::FramebufferLayout& layout);
|
||||
const Layout::FramebufferLayout& layout, bool invert_y);
|
||||
|
||||
private:
|
||||
void CreateWindowAdapt();
|
||||
@ -61,6 +63,7 @@ private:
|
||||
StateTracker& state_tracker;
|
||||
ProgramManager& program_manager;
|
||||
Device& device;
|
||||
const PresentFilters& filters;
|
||||
|
||||
Settings::ScalingFilter current_window_adapt{};
|
||||
std::unique_ptr<WindowAdaptPass> window_adapt;
|
||||
|
@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "video_core/framebuffer_config.h"
|
||||
#include "video_core/present.h"
|
||||
#include "video_core/renderer_opengl/gl_blit_screen.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/present/fsr.h"
|
||||
@ -14,8 +15,9 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
Layer::Layer(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_)
|
||||
: rasterizer(rasterizer_), device_memory(device_memory_) {
|
||||
Layer::Layer(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
const PresentFilters& filters_)
|
||||
: rasterizer(rasterizer_), device_memory(device_memory_), filters(filters_) {
|
||||
// Allocate textures for the screen
|
||||
framebuffer_texture.resource.Create(GL_TEXTURE_2D);
|
||||
|
||||
@ -34,12 +36,12 @@ GLuint Layer::ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix,
|
||||
std::array<ScreenRectVertex, 4>& out_vertices,
|
||||
ProgramManager& program_manager,
|
||||
const Tegra::FramebufferConfig& framebuffer,
|
||||
const Layout::FramebufferLayout& layout) {
|
||||
const Layout::FramebufferLayout& layout, bool invert_y) {
|
||||
FramebufferTextureInfo info = PrepareRenderTarget(framebuffer);
|
||||
auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height);
|
||||
GLuint texture = info.display_texture;
|
||||
|
||||
auto anti_aliasing = Settings::values.anti_aliasing.GetValue();
|
||||
auto anti_aliasing = filters.get_anti_aliasing();
|
||||
if (anti_aliasing != Settings::AntiAliasing::None) {
|
||||
glEnablei(GL_SCISSOR_TEST, 0);
|
||||
auto viewport_width = Settings::values.resolution_info.ScaleUp(framebuffer_texture.width);
|
||||
@ -64,7 +66,7 @@ GLuint Layer::ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix,
|
||||
|
||||
glDisablei(GL_SCISSOR_TEST, 0);
|
||||
|
||||
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
|
||||
if (filters.get_scaling_filter() == Settings::ScalingFilter::Fsr) {
|
||||
if (!fsr || fsr->NeedsRecreation(layout.screen)) {
|
||||
fsr = std::make_unique<FSR>(layout.screen.GetWidth(), layout.screen.GetHeight());
|
||||
}
|
||||
@ -83,10 +85,15 @@ GLuint Layer::ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix,
|
||||
const auto w = screen.GetWidth();
|
||||
const auto h = screen.GetHeight();
|
||||
|
||||
out_vertices[0] = ScreenRectVertex(x, y, crop.left, crop.top);
|
||||
out_vertices[1] = ScreenRectVertex(x + w, y, crop.right, crop.top);
|
||||
out_vertices[2] = ScreenRectVertex(x, y + h, crop.left, crop.bottom);
|
||||
out_vertices[3] = ScreenRectVertex(x + w, y + h, crop.right, crop.bottom);
|
||||
const auto left = crop.left;
|
||||
const auto right = crop.right;
|
||||
const auto top = invert_y ? crop.bottom : crop.top;
|
||||
const auto bottom = invert_y ? crop.top : crop.bottom;
|
||||
|
||||
out_vertices[0] = ScreenRectVertex(x, y, left, top);
|
||||
out_vertices[1] = ScreenRectVertex(x + w, y, right, top);
|
||||
out_vertices[2] = ScreenRectVertex(x, y + h, left, bottom);
|
||||
out_vertices[3] = ScreenRectVertex(x + w, y + h, right, bottom);
|
||||
|
||||
return texture;
|
||||
}
|
||||
@ -131,10 +138,12 @@ FramebufferTextureInfo Layer::LoadFBToScreenInfo(const Tegra::FramebufferConfig&
|
||||
const u64 size_in_bytes{Tegra::Texture::CalculateSize(
|
||||
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
|
||||
const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)};
|
||||
const std::span<const u8> input_data(host_ptr, size_in_bytes);
|
||||
Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
|
||||
framebuffer.width, framebuffer.height, 1, block_height_log2,
|
||||
0);
|
||||
if (host_ptr) {
|
||||
const std::span<const u8> input_data(host_ptr, size_in_bytes);
|
||||
Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
|
||||
framebuffer.width, framebuffer.height, 1,
|
||||
block_height_log2, 0);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
|
||||
|
@ -13,6 +13,8 @@ namespace Layout {
|
||||
struct FramebufferLayout;
|
||||
}
|
||||
|
||||
struct PresentFilters;
|
||||
|
||||
namespace Service::android {
|
||||
enum class PixelFormat : u32;
|
||||
};
|
||||
@ -44,14 +46,15 @@ struct ScreenRectVertex;
|
||||
|
||||
class Layer {
|
||||
public:
|
||||
explicit Layer(RasterizerOpenGL& rasterizer, Tegra::MaxwellDeviceMemoryManager& device_memory);
|
||||
explicit Layer(RasterizerOpenGL& rasterizer, Tegra::MaxwellDeviceMemoryManager& device_memory,
|
||||
const PresentFilters& filters);
|
||||
~Layer();
|
||||
|
||||
GLuint ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix,
|
||||
std::array<ScreenRectVertex, 4>& out_vertices,
|
||||
ProgramManager& program_manager,
|
||||
const Tegra::FramebufferConfig& framebuffer,
|
||||
const Layout::FramebufferLayout& layout);
|
||||
const Layout::FramebufferLayout& layout, bool invert_y);
|
||||
|
||||
private:
|
||||
/// Loads framebuffer from emulated memory into the active OpenGL texture.
|
||||
@ -65,6 +68,7 @@ private:
|
||||
private:
|
||||
RasterizerOpenGL& rasterizer;
|
||||
Tegra::MaxwellDeviceMemoryManager& device_memory;
|
||||
const PresentFilters& filters;
|
||||
|
||||
/// OpenGL framebuffer data
|
||||
std::vector<u8> gl_framebuffer_data;
|
||||
|
@ -37,7 +37,7 @@ WindowAdaptPass::~WindowAdaptPass() = default;
|
||||
|
||||
void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::list<Layer>& layers,
|
||||
std::span<const Tegra::FramebufferConfig> framebuffers,
|
||||
const Layout::FramebufferLayout& layout) {
|
||||
const Layout::FramebufferLayout& layout, bool invert_y) {
|
||||
GLint old_read_fb;
|
||||
GLint old_draw_fb;
|
||||
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
|
||||
@ -51,7 +51,7 @@ void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::li
|
||||
auto layer_it = layers.begin();
|
||||
for (size_t i = 0; i < layer_count; i++) {
|
||||
textures[i] = layer_it->ConfigureDraw(matrices[i], vertices[i], program_manager,
|
||||
framebuffers[i], layout);
|
||||
framebuffers[i], layout, invert_y);
|
||||
layer_it++;
|
||||
}
|
||||
|
||||
@ -92,6 +92,21 @@ void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::li
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
|
||||
for (size_t i = 0; i < layer_count; i++) {
|
||||
switch (framebuffers[i].blending) {
|
||||
case Tegra::BlendMode::Opaque:
|
||||
default:
|
||||
glDisablei(GL_BLEND, 0);
|
||||
break;
|
||||
case Tegra::BlendMode::Premultiplied:
|
||||
glEnablei(GL_BLEND, 0);
|
||||
glBlendFuncSeparatei(0, GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO);
|
||||
break;
|
||||
case Tegra::BlendMode::Coverage:
|
||||
glEnablei(GL_BLEND, 0);
|
||||
glBlendFuncSeparatei(0, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO);
|
||||
break;
|
||||
}
|
||||
|
||||
glBindTextureUnit(0, textures[i]);
|
||||
glProgramUniformMatrix3x2fv(vert.handle, ModelViewMatrixLocation, 1, GL_FALSE,
|
||||
matrices[i].data());
|
||||
|
@ -31,7 +31,7 @@ public:
|
||||
|
||||
void DrawToFramebuffer(ProgramManager& program_manager, std::list<Layer>& layers,
|
||||
std::span<const Tegra::FramebufferConfig> framebuffers,
|
||||
const Layout::FramebufferLayout& layout);
|
||||
const Layout::FramebufferLayout& layout, bool invert_y);
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include "core/core_timing.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "core/telemetry_session.h"
|
||||
#include "video_core/capture.h"
|
||||
#include "video_core/present.h"
|
||||
#include "video_core/renderer_opengl/gl_blit_screen.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
@ -120,7 +122,15 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
}
|
||||
blit_screen = std::make_unique<BlitScreen>(rasterizer, device_memory, state_tracker,
|
||||
program_manager, device);
|
||||
program_manager, device, PresentFiltersForDisplay);
|
||||
blit_applet =
|
||||
std::make_unique<BlitScreen>(rasterizer, device_memory, state_tracker, program_manager,
|
||||
device, PresentFiltersForAppletCapture);
|
||||
capture_framebuffer.Create();
|
||||
capture_renderbuffer.Create();
|
||||
glBindRenderbuffer(GL_RENDERBUFFER, capture_renderbuffer.handle);
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, VideoCore::Capture::LinearWidth,
|
||||
VideoCore::Capture::LinearHeight);
|
||||
}
|
||||
|
||||
RendererOpenGL::~RendererOpenGL() = default;
|
||||
@ -130,10 +140,11 @@ void RendererOpenGL::Composite(std::span<const Tegra::FramebufferConfig> framebu
|
||||
return;
|
||||
}
|
||||
|
||||
RenderAppletCaptureLayer(framebuffers);
|
||||
RenderScreenshot(framebuffers);
|
||||
|
||||
state_tracker.BindFramebuffer(0);
|
||||
blit_screen->DrawScreen(framebuffers, emu_window.GetFramebufferLayout());
|
||||
blit_screen->DrawScreen(framebuffers, emu_window.GetFramebufferLayout(), false);
|
||||
|
||||
++m_current_frame;
|
||||
|
||||
@ -159,11 +170,8 @@ void RendererOpenGL::AddTelemetryFields() {
|
||||
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
|
||||
}
|
||||
|
||||
void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) {
|
||||
if (!renderer_settings.screenshot_requested) {
|
||||
return;
|
||||
}
|
||||
|
||||
void RendererOpenGL::RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers,
|
||||
const Layout::FramebufferLayout& layout, void* dst) {
|
||||
GLint old_read_fb;
|
||||
GLint old_draw_fb;
|
||||
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
|
||||
@ -173,29 +181,86 @@ void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig>
|
||||
screenshot_framebuffer.Create();
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
|
||||
|
||||
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
|
||||
|
||||
GLuint renderbuffer;
|
||||
glGenRenderbuffers(1, &renderbuffer);
|
||||
glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, layout.width, layout.height);
|
||||
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
|
||||
|
||||
blit_screen->DrawScreen(framebuffers, layout);
|
||||
blit_screen->DrawScreen(framebuffers, layout, false);
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
|
||||
renderer_settings.screenshot_bits);
|
||||
glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, dst);
|
||||
|
||||
screenshot_framebuffer.Release();
|
||||
glDeleteRenderbuffers(1, &renderbuffer);
|
||||
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
|
||||
}
|
||||
|
||||
void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) {
|
||||
if (!renderer_settings.screenshot_requested) {
|
||||
return;
|
||||
}
|
||||
|
||||
RenderToBuffer(framebuffers, renderer_settings.screenshot_framebuffer_layout,
|
||||
renderer_settings.screenshot_bits);
|
||||
|
||||
renderer_settings.screenshot_complete_callback(true);
|
||||
renderer_settings.screenshot_requested = false;
|
||||
}
|
||||
|
||||
void RendererOpenGL::RenderAppletCaptureLayer(
|
||||
std::span<const Tegra::FramebufferConfig> framebuffers) {
|
||||
GLint old_read_fb;
|
||||
GLint old_draw_fb;
|
||||
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
|
||||
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, capture_framebuffer.handle);
|
||||
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
|
||||
capture_renderbuffer.handle);
|
||||
|
||||
blit_applet->DrawScreen(framebuffers, VideoCore::Capture::Layout, true);
|
||||
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
|
||||
}
|
||||
|
||||
std::vector<u8> RendererOpenGL::GetAppletCaptureBuffer() {
|
||||
using namespace VideoCore::Capture;
|
||||
|
||||
std::vector<u8> linear(TiledSize);
|
||||
std::vector<u8> out(TiledSize);
|
||||
|
||||
GLint old_read_fb;
|
||||
GLint old_draw_fb;
|
||||
GLint old_pixel_pack_buffer;
|
||||
GLint old_pack_row_length;
|
||||
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
|
||||
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
|
||||
glGetIntegerv(GL_PIXEL_PACK_BUFFER_BINDING, &old_pixel_pack_buffer);
|
||||
glGetIntegerv(GL_PACK_ROW_LENGTH, &old_pack_row_length);
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, capture_framebuffer.handle);
|
||||
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
|
||||
capture_renderbuffer.handle);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
glReadPixels(0, 0, LinearWidth, LinearHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
|
||||
linear.data());
|
||||
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, old_pixel_pack_buffer);
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, old_pack_row_length);
|
||||
|
||||
Tegra::Texture::SwizzleTexture(out, linear, BytesPerPixel, LinearWidth, LinearHeight,
|
||||
LinearDepth, BlockHeight, BlockDepth);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -42,6 +42,8 @@ public:
|
||||
|
||||
void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override;
|
||||
|
||||
std::vector<u8> GetAppletCaptureBuffer() override;
|
||||
|
||||
VideoCore::RasterizerInterface* ReadRasterizer() override {
|
||||
return &rasterizer;
|
||||
}
|
||||
@ -52,7 +54,11 @@ public:
|
||||
|
||||
private:
|
||||
void AddTelemetryFields();
|
||||
|
||||
void RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers,
|
||||
const Layout::FramebufferLayout& layout, void* dst);
|
||||
void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers);
|
||||
void RenderAppletCaptureLayer(std::span<const Tegra::FramebufferConfig> framebuffers);
|
||||
|
||||
Core::TelemetrySession& telemetry_session;
|
||||
Core::Frontend::EmuWindow& emu_window;
|
||||
@ -64,8 +70,11 @@ private:
|
||||
ProgramManager program_manager;
|
||||
RasterizerOpenGL rasterizer;
|
||||
OGLFramebuffer screenshot_framebuffer;
|
||||
OGLFramebuffer capture_framebuffer;
|
||||
OGLRenderbuffer capture_renderbuffer;
|
||||
|
||||
std::unique_ptr<BlitScreen> blit_screen;
|
||||
std::unique_ptr<BlitScreen> blit_applet;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "video_core/present.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
|
||||
#include "common/settings.h"
|
||||
@ -48,12 +49,12 @@ VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
|
||||
|
||||
Layer::Layer(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
|
||||
Tegra::MaxwellDeviceMemoryManager& device_memory_, size_t image_count_,
|
||||
VkExtent2D output_size, VkDescriptorSetLayout layout)
|
||||
VkExtent2D output_size, VkDescriptorSetLayout layout, const PresentFilters& filters_)
|
||||
: device(device_), memory_allocator(memory_allocator_), scheduler(scheduler_),
|
||||
device_memory(device_memory_), image_count(image_count_) {
|
||||
device_memory(device_memory_), filters(filters_), image_count(image_count_) {
|
||||
CreateDescriptorPool();
|
||||
CreateDescriptorSets(layout);
|
||||
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
|
||||
if (filters.get_scaling_filter() == Settings::ScalingFilter::Fsr) {
|
||||
CreateFSR(output_size);
|
||||
}
|
||||
}
|
||||
@ -171,11 +172,11 @@ void Layer::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
|
||||
}
|
||||
|
||||
void Layer::SetAntiAliasPass() {
|
||||
if (anti_alias && anti_alias_setting == Settings::values.anti_aliasing.GetValue()) {
|
||||
if (anti_alias && anti_alias_setting == filters.get_anti_aliasing()) {
|
||||
return;
|
||||
}
|
||||
|
||||
anti_alias_setting = Settings::values.anti_aliasing.GetValue();
|
||||
anti_alias_setting = filters.get_anti_aliasing();
|
||||
|
||||
const VkExtent2D render_area{
|
||||
.width = Settings::values.resolution_info.ScaleUp(raw_width),
|
||||
@ -270,9 +271,11 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
|
||||
const u64 linear_size{GetSizeInBytes(framebuffer)};
|
||||
const u64 tiled_size{Tegra::Texture::CalculateSize(
|
||||
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
|
||||
Tegra::Texture::UnswizzleTexture(
|
||||
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
|
||||
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
|
||||
if (host_ptr) {
|
||||
Tegra::Texture::UnswizzleTexture(
|
||||
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
|
||||
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
|
||||
}
|
||||
|
||||
const VkBufferImageCopy copy{
|
||||
.bufferOffset = image_offset,
|
||||
|
@ -11,6 +11,8 @@ namespace Layout {
|
||||
struct FramebufferLayout;
|
||||
}
|
||||
|
||||
struct PresentFilters;
|
||||
|
||||
namespace Tegra {
|
||||
struct FramebufferConfig;
|
||||
}
|
||||
@ -37,7 +39,8 @@ class Layer final {
|
||||
public:
|
||||
explicit Layer(const Device& device, MemoryAllocator& memory_allocator, Scheduler& scheduler,
|
||||
Tegra::MaxwellDeviceMemoryManager& device_memory, size_t image_count,
|
||||
VkExtent2D output_size, VkDescriptorSetLayout layout);
|
||||
VkExtent2D output_size, VkDescriptorSetLayout layout,
|
||||
const PresentFilters& filters);
|
||||
~Layer();
|
||||
|
||||
void ConfigureDraw(PresentPushConstants* out_push_constants,
|
||||
@ -71,6 +74,7 @@ private:
|
||||
MemoryAllocator& memory_allocator;
|
||||
Scheduler& scheduler;
|
||||
Tegra::MaxwellDeviceMemoryManager& device_memory;
|
||||
const PresentFilters& filters;
|
||||
const size_t image_count{};
|
||||
vk::DescriptorPool descriptor_pool{};
|
||||
vk::DescriptorSets descriptor_sets{};
|
||||
|
@ -362,10 +362,10 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device,
|
||||
});
|
||||
}
|
||||
|
||||
vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass,
|
||||
vk::PipelineLayout& layout,
|
||||
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders,
|
||||
bool enable_blending) {
|
||||
static vk::Pipeline CreateWrappedPipelineImpl(
|
||||
const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
|
||||
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders,
|
||||
VkPipelineColorBlendAttachmentState blending) {
|
||||
const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
@ -443,30 +443,6 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
|
||||
.alphaToOneEnable = VK_FALSE,
|
||||
};
|
||||
|
||||
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_disabled{
|
||||
.blendEnable = VK_FALSE,
|
||||
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.colorBlendOp = VK_BLEND_OP_ADD,
|
||||
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.alphaBlendOp = VK_BLEND_OP_ADD,
|
||||
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
|
||||
};
|
||||
|
||||
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_enabled{
|
||||
.blendEnable = VK_TRUE,
|
||||
.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
|
||||
.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
|
||||
.colorBlendOp = VK_BLEND_OP_ADD,
|
||||
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
|
||||
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.alphaBlendOp = VK_BLEND_OP_ADD,
|
||||
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
|
||||
};
|
||||
|
||||
const VkPipelineColorBlendStateCreateInfo color_blend_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
@ -474,8 +450,7 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
|
||||
.logicOpEnable = VK_FALSE,
|
||||
.logicOp = VK_LOGIC_OP_COPY,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments =
|
||||
enable_blending ? &color_blend_attachment_enabled : &color_blend_attachment_disabled,
|
||||
.pAttachments = &blending,
|
||||
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
|
||||
};
|
||||
|
||||
@ -515,6 +490,63 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
|
||||
});
|
||||
}
|
||||
|
||||
vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass,
|
||||
vk::PipelineLayout& layout,
|
||||
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) {
|
||||
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_disabled{
|
||||
.blendEnable = VK_FALSE,
|
||||
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.colorBlendOp = VK_BLEND_OP_ADD,
|
||||
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.alphaBlendOp = VK_BLEND_OP_ADD,
|
||||
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
|
||||
};
|
||||
|
||||
return CreateWrappedPipelineImpl(device, renderpass, layout, shaders,
|
||||
color_blend_attachment_disabled);
|
||||
}
|
||||
|
||||
vk::Pipeline CreateWrappedPremultipliedBlendingPipeline(
|
||||
const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
|
||||
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) {
|
||||
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_premultiplied{
|
||||
.blendEnable = VK_TRUE,
|
||||
.srcColorBlendFactor = VK_BLEND_FACTOR_ONE,
|
||||
.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
|
||||
.colorBlendOp = VK_BLEND_OP_ADD,
|
||||
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
|
||||
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.alphaBlendOp = VK_BLEND_OP_ADD,
|
||||
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
|
||||
};
|
||||
|
||||
return CreateWrappedPipelineImpl(device, renderpass, layout, shaders,
|
||||
color_blend_attachment_premultiplied);
|
||||
}
|
||||
|
||||
vk::Pipeline CreateWrappedCoverageBlendingPipeline(
|
||||
const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
|
||||
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) {
|
||||
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_coverage{
|
||||
.blendEnable = VK_TRUE,
|
||||
.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
|
||||
.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
|
||||
.colorBlendOp = VK_BLEND_OP_ADD,
|
||||
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
|
||||
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.alphaBlendOp = VK_BLEND_OP_ADD,
|
||||
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
|
||||
};
|
||||
|
||||
return CreateWrappedPipelineImpl(device, renderpass, layout, shaders,
|
||||
color_blend_attachment_coverage);
|
||||
}
|
||||
|
||||
VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>& images,
|
||||
VkSampler sampler, VkImageView view,
|
||||
VkDescriptorSet set, u32 binding) {
|
||||
|
@ -42,8 +42,13 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device,
|
||||
vk::DescriptorSetLayout& layout);
|
||||
vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass,
|
||||
vk::PipelineLayout& layout,
|
||||
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders,
|
||||
bool enable_blending = false);
|
||||
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders);
|
||||
vk::Pipeline CreateWrappedPremultipliedBlendingPipeline(
|
||||
const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
|
||||
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders);
|
||||
vk::Pipeline CreateWrappedCoverageBlendingPipeline(
|
||||
const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
|
||||
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders);
|
||||
VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>& images,
|
||||
VkSampler sampler, VkImageView view,
|
||||
VkDescriptorSet set, u32 binding);
|
||||
|
@ -22,7 +22,7 @@ WindowAdaptPass::WindowAdaptPass(const Device& device_, VkFormat frame_format,
|
||||
CreatePipelineLayout();
|
||||
CreateVertexShader();
|
||||
CreateRenderPass(frame_format);
|
||||
CreatePipeline();
|
||||
CreatePipelines();
|
||||
}
|
||||
|
||||
WindowAdaptPass::~WindowAdaptPass() = default;
|
||||
@ -34,7 +34,6 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s
|
||||
|
||||
const VkFramebuffer host_framebuffer{*dst->framebuffer};
|
||||
const VkRenderPass renderpass{*render_pass};
|
||||
const VkPipeline graphics_pipeline{*pipeline};
|
||||
const VkPipelineLayout graphics_pipeline_layout{*pipeline_layout};
|
||||
const VkExtent2D render_area{
|
||||
.width = dst->width,
|
||||
@ -44,9 +43,23 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s
|
||||
const size_t layer_count = configs.size();
|
||||
std::vector<PresentPushConstants> push_constants(layer_count);
|
||||
std::vector<VkDescriptorSet> descriptor_sets(layer_count);
|
||||
std::vector<VkPipeline> graphics_pipelines(layer_count);
|
||||
|
||||
auto layer_it = layers.begin();
|
||||
for (size_t i = 0; i < layer_count; i++) {
|
||||
switch (configs[i].blending) {
|
||||
case Tegra::BlendMode::Opaque:
|
||||
default:
|
||||
graphics_pipelines[i] = *opaque_pipeline;
|
||||
break;
|
||||
case Tegra::BlendMode::Premultiplied:
|
||||
graphics_pipelines[i] = *premultiplied_pipeline;
|
||||
break;
|
||||
case Tegra::BlendMode::Coverage:
|
||||
graphics_pipelines[i] = *coverage_pipeline;
|
||||
break;
|
||||
}
|
||||
|
||||
layer_it->ConfigureDraw(&push_constants[i], &descriptor_sets[i], rasterizer, *sampler,
|
||||
image_index, configs[i], layout);
|
||||
layer_it++;
|
||||
@ -77,8 +90,8 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s
|
||||
BeginRenderPass(cmdbuf, renderpass, host_framebuffer, render_area);
|
||||
cmdbuf.ClearAttachments({clear_attachment}, {clear_rect});
|
||||
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
|
||||
for (size_t i = 0; i < layer_count; i++) {
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipelines[i]);
|
||||
cmdbuf.PushConstants(graphics_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT,
|
||||
push_constants[i]);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline_layout, 0,
|
||||
@ -129,9 +142,13 @@ void WindowAdaptPass::CreateRenderPass(VkFormat frame_format) {
|
||||
render_pass = CreateWrappedRenderPass(device, frame_format, VK_IMAGE_LAYOUT_UNDEFINED);
|
||||
}
|
||||
|
||||
void WindowAdaptPass::CreatePipeline() {
|
||||
pipeline = CreateWrappedPipeline(device, render_pass, pipeline_layout,
|
||||
std::tie(vertex_shader, fragment_shader), false);
|
||||
void WindowAdaptPass::CreatePipelines() {
|
||||
opaque_pipeline = CreateWrappedPipeline(device, render_pass, pipeline_layout,
|
||||
std::tie(vertex_shader, fragment_shader));
|
||||
premultiplied_pipeline = CreateWrappedPremultipliedBlendingPipeline(
|
||||
device, render_pass, pipeline_layout, std::tie(vertex_shader, fragment_shader));
|
||||
coverage_pipeline = CreateWrappedCoverageBlendingPipeline(
|
||||
device, render_pass, pipeline_layout, std::tie(vertex_shader, fragment_shader));
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -42,7 +42,7 @@ private:
|
||||
void CreatePipelineLayout();
|
||||
void CreateVertexShader();
|
||||
void CreateRenderPass(VkFormat frame_format);
|
||||
void CreatePipeline();
|
||||
void CreatePipelines();
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
@ -52,7 +52,9 @@ private:
|
||||
vk::ShaderModule vertex_shader;
|
||||
vk::ShaderModule fragment_shader;
|
||||
vk::RenderPass render_pass;
|
||||
vk::Pipeline pipeline;
|
||||
vk::Pipeline opaque_pipeline;
|
||||
vk::Pipeline premultiplied_pipeline;
|
||||
vk::Pipeline coverage_pipeline;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -19,7 +19,9 @@
|
||||
#include "core/core_timing.h"
|
||||
#include "core/frontend/graphics_context.h"
|
||||
#include "core/telemetry_session.h"
|
||||
#include "video_core/capture.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/present.h"
|
||||
#include "video_core/renderer_vulkan/present/util.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
@ -38,6 +40,20 @@
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
|
||||
constexpr VkExtent2D CaptureImageSize{
|
||||
.width = VideoCore::Capture::LinearWidth,
|
||||
.height = VideoCore::Capture::LinearHeight,
|
||||
};
|
||||
|
||||
constexpr VkExtent3D CaptureImageExtent{
|
||||
.width = VideoCore::Capture::LinearWidth,
|
||||
.height = VideoCore::Capture::LinearHeight,
|
||||
.depth = VideoCore::Capture::LinearDepth,
|
||||
};
|
||||
|
||||
constexpr VkFormat CaptureFormat = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
|
||||
|
||||
std::string GetReadableVersion(u32 version) {
|
||||
return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
|
||||
VK_VERSION_PATCH(version));
|
||||
@ -99,10 +115,15 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
|
||||
render_window.GetFramebufferLayout().height),
|
||||
present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain,
|
||||
surface),
|
||||
blit_swapchain(device_memory, device, memory_allocator, present_manager, scheduler),
|
||||
blit_screenshot(device_memory, device, memory_allocator, present_manager, scheduler),
|
||||
blit_swapchain(device_memory, device, memory_allocator, present_manager, scheduler,
|
||||
PresentFiltersForDisplay),
|
||||
blit_capture(device_memory, device, memory_allocator, present_manager, scheduler,
|
||||
PresentFiltersForDisplay),
|
||||
blit_applet(device_memory, device, memory_allocator, present_manager, scheduler,
|
||||
PresentFiltersForAppletCapture),
|
||||
rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker,
|
||||
scheduler) {
|
||||
scheduler),
|
||||
applet_frame() {
|
||||
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
|
||||
turbo_mode.emplace(instance, dld);
|
||||
scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });
|
||||
@ -125,6 +146,8 @@ void RendererVulkan::Composite(std::span<const Tegra::FramebufferConfig> framebu
|
||||
|
||||
SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
|
||||
|
||||
RenderAppletCaptureLayer(framebuffers);
|
||||
|
||||
if (!render_window.IsShown()) {
|
||||
return;
|
||||
}
|
||||
@ -167,30 +190,20 @@ void RendererVulkan::Report() const {
|
||||
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
|
||||
}
|
||||
|
||||
void Vulkan::RendererVulkan::RenderScreenshot(
|
||||
std::span<const Tegra::FramebufferConfig> framebuffers) {
|
||||
if (!renderer_settings.screenshot_requested) {
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr VkFormat ScreenshotFormat{VK_FORMAT_B8G8R8A8_UNORM};
|
||||
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
|
||||
|
||||
vk::Buffer RendererVulkan::RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers,
|
||||
const Layout::FramebufferLayout& layout, VkFormat format,
|
||||
VkDeviceSize buffer_size) {
|
||||
auto frame = [&]() {
|
||||
Frame f{};
|
||||
f.image = CreateWrappedImage(memory_allocator, VkExtent2D{layout.width, layout.height},
|
||||
ScreenshotFormat);
|
||||
f.image_view = CreateWrappedImageView(device, f.image, ScreenshotFormat);
|
||||
f.framebuffer = blit_screenshot.CreateFramebuffer(layout, *f.image_view, ScreenshotFormat);
|
||||
f.image =
|
||||
CreateWrappedImage(memory_allocator, VkExtent2D{layout.width, layout.height}, format);
|
||||
f.image_view = CreateWrappedImageView(device, f.image, format);
|
||||
f.framebuffer = blit_capture.CreateFramebuffer(layout, *f.image_view, format);
|
||||
return f;
|
||||
}();
|
||||
|
||||
blit_screenshot.DrawToFrame(rasterizer, &frame, framebuffers, layout, 1,
|
||||
VK_FORMAT_B8G8R8A8_UNORM);
|
||||
|
||||
const auto dst_buffer = CreateWrappedBuffer(
|
||||
memory_allocator, static_cast<VkDeviceSize>(layout.width * layout.height * 4),
|
||||
MemoryUsage::Download);
|
||||
auto dst_buffer = CreateWrappedBuffer(memory_allocator, buffer_size, MemoryUsage::Download);
|
||||
blit_capture.DrawToFrame(rasterizer, &frame, framebuffers, layout, 1, format);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
|
||||
@ -198,15 +211,68 @@ void Vulkan::RendererVulkan::RenderScreenshot(
|
||||
VkExtent3D{layout.width, layout.height, 1});
|
||||
});
|
||||
|
||||
// Ensure the copy is fully completed before saving the screenshot
|
||||
// Ensure the copy is fully completed before saving the capture
|
||||
scheduler.Finish();
|
||||
|
||||
// Copy backing image data to the QImage screenshot buffer
|
||||
// Copy backing image data to the capture buffer
|
||||
dst_buffer.Invalidate();
|
||||
return dst_buffer;
|
||||
}
|
||||
|
||||
void RendererVulkan::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) {
|
||||
if (!renderer_settings.screenshot_requested) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto& layout{renderer_settings.screenshot_framebuffer_layout};
|
||||
const auto dst_buffer = RenderToBuffer(framebuffers, layout, VK_FORMAT_B8G8R8A8_UNORM,
|
||||
layout.width * layout.height * 4);
|
||||
|
||||
std::memcpy(renderer_settings.screenshot_bits, dst_buffer.Mapped().data(),
|
||||
dst_buffer.Mapped().size());
|
||||
renderer_settings.screenshot_complete_callback(false);
|
||||
renderer_settings.screenshot_requested = false;
|
||||
}
|
||||
|
||||
std::vector<u8> RendererVulkan::GetAppletCaptureBuffer() {
|
||||
using namespace VideoCore::Capture;
|
||||
|
||||
std::vector<u8> out(VideoCore::Capture::TiledSize);
|
||||
|
||||
if (!applet_frame.image) {
|
||||
return out;
|
||||
}
|
||||
|
||||
const auto dst_buffer =
|
||||
CreateWrappedBuffer(memory_allocator, VideoCore::Capture::TiledSize, MemoryUsage::Download);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
|
||||
DownloadColorImage(cmdbuf, *applet_frame.image, *dst_buffer, CaptureImageExtent);
|
||||
});
|
||||
|
||||
// Ensure the copy is fully completed before writing the capture
|
||||
scheduler.Finish();
|
||||
|
||||
// Swizzle image data to the capture buffer
|
||||
dst_buffer.Invalidate();
|
||||
Tegra::Texture::SwizzleTexture(out, dst_buffer.Mapped(), BytesPerPixel, LinearWidth,
|
||||
LinearHeight, LinearDepth, BlockHeight, BlockDepth);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void RendererVulkan::RenderAppletCaptureLayer(
|
||||
std::span<const Tegra::FramebufferConfig> framebuffers) {
|
||||
if (!applet_frame.image) {
|
||||
applet_frame.image = CreateWrappedImage(memory_allocator, CaptureImageSize, CaptureFormat);
|
||||
applet_frame.image_view = CreateWrappedImageView(device, applet_frame.image, CaptureFormat);
|
||||
applet_frame.framebuffer = blit_applet.CreateFramebuffer(
|
||||
VideoCore::Capture::Layout, *applet_frame.image_view, CaptureFormat);
|
||||
}
|
||||
|
||||
blit_applet.DrawToFrame(rasterizer, &applet_frame, framebuffers, VideoCore::Capture::Layout, 1,
|
||||
CaptureFormat);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -48,6 +48,8 @@ public:
|
||||
|
||||
void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override;
|
||||
|
||||
std::vector<u8> GetAppletCaptureBuffer() override;
|
||||
|
||||
VideoCore::RasterizerInterface* ReadRasterizer() override {
|
||||
return &rasterizer;
|
||||
}
|
||||
@ -59,7 +61,11 @@ public:
|
||||
private:
|
||||
void Report() const;
|
||||
|
||||
vk::Buffer RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers,
|
||||
const Layout::FramebufferLayout& layout, VkFormat format,
|
||||
VkDeviceSize buffer_size);
|
||||
void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers);
|
||||
void RenderAppletCaptureLayer(std::span<const Tegra::FramebufferConfig> framebuffers);
|
||||
|
||||
Core::TelemetrySession& telemetry_session;
|
||||
Tegra::MaxwellDeviceMemoryManager& device_memory;
|
||||
@ -79,9 +85,12 @@ private:
|
||||
Swapchain swapchain;
|
||||
PresentManager present_manager;
|
||||
BlitScreen blit_swapchain;
|
||||
BlitScreen blit_screenshot;
|
||||
BlitScreen blit_capture;
|
||||
BlitScreen blit_applet;
|
||||
RasterizerVulkan rasterizer;
|
||||
std::optional<TurboMode> turbo_mode;
|
||||
|
||||
Frame applet_frame;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "video_core/framebuffer_config.h"
|
||||
#include "video_core/present.h"
|
||||
#include "video_core/renderer_vulkan/present/filters.h"
|
||||
#include "video_core/renderer_vulkan/present/layer.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
@ -12,9 +13,9 @@ namespace Vulkan {
|
||||
|
||||
BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_,
|
||||
MemoryAllocator& memory_allocator_, PresentManager& present_manager_,
|
||||
Scheduler& scheduler_)
|
||||
Scheduler& scheduler_, const PresentFilters& filters_)
|
||||
: device_memory{device_memory_}, device{device_}, memory_allocator{memory_allocator_},
|
||||
present_manager{present_manager_}, scheduler{scheduler_}, image_count{1},
|
||||
present_manager{present_manager_}, scheduler{scheduler_}, filters{filters_}, image_count{1},
|
||||
swapchain_view_format{VK_FORMAT_B8G8R8A8_UNORM} {}
|
||||
|
||||
BlitScreen::~BlitScreen() = default;
|
||||
@ -27,7 +28,7 @@ void BlitScreen::WaitIdle() {
|
||||
|
||||
void BlitScreen::SetWindowAdaptPass() {
|
||||
layers.clear();
|
||||
scaling_filter = Settings::values.scaling_filter.GetValue();
|
||||
scaling_filter = filters.get_scaling_filter();
|
||||
|
||||
switch (scaling_filter) {
|
||||
case Settings::ScalingFilter::NearestNeighbor:
|
||||
@ -59,7 +60,7 @@ void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame,
|
||||
bool presentation_recreate_required = false;
|
||||
|
||||
// Recreate dynamic resources if the adapting filter changed
|
||||
if (!window_adapt || scaling_filter != Settings::values.scaling_filter.GetValue()) {
|
||||
if (!window_adapt || scaling_filter != filters.get_scaling_filter()) {
|
||||
resource_update_required = true;
|
||||
}
|
||||
|
||||
@ -102,7 +103,7 @@ void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame,
|
||||
|
||||
while (layers.size() < framebuffers.size()) {
|
||||
layers.emplace_back(device, memory_allocator, scheduler, device_memory, image_count,
|
||||
window_size, window_adapt->GetDescriptorSetLayout());
|
||||
window_size, window_adapt->GetDescriptorSetLayout(), filters);
|
||||
}
|
||||
|
||||
// Perform the draw
|
||||
@ -119,8 +120,7 @@ vk::Framebuffer BlitScreen::CreateFramebuffer(const Layout::FramebufferLayout& l
|
||||
VkFormat current_view_format) {
|
||||
const bool format_updated =
|
||||
std::exchange(swapchain_view_format, current_view_format) != current_view_format;
|
||||
if (!window_adapt || scaling_filter != Settings::values.scaling_filter.GetValue() ||
|
||||
format_updated) {
|
||||
if (!window_adapt || scaling_filter != filters.get_scaling_filter() || format_updated) {
|
||||
WaitIdle();
|
||||
SetWindowAdaptPass();
|
||||
}
|
||||
|
@ -16,6 +16,8 @@ namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
struct PresentFilters;
|
||||
|
||||
namespace Tegra {
|
||||
struct FramebufferConfig;
|
||||
}
|
||||
@ -47,7 +49,7 @@ class BlitScreen {
|
||||
public:
|
||||
explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, const Device& device,
|
||||
MemoryAllocator& memory_allocator, PresentManager& present_manager,
|
||||
Scheduler& scheduler);
|
||||
Scheduler& scheduler, const PresentFilters& filters);
|
||||
~BlitScreen();
|
||||
|
||||
void DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame,
|
||||
@ -70,6 +72,7 @@ private:
|
||||
MemoryAllocator& memory_allocator;
|
||||
PresentManager& present_manager;
|
||||
Scheduler& scheduler;
|
||||
const PresentFilters& filters;
|
||||
std::size_t image_count{};
|
||||
std::size_t image_index{};
|
||||
VkFormat swapchain_view_format{};
|
||||
|
@ -381,8 +381,9 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||
.support_float64 = device.IsFloat64Supported(),
|
||||
.support_float16 = device.IsFloat16Supported(),
|
||||
.support_int64 = device.IsShaderInt64Supported(),
|
||||
.needs_demote_reorder =
|
||||
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE,
|
||||
.needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
|
||||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY,
|
||||
.support_snorm_render_buffer = true,
|
||||
.support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user