early-access version 2263

2021-12-03 04:35:20 +01:00
parent ecc126a6a8
commit a28afca771
18 changed files with 101 additions and 62 deletions
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 yuzu emulator early access
 =============

-This is the source code for early-access 2261.
+This is the source code for early-access 2263.

 ## Legal Notice

--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -19,16 +19,16 @@ u64 EstimateRDTSCFrequency() {
    // get current time
    _mm_mfence();
    const u64 tscStart = __rdtsc();
-    const auto startTime = std::chrono::high_resolution_clock::now();
+    const auto startTime = std::chrono::steady_clock::now();
    // wait roughly 3 seconds
    while (true) {
        auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
-            std::chrono::high_resolution_clock::now() - startTime);
+            std::chrono::steady_clock::now() - startTime);
        if (milli.count() >= 3000)
            break;
        std::this_thread::sleep_for(milli_10);
    }
-    const auto endTime = std::chrono::high_resolution_clock::now();
+    const auto endTime = std::chrono::steady_clock::now();
    _mm_mfence();
    const u64 tscEnd = __rdtsc();
    // calculate difference
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -30,6 +30,7 @@
 #include "core/hle/service/apm/apm_controller.h"
 #include "core/hle/service/apm/apm_interface.h"
 #include "core/hle/service/bcat/backend/backend.h"
+#include "core/hle/service/caps/caps.h"
 #include "core/hle/service/filesystem/filesystem.h"
 #include "core/hle/service/ns/ns.h"
 #include "core/hle/service/nvflinger/nvflinger.h"
@@ -298,7 +299,7 @@ ISelfController::ISelfController(Core::System& system_, NVFlinger::NVFlinger& nv
        {91, &ISelfController::GetAccumulatedSuspendedTickChangedEvent, "GetAccumulatedSuspendedTickChangedEvent"},
        {100, &ISelfController::SetAlbumImageTakenNotificationEnabled, "SetAlbumImageTakenNotificationEnabled"},
        {110, nullptr, "SetApplicationAlbumUserData"},
-        {120, nullptr, "SaveCurrentScreenshot"},
+        {120, &ISelfController::SaveCurrentScreenshot, "SaveCurrentScreenshot"},
        {130, nullptr, "SetRecordVolumeMuted"},
        {1000, nullptr, "GetDebugStorageChannel"},
    };
@@ -579,6 +580,17 @@ void ISelfController::SetAlbumImageTakenNotificationEnabled(Kernel::HLERequestCo
    rb.Push(ResultSuccess);
 }

+void ISelfController::SaveCurrentScreenshot(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+
+    const auto album_report_option = rp.PopEnum<Capture::AlbumReportOption>();
+
+    LOG_WARNING(Service_AM, "(STUBBED) called. album_report_option={}", album_report_option);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(ResultSuccess);
+}
+
 AppletMessageQueue::AppletMessageQueue(Core::System& system)
    : service_context{system, "AppletMessageQueue"} {
    on_new_message = service_context.CreateEvent("AMMessageQueue:OnMessageReceived");
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -151,6 +151,7 @@ private:
    void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx);
    void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx);
    void SetAlbumImageTakenNotificationEnabled(Kernel::HLERequestContext& ctx);
+    void SaveCurrentScreenshot(Kernel::HLERequestContext& ctx);

    enum class ScreenshotPermission : u32 {
        Inherit = 0,
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -96,7 +96,7 @@ private:

    bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
                        std::vector<opus_int16>& output, u64* out_performance_time) const {
-        const auto start_time = std::chrono::high_resolution_clock::now();
+        const auto start_time = std::chrono::steady_clock::now();
        const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
        if (sizeof(OpusPacketHeader) > input.size()) {
            LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
@@ -135,7 +135,7 @@ private:
            return false;
        }

-        const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
+        const auto end_time = std::chrono::steady_clock::now() - start_time;
        sample_count = out_sample_count;
        consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
        if (out_performance_time != nullptr) {
--- a/src/core/hle/service/caps/caps.h
+++ b/src/core/hle/service/caps/caps.h
@@ -24,7 +24,7 @@ enum class AlbumImageOrientation {
    Orientation3 = 3,
 };

-enum class AlbumReportOption {
+enum class AlbumReportOption : s32 {
    Disable = 0,
    Enable = 1,
 };
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -21,7 +21,7 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>&
    case 0x0:
        switch (command.cmd) {
        case 0x1:
-            return Submit(fd, input, output);
+            return Submit(input, output);
        case 0x2:
            return GetSyncpoint(input, output);
        case 0x3:
@@ -62,16 +62,11 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>&
    return NvResult::NotImplemented;
 }

-void nvhost_nvdec::OnOpen(DeviceFD fd) {
-    static u32 next_id{};
-    fd_to_id[fd] = next_id++;
-}
+void nvhost_nvdec::OnOpen(DeviceFD fd) {}

 void nvhost_nvdec::OnClose(DeviceFD fd) {
    LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
-    if (fd_to_id.find(fd) != fd_to_id.end()) {
-        system.GPU().ClearCdmaInstance(fd_to_id[fd]);
-    }
+    system.GPU().ClearCdmaInstance();
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -59,8 +59,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
    return NvResult::Success;
 }

-NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector<u8>& input,
-                                     std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlSubmit params{};
    std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
    LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
@@ -94,7 +93,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector<u8>& input,
        Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
        system.Memory().ReadBlock(object->addr + cmd_buffer.offset, cmdlist.data(),
                                  cmdlist.size() * sizeof(u32));
-        gpu.PushCommandBuffer(fd_to_id[fd], cmdlist);
+        gpu.PushCommandBuffer(cmdlist);
    }
    std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
    // Some games expect command_buffers to be written back
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -104,14 +104,13 @@ protected:

    /// Ioctl command implementations
    NvResult SetNVMAPfd(const std::vector<u8>& input);
-    NvResult Submit(DeviceFD fd, const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult Submit(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);

-    std::unordered_map<DeviceFD, u32> fd_to_id{};
    s32_le nvmap_fd{};
    u32_le submit_timeout{};
    std::shared_ptr<nvmap> nvmap_dev;
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -21,7 +21,7 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& i
    case 0x0:
        switch (command.cmd) {
        case 0x1:
-            return Submit(fd, input, output);
+            return Submit(input, output);
        case 0x2:
            return GetSyncpoint(input, output);
        case 0x3:
@@ -62,15 +62,10 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& i
    return NvResult::NotImplemented;
 }

-void nvhost_vic::OnOpen(DeviceFD fd) {
-    static u32 next_id{};
-    fd_to_id[fd] = next_id++;
-}
+void nvhost_vic::OnOpen(DeviceFD fd) {}

 void nvhost_vic::OnClose(DeviceFD fd) {
-    if (fd_to_id.find(fd) != fd_to_id.end()) {
-        system.GPU().ClearCdmaInstance(fd_to_id[fd]);
-    }
+    system.GPU().ClearCdmaInstance();
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/perf_stats.h
+++ b/src/core/perf_stats.h
@@ -33,7 +33,7 @@ public:
    explicit PerfStats(u64 title_id_);
    ~PerfStats();

-    using Clock = std::chrono::high_resolution_clock;
+    using Clock = std::chrono::steady_clock;

    void BeginSystemFrame();
    void EndSystemFrame();
@@ -87,7 +87,7 @@ private:

 class SpeedLimiter {
 public:
-    using Clock = std::chrono::high_resolution_clock;
+    using Clock = std::chrono::steady_clock;

    void DoSpeedLimiting(std::chrono::microseconds current_system_time_us);

--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -23,6 +23,17 @@ namespace Tegra {
 namespace {
 constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
 constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
+constexpr std::array PREFERRED_GPU_DECODERS = {
+    AV_HWDEVICE_TYPE_CUDA,
+#ifdef _WIN32
+    AV_HWDEVICE_TYPE_D3D11VA,
+    AV_HWDEVICE_TYPE_DXVA2,
+#elif defined(__linux__)
+    AV_HWDEVICE_TYPE_VDPAU,
+#endif
+    // last resort for Linux Flatpak (w/ NVIDIA)
+    AV_HWDEVICE_TYPE_VULKAN,
+};

 void AVPacketDeleter(AVPacket* ptr) {
    av_packet_free(&ptr);
@@ -61,6 +72,20 @@ Codec::~Codec() {
    av_buffer_unref(&av_gpu_decoder);
 }

+// List all the currently available hwcontext in ffmpeg
+static std::vector<AVHWDeviceType> ListSupportedContexts() {
+    std::vector<AVHWDeviceType> contexts{};
+    AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
+    do {
+        current_device_type = av_hwdevice_iterate_types(current_device_type);
+        // filter out VA-API since we will try that first if supported
+        if (current_device_type != AV_HWDEVICE_TYPE_VAAPI) {
+            contexts.push_back(current_device_type);
+        }
+    } while (current_device_type != AV_HWDEVICE_TYPE_NONE);
+    return contexts;
+}
+
 #ifdef LIBVA_FOUND
 // List all the currently loaded Linux modules
 static std::vector<std::string> ListLinuxKernelModules() {
@@ -122,16 +147,12 @@ bool Codec::CreateGpuAvDevice() {
    av_dict_free(&hwdevice_options);
 #endif
    static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
-    static constexpr std::array GPU_DECODER_TYPES{
-#ifdef linux
-        AV_HWDEVICE_TYPE_VDPAU,
-#endif
-        AV_HWDEVICE_TYPE_CUDA,
-#ifdef _WIN32
-        AV_HWDEVICE_TYPE_D3D11VA,
-#endif
-    };
-    for (const auto& type : GPU_DECODER_TYPES) {
+    static const auto supported_contexts = ListSupportedContexts();
+    for (const auto& type : PREFERRED_GPU_DECODERS) {
+        if (std::none_of(supported_contexts.begin(), supported_contexts.end(),
+                         [&type](const auto& context) { return context == type; })) {
+            continue;
+        }
        const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
        if (hwdevice_res < 0) {
            LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -185,6 +185,16 @@ struct GPU::Impl {
        return *dma_pusher;
    }

+    /// Returns a reference to the GPU CDMA pusher.
+    [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() {
+        return *cdma_pusher;
+    }
+
+    /// Returns a const reference to the GPU CDMA pusher.
+    [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const {
+        return *cdma_pusher;
+    }
+
    /// Returns a reference to the underlying renderer.
    [[nodiscard]] VideoCore::RendererBase& Renderer() {
        return *renderer;
@@ -328,26 +338,25 @@ struct GPU::Impl {
    }

    /// Push GPU command buffer entries to be processed
-    void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
+    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
        if (!use_nvdec) {
            return;
        }

-        if (cdma_pushers.find(id) == cdma_pushers.end()) {
-            cdma_pushers[id] = std::make_unique<Tegra::CDmaPusher>(gpu);
+        if (!cdma_pusher) {
+            cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu);
        }

        // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
        // TODO(ameerj): RE proper async nvdec operation
        // gpu_thread.SubmitCommandBuffer(std::move(entries));
-        cdma_pushers[id]->ProcessEntries(std::move(entries));
+
+        cdma_pusher->ProcessEntries(std::move(entries));
    }

    /// Frees the CDMAPusher instance to free up resources
-    void ClearCdmaInstance(u32 id) {
-        if (cdma_pushers.find(id) != cdma_pushers.end()) {
-            cdma_pushers.erase(id);
-        }
+    void ClearCdmaInstance() {
+        cdma_pusher.reset();
    }

    /// Swap buffers (render frame)
@@ -650,7 +659,7 @@ struct GPU::Impl {
    Core::System& system;
    std::unique_ptr<Tegra::MemoryManager> memory_manager;
    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
-    std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
+    std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
    std::unique_ptr<VideoCore::RendererBase> renderer;
    VideoCore::RasterizerInterface* rasterizer = nullptr;
    const bool use_nvdec;
@@ -802,6 +811,14 @@ const Tegra::DmaPusher& GPU::DmaPusher() const {
    return impl->DmaPusher();
 }

+Tegra::CDmaPusher& GPU::CDmaPusher() {
+    return impl->CDmaPusher();
+}
+
+const Tegra::CDmaPusher& GPU::CDmaPusher() const {
+    return impl->CDmaPusher();
+}
+
 VideoCore::RendererBase& GPU::Renderer() {
    return impl->Renderer();
 }
@@ -870,12 +887,12 @@ void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
    impl->PushGPUEntries(std::move(entries));
 }

-void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
-    impl->PushCommandBuffer(id, entries);
+void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
+    impl->PushCommandBuffer(entries);
 }

-void GPU::ClearCdmaInstance(u32 id) {
-    impl->ClearCdmaInstance(id);
+void GPU::ClearCdmaInstance() {
+    impl->ClearCdmaInstance();
 }

 void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -242,10 +242,10 @@ public:
    void PushGPUEntries(Tegra::CommandList&& entries);

    /// Push GPU command buffer entries to be processed
-    void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
+    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);

    /// Frees the CDMAPusher instance to free up resources
-    void ClearCdmaInstance(u32 id);
+    void ClearCdmaInstance();

    /// Swap buffers (render frame)
    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
--- a/src/video_core/shader_notify.cpp
+++ b/src/video_core/shader_notify.cpp
@@ -18,7 +18,7 @@ int ShaderNotify::ShadersBuilding() noexcept {
    const int now_complete = num_complete.load(std::memory_order::relaxed);
    const int now_building = num_building.load(std::memory_order::relaxed);
    if (now_complete == now_building) {
-        const auto now = std::chrono::high_resolution_clock::now();
+        const auto now = std::chrono::steady_clock::now();
        if (completed && num_complete == num_when_completed) {
            if (now - complete_time > TIME_TO_STOP_REPORTING) {
                report_base = now_complete;
--- a/src/video_core/shader_notify.h
+++ b/src/video_core/shader_notify.h
@@ -28,6 +28,6 @@ private:

    bool completed{};
    int num_when_completed{};
-    std::chrono::high_resolution_clock::time_point complete_time;
+    std::chrono::steady_clock::time_point complete_time;
 };
 } // namespace VideoCore
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -136,7 +136,7 @@ void LoadingScreen::OnLoadComplete() {
 void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size_t value,
                                   std::size_t total) {
    using namespace std::chrono;
-    const auto now = high_resolution_clock::now();
+    const auto now = steady_clock::now();
    // reset the timer if the stage changes
    if (stage != previous_stage) {
        ui->progress_bar->setStyleSheet(QString::fromUtf8(progressbar_style[stage]));
@@ -160,7 +160,7 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size
    // If theres a drastic slowdown in the rate, then display an estimate
    if (now - previous_time > milliseconds{50} || slow_shader_compile_start) {
        if (!slow_shader_compile_start) {
-            slow_shader_start = high_resolution_clock::now();
+            slow_shader_start = steady_clock::now();
            slow_shader_compile_start = true;
            slow_shader_first_value = value;
        }
--- a/src/yuzu/loading_screen.h
+++ b/src/yuzu/loading_screen.h
@@ -84,8 +84,8 @@ private:
    // shaders, it will start quickly but end slow if new shaders were added since previous launch.
    // These variables are used to detect the change in speed so we can generate an ETA
    bool slow_shader_compile_start = false;
-    std::chrono::high_resolution_clock::time_point slow_shader_start;
-    std::chrono::high_resolution_clock::time_point previous_time;
+    std::chrono::steady_clock::time_point slow_shader_start;
+    std::chrono::steady_clock::time_point previous_time;
    std::size_t slow_shader_first_value = 0;
 };