early-access version 1255

2020-12-28 15:15:37 +00:00
parent 84b39492d1
commit 78b48028e1
6254 changed files with 1868140 additions and 0 deletions
--- a/externals/dynarmic/src/CMakeLists.txt
+++ b/externals/dynarmic/src/CMakeLists.txt
@@ -0,0 +1,362 @@
+add_library(dynarmic
+    ../include/dynarmic/A32/a32.h
+    ../include/dynarmic/A32/config.h
+    ../include/dynarmic/A32/coprocessor.h
+    ../include/dynarmic/A32/coprocessor_util.h
+    ../include/dynarmic/A32/disassembler.h
+    ../include/dynarmic/A64/a64.h
+    ../include/dynarmic/A64/config.h
+    ../include/dynarmic/exclusive_monitor.h
+    ../include/dynarmic/optimization_flags.h
+    common/assert.cpp
+    common/assert.h
+    common/bit_util.h
+    common/cast_util.h
+    common/common_types.h
+    common/crypto/aes.cpp
+    common/crypto/aes.h
+    common/crypto/crc32.cpp
+    common/crypto/crc32.h
+    common/crypto/sm4.cpp
+    common/crypto/sm4.h
+    common/fp/fpcr.h
+    common/fp/fpsr.h
+    common/fp/fused.cpp
+    common/fp/fused.h
+    common/fp/info.h
+    common/fp/mantissa_util.h
+    common/fp/op.h
+    common/fp/op/FPCompare.cpp
+    common/fp/op/FPCompare.h
+    common/fp/op/FPConvert.cpp
+    common/fp/op/FPConvert.h
+    common/fp/op/FPMulAdd.cpp
+    common/fp/op/FPMulAdd.h
+    common/fp/op/FPNeg.h
+    common/fp/op/FPRecipEstimate.cpp
+    common/fp/op/FPRecipEstimate.h
+    common/fp/op/FPRecipExponent.cpp
+    common/fp/op/FPRecipExponent.h
+    common/fp/op/FPRecipStepFused.cpp
+    common/fp/op/FPRecipStepFused.h
+    common/fp/op/FPRoundInt.cpp
+    common/fp/op/FPRoundInt.h
+    common/fp/op/FPRSqrtEstimate.cpp
+    common/fp/op/FPRSqrtEstimate.h
+    common/fp/op/FPRSqrtStepFused.cpp
+    common/fp/op/FPRSqrtStepFused.h
+    common/fp/op/FPToFixed.cpp
+    common/fp/op/FPToFixed.h
+    common/fp/process_exception.cpp
+    common/fp/process_exception.h
+    common/fp/process_nan.cpp
+    common/fp/process_nan.h
+    common/fp/rounding_mode.h
+    common/fp/unpacked.cpp
+    common/fp/unpacked.h
+    common/fp/util.h
+    common/intrusive_list.h
+    common/iterator_util.h
+    common/llvm_disassemble.cpp
+    common/llvm_disassemble.h
+    common/lut_from_list.h
+    common/macro_util.h
+    common/math_util.cpp
+    common/math_util.h
+    common/memory_pool.cpp
+    common/memory_pool.h
+    common/safe_ops.h
+    common/scope_exit.h
+    common/string_util.h
+    common/u128.cpp
+    common/u128.h
+    common/variant_util.h
+    frontend/A32/types.cpp
+    frontend/A32/types.h
+    frontend/A64/types.cpp
+    frontend/A64/types.h
+    frontend/decoder/decoder_detail.h
+    frontend/decoder/matcher.h
+    frontend/imm.cpp
+    frontend/imm.h
+    frontend/ir/basic_block.cpp
+    frontend/ir/basic_block.h
+    frontend/ir/cond.h
+    frontend/ir/ir_emitter.cpp
+    frontend/ir/ir_emitter.h
+    frontend/ir/location_descriptor.cpp
+    frontend/ir/location_descriptor.h
+    frontend/ir/microinstruction.cpp
+    frontend/ir/microinstruction.h
+    frontend/ir/opcodes.cpp
+    frontend/ir/opcodes.h
+    frontend/ir/opcodes.inc
+    frontend/ir/terminal.h
+    frontend/ir/type.cpp
+    frontend/ir/type.h
+    frontend/ir/value.cpp
+    frontend/ir/value.h
+    ir_opt/constant_propagation_pass.cpp
+    ir_opt/dead_code_elimination_pass.cpp
+    ir_opt/identity_removal_pass.cpp
+    ir_opt/ir_matcher.h
+    ir_opt/passes.h
+    ir_opt/verification_pass.cpp
+)
+
+if ("A32" IN_LIST DYNARMIC_FRONTENDS)
+    target_sources(dynarmic PRIVATE
+        frontend/A32/decoder/arm.h
+        frontend/A32/decoder/arm.inc
+        frontend/A32/decoder/asimd.h
+        frontend/A32/decoder/asimd.inc
+        frontend/A32/decoder/thumb16.h
+        frontend/A32/decoder/thumb32.h
+        frontend/A32/decoder/vfp.h
+        frontend/A32/decoder/vfp.inc
+        frontend/A32/disassembler/disassembler.h
+        frontend/A32/disassembler/disassembler_arm.cpp
+        frontend/A32/disassembler/disassembler_thumb.cpp
+        frontend/A32/FPSCR.h
+        frontend/A32/ir_emitter.cpp
+        frontend/A32/ir_emitter.h
+        frontend/A32/ITState.h
+        frontend/A32/location_descriptor.cpp
+        frontend/A32/location_descriptor.h
+        frontend/A32/PSR.h
+        frontend/A32/translate/impl/asimd_load_store_structures.cpp
+        frontend/A32/translate/impl/asimd_misc.cpp
+        frontend/A32/translate/impl/asimd_one_reg_modified_immediate.cpp
+        frontend/A32/translate/impl/asimd_three_regs.cpp
+        frontend/A32/translate/impl/asimd_two_regs_misc.cpp
+        frontend/A32/translate/impl/asimd_two_regs_scalar.cpp
+        frontend/A32/translate/impl/asimd_two_regs_shift.cpp
+        frontend/A32/translate/impl/barrier.cpp
+        frontend/A32/translate/impl/branch.cpp
+        frontend/A32/translate/impl/coprocessor.cpp
+        frontend/A32/translate/impl/crc32.cpp
+        frontend/A32/translate/impl/data_processing.cpp
+        frontend/A32/translate/impl/divide.cpp
+        frontend/A32/translate/impl/exception_generating.cpp
+        frontend/A32/translate/impl/extension.cpp
+        frontend/A32/translate/impl/hint.cpp
+        frontend/A32/translate/impl/load_store.cpp
+        frontend/A32/translate/impl/misc.cpp
+        frontend/A32/translate/impl/multiply.cpp
+        frontend/A32/translate/impl/packing.cpp
+        frontend/A32/translate/impl/parallel.cpp
+        frontend/A32/translate/impl/reversal.cpp
+        frontend/A32/translate/impl/saturated.cpp
+        frontend/A32/translate/impl/status_register_access.cpp
+        frontend/A32/translate/impl/synchronization.cpp
+        frontend/A32/translate/impl/thumb16.cpp
+        frontend/A32/translate/impl/thumb32.cpp
+        frontend/A32/translate/impl/translate_arm.h
+        frontend/A32/translate/impl/translate_thumb.h
+        frontend/A32/translate/impl/vfp.cpp
+        frontend/A32/translate/translate.cpp
+        frontend/A32/translate/translate.h
+        frontend/A32/translate/translate_arm.cpp
+        frontend/A32/translate/translate_thumb.cpp
+        ir_opt/a32_constant_memory_reads_pass.cpp
+        ir_opt/a32_get_set_elimination_pass.cpp
+    )
+endif()
+
+if ("A64" IN_LIST DYNARMIC_FRONTENDS)
+    target_sources(dynarmic PRIVATE
+        frontend/A64/decoder/a64.h
+        frontend/A64/decoder/a64.inc
+        frontend/A64/ir_emitter.cpp
+        frontend/A64/ir_emitter.h
+        frontend/A64/location_descriptor.cpp
+        frontend/A64/location_descriptor.h
+        frontend/A64/translate/impl/branch.cpp
+        frontend/A64/translate/impl/data_processing_addsub.cpp
+        frontend/A64/translate/impl/data_processing_bitfield.cpp
+        frontend/A64/translate/impl/data_processing_conditional_compare.cpp
+        frontend/A64/translate/impl/data_processing_conditional_select.cpp
+        frontend/A64/translate/impl/data_processing_crc32.cpp
+        frontend/A64/translate/impl/data_processing_logical.cpp
+        frontend/A64/translate/impl/data_processing_multiply.cpp
+        frontend/A64/translate/impl/data_processing_pcrel.cpp
+        frontend/A64/translate/impl/data_processing_register.cpp
+        frontend/A64/translate/impl/data_processing_shift.cpp
+        frontend/A64/translate/impl/exception_generating.cpp
+        frontend/A64/translate/impl/floating_point_compare.cpp
+        frontend/A64/translate/impl/floating_point_conditional_compare.cpp
+        frontend/A64/translate/impl/floating_point_conditional_select.cpp
+        frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
+        frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+        frontend/A64/translate/impl/floating_point_data_processing_one_register.cpp
+        frontend/A64/translate/impl/floating_point_data_processing_three_register.cpp
+        frontend/A64/translate/impl/floating_point_data_processing_two_register.cpp
+        frontend/A64/translate/impl/impl.cpp
+        frontend/A64/translate/impl/impl.h
+        frontend/A64/translate/impl/load_store_exclusive.cpp
+        frontend/A64/translate/impl/load_store_load_literal.cpp
+        frontend/A64/translate/impl/load_store_multiple_structures.cpp
+        frontend/A64/translate/impl/load_store_no_allocate_pair.cpp
+        frontend/A64/translate/impl/load_store_register_immediate.cpp
+        frontend/A64/translate/impl/load_store_register_pair.cpp
+        frontend/A64/translate/impl/load_store_register_register_offset.cpp
+        frontend/A64/translate/impl/load_store_register_unprivileged.cpp
+        frontend/A64/translate/impl/load_store_single_structure.cpp
+        frontend/A64/translate/impl/move_wide.cpp
+        frontend/A64/translate/impl/simd_across_lanes.cpp
+        frontend/A64/translate/impl/simd_aes.cpp
+        frontend/A64/translate/impl/simd_copy.cpp
+        frontend/A64/translate/impl/simd_crypto_four_register.cpp
+        frontend/A64/translate/impl/simd_crypto_three_register.cpp
+        frontend/A64/translate/impl/simd_extract.cpp
+        frontend/A64/translate/impl/simd_modified_immediate.cpp
+        frontend/A64/translate/impl/simd_permute.cpp
+        frontend/A64/translate/impl/simd_scalar_pairwise.cpp
+        frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp
+        frontend/A64/translate/impl/simd_scalar_three_same.cpp
+        frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp
+        frontend/A64/translate/impl/simd_scalar_x_indexed_element.cpp
+        frontend/A64/translate/impl/simd_sha.cpp
+        frontend/A64/translate/impl/simd_sha512.cpp
+        frontend/A64/translate/impl/simd_shift_by_immediate.cpp
+        frontend/A64/translate/impl/simd_table_lookup.cpp
+        frontend/A64/translate/impl/simd_three_different.cpp
+        frontend/A64/translate/impl/simd_three_same.cpp
+        frontend/A64/translate/impl/simd_three_same_extra.cpp
+        frontend/A64/translate/impl/simd_two_register_misc.cpp
+        frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp
+        frontend/A64/translate/impl/sys_dc.cpp
+        frontend/A64/translate/impl/system.cpp
+        frontend/A64/translate/impl/system_flag_format.cpp
+        frontend/A64/translate/impl/system_flag_manipulation.cpp
+        frontend/A64/translate/translate.cpp
+        frontend/A64/translate/translate.h
+        ir_opt/a64_callback_config_pass.cpp
+        ir_opt/a64_get_set_elimination_pass.cpp
+        ir_opt/a64_merge_interpret_blocks.cpp
+    )
+endif()
+
+if (ARCHITECTURE_x86_64)
+    target_sources(dynarmic PRIVATE
+        backend/x64/abi.cpp
+        backend/x64/abi.h
+        backend/x64/block_of_code.cpp
+        backend/x64/block_of_code.h
+        backend/x64/block_range_information.cpp
+        backend/x64/block_range_information.h
+        backend/x64/callback.cpp
+        backend/x64/callback.h
+        backend/x64/constant_pool.cpp
+        backend/x64/constant_pool.h
+        backend/x64/devirtualize.h
+        backend/x64/emit_x64.cpp
+        backend/x64/emit_x64.h
+        backend/x64/emit_x64_aes.cpp
+        backend/x64/emit_x64_crc32.cpp
+        backend/x64/emit_x64_data_processing.cpp
+        backend/x64/emit_x64_floating_point.cpp
+        backend/x64/emit_x64_packed.cpp
+        backend/x64/emit_x64_saturation.cpp
+        backend/x64/emit_x64_sm4.cpp
+        backend/x64/emit_x64_vector.cpp
+        backend/x64/emit_x64_vector_floating_point.cpp
+        backend/x64/emit_x64_vector_saturation.cpp
+        backend/x64/exception_handler.h
+        backend/x64/exclusive_monitor.cpp
+        backend/x64/hostloc.cpp
+        backend/x64/hostloc.h
+        backend/x64/jitstate_info.h
+        backend/x64/oparg.h
+        backend/x64/perf_map.cpp
+        backend/x64/perf_map.h
+        backend/x64/reg_alloc.cpp
+        backend/x64/reg_alloc.h
+    )
+
+    if ("A32" IN_LIST DYNARMIC_FRONTENDS)
+        target_sources(dynarmic PRIVATE
+            backend/x64/a32_emit_x64.cpp
+            backend/x64/a32_emit_x64.h
+            backend/x64/a32_interface.cpp
+            backend/x64/a32_jitstate.cpp
+            backend/x64/a32_jitstate.h
+        )
+    endif()
+
+    if ("A64" IN_LIST DYNARMIC_FRONTENDS)
+        target_sources(dynarmic PRIVATE
+            backend/x64/a64_emit_x64.cpp
+            backend/x64/a64_emit_x64.h
+            backend/x64/a64_interface.cpp
+            backend/x64/a64_jitstate.cpp
+            backend/x64/a64_jitstate.h
+        )
+    endif()
+
+    if (WIN32)
+        target_sources(dynarmic PRIVATE backend/x64/exception_handler_windows.cpp)
+    elseif (APPLE)
+        find_path(MACH_EXC_DEFS_DIR "mach/mach_exc.defs")
+        if (NOT MACH_EXC_DEFS_DIR)
+            message(WARNING "macOS fastmem disabled: unable to find mach/mach_exc.defs")
+            target_sources(dynarmic PRIVATE backend/x64/exception_handler_generic.cpp)
+        else()
+            message(STATUS "mach/mach_exc.defs location: ${MACH_EXC_DEFS_DIR}")
+            execute_process(
+                COMMAND
+                    mkdir -p "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig"
+                COMMAND
+                    mig
+                    -arch x86_64
+                    -user "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_user.c"
+                    -header "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_user.h"
+                    -server "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_server.c"
+                    -sheader "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_server.h"
+                    "${MACH_EXC_DEFS_DIR}/mach/mach_exc.defs"
+            )
+            target_sources(dynarmic PRIVATE
+                backend/x64/exception_handler_macos.cpp
+                backend/x64/mig/mach_exc_server.c
+                backend/x64/mig/mach_exc_server.h
+            )
+        endif()
+    elseif (UNIX)
+        if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+            target_link_libraries(dynarmic PUBLIC rt)
+        endif()
+        target_sources(dynarmic PRIVATE backend/x64/exception_handler_posix.cpp)
+    else()
+        target_sources(dynarmic PRIVATE backend/x64/exception_handler_generic.cpp)
+    endif()
+else()
+    message(FATAL_ERROR "Unsupported architecture")
+endif()
+
+include(CreateDirectoryGroups)
+create_target_directory_groups(dynarmic)
+
+target_include_directories(dynarmic
+                           PUBLIC ../include
+                           PRIVATE .)
+target_compile_options(dynarmic PRIVATE ${DYNARMIC_CXX_FLAGS})
+target_link_libraries(dynarmic
+    PRIVATE
+        boost
+        fmt::fmt
+        mp
+        tsl::robin_map
+        xbyak
+        $<$<BOOL:DYNARMIC_USE_LLVM>:${llvm_libs}>
+)
+if (DYNARMIC_ENABLE_CPU_FEATURE_DETECTION)
+    target_compile_definitions(dynarmic PRIVATE DYNARMIC_ENABLE_CPU_FEATURE_DETECTION=1)
+endif()
+if (DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT)
+    target_compile_definitions(dynarmic PRIVATE DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT=1)
+endif()
+if (CMAKE_SYSTEM_NAME STREQUAL "Windows")
+    target_compile_definitions(dynarmic PRIVATE FMT_USE_WINDOWS_H=0)
+endif()
+# Disable this as it relies on a non-standard feature
+target_compile_definitions(dynarmic PRIVATE FMT_USE_USER_DEFINED_LITERALS=0)
--- a/externals/dynarmic/src/backend/x64/a32_emit_x64.cpp
+++ b/externals/dynarmic/src/backend/x64/a32_emit_x64.cpp
--- a/externals/dynarmic/src/backend/x64/a32_emit_x64.h
+++ b/externals/dynarmic/src/backend/x64/a32_emit_x64.h
@@ -0,0 +1,140 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <array>
+#include <optional>
+#include <set>
+#include <tuple>
+
+#include <tsl/robin_map.h>
+
+#include <dynarmic/A32/a32.h>
+#include <dynarmic/A32/config.h>
+
+#include "backend/x64/a32_jitstate.h"
+#include "backend/x64/block_range_information.h"
+#include "backend/x64/emit_x64.h"
+#include "frontend/A32/location_descriptor.h"
+#include "frontend/ir/terminal.h"
+
+namespace Dynarmic::Backend::X64 {
+
+class RegAlloc;
+
+struct A32EmitContext final : public EmitContext {
+    A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
+
+    A32::LocationDescriptor Location() const;
+    bool IsSingleStep() const;
+    FP::FPCR FPCR(bool fpcr_controlled = true) const override;
+
+    bool HasOptimization(OptimizationFlag flag) const override {
+        return conf.HasOptimization(flag);
+    }
+
+    const A32::UserConfig& conf;
+};
+
+class A32EmitX64 final : public EmitX64 {
+public:
+    A32EmitX64(BlockOfCode& code, A32::UserConfig conf, A32::Jit* jit_interface);
+    ~A32EmitX64() override;
+
+    /**
+     * Emit host machine code for a basic block with intermediate representation `block`.
+     * @note block is modified.
+     */
+    BlockDescriptor Emit(IR::Block& block);
+
+    void ClearCache() override;
+
+    void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
+
+    void ChangeProcessorID(size_t value) {
+        conf.processor_id = value;
+    }
+
+protected:
+    A32::UserConfig conf;
+    A32::Jit* jit_interface;
+    BlockRangeInformation<u32> block_ranges;
+
+    void EmitCondPrelude(const A32EmitContext& ctx);
+
+    struct FastDispatchEntry {
+        u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
+        const void* code_ptr = nullptr;
+    };
+    static_assert(sizeof(FastDispatchEntry) == 0x10);
+    static constexpr u64 fast_dispatch_table_mask = 0xFFFF0;
+    static constexpr size_t fast_dispatch_table_size = 0x10000;
+    std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
+    void ClearFastDispatchTable();
+
+    std::map<std::tuple<size_t, int, int>, void(*)()> read_fallbacks;
+    std::map<std::tuple<size_t, int, int>, void(*)()> write_fallbacks;
+    void GenFastmemFallbacks();
+
+    const void* terminal_handler_pop_rsb_hint;
+    const void* terminal_handler_fast_dispatch_hint = nullptr;
+    FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
+    void GenTerminalHandlers();
+
+    // Microinstruction emitters
+#define OPCODE(...)
+#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst);
+#define A64OPC(...)
+#include "frontend/ir/opcodes.inc"
+#undef OPCODE
+#undef A32OPC
+#undef A64OPC
+
+    // Helpers
+    std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
+
+    // Fastmem information
+    using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, std::ptrdiff_t>;
+    struct FastmemPatchInfo {
+        u64 resume_rip;
+        u64 callback;
+        DoNotFastmemMarker marker;
+    };
+    tsl::robin_map<u64, FastmemPatchInfo> fastmem_patch_info;
+    std::set<DoNotFastmemMarker> do_not_fastmem;
+    std::optional<DoNotFastmemMarker> ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const;
+    FakeCall FastmemCallback(u64 rip);
+
+    // Memory access helpers
+    template<std::size_t bitsize, auto callback>
+    void ReadMemory(A32EmitContext& ctx, IR::Inst* inst);
+    template<std::size_t bitsize, auto callback>
+    void WriteMemory(A32EmitContext& ctx, IR::Inst* inst);
+    template<std::size_t bitsize, auto callback>
+    void ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst);
+    template<std::size_t bitsize, auto callback>
+    void ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst);
+
+    // Terminal instruction emitters
+    void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
+    void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+
+    // Patching
+    void Unpatch(const IR::LocationDescriptor& target_desc) override;
+    void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
+    void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
+    void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
+};
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/a32_interface.cpp
+++ b/externals/dynarmic/src/backend/x64/a32_interface.cpp
@@ -0,0 +1,349 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <functional>
+#include <memory>
+
+#include <boost/icl/interval_set.hpp>
+#include <fmt/format.h>
+
+#include <dynarmic/A32/a32.h>
+#include <dynarmic/A32/context.h>
+
+#include "backend/x64/a32_emit_x64.h"
+#include "backend/x64/a32_jitstate.h"
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/callback.h"
+#include "backend/x64/devirtualize.h"
+#include "backend/x64/jitstate_info.h"
+#include "common/assert.h"
+#include "common/cast_util.h"
+#include "common/common_types.h"
+#include "common/llvm_disassemble.h"
+#include "common/scope_exit.h"
+#include "frontend/A32/translate/translate.h"
+#include "frontend/ir/basic_block.h"
+#include "frontend/ir/location_descriptor.h"
+#include "ir_opt/passes.h"
+
+namespace Dynarmic::A32 {
+
+using namespace Backend::X64;
+
+static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
+    return RunCodeCallbacks{
+        std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
+        std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(cb)),
+        std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(cb)),
+    };
+}
+
+static std::function<void(BlockOfCode&)> GenRCP(const A32::UserConfig& conf) {
+    return [conf](BlockOfCode& code) {
+        if (conf.page_table) {
+            code.mov(code.r14, Common::BitCast<u64>(conf.page_table));
+        }
+        if (conf.fastmem_pointer) {
+            code.mov(code.r13, Common::BitCast<u64>(conf.fastmem_pointer));
+        }
+    };
+}
+
+struct Jit::Impl {
+    Impl(Jit* jit, A32::UserConfig conf)
+            : block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, GenRCP(conf))
+            , emitter(block_of_code, conf, jit)
+            , conf(std::move(conf))
+            , jit_interface(jit)
+    {}
+
+    A32JitState jit_state;
+    BlockOfCode block_of_code;
+    A32EmitX64 emitter;
+
+    A32::UserConfig conf;
+
+    // Requests made during execution to invalidate the cache are queued up here.
+    size_t invalid_cache_generation = 0;
+    boost::icl::interval_set<u32> invalid_cache_ranges;
+    bool invalidate_entire_cache = false;
+
+    void Execute() {
+        const CodePtr current_codeptr = [this]{
+            // RSB optimization
+            const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
+            if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
+                jit_state.rsb_ptr = new_rsb_ptr;
+                return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]);
+            }
+
+            return GetCurrentBlock();
+        }();
+
+        block_of_code.RunCode(&jit_state, current_codeptr);
+    }
+
+    void Step() {
+        block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
+    }
+
+    void ExceptionalExit() {
+        if (!conf.wall_clock_cntpct) {
+            const s64 ticks = jit_state.cycles_to_run - jit_state.cycles_remaining;
+            conf.callbacks->AddTicks(ticks);
+        }
+        PerformCacheInvalidation();
+    }
+
+    void ChangeProcessorID(size_t value) {
+        conf.processor_id = value;
+        emitter.ChangeProcessorID(value);
+    }
+
+    void ClearExclusiveState() {
+        jit_state.exclusive_state = 0;
+    }
+
+    std::string Disassemble(const IR::LocationDescriptor& descriptor) {
+        auto block = GetBasicBlock(descriptor);
+        std::string result = fmt::format("address: {}\nsize: {} bytes\n", block.entrypoint, block.size);
+        result += Common::DisassembleX64(block.entrypoint, reinterpret_cast<const char*>(block.entrypoint) + block.size);
+        return result;
+    }
+
+    void PerformCacheInvalidation() {
+        if (invalidate_entire_cache) {
+            jit_state.ResetRSB();
+            block_of_code.ClearCache();
+            emitter.ClearCache();
+
+            invalid_cache_ranges.clear();
+            invalidate_entire_cache = false;
+            invalid_cache_generation++;
+            return;
+        }
+
+        if (invalid_cache_ranges.empty()) {
+            return;
+        }
+
+        jit_state.ResetRSB();
+        emitter.InvalidateCacheRanges(invalid_cache_ranges);
+        invalid_cache_ranges.clear();
+        invalid_cache_generation++;
+    }
+
+    void RequestCacheInvalidation() {
+        if (jit_interface->is_executing) {
+            jit_state.halt_requested = true;
+            return;
+        }
+
+        PerformCacheInvalidation();
+    }
+
+private:
+    Jit* jit_interface;
+
+    static CodePtr GetCurrentBlockThunk(void* this_voidptr) {
+        Jit::Impl& this_ = *static_cast<Jit::Impl*>(this_voidptr);
+        return this_.GetCurrentBlock();
+    }
+
+    IR::LocationDescriptor GetCurrentLocation() const {
+        return IR::LocationDescriptor{jit_state.GetUniqueHash()};
+    }
+
+    CodePtr GetCurrentBlock() {
+        return GetBasicBlock(GetCurrentLocation()).entrypoint;
+    }
+
+    CodePtr GetCurrentSingleStep() {
+        return GetBasicBlock(A32::LocationDescriptor{GetCurrentLocation()}.SetSingleStepping(true)).entrypoint;
+    }
+
+    A32EmitX64::BlockDescriptor GetBasicBlock(IR::LocationDescriptor descriptor) {
+        auto block = emitter.GetBasicBlock(descriptor);
+        if (block)
+            return *block;
+
+        constexpr size_t MINIMUM_REMAINING_CODESIZE = 1 * 1024 * 1024;
+        if (block_of_code.SpaceRemaining() < MINIMUM_REMAINING_CODESIZE) {
+            invalidate_entire_cache = true;
+            PerformCacheInvalidation();
+        }
+
+        IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, [this](u32 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }, {conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
+        if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
+            Optimization::A32GetSetElimination(ir_block);
+            Optimization::DeadCodeElimination(ir_block);
+        }
+        if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
+            Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
+            Optimization::ConstantPropagation(ir_block);
+            Optimization::DeadCodeElimination(ir_block);
+        }
+        Optimization::VerificationPass(ir_block);
+        return emitter.Emit(ir_block);
+    }
+};
+
+Jit::Jit(UserConfig conf) : impl(std::make_unique<Impl>(this, std::move(conf))) {}
+
+Jit::~Jit() = default;
+
+void Jit::Run() {
+    ASSERT(!is_executing);
+    is_executing = true;
+    SCOPE_EXIT { this->is_executing = false; };
+
+    impl->jit_state.halt_requested = false;
+
+    impl->Execute();
+
+    impl->PerformCacheInvalidation();
+}
+
+void Jit::Step() {
+    ASSERT(!is_executing);
+    is_executing = true;
+    SCOPE_EXIT { this->is_executing = false; };
+
+    impl->jit_state.halt_requested = true;
+
+    impl->Step();
+
+    impl->PerformCacheInvalidation();
+}
+
+void Jit::ClearCache() {
+    impl->invalidate_entire_cache = true;
+    impl->RequestCacheInvalidation();
+}
+
+void Jit::InvalidateCacheRange(std::uint32_t start_address, std::size_t length) {
+    impl->invalid_cache_ranges.add(boost::icl::discrete_interval<u32>::closed(start_address, static_cast<u32>(start_address + length - 1)));
+    impl->RequestCacheInvalidation();
+}
+
+void Jit::Reset() {
+    ASSERT(!is_executing);
+    impl->jit_state = {};
+}
+
+void Jit::HaltExecution() {
+    impl->jit_state.halt_requested = true;
+}
+
+void Jit::ExceptionalExit() {
+    impl->ExceptionalExit();
+    is_executing = false;
+}
+
+void Jit::ClearExclusiveState() {
+    impl->ClearExclusiveState();
+}
+
+void Jit::ChangeProcessorID(size_t new_processor) {
+    impl->ChangeProcessorID(new_processor);
+}
+
+std::array<u32, 16>& Jit::Regs() {
+    return impl->jit_state.Reg;
+}
+const std::array<u32, 16>& Jit::Regs() const {
+    return impl->jit_state.Reg;
+}
+
+std::array<u32, 64>& Jit::ExtRegs() {
+    return impl->jit_state.ExtReg;
+}
+
+const std::array<u32, 64>& Jit::ExtRegs() const {
+    return impl->jit_state.ExtReg;
+}
+
+u32 Jit::Cpsr() const {
+    return impl->jit_state.Cpsr();
+}
+
+void Jit::SetCpsr(u32 value) {
+    return impl->jit_state.SetCpsr(value);
+}
+
+u32 Jit::Fpscr() const {
+    return impl->jit_state.Fpscr();
+}
+
+void Jit::SetFpscr(u32 value) {
+    return impl->jit_state.SetFpscr(value);
+}
+
+Context Jit::SaveContext() const {
+    Context ctx;
+    SaveContext(ctx);
+    return ctx;
+}
+
+struct Context::Impl {
+    A32JitState jit_state;
+    size_t invalid_cache_generation;
+};
+
+Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); }
+Context::~Context() = default;
+Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
+Context::Context(Context&& ctx) noexcept : impl(std::move(ctx.impl)) {}
+Context& Context::operator=(const Context& ctx) {
+    *impl = *ctx.impl;
+    return *this;
+}
+Context& Context::operator=(Context&& ctx) noexcept {
+    impl = std::move(ctx.impl);
+    return *this;
+}
+
+std::array<std::uint32_t, 16>& Context::Regs() {
+    return impl->jit_state.Reg;
+}
+const std::array<std::uint32_t, 16>& Context::Regs() const {
+    return impl->jit_state.Reg;
+}
+std::array<std::uint32_t, 64>& Context::ExtRegs() {
+    return impl->jit_state.ExtReg;
+}
+const std::array<std::uint32_t, 64>& Context::ExtRegs() const {
+    return impl->jit_state.ExtReg;
+}
+
+std::uint32_t Context::Cpsr() const {
+    return impl->jit_state.Cpsr();
+}
+void Context::SetCpsr(std::uint32_t value) {
+    impl->jit_state.SetCpsr(value);
+}
+
+std::uint32_t Context::Fpscr() const {
+    return impl->jit_state.Fpscr();
+}
+void Context::SetFpscr(std::uint32_t value) {
+    return impl->jit_state.SetFpscr(value);
+}
+
+void Jit::SaveContext(Context& ctx) const {
+    ctx.impl->jit_state.TransferJitState(impl->jit_state, false);
+    ctx.impl->invalid_cache_generation = impl->invalid_cache_generation;
+}
+
+void Jit::LoadContext(const Context& ctx) {
+    bool reset_rsb = ctx.impl->invalid_cache_generation != impl->invalid_cache_generation;
+    impl->jit_state.TransferJitState(ctx.impl->jit_state, reset_rsb);
+}
+
+std::string Jit::Disassemble() const {
+    return Common::DisassembleX64(impl->block_of_code.GetCodeBegin(), impl->block_of_code.getCurr());
+}
+
+} // namespace Dynarmic::A32
--- a/externals/dynarmic/src/backend/x64/a32_jitstate.cpp
+++ b/externals/dynarmic/src/backend/x64/a32_jitstate.cpp
@@ -0,0 +1,204 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "backend/x64/a32_jitstate.h"
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/nzcv_util.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "frontend/A32/location_descriptor.h"
+
+namespace Dynarmic::Backend::X64 {
+
+/**
+ * CPSR Bits
+ * =========
+ *
+ * ARM CPSR flags
+ * --------------
+ * N        bit 31       Negative flag
+ * Z        bit 30       Zero flag
+ * C        bit 29       Carry flag
+ * V        bit 28       oVerflow flag
+ * Q        bit 27       Saturation flag
+ * IT[1:0]  bits 25-26   If-Then execution state (lower 2 bits)
+ * J        bit 24       Jazelle instruction set flag
+ * GE       bits 16-19   Greater than or Equal flags
+ * IT[7:2]  bits 10-15   If-Then execution state (upper 6 bits)
+ * E        bit 9        Data Endianness flag
+ * A        bit 8        Disable imprecise Aborts
+ * I        bit 7        Disable IRQ interrupts
+ * F        bit 6        Disable FIQ interrupts
+ * T        bit 5        Thumb instruction set flag
+ * M        bits 0-4     Processor Mode bits
+ *
+ * x64 LAHF+SETO flags
+ * -------------------
+ * SF   bit 15       Sign flag
+ * ZF   bit 14       Zero flag
+ * AF   bit 12       Auxiliary flag
+ * PF   bit 10       Parity flag
+ * CF   bit 8        Carry flag
+ * OF   bit 0        Overflow flag
+ */
+
+u32 A32JitState::Cpsr() const {
+    DEBUG_ASSERT((cpsr_q & ~1) == 0);
+    DEBUG_ASSERT((cpsr_jaifm & ~0x010001DF) == 0);
+
+    u32 cpsr = 0;
+
+    // NZCV flags
+    cpsr |= NZCV::FromX64(cpsr_nzcv);
+    // Q flag
+    cpsr |= cpsr_q ? 1 << 27 : 0;
+    // GE flags
+    cpsr |= Common::Bit<31>(cpsr_ge) ? 1 << 19 : 0;
+    cpsr |= Common::Bit<23>(cpsr_ge) ? 1 << 18 : 0;
+    cpsr |= Common::Bit<15>(cpsr_ge) ? 1 << 17 : 0;
+    cpsr |= Common::Bit<7>(cpsr_ge) ? 1 << 16 : 0;
+    // E flag, T flag
+    cpsr |= Common::Bit<1>(upper_location_descriptor) ? 1 << 9 : 0;
+    cpsr |= Common::Bit<0>(upper_location_descriptor) ? 1 << 5 : 0;
+    // IT state
+    cpsr |= static_cast<u32>(upper_location_descriptor & 0b11111100'00000000);
+    cpsr |= static_cast<u32>(upper_location_descriptor & 0b00000011'00000000) << 17;
+    // Other flags
+    cpsr |= cpsr_jaifm;
+
+    return cpsr;
+}
+
+void A32JitState::SetCpsr(u32 cpsr) {
+    // NZCV flags
+    cpsr_nzcv = NZCV::ToX64(cpsr);
+    // Q flag
+    cpsr_q = Common::Bit<27>(cpsr) ? 1 : 0;
+    // GE flags
+    cpsr_ge = 0;
+    cpsr_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0;
+    cpsr_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
+    cpsr_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
+    cpsr_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
+
+    upper_location_descriptor &= 0xFFFF0000;
+    // E flag, T flag
+    upper_location_descriptor |= Common::Bit<9>(cpsr) ? 2 : 0;
+    upper_location_descriptor |= Common::Bit<5>(cpsr) ? 1 : 0;
+    // IT state
+    upper_location_descriptor |= (cpsr >>  0) & 0b11111100'00000000;
+    upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
+
+    // Other flags
+    cpsr_jaifm = cpsr & 0x010001DF;
+}
+
+void A32JitState::ResetRSB() {
+    rsb_location_descriptors.fill(0xFFFFFFFFFFFFFFFFull);
+    rsb_codeptrs.fill(0);
+}
+
+/**
+ * Comparing MXCSR and FPSCR
+ * =========================
+ *
+ * SSE MXCSR exception flags
+ * -------------------------
+ * PE   bit 5   Precision Flag
+ * UE   bit 4   Underflow Flag
+ * OE   bit 3   Overflow Flag
+ * ZE   bit 2   Divide By Zero Flag
+ * DE   bit 1   Denormal Flag                                 // Appears to only be set when MXCSR.DAZ = 0
+ * IE   bit 0   Invalid Operation Flag
+ *
+ * VFP FPSCR cumulative exception bits
+ * -----------------------------------
+ * IDC  bit 7   Input Denormal cumulative exception bit       // Only ever set when FPSCR.FTZ = 1
+ * IXC  bit 4   Inexact cumulative exception bit
+ * UFC  bit 3   Underflow cumulative exception bit
+ * OFC  bit 2   Overflow cumulative exception bit
+ * DZC  bit 1   Division by Zero cumulative exception bit
+ * IOC  bit 0   Invalid Operation cumulative exception bit
+ *
+ * SSE MSCSR exception masks
+ * -------------------------
+ * PM   bit 12  Precision Mask
+ * UM   bit 11  Underflow Mask
+ * OM   bit 10  Overflow Mask
+ * ZM   bit 9   Divide By Zero Mask
+ * DM   bit 8   Denormal Mask
+ * IM   bit 7   Invalid Operation Mask
+ *
+ * VFP FPSCR exception trap enables
+ * --------------------------------
+ * IDE  bit 15  Input Denormal exception trap enable
+ * IXE  bit 12  Inexact exception trap enable
+ * UFE  bit 11  Underflow exception trap enable
+ * OFE  bit 10  Overflow exception trap enable
+ * DZE  bit 9   Division by Zero exception trap enable
+ * IOE  bit 8   Invalid Operation exception trap enable
+ *
+ * SSE MXCSR mode bits
+ * -------------------
+ * FZ   bit 15      Flush To Zero
+ * DAZ  bit 6       Denormals Are Zero
+ * RN   bits 13-14  Round to {0 = Nearest, 1 = Negative, 2 = Positive, 3 = Zero}
+ *
+ * VFP FPSCR mode bits
+ * -------------------
+ * AHP      bit 26      Alternate half-precision
+ * DN       bit 25      Default NaN
+ * FZ       bit 24      Flush to Zero
+ * RMode    bits 22-23  Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero}
+ * Stride   bits 20-21  Vector stride
+ * Len      bits 16-18  Vector length
+ */
+
+// NZCV; QC (ASIMD only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
+constexpr u32 FPSCR_MODE_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
+constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
+
+u32 A32JitState::Fpscr() const {
+    DEBUG_ASSERT((fpsr_nzcv & ~FPSCR_NZCV_MASK) == 0);
+
+    const u32 fpcr_mode = static_cast<u32>(upper_location_descriptor) & FPSCR_MODE_MASK;
+    const u32 mxcsr = guest_MXCSR | asimd_MXCSR;
+
+    u32 FPSCR = fpcr_mode | fpsr_nzcv;
+    FPSCR |= (mxcsr & 0b0000000000001);       // IOC = IE
+    FPSCR |= (mxcsr & 0b0000000111100) >> 1;  // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
+    FPSCR |= fpsr_exc;
+
+    return FPSCR;
+}
+
+void A32JitState::SetFpscr(u32 FPSCR) {
+    // Ensure that only upper half of upper_location_descriptor is used for FPSCR bits.
+    static_assert((FPSCR_MODE_MASK & 0xFFFF0000) == FPSCR_MODE_MASK);
+
+    upper_location_descriptor &= 0x0000FFFF;
+    upper_location_descriptor |= FPSCR & FPSCR_MODE_MASK;
+
+    fpsr_nzcv = FPSCR & FPSCR_NZCV_MASK;
+
+    guest_MXCSR = 0x00001f80;
+    asimd_MXCSR = 0x00009fc0;
+
+    // RMode
+    const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
+    guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
+
+    // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
+    fpsr_exc = FPSCR & 0x9F;
+
+    if (Common::Bit<24>(FPSCR)) {
+        // VFP Flush to Zero
+        guest_MXCSR |= (1 << 15); // SSE Flush to Zero
+        guest_MXCSR |= (1 << 6);  // SSE Denormals are Zero
+    }
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/a32_jitstate.h
+++ b/externals/dynarmic/src/backend/x64/a32_jitstate.h
@@ -0,0 +1,110 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <array>
+
+#include <xbyak.h>
+
+#include "common/common_types.h"
+
+namespace Dynarmic::Backend::X64 {
+
+class BlockOfCode;
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4324) // Structure was padded due to alignment specifier
+#endif
+
+struct A32JitState {
+    using ProgramCounterType = u32;
+
+    A32JitState() { ResetRSB(); }
+
+    std::array<u32, 16> Reg{}; // Current register file.
+    // TODO: Mode-specific register sets unimplemented.
+
+    u32 upper_location_descriptor = 0;
+
+    u32 cpsr_ge = 0;
+    u32 cpsr_q = 0;
+    u32 cpsr_nzcv = 0;
+    u32 cpsr_jaifm = 0;
+    u32 Cpsr() const;
+    void SetCpsr(u32 cpsr);
+
+    alignas(16) std::array<u32, 64> ExtReg{}; // Extension registers.
+
+    static constexpr size_t SpillCount = 64;
+    alignas(16) std::array<std::array<u64, 2>, SpillCount> spill{}; // Spill.
+    static Xbyak::Address GetSpillLocationFromIndex(size_t i) {
+        using namespace Xbyak::util;
+        return xword[r15 + offsetof(A32JitState, spill) + i * sizeof(u64) * 2];
+    }
+
+    // For internal use (See: BlockOfCode::RunCode)
+    u32 guest_MXCSR = 0x00001f80;
+    u32 asimd_MXCSR = 0x00009fc0;
+    u32 save_host_MXCSR = 0;
+    s64 cycles_to_run = 0;
+    s64 cycles_remaining = 0;
+    bool halt_requested = false;
+    bool check_bit = false;
+
+    // Exclusive state
+    u32 exclusive_state = 0;
+
+    static constexpr size_t RSBSize = 8; // MUST be a power of 2.
+    static constexpr size_t RSBPtrMask = RSBSize - 1;
+    u32 rsb_ptr = 0;
+    std::array<u64, RSBSize> rsb_location_descriptors;
+    std::array<u64, RSBSize> rsb_codeptrs;
+    void ResetRSB();
+
+    u32 fpsr_exc = 0;
+    u32 fpsr_qc = 0; // Dummy value
+    u32 fpsr_nzcv = 0;
+    u32 Fpscr() const;
+    void SetFpscr(u32 FPSCR);
+
+    u64 GetUniqueHash() const noexcept {
+        return (static_cast<u64>(upper_location_descriptor) << 32) | (static_cast<u64>(Reg[15]));
+    }
+
+    void TransferJitState(const A32JitState& src, bool reset_rsb) {
+        Reg = src.Reg;
+        upper_location_descriptor = src.upper_location_descriptor;
+        cpsr_ge = src.cpsr_ge;
+        cpsr_q = src.cpsr_q;
+        cpsr_nzcv = src.cpsr_nzcv;
+        cpsr_jaifm = src.cpsr_jaifm;
+        ExtReg = src.ExtReg;
+        guest_MXCSR = src.guest_MXCSR;
+        asimd_MXCSR = src.asimd_MXCSR;
+        fpsr_exc = src.fpsr_exc;
+        fpsr_qc = src.fpsr_qc;
+        fpsr_nzcv = src.fpsr_nzcv;
+
+        exclusive_state = 0;
+
+        if (reset_rsb) {
+            ResetRSB();
+        } else {
+            rsb_ptr = src.rsb_ptr;
+            rsb_location_descriptors = src.rsb_location_descriptors;
+            rsb_codeptrs = src.rsb_codeptrs;
+        }
+    }
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+using CodePtr = const void*;
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/a64_emit_x64.cpp
+++ b/externals/dynarmic/src/backend/x64/a64_emit_x64.cpp
--- a/externals/dynarmic/src/backend/x64/a64_emit_x64.h
+++ b/externals/dynarmic/src/backend/x64/a64_emit_x64.h
@@ -0,0 +1,126 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <array>
+#include <map>
+#include <tuple>
+
+#include <dynarmic/A64/a64.h>
+#include <dynarmic/A64/config.h>
+
+#include "backend/x64/a64_jitstate.h"
+#include "backend/x64/block_range_information.h"
+#include "backend/x64/emit_x64.h"
+#include "frontend/A64/location_descriptor.h"
+#include "frontend/ir/terminal.h"
+
+namespace Dynarmic::Backend::X64 {
+
+class RegAlloc;
+
+struct A64EmitContext final : public EmitContext {
+    A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
+
+    A64::LocationDescriptor Location() const;
+    bool IsSingleStep() const;
+    FP::FPCR FPCR(bool fpcr_controlled = true) const override;
+
+    bool HasOptimization(OptimizationFlag flag) const override {
+        return conf.HasOptimization(flag);
+    }
+
+    const A64::UserConfig& conf;
+};
+
+class A64EmitX64 final : public EmitX64 {
+public:
+    A64EmitX64(BlockOfCode& code, A64::UserConfig conf, A64::Jit* jit_interface);
+    ~A64EmitX64() override;
+
+    /**
+     * Emit host machine code for a basic block with intermediate representation `block`.
+     * @note block is modified.
+     */
+    BlockDescriptor Emit(IR::Block& block);
+
+    void ClearCache() override;
+
+    void InvalidateCacheRanges(const boost::icl::interval_set<u64>& ranges);
+
+    void ChangeProcessorID(size_t value) {
+        conf.processor_id = value;
+    }
+
+protected:
+    A64::UserConfig conf;
+    A64::Jit* jit_interface;
+    BlockRangeInformation<u64> block_ranges;
+
+    struct FastDispatchEntry {
+        u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
+        const void* code_ptr = nullptr;
+    };
+    static_assert(sizeof(FastDispatchEntry) == 0x10);
+    static constexpr u64 fast_dispatch_table_mask = 0xFFFFF0;
+    static constexpr size_t fast_dispatch_table_size = 0x100000;
+    std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
+    void ClearFastDispatchTable();
+
+    void (*memory_read_128)();
+    void (*memory_write_128)();
+    void GenMemory128Accessors();
+
+    std::map<std::tuple<size_t, int, int>, void(*)()> read_fallbacks;
+    std::map<std::tuple<size_t, int, int>, void(*)()> write_fallbacks;
+    void GenFastmemFallbacks();
+
+    const void* terminal_handler_pop_rsb_hint;
+    const void* terminal_handler_fast_dispatch_hint = nullptr;
+    FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
+    void GenTerminalHandlers();
+
+    template<std::size_t bitsize>
+    void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst);
+    template<std::size_t bitsize>
+    void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst);
+    template<std::size_t bitsize, auto callback>
+    void EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst);
+    template<std::size_t bitsize, auto callback>
+    void EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst);
+
+    // Microinstruction emitters
+    void EmitPushRSB(EmitContext& ctx, IR::Inst* inst);
+#define OPCODE(...)
+#define A32OPC(...)
+#define A64OPC(name, type, ...) void EmitA64##name(A64EmitContext& ctx, IR::Inst* inst);
+#include "frontend/ir/opcodes.inc"
+#undef OPCODE
+#undef A32OPC
+#undef A64OPC
+
+    // Helpers
+    std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
+
+    // Terminal instruction emitters
+    void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+    void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
+
+    // Patching
+    void Unpatch(const IR::LocationDescriptor& target_desc) override;
+    void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
+    void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
+    void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
+};
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/a64_interface.cpp
+++ b/externals/dynarmic/src/backend/x64/a64_interface.cpp
@@ -0,0 +1,424 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <cstring>
+#include <memory>
+
+#include <boost/icl/interval_set.hpp>
+#include <dynarmic/A64/a64.h>
+
+#include "backend/x64/a64_emit_x64.h"
+#include "backend/x64/a64_jitstate.h"
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/devirtualize.h"
+#include "backend/x64/jitstate_info.h"
+#include "common/assert.h"
+#include "common/llvm_disassemble.h"
+#include "common/scope_exit.h"
+#include "frontend/A64/translate/translate.h"
+#include "frontend/ir/basic_block.h"
+#include "ir_opt/passes.h"
+
+namespace Dynarmic::A64 {
+
+using namespace Backend::X64;
+
+static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
+    return RunCodeCallbacks{
+        std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
+        std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::AddTicks>(cb)),
+        std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::GetTicksRemaining>(cb)),
+    };
+}
+
+static std::function<void(BlockOfCode&)> GenRCP(const A64::UserConfig& conf) {
+    return [conf](BlockOfCode& code) {
+        if (conf.page_table) {
+            code.mov(code.r14, Common::BitCast<u64>(conf.page_table));
+        }
+    };
+}
+
+struct Jit::Impl final {
+public:
+    Impl(Jit* jit, UserConfig conf)
+        : conf(conf)
+        , block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, GenRCP(conf))
+        , emitter(block_of_code, conf, jit)
+    {
+        ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
+    }
+
+    ~Impl() = default;
+
+    void Run() {
+        ASSERT(!is_executing);
+        is_executing = true;
+        SCOPE_EXIT { this->is_executing = false; };
+        jit_state.halt_requested = false;
+
+        // TODO: Check code alignment
+
+        const CodePtr current_code_ptr = [this]{
+            // RSB optimization
+            const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask;
+            if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
+                jit_state.rsb_ptr = new_rsb_ptr;
+                return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]);
+            }
+
+            return GetCurrentBlock();
+        }();
+        block_of_code.RunCode(&jit_state, current_code_ptr);
+
+        PerformRequestedCacheInvalidation();
+    }
+
+    void Step() {
+        ASSERT(!is_executing);
+        is_executing = true;
+        SCOPE_EXIT { this->is_executing = false; };
+        jit_state.halt_requested = true;
+
+        block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
+
+        PerformRequestedCacheInvalidation();
+    }
+
+    void ExceptionalExit() {
+        if (!conf.wall_clock_cntpct) {
+            const s64 ticks = jit_state.cycles_to_run - jit_state.cycles_remaining;
+            conf.callbacks->AddTicks(ticks);
+        }
+        PerformRequestedCacheInvalidation();
+        is_executing = false;
+    }
+
+    void ChangeProcessorID(size_t value) {
+        conf.processor_id = value;
+        emitter.ChangeProcessorID(value);
+    }
+
+    void ClearCache() {
+        invalidate_entire_cache = true;
+        RequestCacheInvalidation();
+    }
+
+    void InvalidateCacheRange(u64 start_address, size_t length) {
+        const auto end_address = static_cast<u64>(start_address + length - 1);
+        const auto range = boost::icl::discrete_interval<u64>::closed(start_address, end_address);
+        invalid_cache_ranges.add(range);
+        RequestCacheInvalidation();
+    }
+
+    void Reset() {
+        ASSERT(!is_executing);
+        jit_state = {};
+    }
+
+    void HaltExecution() {
+        jit_state.halt_requested = true;
+    }
+
+    u64 GetSP() const {
+        return jit_state.sp;
+    }
+
+    void SetSP(u64 value) {
+        jit_state.sp = value;
+    }
+
+    u64 GetPC() const {
+        return jit_state.pc;
+    }
+
+    void SetPC(u64 value) {
+        jit_state.pc = value;
+    }
+
+    u64 GetRegister(size_t index) const {
+        if (index == 31)
+            return GetSP();
+        return jit_state.reg.at(index);
+    }
+
+    void SetRegister(size_t index, u64 value) {
+        if (index == 31)
+            return SetSP(value);
+        jit_state.reg.at(index) = value;
+    }
+
+    std::array<u64, 31> GetRegisters() const {
+        return jit_state.reg;
+    }
+
+    void SetRegisters(const std::array<u64, 31>& value) {
+        jit_state.reg = value;
+    }
+
+    Vector GetVector(size_t index) const {
+        return {jit_state.vec.at(index * 2), jit_state.vec.at(index * 2 + 1)};
+    }
+
+    void SetVector(size_t index, Vector value) {
+        jit_state.vec.at(index * 2) = value[0];
+        jit_state.vec.at(index * 2 + 1) = value[1];
+    }
+
+    std::array<Vector, 32> GetVectors() const {
+        std::array<Vector, 32> ret;
+        static_assert(sizeof(ret) == sizeof(jit_state.vec));
+        std::memcpy(ret.data(), jit_state.vec.data(), sizeof(jit_state.vec));
+        return ret;
+    }
+
+    void SetVectors(const std::array<Vector, 32>& value) {
+        static_assert(sizeof(value) == sizeof(jit_state.vec));
+        std::memcpy(jit_state.vec.data(), value.data(), sizeof(jit_state.vec));
+    }
+
+    u32 GetFpcr() const {
+        return jit_state.GetFpcr();
+    }
+
+    void SetFpcr(u32 value) {
+        jit_state.SetFpcr(value);
+    }
+
+    u32 GetFpsr() const {
+        return jit_state.GetFpsr();
+    }
+
+    void SetFpsr(u32 value) {
+        jit_state.SetFpsr(value);
+    }
+
+    u32 GetPstate() const {
+        return jit_state.GetPstate();
+    }
+
+    void SetPstate(u32 value) {
+        jit_state.SetPstate(value);
+    }
+
+    void ClearExclusiveState() {
+        jit_state.exclusive_state = 0;
+    }
+
+    bool IsExecuting() const {
+        return is_executing;
+    }
+
+    std::string Disassemble() const {
+        return Common::DisassembleX64(block_of_code.GetCodeBegin(), block_of_code.getCurr());
+    }
+
+private:
+    static CodePtr GetCurrentBlockThunk(void* thisptr) {
+        Jit::Impl* this_ = static_cast<Jit::Impl*>(thisptr);
+        return this_->GetCurrentBlock();
+    }
+
+    IR::LocationDescriptor GetCurrentLocation() const {
+        return IR::LocationDescriptor{jit_state.GetUniqueHash()};
+    }
+
+    CodePtr GetCurrentBlock() {
+        return GetBlock(GetCurrentLocation());
+    }
+
+    CodePtr GetCurrentSingleStep() {
+        return GetBlock(A64::LocationDescriptor{GetCurrentLocation()}.SetSingleStepping(true));
+    }
+
+    CodePtr GetBlock(IR::LocationDescriptor current_location) {
+        if (auto block = emitter.GetBasicBlock(current_location))
+            return block->entrypoint;
+
+        constexpr size_t MINIMUM_REMAINING_CODESIZE = 1 * 1024 * 1024;
+        if (block_of_code.SpaceRemaining() < MINIMUM_REMAINING_CODESIZE) {
+            // Immediately evacuate cache
+            invalidate_entire_cache = true;
+            PerformRequestedCacheInvalidation();
+        }
+
+        // JIT Compile
+        const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
+        IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
+                                                {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
+        Optimization::A64CallbackConfigPass(ir_block, conf);
+        if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
+            Optimization::A64GetSetElimination(ir_block);
+            Optimization::DeadCodeElimination(ir_block);
+        }
+        if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
+            Optimization::ConstantPropagation(ir_block);
+            Optimization::DeadCodeElimination(ir_block);
+        }
+        if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
+            Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
+        }
+        Optimization::VerificationPass(ir_block);
+        return emitter.Emit(ir_block).entrypoint;
+    }
+
+    void RequestCacheInvalidation() {
+        if (is_executing) {
+            jit_state.halt_requested = true;
+            return;
+        }
+
+        PerformRequestedCacheInvalidation();
+    }
+
+    void PerformRequestedCacheInvalidation() {
+        if (!invalidate_entire_cache && invalid_cache_ranges.empty()) {
+            return;
+        }
+
+        jit_state.ResetRSB();
+        if (invalidate_entire_cache) {
+            block_of_code.ClearCache();
+            emitter.ClearCache();
+        } else {
+            emitter.InvalidateCacheRanges(invalid_cache_ranges);
+        }
+        invalid_cache_ranges.clear();
+        invalidate_entire_cache = false;
+    }
+
+    bool is_executing = false;
+
+    UserConfig conf;
+    A64JitState jit_state;
+    BlockOfCode block_of_code;
+    A64EmitX64 emitter;
+
+    bool invalidate_entire_cache = false;
+    boost::icl::interval_set<u64> invalid_cache_ranges;
+};
+
+Jit::Jit(UserConfig conf)
+    : impl(std::make_unique<Jit::Impl>(this, conf)) {}
+
+Jit::~Jit() = default;
+
+void Jit::Run() {
+    impl->Run();
+}
+
+void Jit::Step() {
+    impl->Step();
+}
+
+void Jit::ClearCache() {
+    impl->ClearCache();
+}
+
+void Jit::InvalidateCacheRange(u64 start_address, size_t length) {
+    impl->InvalidateCacheRange(start_address, length);
+}
+
+void Jit::Reset() {
+    impl->Reset();
+}
+
+void Jit::HaltExecution() {
+    impl->HaltExecution();
+}
+
+void Jit::ExceptionalExit() {
+    impl->ExceptionalExit();
+}
+
+void Jit::ChangeProcessorID(size_t new_processor) {
+    impl->ChangeProcessorID(new_processor);
+}
+
+u64 Jit::GetSP() const {
+    return impl->GetSP();
+}
+
+void Jit::SetSP(u64 value) {
+    impl->SetSP(value);
+}
+
+u64 Jit::GetPC() const {
+    return impl->GetPC();
+}
+
+void Jit::SetPC(u64 value) {
+    impl->SetPC(value);
+}
+
+u64 Jit::GetRegister(size_t index) const {
+    return impl->GetRegister(index);
+}
+
+void Jit::SetRegister(size_t index, u64 value) {
+    impl->SetRegister(index, value);
+}
+
+std::array<u64, 31> Jit::GetRegisters() const {
+    return impl->GetRegisters();
+}
+
+void Jit::SetRegisters(const std::array<u64, 31>& value) {
+    impl->SetRegisters(value);
+}
+
+Vector Jit::GetVector(size_t index) const {
+    return impl->GetVector(index);
+}
+
+void Jit::SetVector(size_t index, Vector value) {
+    impl->SetVector(index, value);
+}
+
+std::array<Vector, 32> Jit::GetVectors() const {
+    return impl->GetVectors();
+}
+
+void Jit::SetVectors(const std::array<Vector, 32>& value) {
+    impl->SetVectors(value);
+}
+
+u32 Jit::GetFpcr() const {
+    return impl->GetFpcr();
+}
+
+void Jit::SetFpcr(u32 value) {
+    impl->SetFpcr(value);
+}
+
+u32 Jit::GetFpsr() const {
+    return impl->GetFpsr();
+}
+
+void Jit::SetFpsr(u32 value) {
+    impl->SetFpsr(value);
+}
+
+u32 Jit::GetPstate() const {
+    return impl->GetPstate();
+}
+
+void Jit::SetPstate(u32 value) {
+    impl->SetPstate(value);
+}
+
+void Jit::ClearExclusiveState() {
+    impl->ClearExclusiveState();
+}
+
+bool Jit::IsExecuting() const {
+    return impl->IsExecuting();
+}
+
+std::string Jit::Disassemble() const {
+    return impl->Disassemble();
+}
+
+} // namespace Dynarmic::A64
--- a/externals/dynarmic/src/backend/x64/a64_jitstate.cpp
+++ b/externals/dynarmic/src/backend/x64/a64_jitstate.cpp
@@ -0,0 +1,114 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "backend/x64/a64_jitstate.h"
+#include "common/bit_util.h"
+#include "frontend/A64/location_descriptor.h"
+
+namespace Dynarmic::Backend::X64 {
+
+/**
+ * Comparing MXCSR and FPCR
+ * ========================
+ *
+ * SSE MSCSR exception masks
+ * -------------------------
+ * PM   bit 12  Precision Mask
+ * UM   bit 11  Underflow Mask
+ * OM   bit 10  Overflow Mask
+ * ZM   bit 9   Divide By Zero Mask
+ * DM   bit 8   Denormal Mask
+ * IM   bit 7   Invalid Operation Mask
+ *
+ * A64 FPCR exception trap enables
+ * -------------------------------
+ * IDE  bit 15  Input Denormal exception trap enable
+ * IXE  bit 12  Inexact exception trap enable
+ * UFE  bit 11  Underflow exception trap enable
+ * OFE  bit 10  Overflow exception trap enable
+ * DZE  bit 9   Division by Zero exception trap enable
+ * IOE  bit 8   Invalid Operation exception trap enable
+ *
+ * SSE MXCSR mode bits
+ * -------------------
+ * FZ   bit 15  Flush To Zero
+ * DAZ  bit 6   Denormals Are Zero
+ * RN   bits 13-14  Round to {0 = Nearest, 1 = Negative, 2 = Positive, 3 = Zero}
+ *
+ * A64 FPCR mode bits
+ * ------------------
+ * AHP  bit 26  Alternative half-precision
+ * DN   bit 25  Default NaN
+ * FZ   bit 24  Flush to Zero
+ * RMode    bits 22-23  Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero}
+ * FZ16 bit 19  Flush to Zero for half-precision
+ */
+
+constexpr u32 FPCR_MASK = 0x07C89F00;
+
+u32 A64JitState::GetFpcr() const {
+    return fpcr;
+}
+
+void A64JitState::SetFpcr(u32 value) {
+    fpcr = value & FPCR_MASK;
+
+    asimd_MXCSR &= 0x0000003D;
+    guest_MXCSR &= 0x0000003D;
+    asimd_MXCSR |= 0x00001f80;
+    guest_MXCSR |= 0x00001f80; // Mask all exceptions
+
+    // RMode
+    const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
+    guest_MXCSR |= MXCSR_RMode[(value >> 22) & 0x3];
+
+    if (Common::Bit<24>(value)) {
+        guest_MXCSR |= (1 << 15); // SSE Flush to Zero
+        guest_MXCSR |= (1 << 6);  // SSE Denormals are Zero
+    }
+}
+
+/**
+ * Comparing MXCSR and FPSR
+ * ========================
+ *
+ * SSE MXCSR exception flags
+ * -------------------------
+ * PE   bit 5   Precision Flag
+ * UE   bit 4   Underflow Flag
+ * OE   bit 3   Overflow Flag
+ * ZE   bit 2   Divide By Zero Flag
+ * DE   bit 1   Denormal Flag                                 // Appears to only be set when MXCSR.DAZ = 0
+ * IE   bit 0   Invalid Operation Flag
+ *
+ * A64 FPSR cumulative exception bits
+ * ----------------------------------
+ * QC   bit 27  Cumulative saturation bit
+ * IDC  bit 7   Input Denormal cumulative exception bit       // Only ever set when FPCR.FTZ = 1
+ * IXC  bit 4   Inexact cumulative exception bit
+ * UFC  bit 3   Underflow cumulative exception bit
+ * OFC  bit 2   Overflow cumulative exception bit
+ * DZC  bit 1   Division by Zero cumulative exception bit
+ * IOC  bit 0   Invalid Operation cumulative exception bit
+ */
+
+u32 A64JitState::GetFpsr() const {
+    const u32 mxcsr = guest_MXCSR | asimd_MXCSR;
+    u32 fpsr = 0;
+    fpsr |= (mxcsr & 0b0000000000001);       // IOC = IE
+    fpsr |= (mxcsr & 0b0000000111100) >> 1;  // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
+    fpsr |= fpsr_exc;
+    fpsr |= (fpsr_qc == 0 ? 0 : 1) << 27;
+    return fpsr;
+}
+
+void A64JitState::SetFpsr(u32 value) {
+    guest_MXCSR &= ~0x0000003D;
+    asimd_MXCSR &= ~0x0000003D;
+    fpsr_qc = (value >> 27) & 1;
+    fpsr_exc = value & 0x9F;
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/a64_jitstate.h
+++ b/externals/dynarmic/src/backend/x64/a64_jitstate.h
@@ -0,0 +1,96 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <array>
+
+#include <xbyak.h>
+
+#include "backend/x64/nzcv_util.h"
+#include "common/common_types.h"
+#include "frontend/A64/location_descriptor.h"
+
+namespace Dynarmic::Backend::X64 {
+
+class BlockOfCode;
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4324) // Structure was padded due to alignment specifier
+#endif
+
+struct A64JitState {
+    using ProgramCounterType = u64;
+
+    A64JitState() { ResetRSB(); }
+
+    std::array<u64, 31> reg{};
+    u64 sp = 0;
+    u64 pc = 0;
+
+    u32 cpsr_nzcv = 0;
+
+    u32 GetPstate() const {
+        return NZCV::FromX64(cpsr_nzcv);
+    }
+    void SetPstate(u32 new_pstate) {
+        cpsr_nzcv = NZCV::ToX64(new_pstate);
+    }
+
+    alignas(16) std::array<u64, 64> vec{}; // Extension registers.
+
+    static constexpr size_t SpillCount = 64;
+    alignas(16) std::array<std::array<u64, 2>, SpillCount> spill{}; // Spill.
+    static Xbyak::Address GetSpillLocationFromIndex(size_t i) {
+        using namespace Xbyak::util;
+        return xword[r15 + offsetof(A64JitState, spill) + i * sizeof(u64) * 2];
+    }
+
+    // For internal use (See: BlockOfCode::RunCode)
+    u32 guest_MXCSR = 0x00001f80;
+    u32 asimd_MXCSR = 0x00009fc0;
+    u32 save_host_MXCSR = 0;
+    s64 cycles_to_run = 0;
+    s64 cycles_remaining = 0;
+    bool halt_requested = false;
+    bool check_bit = false;
+
+    // Exclusive state
+    static constexpr u64 RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull;
+    u8 exclusive_state = 0;
+
+    static constexpr size_t RSBSize = 8; // MUST be a power of 2.
+    static constexpr size_t RSBPtrMask = RSBSize - 1;
+    u32 rsb_ptr = 0;
+    std::array<u64, RSBSize> rsb_location_descriptors;
+    std::array<u64, RSBSize> rsb_codeptrs;
+    void ResetRSB() {
+        rsb_location_descriptors.fill(0xFFFFFFFFFFFFFFFFull);
+        rsb_codeptrs.fill(0);
+    }
+
+    u32 fpsr_exc = 0;
+    u32 fpsr_qc = 0;
+    u32 fpcr = 0;
+    u32 GetFpcr() const;
+    u32 GetFpsr() const;
+    void SetFpcr(u32 value);
+    void SetFpsr(u32 value);
+
+    u64 GetUniqueHash() const noexcept {
+        const u64 fpcr_u64 = static_cast<u64>(fpcr & A64::LocationDescriptor::fpcr_mask) << A64::LocationDescriptor::fpcr_shift;
+        const u64 pc_u64 = pc & A64::LocationDescriptor::pc_mask;
+        return pc_u64 | fpcr_u64;
+    }
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+using CodePtr = const void*;
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/abi.cpp
+++ b/externals/dynarmic/src/backend/x64/abi.cpp
@@ -0,0 +1,134 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2020 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <algorithm>
+#include <vector>
+
+#include <xbyak.h>
+
+#include "backend/x64/abi.h"
+#include "backend/x64/block_of_code.h"
+#include "common/common_types.h"
+#include "common/iterator_util.h"
+
+namespace Dynarmic::Backend::X64 {
+
+constexpr size_t XMM_SIZE = 16;
+
+struct FrameInfo {
+    size_t stack_subtraction;
+    size_t xmm_offset;
+    size_t frame_offset;
+};
+
+static FrameInfo CalculateFrameInfo(size_t num_gprs, size_t num_xmms, size_t frame_size) {
+    // We are initially 8 byte aligned because the return value is pushed onto an aligned stack after a call.
+    const size_t rsp_alignment = (num_gprs % 2 == 0) ? 8 : 0;
+    const size_t total_xmm_size = num_xmms * XMM_SIZE;
+
+    if (frame_size & 0xF) {
+        frame_size += 0x10 - (frame_size & 0xF);
+    }
+
+    return {
+        rsp_alignment + total_xmm_size + frame_size + ABI_SHADOW_SPACE,
+        frame_size + ABI_SHADOW_SPACE,
+        ABI_SHADOW_SPACE,
+    };
+}
+
+template<typename RegisterArrayT>
+void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size, const RegisterArrayT& regs) {
+    using namespace Xbyak::util;
+
+    const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
+    const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM);
+
+    FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
+
+    for (HostLoc gpr : regs) {
+        if (HostLocIsGPR(gpr)) {
+            code.push(HostLocToReg64(gpr));
+        }
+    }
+
+    if (frame_info.stack_subtraction != 0) {
+        code.sub(rsp, u32(frame_info.stack_subtraction));
+    }
+
+    size_t xmm_offset = frame_info.xmm_offset;
+    for (HostLoc xmm : regs) {
+        if (HostLocIsXMM(xmm)) {
+            if (code.HasAVX()) {
+                code.vmovaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm));
+            } else {
+                code.movaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm));
+            }
+            xmm_offset += XMM_SIZE;
+        }
+    }
+}
+
+template<typename RegisterArrayT>
+void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size, const RegisterArrayT& regs) {
+    using namespace Xbyak::util;
+
+    const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
+    const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM);
+
+    FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
+
+    size_t xmm_offset = frame_info.xmm_offset;
+    for (HostLoc xmm : regs) {
+        if (HostLocIsXMM(xmm)) {
+            if (code.HasAVX()) {
+                code.vmovaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
+            } else {
+                code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
+            }
+            xmm_offset += XMM_SIZE;
+        }
+    }
+
+    if (frame_info.stack_subtraction != 0) {
+        code.add(rsp, u32(frame_info.stack_subtraction));
+    }
+
+    for (HostLoc gpr : Common::Reverse(regs)) {
+        if (HostLocIsGPR(gpr)) {
+            code.pop(HostLocToReg64(gpr));
+        }
+    }
+}
+
+void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) {
+    ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
+}
+
+void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) {
+    ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
+}
+
+void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) {
+    ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
+}
+
+void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size) {
+    ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
+}
+
+void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
+    std::vector<HostLoc> regs;
+    std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
+    ABI_PushRegistersAndAdjustStack(code, 0, regs);
+}
+
+void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception) {
+    std::vector<HostLoc> regs;
+    std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
+    ABI_PopRegistersAndAdjustStack(code, 0, regs);
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/abi.h
+++ b/externals/dynarmic/src/backend/x64/abi.h
@@ -0,0 +1,123 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+#pragma once
+
+#include <array>
+
+#include "backend/x64/hostloc.h"
+
+namespace Dynarmic::Backend::X64 {
+
+class BlockOfCode;
+
+#ifdef _WIN32
+
+constexpr HostLoc ABI_RETURN = HostLoc::RAX;
+
+constexpr HostLoc ABI_PARAM1 = HostLoc::RCX;
+constexpr HostLoc ABI_PARAM2 = HostLoc::RDX;
+constexpr HostLoc ABI_PARAM3 = HostLoc::R8;
+constexpr HostLoc ABI_PARAM4 = HostLoc::R9;
+
+constexpr std::array<HostLoc, 13> ABI_ALL_CALLER_SAVE = {
+    HostLoc::RAX,
+    HostLoc::RCX,
+    HostLoc::RDX,
+    HostLoc::R8,
+    HostLoc::R9,
+    HostLoc::R10,
+    HostLoc::R11,
+    HostLoc::XMM0,
+    HostLoc::XMM1,
+    HostLoc::XMM2,
+    HostLoc::XMM3,
+    HostLoc::XMM4,
+    HostLoc::XMM5,
+};
+
+constexpr std::array<HostLoc, 18> ABI_ALL_CALLEE_SAVE = {
+    HostLoc::RBX,
+    HostLoc::RSI,
+    HostLoc::RDI,
+    HostLoc::RBP,
+    HostLoc::R12,
+    HostLoc::R13,
+    HostLoc::R14,
+    HostLoc::R15,
+    HostLoc::XMM6,
+    HostLoc::XMM7,
+    HostLoc::XMM8,
+    HostLoc::XMM9,
+    HostLoc::XMM10,
+    HostLoc::XMM11,
+    HostLoc::XMM12,
+    HostLoc::XMM13,
+    HostLoc::XMM14,
+    HostLoc::XMM15,
+};
+
+constexpr size_t ABI_SHADOW_SPACE = 32; // bytes
+
+#else
+
+constexpr HostLoc ABI_RETURN = HostLoc::RAX;
+
+constexpr HostLoc ABI_PARAM1 = HostLoc::RDI;
+constexpr HostLoc ABI_PARAM2 = HostLoc::RSI;
+constexpr HostLoc ABI_PARAM3 = HostLoc::RDX;
+constexpr HostLoc ABI_PARAM4 = HostLoc::RCX;
+
+constexpr std::array<HostLoc, 25> ABI_ALL_CALLER_SAVE = {
+    HostLoc::RAX,
+    HostLoc::RCX,
+    HostLoc::RDX,
+    HostLoc::RDI,
+    HostLoc::RSI,
+    HostLoc::R8,
+    HostLoc::R9,
+    HostLoc::R10,
+    HostLoc::R11,
+    HostLoc::XMM0,
+    HostLoc::XMM1,
+    HostLoc::XMM2,
+    HostLoc::XMM3,
+    HostLoc::XMM4,
+    HostLoc::XMM5,
+    HostLoc::XMM6,
+    HostLoc::XMM7,
+    HostLoc::XMM8,
+    HostLoc::XMM9,
+    HostLoc::XMM10,
+    HostLoc::XMM11,
+    HostLoc::XMM12,
+    HostLoc::XMM13,
+    HostLoc::XMM14,
+    HostLoc::XMM15,
+};
+
+constexpr std::array<HostLoc, 6> ABI_ALL_CALLEE_SAVE = {
+    HostLoc::RBX,
+    HostLoc::RBP,
+    HostLoc::R12,
+    HostLoc::R13,
+    HostLoc::R14,
+    HostLoc::R15,
+};
+
+constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
+
+#endif
+
+static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 31, "Invalid total number of registers");
+
+void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0);
+void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0);
+void ABI_PushCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0);
+void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0);
+
+void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
+void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, HostLoc exception);
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/block_of_code.cpp
+++ b/externals/dynarmic/src/backend/x64/block_of_code.cpp
@@ -0,0 +1,399 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <array>
+#include <cstring>
+
+#include <xbyak.h>
+
+#include "backend/x64/a32_jitstate.h"
+#include "backend/x64/abi.h"
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/perf_map.h"
+#include "common/assert.h"
+
+#ifdef _WIN32
+    #include <windows.h>
+#else
+    #include <sys/mman.h>
+#endif
+
+namespace Dynarmic::Backend::X64 {
+
+#ifdef _WIN32
+const Xbyak::Reg64 BlockOfCode::ABI_RETURN = Xbyak::util::rax;
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = Xbyak::util::rcx;
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM2 = Xbyak::util::rdx;
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM3 = Xbyak::util::r8;
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM4 = Xbyak::util::r9;
+const std::array<Xbyak::Reg64, 4> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2, BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4};
+#else
+const Xbyak::Reg64 BlockOfCode::ABI_RETURN = Xbyak::util::rax;
+const Xbyak::Reg64 BlockOfCode::ABI_RETURN2 = Xbyak::util::rdx;
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = Xbyak::util::rdi;
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM2 = Xbyak::util::rsi;
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM3 = Xbyak::util::rdx;
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM4 = Xbyak::util::rcx;
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM5 = Xbyak::util::r8;
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM6 = Xbyak::util::r9;
+const std::array<Xbyak::Reg64, 6> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2, BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4, BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6};
+#endif
+
+namespace {
+
+constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024;
+constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
+constexpr size_t CONSTANT_POOL_SIZE = 2 * 1024 * 1024;
+
+class CustomXbyakAllocator : public Xbyak::Allocator {
+public:
+#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+    bool useProtect() const override { return false; }
+#endif
+};
+
+// This is threadsafe as Xbyak::Allocator does not contain any state; it is a pure interface.
+CustomXbyakAllocator s_allocator;
+
+#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+void ProtectMemory(const void* base, size_t size, bool is_executable) {
+#ifdef _WIN32
+    DWORD oldProtect = 0;
+    VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
+#else
+    static const size_t pageSize = sysconf(_SC_PAGESIZE);
+    const size_t iaddr = reinterpret_cast<size_t>(base);
+    const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
+    const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
+    mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
+#endif
+}
+#endif
+
+} // anonymous namespace
+
+BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, std::function<void(BlockOfCode&)> rcp)
+        : Xbyak::CodeGenerator(TOTAL_CODE_SIZE, nullptr, &s_allocator)
+        , cb(std::move(cb))
+        , jsi(jsi)
+        , constant_pool(*this, CONSTANT_POOL_SIZE)
+{
+    EnableWriting();
+    GenRunCode(rcp);
+}
+
+void BlockOfCode::PreludeComplete() {
+    prelude_complete = true;
+    near_code_begin = getCurr();
+    far_code_begin = getCurr() + FAR_CODE_OFFSET;
+    ClearCache();
+    DisableWriting();
+}
+
+void BlockOfCode::EnableWriting() {
+#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+    ProtectMemory(getCode(), maxSize_, false);
+#endif
+}
+
+void BlockOfCode::DisableWriting() {
+#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+    ProtectMemory(getCode(), maxSize_, true);
+#endif
+}
+
+void BlockOfCode::ClearCache() {
+    ASSERT(prelude_complete);
+    in_far_code = false;
+    near_code_ptr = near_code_begin;
+    far_code_ptr = far_code_begin;
+    SetCodePtr(near_code_begin);
+}
+
+size_t BlockOfCode::SpaceRemaining() const {
+    ASSERT(prelude_complete);
+    // This function provides an underestimate of near-code-size but that's okay.
+    // (Why? The maximum size of near code should be measured from near_code_begin, not top_.)
+    // These are offsets from Xbyak::CodeArray::top_.
+    std::size_t far_code_offset, near_code_offset;
+    if (in_far_code) {
+        near_code_offset = static_cast<const u8*>(near_code_ptr) - getCode();
+        far_code_offset = getCurr() - getCode();
+    } else {
+        near_code_offset = getCurr() - getCode();
+        far_code_offset = static_cast<const u8*>(far_code_ptr) - getCode();
+    }
+    if (far_code_offset > TOTAL_CODE_SIZE)
+        return 0;
+    if (near_code_offset > FAR_CODE_OFFSET)
+        return 0;
+    return std::min(TOTAL_CODE_SIZE - far_code_offset, FAR_CODE_OFFSET - near_code_offset);
+}
+
+void BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const {
+    run_code(jit_state, code_ptr);
+}
+
+void BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const {
+    step_code(jit_state, code_ptr);
+}
+
+void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) {
+    size_t index = 0;
+    if (mxcsr_already_exited)
+        index |= MXCSR_ALREADY_EXITED;
+    jmp(return_from_run_code[index]);
+}
+
+void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) {
+    size_t index = FORCE_RETURN;
+    if (mxcsr_already_exited)
+        index |= MXCSR_ALREADY_EXITED;
+    jmp(return_from_run_code[index]);
+}
+
+void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
+    align();
+    run_code = getCurr<RunCodeFuncType>();
+
+    // This serves two purposes:
+    // 1. It saves all the registers we as a callee need to save.
+    // 2. It aligns the stack so that the code the JIT emits can assume
+    //    that the stack is appropriately aligned for CALLs.
+    ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
+
+    mov(r15, ABI_PARAM1);
+    mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
+
+    cb.GetTicksRemaining->EmitCall(*this);
+    mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN);
+    mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
+
+    rcp(*this);
+
+    SwitchMxcsrOnEntry();
+    jmp(rbx);
+
+    align();
+    step_code = getCurr<RunCodeFuncType>();
+
+    ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
+
+    mov(r15, ABI_PARAM1);
+
+    mov(qword[r15 + jsi.offsetof_cycles_to_run], 1);
+    mov(qword[r15 + jsi.offsetof_cycles_remaining], 1);
+
+    rcp(*this);
+
+    SwitchMxcsrOnEntry();
+    jmp(ABI_PARAM2);
+
+    align();
+
+    // Dispatcher loop
+
+    Xbyak::Label return_to_caller, return_to_caller_mxcsr_already_exited;
+
+    align();
+    return_from_run_code[0] = getCurr<const void*>();
+
+    cmp(qword[r15 + jsi.offsetof_cycles_remaining], 0);
+    jng(return_to_caller);
+    cb.LookupBlock->EmitCall(*this);
+    jmp(ABI_RETURN);
+
+    align();
+    return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr<const void*>();
+
+    cmp(qword[r15 + jsi.offsetof_cycles_remaining], 0);
+    jng(return_to_caller_mxcsr_already_exited);
+    SwitchMxcsrOnEntry();
+    cb.LookupBlock->EmitCall(*this);
+    jmp(ABI_RETURN);
+
+    align();
+    return_from_run_code[FORCE_RETURN] = getCurr<const void*>();
+    L(return_to_caller);
+
+    SwitchMxcsrOnExit();
+    // fallthrough
+
+    return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr<const void*>();
+    L(return_to_caller_mxcsr_already_exited);
+
+    cb.AddTicks->EmitCall(*this, [this](RegList param) {
+        mov(param[0], qword[r15 + jsi.offsetof_cycles_to_run]);
+        sub(param[0], qword[r15 + jsi.offsetof_cycles_remaining]);
+    });
+
+    ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
+    ret();
+
+    PerfMapRegister(run_code, getCurr(), "dynarmic_dispatcher");
+}
+
+void BlockOfCode::SwitchMxcsrOnEntry() {
+    stmxcsr(dword[r15 + jsi.offsetof_save_host_MXCSR]);
+    ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
+}
+
+void BlockOfCode::SwitchMxcsrOnExit() {
+    stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
+    ldmxcsr(dword[r15 + jsi.offsetof_save_host_MXCSR]);
+}
+
+void BlockOfCode::EnterStandardASIMD() {
+    stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
+    ldmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]);
+}
+
+void BlockOfCode::LeaveStandardASIMD() {
+    stmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]);
+    ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
+}
+
+void BlockOfCode::UpdateTicks() {
+    cb.AddTicks->EmitCall(*this, [this](RegList param) {
+        mov(param[0], qword[r15 + jsi.offsetof_cycles_to_run]);
+        sub(param[0], qword[r15 + jsi.offsetof_cycles_remaining]);
+    });
+
+    cb.GetTicksRemaining->EmitCall(*this);
+    mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN);
+    mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
+}
+
+void BlockOfCode::LookupBlock() {
+    cb.LookupBlock->EmitCall(*this);
+}
+
+Xbyak::Address BlockOfCode::MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
+    return constant_pool.GetConstant(frame, lower, upper);
+}
+
+void BlockOfCode::SwitchToFarCode() {
+    ASSERT(prelude_complete);
+    ASSERT(!in_far_code);
+    in_far_code = true;
+    near_code_ptr = getCurr();
+    SetCodePtr(far_code_ptr);
+
+    ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!");
+}
+
+void BlockOfCode::SwitchToNearCode() {
+    ASSERT(prelude_complete);
+    ASSERT(in_far_code);
+    in_far_code = false;
+    far_code_ptr = getCurr();
+    SetCodePtr(near_code_ptr);
+}
+
+CodePtr BlockOfCode::GetCodeBegin() const {
+    return near_code_begin;
+}
+
+size_t BlockOfCode::GetTotalCodeSize() const {
+    return maxSize_;
+}
+
+void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {
+    if (size_ + alloc_size >= maxSize_) {
+        throw Xbyak::Error(Xbyak::ERR_CODE_IS_TOO_BIG);
+    }
+
+    void* ret = getCurr<void*>();
+    size_ += alloc_size;
+    memset(ret, 0, alloc_size);
+    return ret;
+}
+
+void BlockOfCode::SetCodePtr(CodePtr code_ptr) {
+    // The "size" defines where top_, the insertion point, is.
+    size_t required_size = reinterpret_cast<const u8*>(code_ptr) - getCode();
+    setSize(required_size);
+}
+
+void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
+    size_t current_size = getCurr<const u8*>() - reinterpret_cast<const u8*>(begin);
+    ASSERT(current_size <= size);
+    nop(size - current_size);
+}
+
+bool BlockOfCode::HasSSSE3() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tSSSE3);
+}
+
+bool BlockOfCode::HasSSE41() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tSSE41);
+}
+
+bool BlockOfCode::HasSSE42() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tSSE42);
+}
+
+bool BlockOfCode::HasPCLMULQDQ() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tPCLMULQDQ);
+}
+
+bool BlockOfCode::HasAVX() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tAVX);
+}
+
+bool BlockOfCode::HasF16C() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tF16C);
+}
+
+bool BlockOfCode::HasAESNI() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tAESNI);
+}
+
+bool BlockOfCode::HasLZCNT() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tLZCNT);
+}
+
+bool BlockOfCode::HasBMI1() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tBMI1);
+}
+
+bool BlockOfCode::HasBMI2() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tBMI2);
+}
+
+bool BlockOfCode::HasFastBMI2() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tBMI2) && !DoesCpuSupport(Xbyak::util::Cpu::tAMD);
+}
+
+bool BlockOfCode::HasFMA() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tFMA);
+}
+
+bool BlockOfCode::HasAVX2() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tAVX2);
+}
+
+bool BlockOfCode::HasAVX512_Skylake() const {
+    // The feature set formerly known as AVX3.2. (Introduced with Skylake.)
+    return DoesCpuSupport(Xbyak::util::Cpu::tAVX512F)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512CD)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512BW)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL);
+}
+
+bool BlockOfCode::HasAVX512_BITALG() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tAVX512_BITALG);
+}
+
+bool BlockOfCode::DoesCpuSupport([[maybe_unused]] Xbyak::util::Cpu::Type type) const {
+#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
+    return cpu_info.has(type);
+#else
+    return false;
+#endif
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/block_of_code.h
+++ b/externals/dynarmic/src/backend/x64/block_of_code.h
@@ -0,0 +1,189 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <array>
+#include <functional>
+#include <memory>
+#include <type_traits>
+
+#include <xbyak.h>
+#include <xbyak_util.h>
+
+#include "backend/x64/callback.h"
+#include "backend/x64/constant_pool.h"
+#include "backend/x64/jitstate_info.h"
+#include "common/cast_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::Backend::X64 {
+
+using CodePtr = const void*;
+
+struct RunCodeCallbacks {
+    std::unique_ptr<Callback> LookupBlock;
+    std::unique_ptr<Callback> AddTicks;
+    std::unique_ptr<Callback> GetTicksRemaining;
+};
+
+class BlockOfCode final : public Xbyak::CodeGenerator {
+public:
+    BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, std::function<void(BlockOfCode&)> rcp);
+    BlockOfCode(const BlockOfCode&) = delete;
+
+    /// Call when external emitters have finished emitting their preludes.
+    void PreludeComplete();
+
+    /// Change permissions to RW. This is required to support systems with W^X enforced.
+    void EnableWriting();
+    /// Change permissions to RX. This is required to support systems with W^X enforced.
+    void DisableWriting();
+
+    /// Clears this block of code and resets code pointer to beginning.
+    void ClearCache();
+    /// Calculates how much space is remaining to use. This is the minimum of near code and far code.
+    size_t SpaceRemaining() const;
+
+    /// Runs emulated code from code_ptr.
+    void RunCode(void* jit_state, CodePtr code_ptr) const;
+    /// Runs emulated code from code_ptr for a single cycle.
+    void StepCode(void* jit_state, CodePtr code_ptr) const;
+    /// Code emitter: Returns to dispatcher
+    void ReturnFromRunCode(bool mxcsr_already_exited = false);
+    /// Code emitter: Returns to dispatcher, forces return to host
+    void ForceReturnFromRunCode(bool mxcsr_already_exited = false);
+    /// Code emitter: Makes guest MXCSR the current MXCSR
+    void SwitchMxcsrOnEntry();
+    /// Code emitter: Makes saved host MXCSR the current MXCSR
+    void SwitchMxcsrOnExit();
+    /// Code emitter: Enter standard ASIMD MXCSR region
+    void EnterStandardASIMD();
+    /// Code emitter: Leave standard ASIMD MXCSR region
+    void LeaveStandardASIMD();
+    /// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining
+    /// @note this clobbers ABI caller-save registers
+    void UpdateTicks();
+    /// Code emitter: Performs a block lookup based on current state
+    /// @note this clobbers ABI caller-save registers
+    void LookupBlock();
+
+    /// Code emitter: Calls the function
+    template <typename FunctionPointer>
+    void CallFunction(FunctionPointer fn) {
+        static_assert(std::is_pointer_v<FunctionPointer> && std::is_function_v<std::remove_pointer_t<FunctionPointer>>,
+                      "Supplied type must be a pointer to a function");
+
+        const u64 address  = reinterpret_cast<u64>(fn);
+        const u64 distance = address - (getCurr<u64>() + 5);
+
+        if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
+            // Far call
+            mov(rax, address);
+            call(rax);
+        } else {
+            call(fn);
+        }
+    }
+
+    /// Code emitter: Calls the lambda. Lambda must not have any captures.
+    template <typename Lambda>
+    void CallLambda(Lambda l) {
+        CallFunction(Common::FptrCast(l));
+    }
+
+    Xbyak::Address MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
+
+    /// Far code sits far away from the near code. Execution remains primarily in near code.
+    /// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
+    void SwitchToFarCode();
+    void SwitchToNearCode();
+
+    CodePtr GetCodeBegin() const;
+    size_t GetTotalCodeSize() const;
+
+    const void* GetReturnFromRunCodeAddress() const {
+        return return_from_run_code[0];
+    }
+
+    const void* GetForceReturnFromRunCodeAddress() const {
+        return return_from_run_code[FORCE_RETURN];
+    }
+
+    void int3() { db(0xCC); }
+
+    /// Allocate memory of `size` bytes from the same block of memory the code is in.
+    /// This is useful for objects that need to be placed close to or within code.
+    /// The lifetime of this memory is the same as the code around it.
+    void* AllocateFromCodeSpace(size_t size);
+
+    void SetCodePtr(CodePtr code_ptr);
+    void EnsurePatchLocationSize(CodePtr begin, size_t size);
+
+    // ABI registers
+#ifdef _WIN32
+    static const Xbyak::Reg64 ABI_RETURN;
+    static const Xbyak::Reg64 ABI_PARAM1;
+    static const Xbyak::Reg64 ABI_PARAM2;
+    static const Xbyak::Reg64 ABI_PARAM3;
+    static const Xbyak::Reg64 ABI_PARAM4;
+    static const std::array<Xbyak::Reg64, 4> ABI_PARAMS;
+#else
+    static const Xbyak::Reg64 ABI_RETURN;
+    static const Xbyak::Reg64 ABI_RETURN2;
+    static const Xbyak::Reg64 ABI_PARAM1;
+    static const Xbyak::Reg64 ABI_PARAM2;
+    static const Xbyak::Reg64 ABI_PARAM3;
+    static const Xbyak::Reg64 ABI_PARAM4;
+    static const Xbyak::Reg64 ABI_PARAM5;
+    static const Xbyak::Reg64 ABI_PARAM6;
+    static const std::array<Xbyak::Reg64, 6> ABI_PARAMS;
+#endif
+
+    JitStateInfo GetJitStateInfo() const { return jsi; }
+
+    bool HasSSSE3() const;
+    bool HasSSE41() const;
+    bool HasSSE42() const;
+    bool HasPCLMULQDQ() const;
+    bool HasAVX() const;
+    bool HasF16C() const;
+    bool HasAESNI() const;
+    bool HasLZCNT() const;
+    bool HasBMI1() const;
+    bool HasBMI2() const;
+    bool HasFastBMI2() const;
+    bool HasFMA() const;
+    bool HasAVX2() const;
+    bool HasAVX512_Skylake() const;
+    bool HasAVX512_BITALG() const;
+
+private:
+    RunCodeCallbacks cb;
+    JitStateInfo jsi;
+
+    bool prelude_complete = false;
+    CodePtr near_code_begin;
+    CodePtr far_code_begin;
+
+    ConstantPool constant_pool;
+
+    bool in_far_code = false;
+    CodePtr near_code_ptr;
+    CodePtr far_code_ptr;
+
+    using RunCodeFuncType = void(*)(void*, CodePtr);
+    RunCodeFuncType run_code = nullptr;
+    RunCodeFuncType step_code = nullptr;
+    static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0;
+    static constexpr size_t FORCE_RETURN = 1 << 1;
+    std::array<const void*, 4> return_from_run_code;
+    void GenRunCode(std::function<void(BlockOfCode&)> rcp);
+
+    Xbyak::util::Cpu cpu_info;
+    bool DoesCpuSupport(Xbyak::util::Cpu::Type type) const;
+};
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/block_range_information.cpp
+++ b/externals/dynarmic/src/backend/x64/block_range_information.cpp
@@ -0,0 +1,43 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <boost/icl/interval_map.hpp>
+#include <boost/icl/interval_set.hpp>
+#include <tsl/robin_set.h>
+
+#include "backend/x64/block_range_information.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::Backend::X64 {
+
+template <typename ProgramCounterType>
+void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
+    block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
+}
+
+template <typename ProgramCounterType>
+void BlockRangeInformation<ProgramCounterType>::ClearCache() {
+    block_ranges.clear();
+}
+
+template <typename ProgramCounterType>
+tsl::robin_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
+    tsl::robin_set<IR::LocationDescriptor> erase_locations;
+    for (auto invalidate_interval : ranges) {
+        auto pair = block_ranges.equal_range(invalidate_interval);
+        for (auto it = pair.first; it != pair.second; ++it) {
+            for (const auto &descriptor : it->second) {
+                erase_locations.insert(descriptor);
+            }
+        }
+    }
+    // TODO: EFFICIENCY: Remove ranges that are to be erased.
+    return erase_locations;
+}
+
+template class BlockRangeInformation<u32>;
+template class BlockRangeInformation<u64>;
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/block_range_information.h
+++ b/externals/dynarmic/src/backend/x64/block_range_information.h
@@ -0,0 +1,29 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <set>
+
+#include <boost/icl/interval_map.hpp>
+#include <boost/icl/interval_set.hpp>
+#include <tsl/robin_set.h>
+
+#include "frontend/ir/location_descriptor.h"
+
+namespace Dynarmic::Backend::X64 {
+
+template <typename ProgramCounterType>
+class BlockRangeInformation {
+public:
+    void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
+    void ClearCache();
+    tsl::robin_set<IR::LocationDescriptor> InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges);
+
+private:
+    boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
+};
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/callback.cpp
+++ b/externals/dynarmic/src/backend/x64/callback.cpp
@@ -0,0 +1,40 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "backend/x64/callback.h"
+#include "backend/x64/block_of_code.h"
+
+namespace Dynarmic::Backend::X64 {
+
+Callback::~Callback() = default;
+
+void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
+    l({code.ABI_PARAM1, code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
+    code.CallFunction(fn);
+}
+
+void SimpleCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> l) const {
+    l(code.ABI_PARAM1, {code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
+    code.CallFunction(fn);
+}
+
+void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
+    l({code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
+    code.mov(code.ABI_PARAM1, arg);
+    code.CallFunction(fn);
+}
+
+void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> l) const {
+#if defined(WIN32) && !defined(__MINGW64__)
+    l(code.ABI_PARAM2, {code.ABI_PARAM3, code.ABI_PARAM4});
+    code.mov(code.ABI_PARAM1, arg);
+#else
+    l(code.ABI_PARAM1, {code.ABI_PARAM3, code.ABI_PARAM4});
+    code.mov(code.ABI_PARAM2, arg);
+#endif
+    code.CallFunction(fn);
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/callback.h
+++ b/externals/dynarmic/src/backend/x64/callback.h
@@ -0,0 +1,54 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <functional>
+#include <vector>
+
+#include <xbyak.h>
+
+#include "common/common_types.h"
+
+namespace Dynarmic::Backend::X64 {
+
+using RegList = std::vector<Xbyak::Reg64>;
+
+class BlockOfCode;
+
+class Callback {
+public:
+    virtual ~Callback();
+
+    virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const = 0;
+    virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const = 0;
+};
+
+class SimpleCallback final : public Callback {
+public:
+    template <typename Function>
+    SimpleCallback(Function fn) : fn(reinterpret_cast<void(*)()>(fn)) {}
+
+    void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override;
+    void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
+
+private:
+    void (*fn)();
+};
+
+class ArgCallback final : public Callback {
+public:
+    template <typename Function>
+    ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast<void(*)()>(fn)), arg(arg) {}
+
+    void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override;
+    void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
+
+private:
+    void (*fn)();
+    u64 arg;
+};
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/constant_pool.cpp
+++ b/externals/dynarmic/src/backend/x64/constant_pool.cpp
@@ -0,0 +1,34 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <cstring>
+
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/constant_pool.h"
+#include "common/assert.h"
+
+namespace Dynarmic::Backend::X64 {
+
+ConstantPool::ConstantPool(BlockOfCode& code, size_t size) : code(code), pool_size(size) {
+    code.int3();
+    code.align(align_size);
+    pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size));
+    current_pool_ptr = pool_begin;
+}
+
+Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
+    const auto constant = std::make_tuple(lower, upper);
+    auto iter = constant_info.find(constant);
+    if (iter == constant_info.end()) {
+        ASSERT(static_cast<size_t>(current_pool_ptr - pool_begin) < pool_size);
+        std::memcpy(current_pool_ptr, &lower, sizeof(u64));
+        std::memcpy(current_pool_ptr + sizeof(u64), &upper, sizeof(u64));
+        iter = constant_info.emplace(constant, current_pool_ptr).first;
+        current_pool_ptr += align_size;
+    }
+    return frame[code.rip + iter->second];
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/constant_pool.h
+++ b/externals/dynarmic/src/backend/x64/constant_pool.h
@@ -0,0 +1,40 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <map>
+#include <tuple>
+
+#include <xbyak.h>
+
+#include "common/common_types.h"
+
+namespace Dynarmic::Backend::X64 {
+
+class BlockOfCode;
+
+/// ConstantPool allocates a block of memory from BlockOfCode.
+/// It places constants into this block of memory, returning the address
+/// of the memory location where the constant is placed. If the constant
+/// already exists, its memory location is reused.
+class ConstantPool final {
+public:
+    ConstantPool(BlockOfCode& code, size_t size);
+
+    Xbyak::Address GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
+
+private:
+    static constexpr size_t align_size = 16; // bytes
+
+    std::map<std::tuple<u64, u64>, void*> constant_info;
+
+    BlockOfCode& code;
+    size_t pool_size;
+    u8* pool_begin;
+    u8* current_pool_ptr;
+};
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/devirtualize.h
+++ b/externals/dynarmic/src/backend/x64/devirtualize.h
@@ -0,0 +1,81 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <cstring>
+#include <utility>
+
+#include <mp/traits/function_info.h>
+
+#include "backend/x64/callback.h"
+#include "common/cast_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic {
+namespace Backend::X64 {
+
+namespace impl {
+
+template <typename FunctionType, FunctionType mfp>
+struct ThunkBuilder;
+
+template <typename C, typename R, typename... Args, R(C::*mfp)(Args...)>
+struct ThunkBuilder<R(C::*)(Args...), mfp> {
+    static R Thunk(C* this_, Args... args) {
+        return (this_->*mfp)(std::forward<Args>(args)...);
+    }
+};
+
+} // namespace impl
+
+template<auto mfp>
+ArgCallback DevirtualizeGeneric(mp::class_type<decltype(mfp)>* this_) {
+    return ArgCallback{&impl::ThunkBuilder<decltype(mfp), mfp>::Thunk, reinterpret_cast<u64>(this_)};
+}
+
+template<auto mfp>
+ArgCallback DevirtualizeWindows(mp::class_type<decltype(mfp)>* this_) {
+    static_assert(sizeof(mfp) == 8);
+    return ArgCallback{Common::BitCast<u64>(mfp), reinterpret_cast<u64>(this_)};
+}
+
+template<auto mfp>
+ArgCallback DevirtualizeItanium(mp::class_type<decltype(mfp)>* this_) {
+    struct MemberFunctionPointer {
+        /// For a non-virtual function, this is a simple function pointer.
+        /// For a virtual function, it is (1 + virtual table offset in bytes).
+        u64 ptr;
+        /// The required adjustment to `this`, prior to the call.
+        u64 adj;
+    } mfp_struct = Common::BitCast<MemberFunctionPointer>(mfp);
+
+    static_assert(sizeof(MemberFunctionPointer) == 16);
+    static_assert(sizeof(MemberFunctionPointer) == sizeof(mfp));
+
+    u64 fn_ptr = mfp_struct.ptr;
+    u64 this_ptr = reinterpret_cast<u64>(this_) + mfp_struct.adj;
+    if (mfp_struct.ptr & 1) {
+        u64 vtable = Common::BitCastPointee<u64>(this_ptr);
+        fn_ptr = Common::BitCastPointee<u64>(vtable + fn_ptr - 1);
+    }
+    return ArgCallback{fn_ptr, this_ptr};
+}
+
+template<auto mfp>
+ArgCallback Devirtualize(mp::class_type<decltype(mfp)>* this_) {
+#if defined(__APPLE__) || defined(linux) || defined(__linux) || defined(__linux__)
+    return DevirtualizeItanium<mfp>(this_);
+#elif defined(__MINGW64__)
+    return DevirtualizeItanium<mfp>(this_);
+#elif defined(_WIN32)
+    return DevirtualizeWindows<mfp>(this_);
+#else
+    return DevirtualizeGeneric<mfp>(this_);
+#endif
+}
+
+} // namespace Backend::X64
+} // namespace Dynarmic
--- a/externals/dynarmic/src/backend/x64/emit_x64.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64.cpp
@@ -0,0 +1,337 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <iterator>
+
+#include <tsl/robin_set.h>
+
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/emit_x64.h"
+#include "backend/x64/nzcv_util.h"
+#include "backend/x64/perf_map.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/scope_exit.h"
+#include "common/variant_util.h"
+#include "frontend/ir/basic_block.h"
+#include "frontend/ir/microinstruction.h"
+#include "frontend/ir/opcodes.h"
+
+// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
+// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
+
+namespace Dynarmic::Backend::X64 {
+
+using namespace Xbyak::util;
+
+EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
+    : reg_alloc(reg_alloc), block(block) {}
+
+size_t EmitContext::GetInstOffset(IR::Inst* inst) const {
+    return static_cast<size_t>(std::distance(block.begin(), IR::Block::iterator(inst)));
+}
+
+void EmitContext::EraseInstruction(IR::Inst* inst) {
+    block.Instructions().erase(inst);
+    inst->ClearArgs();
+}
+
+EmitX64::EmitX64(BlockOfCode& code) : code(code) {
+    exception_handler.Register(code);
+}
+
+EmitX64::~EmitX64() = default;
+
+std::optional<EmitX64::BlockDescriptor> EmitX64::GetBasicBlock(IR::LocationDescriptor descriptor) const {
+    const auto iter = block_descriptors.find(descriptor);
+    if (iter == block_descriptors.end()) {
+        return std::nullopt;
+    }
+    return iter->second;
+}
+
+void EmitX64::EmitVoid(EmitContext&, IR::Inst*) {
+}
+
+void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) {
+    code.int3();
+}
+
+void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    if (!args[0].IsImmediate()) {
+        ctx.reg_alloc.DefineValue(inst, args[0]);
+    }
+}
+
+void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target) {
+    using namespace Xbyak::util;
+
+    const auto iter = block_descriptors.find(target);
+    CodePtr target_code_ptr = iter != block_descriptors.end()
+                            ? iter->second.entrypoint
+                            : code.GetReturnFromRunCodeAddress();
+
+    code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]);
+
+    code.mov(loc_desc_reg, target.Value());
+
+    patch_information[target].mov_rcx.emplace_back(code.getCurr());
+    EmitPatchMovRcx(target_code_ptr);
+
+    code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
+    code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
+
+    code.add(index_reg.cvt32(), 1);
+    code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask));
+    code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
+}
+
+void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    ASSERT(args[0].IsImmediate());
+    const u64 unique_hash_of_target = args[0].GetImmediateU64();
+
+    ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
+    const Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr();
+    const Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr();
+
+    PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
+}
+
+void EmitX64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) {
+    ASSERT_MSG(false, "should never happen");
+}
+
+void EmitX64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) {
+    ASSERT_MSG(false, "should never happen");
+}
+
+void EmitX64::EmitGetGEFromOp(EmitContext&, IR::Inst*) {
+    ASSERT_MSG(false, "should never happen");
+}
+
+void EmitX64::EmitGetUpperFromOp(EmitContext&, IR::Inst*) {
+    ASSERT_MSG(false, "should never happen");
+}
+
+void EmitX64::EmitGetLowerFromOp(EmitContext&, IR::Inst*) {
+    ASSERT_MSG(false, "should never happen");
+}
+
+void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const int bitsize = [&]{
+        switch (args[0].GetType()) {
+        case IR::Type::U8:
+            return 8;
+        case IR::Type::U16:
+            return 16;
+        case IR::Type::U32:
+            return 32;
+        case IR::Type::U64:
+            return 64;
+        default:
+            UNREACHABLE();
+        }
+    }();
+
+    const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
+    const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
+    code.cmp(value, 0);
+    code.lahf();
+    code.seto(code.al);
+    ctx.reg_alloc.DefineValue(inst, nzcv);
+}
+
+void EmitX64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    if (args[0].IsImmediate()) {
+        const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32();
+        u32 value = 0;
+        value |= Common::Bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
+        value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
+        value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
+        value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
+        code.mov(nzcv, value);
+        ctx.reg_alloc.DefineValue(inst, nzcv);
+    } else if (code.HasFastBMI2()) {
+        const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+        const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
+
+        code.shr(nzcv, 28);
+        code.mov(tmp, NZCV::x64_mask);
+        code.pdep(nzcv, nzcv, tmp);
+
+        ctx.reg_alloc.DefineValue(inst, nzcv);
+    } else {
+        const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+
+        code.shr(nzcv, 28);
+        code.imul(nzcv, nzcv, NZCV::to_x64_multiplier);
+        code.and_(nzcv, NZCV::x64_mask);
+
+        ctx.reg_alloc.DefineValue(inst, nzcv);
+    }
+}
+
+void EmitX64::EmitAddCycles(size_t cycles) {
+    ASSERT(cycles < std::numeric_limits<u32>::max());
+    code.sub(qword[r15 + code.GetJitStateInfo().offsetof_cycles_remaining], static_cast<u32>(cycles));
+}
+
+Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
+    Xbyak::Label pass;
+
+    code.mov(eax, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
+
+    // sahf restores SF, ZF, CF
+    // add al, 0x7F restores OF
+
+    switch (cond) {
+    case IR::Cond::EQ: //z
+        code.sahf();
+        code.jz(pass);
+        break;
+    case IR::Cond::NE: //!z
+        code.sahf();
+        code.jnz(pass);
+        break;
+    case IR::Cond::CS: //c
+        code.sahf();
+        code.jc(pass);
+        break;
+    case IR::Cond::CC: //!c
+        code.sahf();
+        code.jnc(pass);
+        break;
+    case IR::Cond::MI: //n
+        code.sahf();
+        code.js(pass);
+        break;
+    case IR::Cond::PL: //!n
+        code.sahf();
+        code.jns(pass);
+        break;
+    case IR::Cond::VS: //v
+        code.add(al, 0x7F);
+        code.jo(pass);
+        break;
+    case IR::Cond::VC: //!v
+        code.add(al, 0x7F);
+        code.jno(pass);
+        break;
+    case IR::Cond::HI: //c & !z
+        code.sahf();
+        code.cmc();
+        code.ja(pass);
+        break;
+    case IR::Cond::LS: //!c | z
+        code.sahf();
+        code.cmc();
+        code.jna(pass);
+        break;
+    case IR::Cond::GE: // n == v
+        code.add(al, 0x7F);
+        code.sahf();
+        code.jge(pass);
+        break;
+    case IR::Cond::LT: // n != v
+        code.add(al, 0x7F);
+        code.sahf();
+        code.jl(pass);
+        break;
+    case IR::Cond::GT: // !z & (n == v)
+        code.add(al, 0x7F);
+        code.sahf();
+        code.jg(pass);
+        break;
+    case IR::Cond::LE: // z | (n != v)
+        code.add(al, 0x7F);
+        code.sahf();
+        code.jle(pass);
+        break;
+    default:
+        ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
+        break;
+    }
+
+    return pass;
+}
+
+EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
+    PerfMapRegister(entrypoint, code.getCurr(), LocationDescriptorToFriendlyName(descriptor));
+    Patch(descriptor, entrypoint);
+
+    BlockDescriptor block_desc{entrypoint, size};
+    block_descriptors.emplace(descriptor.Value(), block_desc);
+    return block_desc;
+}
+
+void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
+    Common::VisitVariant<void>(terminal, [this, initial_location, is_single_step](auto x) {
+        using T = std::decay_t<decltype(x)>;
+        if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
+            this->EmitTerminalImpl(x, initial_location, is_single_step);
+        } else {
+            ASSERT_MSG(false, "Invalid terminal");
+        }
+    });
+}
+
+void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
+    const CodePtr save_code_ptr = code.getCurr();
+    const PatchInformation& patch_info = patch_information[target_desc];
+
+    for (CodePtr location : patch_info.jg) {
+        code.SetCodePtr(location);
+        EmitPatchJg(target_desc, target_code_ptr);
+    }
+
+    for (CodePtr location : patch_info.jmp) {
+        code.SetCodePtr(location);
+        EmitPatchJmp(target_desc, target_code_ptr);
+    }
+
+    for (CodePtr location : patch_info.mov_rcx) {
+        code.SetCodePtr(location);
+        EmitPatchMovRcx(target_code_ptr);
+    }
+
+    code.SetCodePtr(save_code_ptr);
+}
+
+void EmitX64::Unpatch(const IR::LocationDescriptor& target_desc) {
+    Patch(target_desc, nullptr);
+}
+
+void EmitX64::ClearCache() {
+    block_descriptors.clear();
+    patch_information.clear();
+
+    PerfMapClear();
+}
+
+void EmitX64::InvalidateBasicBlocks(const tsl::robin_set<IR::LocationDescriptor>& locations) {
+    code.EnableWriting();
+    SCOPE_EXIT { code.DisableWriting(); };
+
+    for (const auto &descriptor : locations) {
+        const auto it = block_descriptors.find(descriptor);
+        if (it == block_descriptors.end()) {
+            continue;
+        }
+
+        if (patch_information.count(descriptor)) {
+            Unpatch(descriptor);
+        }
+        block_descriptors.erase(it);
+    }
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/emit_x64.h
+++ b/externals/dynarmic/src/backend/x64/emit_x64.h
@@ -0,0 +1,131 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <array>
+#include <optional>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include <tsl/robin_map.h>
+#include <tsl/robin_set.h>
+
+#include <xbyak_util.h>
+
+#include "backend/x64/exception_handler.h"
+#include "backend/x64/reg_alloc.h"
+#include "common/bit_util.h"
+#include "common/fp/fpcr.h"
+#include "frontend/ir/location_descriptor.h"
+#include "frontend/ir/terminal.h"
+
+namespace Dynarmic::IR {
+class Block;
+class Inst;
+} // namespace Dynarmic::IR
+
+namespace Dynarmic {
+enum class OptimizationFlag : u32;
+} // namespace Dynarmic
+
+namespace Dynarmic::Backend::X64 {
+
+class BlockOfCode;
+
+using A64FullVectorWidth = std::integral_constant<size_t, 128>;
+
+// Array alias that always sizes itself according to the given type T
+// relative to the size of a vector register. e.g. T = u32 would result
+// in a std::array<u32, 4>.
+template <typename T>
+using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
+
+template <typename T>
+using HalfVectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>() / 2>;
+
+struct EmitContext {
+    EmitContext(RegAlloc& reg_alloc, IR::Block& block);
+
+    size_t GetInstOffset(IR::Inst* inst) const;
+    void EraseInstruction(IR::Inst* inst);
+
+    virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
+
+    virtual bool HasOptimization(OptimizationFlag flag) const = 0;
+
+    RegAlloc& reg_alloc;
+    IR::Block& block;
+};
+
+class EmitX64 {
+public:
+    struct BlockDescriptor {
+        CodePtr entrypoint;  // Entrypoint of emitted code
+        size_t size;         // Length in bytes of emitted code
+    };
+
+    explicit EmitX64(BlockOfCode& code);
+    virtual ~EmitX64();
+
+    /// Looks up an emitted host block in the cache.
+    std::optional<BlockDescriptor> GetBasicBlock(IR::LocationDescriptor descriptor) const;
+
+    /// Empties the entire cache.
+    virtual void ClearCache();
+
+    /// Invalidates a selection of basic blocks.
+    void InvalidateBasicBlocks(const tsl::robin_set<IR::LocationDescriptor>& locations);
+
+protected:
+    // Microinstruction emitters
+#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst);
+#define A32OPC(...)
+#define A64OPC(...)
+#include "frontend/ir/opcodes.inc"
+#undef OPCODE
+#undef A32OPC
+#undef A64OPC
+
+    // Helpers
+    virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;
+    void EmitAddCycles(size_t cycles);
+    Xbyak::Label EmitCond(IR::Cond cond);
+    BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size);
+    void PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target);
+
+    // Terminal instruction emitters
+    void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
+    virtual void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+    virtual void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
+
+    // Patching
+    struct PatchInformation {
+        std::vector<CodePtr> jg;
+        std::vector<CodePtr> jmp;
+        std::vector<CodePtr> mov_rcx;
+    };
+    void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
+    virtual void Unpatch(const IR::LocationDescriptor& target_desc);
+    virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
+    virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
+    virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0;
+
+    // State
+    BlockOfCode& code;
+    ExceptionHandler exception_handler;
+    tsl::robin_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
+    tsl::robin_map<IR::LocationDescriptor, PatchInformation> patch_information;
+};
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/emit_x64_aes.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_aes.cpp
@@ -0,0 +1,107 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "backend/x64/abi.h"
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/emit_x64.h"
+#include "common/common_types.h"
+#include "common/crypto/aes.h"
+#include "frontend/ir/microinstruction.h"
+
+namespace Dynarmic::Backend::X64 {
+
+using namespace Xbyak::util;
+namespace AES = Common::Crypto::AES;
+
+using AESFn = void(AES::State&, const AES::State&);
+
+static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, BlockOfCode& code, IR::Inst* inst, AESFn fn) {
+    constexpr u32 stack_space = static_cast<u32>(sizeof(AES::State)) * 2;
+    const Xbyak::Xmm input = ctx.reg_alloc.UseXmm(args[0]);
+    const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
+    ctx.reg_alloc.EndOfAllocScope();
+
+    ctx.reg_alloc.HostCall(nullptr);
+    code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
+
+    code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
+    code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + sizeof(AES::State)]);
+    code.movaps(xword[code.ABI_PARAM2], input);
+    code.CallFunction(fn);
+    code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]);
+
+    code.add(rsp, stack_space + ABI_SHADOW_SPACE);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitX64::EmitAESDecryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    if (code.HasAESNI()) {
+        const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
+        const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
+
+        code.pxor(zero, zero);
+        code.aesdeclast(data, zero);
+
+        ctx.reg_alloc.DefineValue(inst, data);
+        return;
+    }
+
+    EmitAESFunction(args, ctx, code, inst, AES::DecryptSingleRound);
+}
+
+void EmitX64::EmitAESEncryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    if (code.HasAESNI()) {
+        const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
+        const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
+
+        code.pxor(zero, zero);
+        code.aesenclast(data, zero);
+
+        ctx.reg_alloc.DefineValue(inst, data);
+        return;
+    }
+
+    EmitAESFunction(args, ctx, code, inst, AES::EncryptSingleRound);
+}
+
+void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) {
+     auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    if (code.HasAESNI()) {
+        const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
+
+        code.aesimc(data, data);
+
+        ctx.reg_alloc.DefineValue(inst, data);
+        return;
+    }
+
+    EmitAESFunction(args, ctx, code, inst, AES::InverseMixColumns);
+}
+
+void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    if (code.HasAESNI()) {
+        const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
+        const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
+
+        code.pxor(zero, zero);
+        code.aesdeclast(data, zero);
+        code.aesenc(data, zero);
+
+        ctx.reg_alloc.DefineValue(inst, data);
+        return;
+    }
+
+    EmitAESFunction(args, ctx, code, inst, AES::MixColumns);
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/emit_x64_crc32.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_crc32.cpp
@@ -0,0 +1,148 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <array>
+#include <climits>
+
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/emit_x64.h"
+#include "common/crypto/crc32.h"
+#include "frontend/ir/microinstruction.h"
+
+namespace Dynarmic::Backend::X64 {
+
+using namespace Xbyak::util;
+namespace CRC32 = Common::Crypto::CRC32;
+
+static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    if (code.HasSSE42()) {
+        const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+        const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size);
+        code.crc32(crc, value);
+        ctx.reg_alloc.DefineValue(inst, crc);
+        return;
+    }
+
+    ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
+    code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
+    code.CallFunction(&CRC32::ComputeCRC32Castagnoli);
+}
+
+static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    if (code.HasPCLMULQDQ() && data_size < 32) {
+        const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+        const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]);
+        const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
+        const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
+        const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
+
+        code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
+
+        code.movzx(value.cvt32(), value.changeBit(data_size));
+        code.xor_(value.cvt32(), crc);
+        code.movd(xmm_tmp, value.cvt32());
+        code.pslldq(xmm_tmp, (64 - data_size) / 8);
+
+        if (code.HasAVX()) {
+            code.vpclmulqdq(xmm_value, xmm_tmp, xmm_const, 0x00);
+            code.pclmulqdq(xmm_value, xmm_const, 0x10);
+            code.pxor(xmm_value, xmm_tmp);
+        } else {
+            code.movdqa(xmm_value, xmm_tmp);
+            code.pclmulqdq(xmm_value, xmm_const, 0x00);
+            code.pclmulqdq(xmm_value, xmm_const, 0x10);
+            code.pxor(xmm_value, xmm_tmp);
+        }
+
+        code.pextrd(crc, xmm_value, 2);
+
+        ctx.reg_alloc.DefineValue(inst, crc);
+        return;
+    }
+
+    if (code.HasPCLMULQDQ() && data_size == 32) {
+        const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+        const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
+        const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
+        const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
+
+        code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
+
+        code.xor_(crc, value);
+        code.shl(crc.cvt64(), 32);
+        code.movq(xmm_value, crc.cvt64());
+
+        code.pclmulqdq(xmm_value, xmm_const, 0x00);
+        code.pclmulqdq(xmm_value, xmm_const, 0x10);
+
+        code.pextrd(crc, xmm_value, 2);
+
+        ctx.reg_alloc.DefineValue(inst, crc);
+        return;
+    }
+
+    if (code.HasPCLMULQDQ() && data_size == 64) {
+        const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+        const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
+        const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
+        const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
+
+        code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
+
+        code.mov(crc, crc);
+        code.xor_(crc.cvt64(), value);
+        code.movq(xmm_value, crc.cvt64());
+
+        code.pclmulqdq(xmm_value, xmm_const, 0x00);
+        code.pclmulqdq(xmm_value, xmm_const, 0x10);
+
+        code.pextrd(crc, xmm_value, 2);
+
+        ctx.reg_alloc.DefineValue(inst, crc);
+        return;
+    }
+
+    ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
+    code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
+    code.CallFunction(&CRC32::ComputeCRC32ISO);
+}
+
+void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {
+    EmitCRC32Castagnoli(code, ctx, inst, 8);
+}
+
+void EmitX64::EmitCRC32Castagnoli16(EmitContext& ctx, IR::Inst* inst) {
+    EmitCRC32Castagnoli(code, ctx, inst, 16);
+}
+
+void EmitX64::EmitCRC32Castagnoli32(EmitContext& ctx, IR::Inst* inst) {
+    EmitCRC32Castagnoli(code, ctx, inst, 32);
+}
+
+void EmitX64::EmitCRC32Castagnoli64(EmitContext& ctx, IR::Inst* inst) {
+    EmitCRC32Castagnoli(code, ctx, inst, 64);
+}
+
+void EmitX64::EmitCRC32ISO8(EmitContext& ctx, IR::Inst* inst) {
+    EmitCRC32ISO(code, ctx, inst, 8);
+}
+
+void EmitX64::EmitCRC32ISO16(EmitContext& ctx, IR::Inst* inst) {
+    EmitCRC32ISO(code, ctx, inst, 16);
+}
+
+void EmitX64::EmitCRC32ISO32(EmitContext& ctx, IR::Inst* inst) {
+    EmitCRC32ISO(code, ctx, inst, 32);
+}
+
+void EmitX64::EmitCRC32ISO64(EmitContext& ctx, IR::Inst* inst) {
+    EmitCRC32ISO(code, ctx, inst, 64);
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/emit_x64_data_processing.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_data_processing.cpp
--- a/externals/dynarmic/src/backend/x64/emit_x64_floating_point.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_floating_point.cpp
--- a/externals/dynarmic/src/backend/x64/emit_x64_packed.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_packed.cpp
@@ -0,0 +1,712 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/emit_x64.h"
+#include "frontend/ir/microinstruction.h"
+#include "frontend/ir/opcodes.h"
+
+namespace Dynarmic::Backend::X64 {
+
+using namespace Xbyak::util;
+
+void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+
+    code.paddb(xmm_a, xmm_b);
+
+    if (ge_inst) {
+        const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
+        const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
+
+        code.pcmpeqb(ones, ones);
+
+        code.movdqa(xmm_ge, xmm_a);
+        code.pminub(xmm_ge, xmm_b);
+        code.pcmpeqb(xmm_ge, xmm_b);
+        code.pxor(xmm_ge, ones);
+
+        ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, xmm_a);
+}
+
+void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+
+    if (ge_inst) {
+        const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
+        const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
+
+        code.pxor(xmm_ge, xmm_ge);
+        code.movdqa(saturated_sum, xmm_a);
+        code.paddsb(saturated_sum, xmm_b);
+        code.pcmpgtb(xmm_ge, saturated_sum);
+        code.pcmpeqb(saturated_sum, saturated_sum);
+        code.pxor(xmm_ge, saturated_sum);
+
+        ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    code.paddb(xmm_a, xmm_b);
+
+    ctx.reg_alloc.DefineValue(inst, xmm_a);
+}
+
+void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+
+    code.paddw(xmm_a, xmm_b);
+
+    if (ge_inst) {
+        if (code.HasSSE41()) {
+            const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
+            const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
+
+            code.pcmpeqb(ones, ones);
+
+            code.movdqa(xmm_ge, xmm_a);
+            code.pminuw(xmm_ge, xmm_b);
+            code.pcmpeqw(xmm_ge, xmm_b);
+            code.pxor(xmm_ge, ones);
+
+            ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
+            ctx.EraseInstruction(ge_inst);
+        } else {
+            const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm();
+            const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
+
+            // !(b <= a+b) == b > a+b
+            code.movdqa(tmp_a, xmm_a);
+            code.movdqa(tmp_b, xmm_b);
+            code.paddw(tmp_a, code.MConst(xword, 0x80008000));
+            code.paddw(tmp_b, code.MConst(xword, 0x80008000));
+            code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
+
+            ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
+            ctx.EraseInstruction(ge_inst);
+        }
+    }
+
+    ctx.reg_alloc.DefineValue(inst, xmm_a);
+}
+
+void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+
+    if (ge_inst) {
+        const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
+        const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
+
+        code.pxor(xmm_ge, xmm_ge);
+        code.movdqa(saturated_sum, xmm_a);
+        code.paddsw(saturated_sum, xmm_b);
+        code.pcmpgtw(xmm_ge, saturated_sum);
+        code.pcmpeqw(saturated_sum, saturated_sum);
+        code.pxor(xmm_ge, saturated_sum);
+
+        ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    code.paddw(xmm_a, xmm_b);
+
+    ctx.reg_alloc.DefineValue(inst, xmm_a);
+}
+
+void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+
+    if (ge_inst) {
+        const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
+
+        code.movdqa(xmm_ge, xmm_a);
+        code.pmaxub(xmm_ge, xmm_b);
+        code.pcmpeqb(xmm_ge, xmm_a);
+
+        ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    code.psubb(xmm_a, xmm_b);
+
+    ctx.reg_alloc.DefineValue(inst, xmm_a);
+}
+
+void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+
+    if (ge_inst) {
+        const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
+        const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
+
+        code.pxor(xmm_ge, xmm_ge);
+        code.movdqa(saturated_sum, xmm_a);
+        code.psubsb(saturated_sum, xmm_b);
+        code.pcmpgtb(xmm_ge, saturated_sum);
+        code.pcmpeqb(saturated_sum, saturated_sum);
+        code.pxor(xmm_ge, saturated_sum);
+
+        ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    code.psubb(xmm_a, xmm_b);
+
+    ctx.reg_alloc.DefineValue(inst, xmm_a);
+}
+
+void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    if (!ge_inst) {
+        const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+        const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+
+        code.psubw(xmm_a, xmm_b);
+
+        ctx.reg_alloc.DefineValue(inst, xmm_a);
+        return;
+    }
+
+    if (code.HasSSE41()) {
+        const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+        const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+        const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
+
+        code.movdqa(xmm_ge, xmm_a);
+        code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
+        code.pcmpeqw(xmm_ge, xmm_a);
+
+        code.psubw(xmm_a, xmm_b);
+
+        ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
+        ctx.EraseInstruction(ge_inst);
+        ctx.reg_alloc.DefineValue(inst, xmm_a);
+        return;
+    }
+
+    const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
+    const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
+    const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
+
+    // (a >= b) == !(b > a)
+    code.pcmpeqb(ones, ones);
+    code.paddw(xmm_a, code.MConst(xword, 0x80008000));
+    code.paddw(xmm_b, code.MConst(xword, 0x80008000));
+    code.movdqa(xmm_ge, xmm_b);
+    code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
+    code.pxor(xmm_ge, ones);
+
+    code.psubw(xmm_a, xmm_b);
+
+    ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
+    ctx.EraseInstruction(ge_inst);
+    ctx.reg_alloc.DefineValue(inst, xmm_a);
+}
+
+void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+
+    if (ge_inst) {
+        const Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm();
+        const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
+
+        code.pxor(xmm_ge, xmm_ge);
+        code.movdqa(saturated_diff, xmm_a);
+        code.psubsw(saturated_diff, xmm_b);
+        code.pcmpgtw(xmm_ge, saturated_diff);
+        code.pcmpeqw(saturated_diff, saturated_diff);
+        code.pxor(xmm_ge, saturated_diff);
+
+        ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    code.psubw(xmm_a, xmm_b);
+
+    ctx.reg_alloc.DefineValue(inst, xmm_a);
+}
+
+void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    if (args[0].IsInXmm() || args[1].IsInXmm()) {
+        const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+        const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
+        const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
+
+        // Since,
+        //   pavg(a, b) == (a + b + 1) >> 1
+        // Therefore,
+        //   ~pavg(~a, ~b) == (a + b) >> 1
+
+        code.pcmpeqb(ones, ones);
+        code.pxor(xmm_a, ones);
+        code.pxor(xmm_b, ones);
+        code.pavgb(xmm_a, xmm_b);
+        code.pxor(xmm_a, ones);
+
+        ctx.reg_alloc.DefineValue(inst, xmm_a);
+    } else {
+        const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+        const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
+        const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
+        const Xbyak::Reg32 and_a_b = reg_a;
+        const Xbyak::Reg32 result = reg_a;
+
+        // This relies on the equality x+y == ((x&y) << 1) + (x^y).
+        // Note that x^y always contains the LSB of the result.
+        // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
+        // We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
+
+        code.mov(xor_a_b, reg_a);
+        code.and_(and_a_b, reg_b);
+        code.xor_(xor_a_b, reg_b);
+        code.shr(xor_a_b, 1);
+        code.and_(xor_a_b, 0x7F7F7F7F);
+        code.add(result, xor_a_b);
+
+        ctx.reg_alloc.DefineValue(inst, result);
+    }
+}
+
+void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    if (args[0].IsInXmm() || args[1].IsInXmm()) {
+        const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+        const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+        const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+
+        code.movdqa(tmp, xmm_a);
+        code.pand(xmm_a, xmm_b);
+        code.pxor(tmp, xmm_b);
+        code.psrlw(tmp, 1);
+        code.paddw(xmm_a, tmp);
+
+        ctx.reg_alloc.DefineValue(inst, xmm_a);
+    } else {
+        const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+        const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
+        const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
+        const Xbyak::Reg32 and_a_b = reg_a;
+        const Xbyak::Reg32 result = reg_a;
+
+        // This relies on the equality x+y == ((x&y) << 1) + (x^y).
+        // Note that x^y always contains the LSB of the result.
+        // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
+        // We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below.
+
+        code.mov(xor_a_b, reg_a);
+        code.and_(and_a_b, reg_b);
+        code.xor_(xor_a_b, reg_b);
+        code.shr(xor_a_b, 1);
+        code.and_(xor_a_b, 0x7FFF7FFF);
+        code.add(result, xor_a_b);
+
+        ctx.reg_alloc.DefineValue(inst, result);
+    }
+}
+
+void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+    const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
+    const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
+    const Xbyak::Reg32 and_a_b = reg_a;
+    const Xbyak::Reg32 result = reg_a;
+    const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
+
+    // This relies on the equality x+y == ((x&y) << 1) + (x^y).
+    // Note that x^y always contains the LSB of the result.
+    // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
+    // We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
+    // carry propagates the sign bit from (x^y)>>1 upwards by one.
+
+    code.mov(xor_a_b, reg_a);
+    code.and_(and_a_b, reg_b);
+    code.xor_(xor_a_b, reg_b);
+    code.mov(carry, xor_a_b);
+    code.and_(carry, 0x80808080);
+    code.shr(xor_a_b, 1);
+    code.and_(xor_a_b, 0x7F7F7F7F);
+    code.add(result, xor_a_b);
+    code.xor_(result, carry);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+    const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+
+    // This relies on the equality x+y == ((x&y) << 1) + (x^y).
+    // Note that x^y always contains the LSB of the result.
+    // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>>1).
+    // The arithmetic shift right makes this signed.
+
+    code.movdqa(tmp, xmm_a);
+    code.pand(xmm_a, xmm_b);
+    code.pxor(tmp, xmm_b);
+    code.psraw(tmp, 1);
+    code.paddw(xmm_a, tmp);
+
+    ctx.reg_alloc.DefineValue(inst, xmm_a);
+}
+
+void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+    const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
+
+    // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
+    // Note that x^y always contains the LSB of the result.
+    // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
+
+    code.xor_(minuend, subtrahend);
+    code.and_(subtrahend, minuend);
+    code.shr(minuend, 1);
+
+    // At this point,
+    // minuend := (a^b) >> 1
+    // subtrahend := (a^b) & b
+
+    // We must now perform a partitioned subtraction.
+    // We can do this because minuend contains 7 bit fields.
+    // We use the extra bit in minuend as a bit to borrow from; we set this bit.
+    // We invert this bit at the end as this tells us if that bit was borrowed from.
+    code.or_(minuend, 0x80808080);
+    code.sub(minuend, subtrahend);
+    code.xor_(minuend, 0x80808080);
+
+    // minuend now contains the desired result.
+    ctx.reg_alloc.DefineValue(inst, minuend);
+}
+
+void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+    const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
+
+    const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
+
+    // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
+    // Note that x^y always contains the LSB of the result.
+    // Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
+
+    code.xor_(minuend, subtrahend);
+    code.and_(subtrahend, minuend);
+    code.mov(carry, minuend);
+    code.and_(carry, 0x80808080);
+    code.shr(minuend, 1);
+
+    // At this point,
+    // minuend := (a^b) >> 1
+    // subtrahend := (a^b) & b
+    // carry := (a^b) & 0x80808080
+
+    // We must now perform a partitioned subtraction.
+    // We can do this because minuend contains 7 bit fields.
+    // We use the extra bit in minuend as a bit to borrow from; we set this bit.
+    // We invert this bit at the end as this tells us if that bit was borrowed from.
+    // We then sign extend the result into this bit.
+    code.or_(minuend, 0x80808080);
+    code.sub(minuend, subtrahend);
+    code.xor_(minuend, 0x80808080);
+    code.xor_(minuend, carry);
+
+    ctx.reg_alloc.DefineValue(inst, minuend);
+}
+
+void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
+
+    // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
+    // Note that x^y always contains the LSB of the result.
+    // Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
+
+    code.pxor(minuend, subtrahend);
+    code.pand(subtrahend, minuend);
+    code.psrlw(minuend, 1);
+
+    // At this point,
+    // minuend := (a^b) >> 1
+    // subtrahend := (a^b) & b
+
+    code.psubw(minuend, subtrahend);
+
+    ctx.reg_alloc.DefineValue(inst, minuend);
+}
+
+void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
+
+    // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
+    // Note that x^y always contains the LSB of the result.
+    // Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>>1) - ((x^y)&y).
+
+    code.pxor(minuend, subtrahend);
+    code.pand(subtrahend, minuend);
+    code.psraw(minuend, 1);
+
+    // At this point,
+    // minuend := (a^b) >>> 1
+    // subtrahend := (a^b) & b
+
+    code.psubw(minuend, subtrahend);
+
+    ctx.reg_alloc.DefineValue(inst, minuend);
+}
+
+static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
+
+    const Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+    const Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
+    const Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr().cvt32();
+    const Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr().cvt32();
+    Xbyak::Reg32 reg_sum, reg_diff;
+
+    if (is_signed) {
+        code.movsx(reg_a_lo, reg_a_hi.cvt16());
+        code.movsx(reg_b_lo, reg_b_hi.cvt16());
+        code.sar(reg_a_hi, 16);
+        code.sar(reg_b_hi, 16);
+    } else {
+        code.movzx(reg_a_lo, reg_a_hi.cvt16());
+        code.movzx(reg_b_lo, reg_b_hi.cvt16());
+        code.shr(reg_a_hi, 16);
+        code.shr(reg_b_hi, 16);
+    }
+
+    if (hi_is_sum) {
+        code.sub(reg_a_lo, reg_b_hi);
+        code.add(reg_a_hi, reg_b_lo);
+        reg_diff = reg_a_lo;
+        reg_sum = reg_a_hi;
+    } else {
+        code.add(reg_a_lo, reg_b_hi);
+        code.sub(reg_a_hi, reg_b_lo);
+        reg_diff = reg_a_hi;
+        reg_sum = reg_a_lo;
+    }
+
+    if (ge_inst) {
+        // The reg_b registers are no longer required.
+        const Xbyak::Reg32 ge_sum = reg_b_hi;
+        const Xbyak::Reg32 ge_diff = reg_b_lo;
+
+        code.mov(ge_sum, reg_sum);
+        code.mov(ge_diff, reg_diff);
+
+        if (!is_signed) {
+            code.shl(ge_sum, 15);
+            code.sar(ge_sum, 31);
+        } else {
+            code.not_(ge_sum);
+            code.sar(ge_sum, 31);
+        }
+        code.not_(ge_diff);
+        code.sar(ge_diff, 31);
+        code.and_(ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF);
+        code.and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
+        code.or_(ge_sum, ge_diff);
+
+        ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
+        ctx.EraseInstruction(ge_inst);
+    }
+
+    if (is_halving) {
+        code.shl(reg_a_lo, 15);
+        code.shr(reg_a_hi, 1);
+    } else {
+        code.shl(reg_a_lo, 16);
+    }
+
+    // reg_a_lo now contains the low word and reg_a_hi now contains the high word.
+    // Merge them.
+    code.shld(reg_a_hi, reg_a_lo, 16);
+
+    ctx.reg_alloc.DefineValue(inst, reg_a_hi);
+}
+
+void EmitX64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, true, false, false);
+}
+
+void EmitX64::EmitPackedAddSubS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, true, true, false);
+}
+
+void EmitX64::EmitPackedSubAddU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, false, false, false);
+}
+
+void EmitX64::EmitPackedSubAddS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, false, true, false);
+}
+
+void EmitX64::EmitPackedHalvingAddSubU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, true, false, true);
+}
+
+void EmitX64::EmitPackedHalvingAddSubS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, true, true, true);
+}
+
+void EmitX64::EmitPackedHalvingSubAddU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, false, false, true);
+}
+
+void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedSubAdd(code, ctx, inst, false, true, true);
+}
+
+static void EmitPackedOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+
+    (code.*fn)(xmm_a, xmm_b);
+
+    ctx.reg_alloc.DefineValue(inst, xmm_a);
+}
+
+void EmitX64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddusb);
+}
+
+void EmitX64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddsb);
+}
+
+void EmitX64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubusb);
+}
+
+void EmitX64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubsb);
+}
+
+void EmitX64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddusw);
+}
+
+void EmitX64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddsw);
+}
+
+void EmitX64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubusw);
+}
+
+void EmitX64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
+    EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubsw);
+}
+
+void EmitX64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
+    const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+
+    // TODO: Optimize with zero-extension detection
+    code.movaps(tmp, code.MConst(xword, 0xFFFFFFFF));
+    code.pand(xmm_a, tmp);
+    code.pand(xmm_b, tmp);
+    code.psadbw(xmm_a, xmm_b);
+
+    ctx.reg_alloc.DefineValue(inst, xmm_a);
+}
+
+void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const size_t num_args_in_xmm = args[0].IsInXmm() + args[1].IsInXmm() + args[2].IsInXmm();
+
+    if (num_args_in_xmm >= 2) {
+        const Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(args[0]);
+        const Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]);
+        const Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]);
+
+        code.pand(from, ge);
+        code.pandn(ge, to);
+        code.por(from, ge);
+
+        ctx.reg_alloc.DefineValue(inst, from);
+    } else if (code.HasBMI1()) {
+        const Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32();
+        const Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
+        const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
+
+        code.and_(from, ge);
+        code.andn(to, ge, to);
+        code.or_(from, to);
+
+        ctx.reg_alloc.DefineValue(inst, from);
+    } else {
+        const Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+        const Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32();
+        const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
+
+        code.and_(from, ge);
+        code.not_(ge);
+        code.and_(ge, to);
+        code.or_(from, ge);
+
+        ctx.reg_alloc.DefineValue(inst, from);
+    }
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/emit_x64_saturation.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_saturation.cpp
@@ -0,0 +1,315 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <limits>
+
+#include <mp/traits/integer_of_size.h>
+
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/emit_x64.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "frontend/ir/basic_block.h"
+#include "frontend/ir/microinstruction.h"
+#include "frontend/ir/opcodes.h"
+
+namespace Dynarmic::Backend::X64 {
+
+using namespace Xbyak::util;
+
+namespace {
+
+enum class Op {
+    Add,
+    Sub,
+};
+
+template<Op op, size_t size>
+void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
+    const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
+    Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]).changeBit(size);
+    Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr().changeBit(size);
+
+    constexpr u64 int_max = static_cast<u64>(std::numeric_limits<mp::signed_integer_of_size<size>>::max());
+    if constexpr (size < 64) {
+        code.xor_(overflow.cvt32(), overflow.cvt32());
+        code.bt(result.cvt32(), size - 1);
+        code.adc(overflow.cvt32(), int_max);
+    } else {
+        code.mov(overflow, int_max);
+        code.bt(result, 63);
+        code.adc(overflow, 0);
+    }
+
+    // overflow now contains 0x7F... if a was positive, or 0x80... if a was negative
+
+    if constexpr (op == Op::Add) {
+        code.add(result, addend);
+    } else {
+        code.sub(result, addend);
+    }
+
+    if constexpr (size == 8) {
+        code.cmovo(result.cvt32(), overflow.cvt32());
+    } else {
+        code.cmovo(result, overflow);
+    }
+
+    if (overflow_inst) {
+        code.seto(overflow.cvt8());
+
+        ctx.reg_alloc.DefineValue(overflow_inst, overflow);
+        ctx.EraseInstruction(overflow_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+template<Op op, size_t size>
+void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
+    const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
+    Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]).changeBit(size);
+
+    constexpr u64 boundary = op == Op::Add ? std::numeric_limits<mp::unsigned_integer_of_size<size>>::max() : 0;
+
+    if constexpr (op == Op::Add) {
+        code.add(op_result, addend);
+    } else {
+        code.sub(op_result, addend);
+    }
+    code.mov(addend, boundary);
+    if constexpr (size == 8) {
+        code.cmovae(addend.cvt32(), op_result.cvt32());
+    } else {
+        code.cmovae(addend, op_result);
+    }
+
+    if (overflow_inst) {
+        const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
+        code.setb(overflow.cvt8());
+
+        ctx.reg_alloc.DefineValue(overflow_inst, overflow);
+        ctx.EraseInstruction(overflow_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, addend);
+}
+
+} // anonymous namespace
+
+void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
+    const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+    const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
+    const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
+
+    code.movsx(x, x.cvt16());
+    code.movsx(y, y.cvt16());
+
+    code.imul(x, y);
+    code.lea(y, ptr[x.cvt64() + x.cvt64()]);
+    code.mov(tmp, x);
+    code.shr(tmp, 15);
+    code.xor_(y, x);
+    code.mov(y, 0x7FFF);
+    code.cmovns(y, tmp);
+
+    if (overflow_inst) {
+        code.sets(tmp.cvt8());
+
+        ctx.reg_alloc.DefineValue(overflow_inst, tmp);
+        ctx.EraseInstruction(overflow_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
+void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
+    const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
+    const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
+    const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
+
+    code.movsxd(x, x.cvt32());
+    code.movsxd(y, y.cvt32());
+
+    code.imul(x, y);
+    code.lea(y, ptr[x + x]);
+    code.mov(tmp, x);
+    code.shr(tmp, 31);
+    code.xor_(y, x);
+    code.mov(y.cvt32(), 0x7FFFFFFF);
+    code.cmovns(y.cvt32(), tmp.cvt32());
+
+    if (overflow_inst) {
+        code.sets(tmp.cvt8());
+
+        ctx.reg_alloc.DefineValue(overflow_inst, tmp);
+        ctx.EraseInstruction(overflow_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, y);
+}
+
+void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
+    const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const size_t N = args[1].GetImmediateU8();
+    ASSERT(N >= 1 && N <= 32);
+
+    if (N == 32) {
+        if (overflow_inst) {
+            const auto no_overflow = IR::Value(false);
+            overflow_inst->ReplaceUsesWith(no_overflow);
+        }
+        // TODO: DefineValue directly on Argument
+        const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
+        const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]);
+        code.mov(result.cvt32(), source.cvt32());
+        ctx.reg_alloc.DefineValue(inst, result);
+        return;
+    }
+
+    const u32 mask = (1u << N) - 1;
+    const u32 positive_saturated_value = (1u << (N - 1)) - 1;
+    const u32 negative_saturated_value = 1u << (N - 1);
+
+    const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
+    const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
+    const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
+
+    // overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
+    code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]);
+
+    // Put the appropriate saturated value in result
+    code.mov(result, reg_a);
+    code.sar(result, 31);
+    code.xor_(result, positive_saturated_value);
+
+    // Do the saturation
+    code.cmp(overflow, mask);
+    code.cmovbe(result, reg_a);
+
+    if (overflow_inst) {
+        code.seta(overflow.cvt8());
+
+        ctx.reg_alloc.DefineValue(overflow_inst, overflow);
+        ctx.EraseInstruction(overflow_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitX64::EmitUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
+    EmitUnsignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
+}
+
+void EmitX64::EmitUnsignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
+    EmitUnsignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
+}
+
+void EmitX64::EmitUnsignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
+    EmitUnsignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
+}
+
+void EmitX64::EmitUnsignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
+    EmitUnsignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
+}
+
+void EmitX64::EmitUnsignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
+    EmitUnsignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
+}
+
+void EmitX64::EmitUnsignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
+    EmitUnsignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
+}
+
+void EmitX64::EmitUnsignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
+    EmitUnsignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
+}
+
+void EmitX64::EmitUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
+    EmitUnsignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
+}
+
+void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
+    const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    const size_t N = args[1].GetImmediateU8();
+    ASSERT(N <= 31);
+
+    const u32 saturated_value = (1u << N) - 1;
+
+    const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
+    const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
+    const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
+
+    // Pseudocode: result = clamp(reg_a, 0, saturated_value);
+    code.xor_(overflow, overflow);
+    code.cmp(reg_a, saturated_value);
+    code.mov(result, saturated_value);
+    code.cmovle(result, overflow);
+    code.cmovbe(result, reg_a);
+
+    if (overflow_inst) {
+        code.seta(overflow.cvt8());
+
+        ctx.reg_alloc.DefineValue(overflow_inst, overflow);
+        ctx.EraseInstruction(overflow_inst);
+    }
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/emit_x64_sm4.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_sm4.cpp
@@ -0,0 +1,20 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/emit_x64.h"
+#include "common/crypto/sm4.h"
+#include "frontend/ir/microinstruction.h"
+
+namespace Dynarmic::Backend::X64 {
+
+void EmitX64::EmitSM4AccessSubstitutionBox(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    ctx.reg_alloc.HostCall(inst, args[0]);
+    code.CallFunction(&Common::Crypto::SM4::AccessSubstitutionBox);
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/emit_x64_vector.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_vector.cpp
--- a/externals/dynarmic/src/backend/x64/emit_x64_vector_floating_point.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_vector_floating_point.cpp
--- a/externals/dynarmic/src/backend/x64/emit_x64_vector_saturation.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_vector_saturation.cpp
@@ -0,0 +1,324 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/emit_x64.h"
+#include "common/common_types.h"
+#include "frontend/ir/microinstruction.h"
+#include "frontend/ir/opcodes.h"
+
+namespace Dynarmic::Backend::X64 {
+
+using namespace Xbyak::util;
+
+namespace {
+
+void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*saturated_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&), void (Xbyak::CodeGenerator::*unsaturated_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&), void (Xbyak::CodeGenerator::*sub_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm addend = ctx.reg_alloc.UseXmm(args[1]);
+    const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
+
+    code.movaps(xmm0, result);
+
+    (code.*saturated_fn)(result, addend);
+
+    (code.*unsaturated_fn)(xmm0, addend);
+    (code.*sub_fn)(xmm0, result);
+    if (code.HasSSE41()) {
+        code.ptest(xmm0, xmm0);
+    } else {
+        const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+        code.pxor(tmp, tmp);
+        code.pcmpeqw(xmm0, tmp);
+        code.pmovmskb(overflow.cvt32(), xmm0);
+        code.xor_(overflow.cvt32(), 0xFFFF);
+        code.test(overflow.cvt32(), overflow.cvt32());
+    }
+    code.setnz(overflow);
+    code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+enum class Op {
+    Add,
+    Sub,
+};
+
+template<Op op, size_t esize>
+void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
+    static_assert(esize == 32 || esize == 64);
+    constexpr u64 msb_mask = esize == 32 ? 0x8000000080000000 : 0x8000000000000000;
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm arg = ctx.reg_alloc.UseXmm(args[1]);
+    const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+    const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
+
+    // TODO AVX-512: vpternlog, vpsraq
+    // TODO AVX2 implementation
+
+    code.movaps(xmm0, result);
+    code.movaps(tmp, result);
+
+    if constexpr (op == Op::Add) {
+        if constexpr (esize == 32) {
+            code.paddd(result, arg);
+        } else {
+            code.paddq(result, arg);
+        }
+    } else {
+        if constexpr (esize == 32) {
+            code.psubd(result, arg);
+        } else {
+            code.psubq(result, arg);
+        }
+    }
+
+    code.pxor(tmp, result);
+    code.pxor(xmm0, arg);
+    if constexpr (op == Op::Add) {
+        code.pandn(xmm0, tmp);
+    } else {
+        code.pand(xmm0, tmp);
+    }
+
+    code.movaps(tmp, result);
+    code.psrad(tmp, 31);
+    if constexpr (esize == 64) {
+        code.pshufd(tmp, tmp, 0b11110101);
+    }
+    code.pxor(tmp, code.MConst(xword, msb_mask, msb_mask));
+
+    if (code.HasSSE41()) {
+        code.ptest(xmm0, code.MConst(xword, msb_mask, msb_mask));
+    } else {
+        if constexpr (esize == 32) {
+            code.movmskps(overflow.cvt32(), xmm0);
+        } else {
+            code.movmskpd(overflow.cvt32(), xmm0);
+        }
+        code.test(overflow.cvt32(), overflow.cvt32());
+    }
+    code.setnz(overflow);
+    code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
+
+    if (code.HasSSE41()) {
+        if constexpr (esize == 32) {
+            code.blendvps(result, tmp);
+        } else {
+            code.blendvpd(result, tmp);
+        }
+
+        ctx.reg_alloc.DefineValue(inst, result);
+    } else {
+        code.psrad(xmm0, 31);
+        if constexpr (esize == 64) {
+            code.pshufd(xmm0, xmm0, 0b11110101);
+        }
+
+        code.pand(tmp, xmm0);
+        code.pandn(xmm0, result);
+        code.por(tmp, xmm0);
+
+        ctx.reg_alloc.DefineValue(inst, tmp);
+    }
+}
+
+} // anonymous namespace
+
+void EmitX64::EmitVectorSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::paddsb, &Xbyak::CodeGenerator::paddb, &Xbyak::CodeGenerator::psubb);
+}
+
+void EmitX64::EmitVectorSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::paddsw, &Xbyak::CodeGenerator::paddw, &Xbyak::CodeGenerator::psubw);
+}
+
+void EmitX64::EmitVectorSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSignedSaturated<Op::Add, 32>(code, ctx, inst);
+}
+
+void EmitX64::EmitVectorSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSignedSaturated<Op::Add, 64>(code, ctx, inst);
+}
+
+void EmitX64::EmitVectorSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::psubsb, &Xbyak::CodeGenerator::psubb, &Xbyak::CodeGenerator::psubb);
+}
+
+void EmitX64::EmitVectorSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::psubsw, &Xbyak::CodeGenerator::psubw, &Xbyak::CodeGenerator::psubw);
+}
+
+void EmitX64::EmitVectorSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSignedSaturated<Op::Sub, 32>(code, ctx, inst);
+}
+
+void EmitX64::EmitVectorSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSignedSaturated<Op::Sub, 64>(code, ctx, inst);
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::paddusb, &Xbyak::CodeGenerator::paddb, &Xbyak::CodeGenerator::psubb);
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::paddusw, &Xbyak::CodeGenerator::paddw, &Xbyak::CodeGenerator::psubw);
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm addend = ctx.reg_alloc.UseXmm(args[1]);
+    const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+    const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
+
+    // TODO AVX2, AVX-512: vpternlog
+
+    code.movaps(tmp, result);
+    code.movaps(xmm0, result);
+
+    code.pxor(xmm0, addend);
+    code.pand(tmp, addend);
+    code.paddd(result, addend);
+
+    code.psrld(xmm0, 1);
+    code.paddd(tmp, xmm0);
+    code.psrad(tmp, 31);
+
+    code.por(result, tmp);
+
+    if (code.HasSSE41()) {
+        code.ptest(tmp, tmp);
+    } else {
+        code.movmskps(overflow.cvt32(), tmp);
+        code.test(overflow.cvt32(), overflow.cvt32());
+    }
+    code.setnz(overflow);
+    code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm addend = ctx.reg_alloc.UseXmm(args[1]);
+    const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+    const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
+
+    // TODO AVX2, AVX-512: vpternlog
+
+    code.movaps(tmp, result);
+    code.movaps(xmm0, result);
+
+    code.pxor(xmm0, addend);
+    code.pand(tmp, addend);
+    code.paddq(result, addend);
+
+    code.psrlq(xmm0, 1);
+    code.paddq(tmp, xmm0);
+    code.psrad(tmp, 31);
+    code.pshufd(tmp, tmp, 0b11110101);
+
+    code.por(result, tmp);
+
+    if (code.HasSSE41()) {
+        code.ptest(tmp, tmp);
+    } else {
+        code.movmskpd(overflow.cvt32(), tmp);
+        code.test(overflow.cvt32(), overflow.cvt32());
+    }
+    code.setnz(overflow);
+    code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::psubusb, &Xbyak::CodeGenerator::psubb, &Xbyak::CodeGenerator::psubb);
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
+    EmitVectorSaturatedNative(code, ctx, inst, &Xbyak::CodeGenerator::psubusw, &Xbyak::CodeGenerator::psubw, &Xbyak::CodeGenerator::psubw);
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseXmm(args[1]);
+    const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+    const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
+
+    // TODO AVX2, AVX-512: vpternlog
+
+    code.movaps(tmp, result);
+    code.movaps(xmm0, subtrahend);
+
+    code.pxor(tmp, subtrahend);
+    code.psubd(result, subtrahend);
+    code.pand(xmm0, tmp);
+
+    code.psrld(tmp, 1);
+    code.psubd(tmp, xmm0);
+    code.psrad(tmp, 31);
+
+    if (code.HasSSE41()) {
+        code.ptest(tmp, tmp);
+    } else {
+        code.movmskps(overflow.cvt32(), tmp);
+        code.test(overflow.cvt32(), overflow.cvt32());
+    }
+    code.setnz(overflow);
+    code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
+
+    code.pandn(tmp, result);
+    ctx.reg_alloc.DefineValue(inst, tmp);
+}
+
+void EmitX64::EmitVectorUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
+    const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseXmm(args[1]);
+    const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+    const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
+
+    // TODO AVX2, AVX-512: vpternlog
+
+    code.movaps(tmp, result);
+    code.movaps(xmm0, subtrahend);
+
+    code.pxor(tmp, subtrahend);
+    code.psubq(result, subtrahend);
+    code.pand(xmm0, tmp);
+
+    code.psrlq(tmp, 1);
+    code.psubq(tmp, xmm0);
+    code.psrad(tmp, 31);
+    code.pshufd(tmp, tmp, 0b11110101);
+
+    if (code.HasSSE41()) {
+        code.ptest(tmp, tmp);
+    } else {
+        code.movmskpd(overflow.cvt32(), tmp);
+        code.test(overflow.cvt32(), overflow.cvt32());
+    }
+    code.setnz(overflow);
+    code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
+
+    code.pandn(tmp, result);
+    ctx.reg_alloc.DefineValue(inst, tmp);
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/exception_handler.h
+++ b/externals/dynarmic/src/backend/x64/exception_handler.h
@@ -0,0 +1,37 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2020 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <functional>
+#include <memory>
+
+#include "common/common_types.h"
+
+namespace Dynarmic::Backend::X64 {
+
+class BlockOfCode;
+
+struct FakeCall {
+    u64 call_rip;
+    u64 ret_rip;
+};
+
+class ExceptionHandler final {
+public:
+    ExceptionHandler();
+    ~ExceptionHandler();
+
+    void Register(BlockOfCode& code);
+
+    bool SupportsFastmem() const noexcept;
+    void SetFastmemCallback(std::function<FakeCall(u64)> cb);
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> impl;
+};
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/exception_handler_generic.cpp
+++ b/externals/dynarmic/src/backend/x64/exception_handler_generic.cpp
@@ -0,0 +1,28 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "backend/x64/exception_handler.h"
+
+namespace Dynarmic::Backend::X64 {
+
+struct ExceptionHandler::Impl final {
+};
+
+ExceptionHandler::ExceptionHandler() = default;
+ExceptionHandler::~ExceptionHandler() = default;
+
+void ExceptionHandler::Register(BlockOfCode&) {
+    // Do nothing
+}
+
+bool ExceptionHandler::SupportsFastmem() const noexcept {
+    return false;
+}
+
+void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)>) {
+    // Do nothing
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/exception_handler_macos.cpp
+++ b/externals/dynarmic/src/backend/x64/exception_handler_macos.cpp
@@ -0,0 +1,230 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2019 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "backend/x64/exception_handler.h"
+
+#include <mach/mach.h>
+#include <mach/message.h>
+
+#include <cstring>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <thread>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "backend/x64/block_of_code.h"
+#include "common/assert.h"
+#include "common/cast_util.h"
+#include "common/common_types.h"
+
+#define mig_external extern "C"
+#include "backend/x64/mig/mach_exc_server.h"
+
+namespace Dynarmic::Backend::X64 {
+
+namespace {
+
+struct CodeBlockInfo {
+    u64 code_begin, code_end;
+    std::function<FakeCall(u64)> cb;
+};
+
+struct MachMessage {
+    mach_msg_header_t head;
+    char data[2048]; ///< Arbitrary size
+};
+
+class MachHandler final {
+public:
+    MachHandler();
+    ~MachHandler();
+
+    kern_return_t HandleRequest(x86_thread_state64_t* thread_state);
+
+    void AddCodeBlock(CodeBlockInfo info);
+    void RemoveCodeBlock(u64 rip);
+
+private:
+    auto FindCodeBlockInfo(u64 rip) {
+        return std::find_if(code_block_infos.begin(), code_block_infos.end(), [&](const auto& x) { return x.code_begin <= rip && x.code_end > rip; });
+    }
+
+    std::vector<CodeBlockInfo> code_block_infos;
+    std::mutex code_block_infos_mutex;
+
+    std::thread thread;
+    mach_port_t server_port;
+
+    void MessagePump();
+};
+
+MachHandler::MachHandler() {
+    #define KCHECK(x) ASSERT_MSG((x) == KERN_SUCCESS, "dynarmic: macOS MachHandler: init failure at {}", #x)
+
+    KCHECK(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &server_port));
+    KCHECK(mach_port_insert_right(mach_task_self(), server_port, server_port, MACH_MSG_TYPE_MAKE_SEND));
+    KCHECK(task_set_exception_ports(mach_task_self(), EXC_MASK_BAD_ACCESS, server_port, EXCEPTION_STATE | MACH_EXCEPTION_CODES, x86_THREAD_STATE64));
+
+    // The below doesn't actually work, and I'm not sure why; since this doesn't work we'll have a spurious error message upon shutdown.
+    mach_port_t prev;
+    KCHECK(mach_port_request_notification(mach_task_self(), server_port, MACH_NOTIFY_PORT_DESTROYED, 0, server_port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &prev));
+
+    #undef KCHECK
+
+    thread = std::thread(&MachHandler::MessagePump, this);
+}
+
+MachHandler::~MachHandler() {
+    mach_port_destroy(mach_task_self(), server_port);
+    thread.join();
+}
+
+void MachHandler::MessagePump() {
+    mach_msg_return_t mr;
+    MachMessage request;
+    MachMessage reply;
+
+    while (true) {
+        mr = mach_msg(&request.head, MACH_RCV_MSG | MACH_RCV_LARGE, 0, sizeof(request), server_port, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
+        if (mr != MACH_MSG_SUCCESS) {
+            fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to receive mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr));
+            return;
+        }
+
+        if (!mach_exc_server(&request.head, &reply.head)) {
+            fmt::print(stderr, "dynarmic: macOS MachHandler: Unexpected mach message\n");
+            return;
+        }
+
+        mr = mach_msg(&reply.head, MACH_SEND_MSG, reply.head.msgh_size, 0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
+        if (mr != MACH_MSG_SUCCESS){
+            fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to send mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr));
+            return;
+        }
+    }
+}
+
+kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) {
+    std::lock_guard<std::mutex> guard(code_block_infos_mutex);
+
+    const auto iter = FindCodeBlockInfo(ts->__rip);
+    if (iter == code_block_infos.end()) {
+        fmt::print(stderr, "dynarmic: macOS MachHandler: Exception was not in registered code blocks (rip {:#016x})\n", ts->__rip);
+        return KERN_FAILURE;
+    }
+
+    FakeCall fc = iter->cb(ts->__rip);
+
+    ts->__rsp -= sizeof(u64);
+    *Common::BitCast<u64*>(ts->__rsp) = fc.ret_rip;
+    ts->__rip = fc.call_rip;
+
+    return KERN_SUCCESS;
+}
+
+void MachHandler::AddCodeBlock(CodeBlockInfo cbi) {
+    std::lock_guard<std::mutex> guard(code_block_infos_mutex);
+    if (auto iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) {
+        code_block_infos.erase(iter);
+    }
+    code_block_infos.push_back(cbi);
+}
+
+void MachHandler::RemoveCodeBlock(u64 rip) {
+    std::lock_guard<std::mutex> guard(code_block_infos_mutex);
+    const auto iter = FindCodeBlockInfo(rip);
+    if (iter == code_block_infos.end()) {
+        return;
+    }
+    code_block_infos.erase(iter);
+}
+
+MachHandler mach_handler;
+
+} // anonymous namespace
+
+mig_external kern_return_t catch_mach_exception_raise(mach_port_t, mach_port_t, mach_port_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t) {
+    fmt::print(stderr, "dynarmic: Unexpected mach message: mach_exception_raise\n");
+    return KERN_FAILURE;
+}
+
+mig_external kern_return_t catch_mach_exception_raise_state_identity(mach_port_t, mach_port_t, mach_port_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, int*, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t*) {
+    fmt::print(stderr, "dynarmic: Unexpected mach message: mach_exception_raise_state_identity\n");
+    return KERN_FAILURE;
+}
+
+mig_external kern_return_t catch_mach_exception_raise_state(
+    mach_port_t /*exception_port*/,
+    exception_type_t exception,
+    const mach_exception_data_t /*code*/, // code[0] is as per kern_return.h, code[1] is rip.
+    mach_msg_type_number_t /*codeCnt*/,
+    int* flavor,
+    const thread_state_t old_state,
+    mach_msg_type_number_t old_stateCnt,
+    thread_state_t new_state,
+    mach_msg_type_number_t* new_stateCnt
+) {
+    if (!flavor || !new_stateCnt) {
+        fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Invalid arguments.\n");
+        return KERN_INVALID_ARGUMENT;
+    }
+    if (*flavor != x86_THREAD_STATE64 || old_stateCnt != x86_THREAD_STATE64_COUNT || *new_stateCnt < x86_THREAD_STATE64_COUNT) {
+        fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Unexpected flavor.\n");
+        return KERN_INVALID_ARGUMENT;
+    }
+    if (exception != EXC_BAD_ACCESS) {
+        fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Unexpected exception type.\n");
+        return KERN_FAILURE;
+    }
+
+    x86_thread_state64_t* ts = reinterpret_cast<x86_thread_state64_t*>(new_state);
+    std::memcpy(ts, reinterpret_cast<const x86_thread_state64_t*>(old_state), sizeof(x86_thread_state64_t));
+    *new_stateCnt = x86_THREAD_STATE64_COUNT;
+
+    return mach_handler.HandleRequest(ts);
+}
+
+struct ExceptionHandler::Impl final {
+    Impl(BlockOfCode& code)
+        : code_begin(Common::BitCast<u64>(code.getCode()))
+        , code_end(code_begin + code.GetTotalCodeSize())
+    {}
+
+    void SetCallback(std::function<FakeCall(u64)> cb) {
+        CodeBlockInfo cbi;
+        cbi.code_begin = code_begin;
+        cbi.code_end = code_end;
+        cbi.cb = cb;
+        mach_handler.AddCodeBlock(cbi);
+    }
+
+    ~Impl() {
+        mach_handler.RemoveCodeBlock(code_begin);
+    }
+
+private:
+    u64 code_begin, code_end;
+};
+
+ExceptionHandler::ExceptionHandler() = default;
+
+ExceptionHandler::~ExceptionHandler() = default;
+
+void ExceptionHandler::Register(BlockOfCode& code) {
+    impl = std::make_unique<Impl>(code);
+}
+
+bool ExceptionHandler::SupportsFastmem() const noexcept {
+    return static_cast<bool>(impl);
+}
+
+void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
+    impl->SetCallback(cb);
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/exception_handler_posix.cpp
+++ b/externals/dynarmic/src/backend/x64/exception_handler_posix.cpp
@@ -0,0 +1,198 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2019 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "backend/x64/exception_handler.h"
+
+#include <cstring>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <vector>
+
+#include <signal.h>
+#ifdef __APPLE__
+#include <sys/ucontext.h>
+#else
+#include <ucontext.h>
+#endif
+
+#include "backend/x64/block_of_code.h"
+#include "common/assert.h"
+#include "common/cast_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::Backend::X64 {
+
+namespace {
+
+struct CodeBlockInfo {
+    u64 code_begin, code_end;
+    std::function<FakeCall(u64)> cb;
+};
+
+class SigHandler {
+public:
+    SigHandler();
+
+    void AddCodeBlock(CodeBlockInfo info);
+    void RemoveCodeBlock(u64 rip);
+
+    bool SupportsFastmem() const { return supports_fast_mem; }
+
+private:
+    auto FindCodeBlockInfo(u64 rip) {
+        return std::find_if(code_block_infos.begin(), code_block_infos.end(), [&](const auto& x) { return x.code_begin <= rip && x.code_end > rip; });
+    }
+
+    bool supports_fast_mem = true;
+
+    std::vector<CodeBlockInfo> code_block_infos;
+    std::mutex code_block_infos_mutex;
+
+    struct sigaction old_sa_segv;
+    struct sigaction old_sa_bus;
+
+    static void SigAction(int sig, siginfo_t* info, void* raw_context);
+};
+
+SigHandler sig_handler;
+
+SigHandler::SigHandler() {
+    constexpr size_t signal_stack_size = std::max(SIGSTKSZ, 2 * 1024 * 1024);
+
+    stack_t signal_stack;
+    signal_stack.ss_sp = std::malloc(signal_stack_size);
+    signal_stack.ss_size = signal_stack_size;
+    signal_stack.ss_flags = 0;
+    if (sigaltstack(&signal_stack, nullptr) != 0) {
+        fmt::print(stderr, "dynarmic: POSIX SigHandler: init failure at sigaltstack\n");
+        supports_fast_mem = false;
+        return;
+    }
+
+    struct sigaction sa;
+    sa.sa_handler = nullptr;
+    sa.sa_sigaction = &SigHandler::SigAction;
+    sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
+    sigemptyset(&sa.sa_mask);
+    if (sigaction(SIGSEGV, &sa, &old_sa_segv) != 0) {
+        fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGSEGV handler\n");
+        supports_fast_mem = false;
+        return;
+    }
+#ifdef __APPLE__
+    if (sigaction(SIGBUS, &sa, &old_sa_bus) != 0) {
+        fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGBUS handler\n");
+        supports_fast_mem = false;
+        return;
+    }
+#endif
+}
+
+void SigHandler::AddCodeBlock(CodeBlockInfo cbi) {
+    std::lock_guard<std::mutex> guard(code_block_infos_mutex);
+    if (auto iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) {
+        code_block_infos.erase(iter);
+    }
+    code_block_infos.push_back(cbi);
+}
+
+void SigHandler::RemoveCodeBlock(u64 rip) {
+    std::lock_guard<std::mutex> guard(code_block_infos_mutex);
+    const auto iter = FindCodeBlockInfo(rip);
+    if (iter == code_block_infos.end()) {
+        return;
+    }
+    code_block_infos.erase(iter);
+}
+
+void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
+    ASSERT(sig == SIGSEGV || sig == SIGBUS);
+
+#if defined(__APPLE__)
+    #define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rip)
+    #define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext->__ss.__rsp)
+#elif defined(__linux__)
+    #define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RIP])
+    #define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.gregs[REG_RSP])
+#elif defined(__FreeBSD__)
+    #define CTX_RIP (((ucontext_t*)raw_context)->uc_mcontext.mc_rip)
+    #define CTX_RSP (((ucontext_t*)raw_context)->uc_mcontext.mc_rsp)
+#else
+    #error "Unknown platform"
+#endif
+
+    {
+        std::lock_guard<std::mutex> guard(sig_handler.code_block_infos_mutex);
+
+        const auto iter = sig_handler.FindCodeBlockInfo(CTX_RIP);
+        if (iter != sig_handler.code_block_infos.end()) {
+            FakeCall fc = iter->cb(CTX_RIP);
+
+            CTX_RSP -= sizeof(u64);
+            *Common::BitCast<u64*>(CTX_RSP) = fc.ret_rip;
+            CTX_RIP = fc.call_rip;
+
+            return;
+        }
+    }
+
+    fmt::print(stderr, "dynarmic: POSIX SigHandler: Exception was not in registered code blocks (rip {:#016x})\n", CTX_RIP);
+
+    struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus;
+    if (retry_sa->sa_flags & SA_SIGINFO) {
+      retry_sa->sa_sigaction(sig, info, raw_context);
+      return;
+    }
+    if (retry_sa->sa_handler == SIG_DFL) {
+      signal(sig, SIG_DFL);
+      return;
+    }
+    if (retry_sa->sa_handler == SIG_IGN) {
+      return;
+    }
+    retry_sa->sa_handler(sig);
+}
+
+} // anonymous namespace
+
+struct ExceptionHandler::Impl final {
+    Impl(BlockOfCode& code)
+        : code_begin(Common::BitCast<u64>(code.getCode()))
+        , code_end(code_begin + code.GetTotalCodeSize())
+    {}
+
+    void SetCallback(std::function<FakeCall(u64)> cb) {
+        CodeBlockInfo cbi;
+        cbi.code_begin = code_begin;
+        cbi.code_end = code_end;
+        cbi.cb = cb;
+        sig_handler.AddCodeBlock(cbi);
+    }
+
+    ~Impl() {
+        sig_handler.RemoveCodeBlock(code_begin);
+    }
+
+private:
+    u64 code_begin, code_end;
+};
+
+ExceptionHandler::ExceptionHandler() = default;
+ExceptionHandler::~ExceptionHandler() = default;
+
+void ExceptionHandler::Register(BlockOfCode& code) {
+    impl = std::make_unique<Impl>(code);
+}
+
+bool ExceptionHandler::SupportsFastmem() const noexcept {
+    return static_cast<bool>(impl) && sig_handler.SupportsFastmem();
+}
+
+void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
+    impl->SetCallback(cb);
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/exception_handler_windows.cpp
+++ b/externals/dynarmic/src/backend/x64/exception_handler_windows.cpp
@@ -0,0 +1,262 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <cstring>
+#include <vector>
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/exception_handler.h"
+#include "common/assert.h"
+#include "common/cast_util.h"
+#include "common/common_types.h"
+#include "common/safe_ops.h"
+
+using UBYTE = u8;
+
+enum UNWIND_REGISTER_CODES {
+    UWRC_RAX,
+    UWRC_RCX,
+    UWRC_RDX,
+    UWRC_RBX,
+    UWRC_RSP,
+    UWRC_RBP,
+    UWRC_RSI,
+    UWRC_RDI,
+    UWRC_R8,
+    UWRC_R9,
+    UWRC_R10,
+    UWRC_R11,
+    UWRC_R12,
+    UWRC_R13,
+    UWRC_R14,
+    UWRC_R15,
+};
+
+enum UNWIND_OPCODE {
+    UWOP_PUSH_NONVOL = 0,
+    UWOP_ALLOC_LARGE = 1,
+    UWOP_ALLOC_SMALL = 2,
+    UWOP_SET_FPREG = 3,
+    UWOP_SAVE_NONVOL = 4,
+    UWOP_SAVE_NONVOL_FAR = 5,
+    UWOP_SAVE_XMM128 = 8,
+    UWOP_SAVE_XMM128_FAR = 9,
+    UWOP_PUSH_MACHFRAME = 10,
+};
+
+union UNWIND_CODE {
+    struct {
+        UBYTE CodeOffset;
+        UBYTE UnwindOp : 4;
+        UBYTE OpInfo : 4;
+    } code;
+    USHORT FrameOffset;
+};
+
+// UNWIND_INFO is a tail-padded structure
+struct UNWIND_INFO {
+    UBYTE Version : 3;
+    UBYTE Flags : 5;
+    UBYTE SizeOfProlog;
+    UBYTE CountOfCodes;
+    UBYTE FrameRegister : 4;
+    UBYTE FrameOffset : 4;
+    // UNWIND_CODE UnwindCode[];
+    // With Flags == 0 there are no additional fields.
+    // OPTIONAL UNW_EXCEPTION_INFO ExceptionInfo;
+};
+
+struct UNW_EXCEPTION_INFO {
+    ULONG ExceptionHandler;
+    // OPTIONAL ARBITRARY HandlerData;
+};
+
+namespace Dynarmic::Backend::X64 {
+
+struct PrologueInformation {
+    std::vector<UNWIND_CODE> unwind_code;
+    size_t number_of_unwind_code_entries;
+    u8 prolog_size;
+};
+
+static PrologueInformation GetPrologueInformation() {
+    PrologueInformation ret;
+
+    const auto next_entry = [&]() -> UNWIND_CODE& {
+        ret.unwind_code.emplace_back();
+        return ret.unwind_code.back();
+    };
+    const auto push_nonvol = [&](u8 offset, UNWIND_REGISTER_CODES reg) {
+        auto& entry = next_entry();
+        entry.code.CodeOffset = offset;
+        entry.code.UnwindOp = UWOP_PUSH_NONVOL;
+        entry.code.OpInfo = reg;
+    };
+    const auto alloc_large = [&](u8 offset, size_t size) {
+        ASSERT(size % 8 == 0);
+        size /= 8;
+
+        auto& entry = next_entry();
+        entry.code.CodeOffset = offset;
+        entry.code.UnwindOp = UWOP_ALLOC_LARGE;
+        if (size <= 0xFFFF) {
+            entry.code.OpInfo = 0;
+            auto& size_entry = next_entry();
+            size_entry.FrameOffset = static_cast<USHORT>(size);
+        } else {
+            entry.code.OpInfo = 1;
+            auto& size_entry_1 = next_entry();
+            size_entry_1.FrameOffset = static_cast<USHORT>(size);
+            auto& size_entry_2 = next_entry();
+            size_entry_2.FrameOffset = static_cast<USHORT>(size >> 16);
+        }
+    };
+    const auto save_xmm128 = [&](u8 offset, u8 reg, size_t frame_offset) {
+        ASSERT(frame_offset % 16 == 0);
+
+        auto& entry = next_entry();
+        entry.code.CodeOffset = offset;
+        entry.code.UnwindOp = UWOP_SAVE_XMM128;
+        entry.code.OpInfo = reg;
+        auto& offset_entry = next_entry();
+        offset_entry.FrameOffset = static_cast<USHORT>(frame_offset / 16);
+    };
+
+    // This is a list of operations that occur in the prologue.
+    // The debugger uses this information to retrieve register values and
+    // to calculate the size of the stack frame.
+    ret.prolog_size = 89;
+    save_xmm128(89, 15, 0xB0);  // +050  44 0F 29 BC 24 B0 00 00 00  movaps  xmmword ptr [rsp+0B0h],xmm15
+    save_xmm128(80, 14, 0xA0);  // +047  44 0F 29 B4 24 A0 00 00 00  movaps  xmmword ptr [rsp+0A0h],xmm14
+    save_xmm128(71, 13, 0x90);  // +03E  44 0F 29 AC 24 90 00 00 00  movaps  xmmword ptr [rsp+90h],xmm13
+    save_xmm128(62, 12, 0x80);  // +035  44 0F 29 A4 24 80 00 00 00  movaps  xmmword ptr [rsp+80h],xmm12
+    save_xmm128(53, 11, 0x70);  // +02F  44 0F 29 5C 24 70           movaps  xmmword ptr [rsp+70h],xmm11
+    save_xmm128(47, 10, 0x60);  // +029  44 0F 29 54 24 60           movaps  xmmword ptr [rsp+60h],xmm10
+    save_xmm128(41, 9, 0x50);   // +023  44 0F 29 4C 24 50           movaps  xmmword ptr [rsp+50h],xmm9
+    save_xmm128(35, 8, 0x40);   // +01D  44 0F 29 44 24 40           movaps  xmmword ptr [rsp+40h],xmm8
+    save_xmm128(29, 7, 0x30);   // +018  0F 29 7C 24 30              movaps  xmmword ptr [rsp+30h],xmm7
+    save_xmm128(24, 6, 0x20);   // +013  0F 29 74 24 20              movaps  xmmword ptr [rsp+20h],xmm6
+    alloc_large(19, 0xC8);      // +00C  48 81 EC C8 00 00 00        sub     rsp,0C8h
+    push_nonvol(12, UWRC_R15);  // +00A  41 57                       push    r15
+    push_nonvol(10, UWRC_R14);  // +008  41 56                       push    r14
+    push_nonvol(8, UWRC_R13);   // +006  41 55                       push    r13
+    push_nonvol(6, UWRC_R12);   // +004  41 54                       push    r12
+    push_nonvol(4, UWRC_RBP);   // +003  55                          push    rbp
+    push_nonvol(3, UWRC_RDI);   // +002  57                          push    rdi
+    push_nonvol(2, UWRC_RSI);   // +001  56                          push    rsi
+    push_nonvol(1, UWRC_RBX);   // +000  53                          push    rbx
+
+    ret.number_of_unwind_code_entries = ret.unwind_code.size();
+
+    // The Windows API requires the size of the unwind_code array
+    // to be a multiple of two for alignment reasons.
+    if (ret.unwind_code.size() % 2 == 1) {
+        auto& last_entry = next_entry();
+        last_entry.FrameOffset = 0;
+    }
+    ASSERT(ret.unwind_code.size() % 2 == 0);
+
+    return ret;
+}
+
+struct ExceptionHandler::Impl final {
+    Impl(BlockOfCode& code) {
+        const auto prolog_info = GetPrologueInformation();
+
+        code.align(16);
+        const u8* exception_handler_without_cb = code.getCurr<u8*>();
+        code.mov(code.eax, static_cast<u32>(ExceptionContinueSearch));
+        code.ret();
+
+        code.align(16);
+        const u8* exception_handler_with_cb = code.getCurr<u8*>();
+        // Our 3rd argument is a PCONTEXT.
+
+        // If not within our codeblock, ignore this exception.
+        code.mov(code.rax, Safe::Negate(Common::BitCast<u64>(code.getCode())));
+        code.add(code.rax, code.qword[code.ABI_PARAM3 + Xbyak::RegExp(offsetof(CONTEXT, Rip))]);
+        code.cmp(code.rax, static_cast<u32>(code.GetTotalCodeSize()));
+        code.ja(exception_handler_without_cb);
+
+        code.sub(code.rsp, 8);
+        code.mov(code.ABI_PARAM1, Common::BitCast<u64>(&cb));
+        code.mov(code.ABI_PARAM2, code.ABI_PARAM3);
+        code.CallLambda(
+            [](const std::function<FakeCall(u64)>& cb_, PCONTEXT ctx){
+                FakeCall fc = cb_(ctx->Rip);
+
+                ctx->Rsp -= sizeof(u64);
+                *Common::BitCast<u64*>(ctx->Rsp) = fc.ret_rip;
+                ctx->Rip = fc.call_rip;
+            }
+        );
+        code.add(code.rsp, 8);
+        code.mov(code.eax, static_cast<u32>(ExceptionContinueExecution));
+        code.ret();
+
+        exception_handler_without_cb_offset = static_cast<ULONG>(exception_handler_without_cb - code.getCode<u8*>());
+        exception_handler_with_cb_offset = static_cast<ULONG>(exception_handler_with_cb - code.getCode<u8*>());
+
+        code.align(16);
+        UNWIND_INFO* unwind_info = static_cast<UNWIND_INFO*>(code.AllocateFromCodeSpace(sizeof(UNWIND_INFO)));
+        unwind_info->Version = 1;
+        unwind_info->Flags = UNW_FLAG_EHANDLER;
+        unwind_info->SizeOfProlog = prolog_info.prolog_size;
+        unwind_info->CountOfCodes = static_cast<UBYTE>(prolog_info.number_of_unwind_code_entries);
+        unwind_info->FrameRegister = 0; // No frame register present
+        unwind_info->FrameOffset = 0; // Unused because FrameRegister == 0
+        // UNWIND_INFO::UnwindCode field:
+        const size_t size_of_unwind_code = sizeof(UNWIND_CODE) * prolog_info.unwind_code.size();
+        UNWIND_CODE* unwind_code = static_cast<UNWIND_CODE*>(code.AllocateFromCodeSpace(size_of_unwind_code));
+        memcpy(unwind_code, prolog_info.unwind_code.data(), size_of_unwind_code);
+        // UNWIND_INFO::ExceptionInfo field:
+        except_info = static_cast<UNW_EXCEPTION_INFO*>(code.AllocateFromCodeSpace(sizeof(UNW_EXCEPTION_INFO)));
+        except_info->ExceptionHandler = exception_handler_without_cb_offset;
+
+        code.align(16);
+        rfuncs = static_cast<RUNTIME_FUNCTION*>(code.AllocateFromCodeSpace(sizeof(RUNTIME_FUNCTION)));
+        rfuncs->BeginAddress = static_cast<DWORD>(0);
+        rfuncs->EndAddress = static_cast<DWORD>(code.GetTotalCodeSize());
+        rfuncs->UnwindData = static_cast<DWORD>(reinterpret_cast<u8*>(unwind_info) - code.getCode());
+
+        RtlAddFunctionTable(rfuncs, 1, reinterpret_cast<DWORD64>(code.getCode()));
+    }
+
+    void SetCallback(std::function<FakeCall(u64)> new_cb) {
+        cb = new_cb;
+        except_info->ExceptionHandler = cb ? exception_handler_with_cb_offset : exception_handler_without_cb_offset;
+    }
+
+    ~Impl() {
+        RtlDeleteFunctionTable(rfuncs);
+    }
+
+private:
+    RUNTIME_FUNCTION* rfuncs;
+    std::function<FakeCall(u64)> cb;
+    UNW_EXCEPTION_INFO* except_info;
+    ULONG exception_handler_without_cb_offset;
+    ULONG exception_handler_with_cb_offset;
+};
+
+ExceptionHandler::ExceptionHandler() = default;
+ExceptionHandler::~ExceptionHandler() = default;
+
+void ExceptionHandler::Register(BlockOfCode& code) {
+    impl = std::make_unique<Impl>(code);
+}
+
+bool ExceptionHandler::SupportsFastmem() const noexcept {
+    return static_cast<bool>(impl);
+}
+
+void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
+    impl->SetCallback(cb);
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/exclusive_monitor.cpp
+++ b/externals/dynarmic/src/backend/x64/exclusive_monitor.cpp
@@ -0,0 +1,59 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <algorithm>
+
+#include <dynarmic/exclusive_monitor.h>
+#include "common/assert.h"
+
+namespace Dynarmic {
+
+ExclusiveMonitor::ExclusiveMonitor(size_t processor_count) :
+    exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {
+    Unlock();
+}
+
+size_t ExclusiveMonitor::GetProcessorCount() const {
+    return exclusive_addresses.size();
+}
+
+void ExclusiveMonitor::Lock() {
+    while (is_locked.test_and_set(std::memory_order_acquire)) {}
+}
+
+void ExclusiveMonitor::Unlock() {
+    is_locked.clear(std::memory_order_release);
+}
+
+bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address) {
+    const VAddr masked_address = address & RESERVATION_GRANULE_MASK;
+
+    Lock();
+    if (exclusive_addresses[processor_id] != masked_address) {
+        Unlock();
+        return false;
+    }
+
+    for (VAddr& other_address : exclusive_addresses) {
+        if (other_address == masked_address) {
+            other_address = INVALID_EXCLUSIVE_ADDRESS;
+        }
+    }
+    return true;
+}
+
+void ExclusiveMonitor::Clear() {
+    Lock();
+    std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS);
+    Unlock();
+}
+
+void ExclusiveMonitor::ClearProcessor(size_t processor_id) {
+    Lock();
+    exclusive_addresses[processor_id] = INVALID_EXCLUSIVE_ADDRESS;
+    Unlock();
+}
+
+} // namespace Dynarmic
--- a/externals/dynarmic/src/backend/x64/hostloc.cpp
+++ b/externals/dynarmic/src/backend/x64/hostloc.cpp
@@ -0,0 +1,22 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <xbyak.h>
+
+#include "backend/x64/hostloc.h"
+
+namespace Dynarmic::Backend::X64 {
+
+Xbyak::Reg64 HostLocToReg64(HostLoc loc) {
+    ASSERT(HostLocIsGPR(loc));
+    return Xbyak::Reg64(static_cast<int>(loc));
+}
+
+Xbyak::Xmm HostLocToXmm(HostLoc loc) {
+    ASSERT(HostLocIsXMM(loc));
+    return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/hostloc.h
+++ b/externals/dynarmic/src/backend/x64/hostloc.h
@@ -0,0 +1,124 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+#pragma once
+
+#include <xbyak.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::Backend::X64 {
+
+enum class HostLoc {
+    // Ordering of the registers is intentional. See also: HostLocToX64.
+    RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
+    XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
+    CF, PF, AF, ZF, SF, OF,
+    FirstSpill,
+};
+
+constexpr size_t NonSpillHostLocCount = static_cast<size_t>(HostLoc::FirstSpill);
+
+inline bool HostLocIsGPR(HostLoc reg) {
+    return reg >= HostLoc::RAX && reg <= HostLoc::R15;
+}
+
+inline bool HostLocIsXMM(HostLoc reg) {
+    return reg >= HostLoc::XMM0 && reg <= HostLoc::XMM15;
+}
+
+inline bool HostLocIsRegister(HostLoc reg) {
+    return HostLocIsGPR(reg) || HostLocIsXMM(reg);
+}
+
+inline bool HostLocIsFlag(HostLoc reg) {
+    return reg >= HostLoc::CF && reg <= HostLoc::OF;
+}
+
+inline HostLoc HostLocRegIdx(int idx) {
+    ASSERT(idx >= 0 && idx <= 15);
+    return static_cast<HostLoc>(idx);
+}
+
+inline HostLoc HostLocXmmIdx(int idx) {
+    ASSERT(idx >= 0 && idx <= 15);
+    return static_cast<HostLoc>(static_cast<size_t>(HostLoc::XMM0) + idx);
+}
+
+inline HostLoc HostLocSpill(size_t i) {
+    return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
+}
+
+inline bool HostLocIsSpill(HostLoc reg) {
+    return reg >= HostLoc::FirstSpill;
+}
+
+inline size_t HostLocBitWidth(HostLoc loc) {
+    if (HostLocIsGPR(loc))
+        return 64;
+    if (HostLocIsXMM(loc))
+        return 128;
+    if (HostLocIsSpill(loc))
+        return 128;
+    if (HostLocIsFlag(loc))
+        return 1;
+    UNREACHABLE();
+}
+
+using HostLocList = std::initializer_list<HostLoc>;
+
+// RSP is preserved for function calls
+// R15 contains the JitState pointer
+const HostLocList any_gpr = {
+    HostLoc::RAX,
+    HostLoc::RBX,
+    HostLoc::RCX,
+    HostLoc::RDX,
+    HostLoc::RSI,
+    HostLoc::RDI,
+    HostLoc::RBP,
+    HostLoc::R8,
+    HostLoc::R9,
+    HostLoc::R10,
+    HostLoc::R11,
+    HostLoc::R12,
+    HostLoc::R13,
+    HostLoc::R14,
+};
+
+// XMM0 is reserved for use by instructions that implicitly use it as an argument
+const HostLocList any_xmm = {
+    HostLoc::XMM1,
+    HostLoc::XMM2,
+    HostLoc::XMM3,
+    HostLoc::XMM4,
+    HostLoc::XMM5,
+    HostLoc::XMM6,
+    HostLoc::XMM7,
+    HostLoc::XMM8,
+    HostLoc::XMM9,
+    HostLoc::XMM10,
+    HostLoc::XMM11,
+    HostLoc::XMM12,
+    HostLoc::XMM13,
+    HostLoc::XMM14,
+    HostLoc::XMM15,
+};
+
+Xbyak::Reg64 HostLocToReg64(HostLoc loc);
+Xbyak::Xmm HostLocToXmm(HostLoc loc);
+
+template <typename JitStateType>
+Xbyak::Address SpillToOpArg(HostLoc loc) {
+    ASSERT(HostLocIsSpill(loc));
+
+    size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
+    ASSERT_MSG(i < JitStateType::SpillCount, "Spill index greater than number of available spill locations");
+
+    return JitStateType::GetSpillLocationFromIndex(i);
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/jitstate_info.h
+++ b/externals/dynarmic/src/backend/x64/jitstate_info.h
@@ -0,0 +1,43 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace Dynarmic::Backend::X64 {
+
+struct JitStateInfo {
+    template <typename JitStateType>
+    JitStateInfo(const JitStateType&)
+        : offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining))
+        , offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run))
+        , offsetof_save_host_MXCSR(offsetof(JitStateType, save_host_MXCSR))
+        , offsetof_guest_MXCSR(offsetof(JitStateType, guest_MXCSR))
+        , offsetof_asimd_MXCSR(offsetof(JitStateType, asimd_MXCSR))
+        , offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr))
+        , rsb_ptr_mask(JitStateType::RSBPtrMask)
+        , offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
+        , offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
+        , offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
+        , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
+        , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
+    {}
+
+    const size_t offsetof_cycles_remaining;
+    const size_t offsetof_cycles_to_run;
+    const size_t offsetof_save_host_MXCSR;
+    const size_t offsetof_guest_MXCSR;
+    const size_t offsetof_asimd_MXCSR;
+    const size_t offsetof_rsb_ptr;
+    const size_t rsb_ptr_mask;
+    const size_t offsetof_rsb_location_descriptors;
+    const size_t offsetof_rsb_codeptrs;
+    const size_t offsetof_cpsr_nzcv;
+    const size_t offsetof_fpsr_exc;
+    const size_t offsetof_fpsr_qc;
+};
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/nzcv_util.h
+++ b/externals/dynarmic/src/backend/x64/nzcv_util.h
@@ -0,0 +1,53 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include "common/common_types.h"
+#include "common/bit_util.h"
+
+namespace Dynarmic::Backend::X64::NZCV {
+
+constexpr u32 arm_mask = 0xF000'0000;
+constexpr u32 x64_mask = 0xC101;
+
+constexpr size_t x64_n_flag_bit = 15;
+constexpr size_t x64_z_flag_bit = 14;
+constexpr size_t x64_c_flag_bit = 8;
+constexpr size_t x64_v_flag_bit = 0;
+
+/// This is a constant used to create the x64 flags format from the ARM format.
+/// NZCV * multiplier: NZCV0NZCV000NZCV
+/// x64_flags format:  NZ-----C-------V
+constexpr u32 to_x64_multiplier = 0x1081;
+
+/// This is a constant used to create the ARM format from the x64 flags format.
+constexpr u32 from_x64_multiplier = 0x1021'0000;
+
+inline u32 ToX64(u32 nzcv) {
+    /* Naive implementation:
+    u32 x64_flags = 0;
+    x64_flags |= Common::Bit<31>(cpsr) ? 1 << 15 : 0;
+    x64_flags |= Common::Bit<30>(cpsr) ? 1 << 14 : 0;
+    x64_flags |= Common::Bit<29>(cpsr) ? 1 << 8 : 0;
+    x64_flags |= Common::Bit<28>(cpsr) ? 1 : 0;
+    return x64_flags;
+    */
+    return ((nzcv >> 28) * to_x64_multiplier) & x64_mask;
+}
+
+inline u32 FromX64(u32 x64_flags) {
+    /* Naive implementation:
+    u32 nzcv = 0;
+    nzcv |= Common::Bit<15>(x64_flags) ? 1 << 31 : 0;
+    nzcv |= Common::Bit<14>(x64_flags) ? 1 << 30 : 0;
+    nzcv |= Common::Bit<8>(x64_flags) ? 1 << 29 : 0;
+    nzcv |= Common::Bit<0>(x64_flags) ? 1 << 28 : 0;
+    return nzcv;
+    */
+    return ((x64_flags & x64_mask) * from_x64_multiplier) & arm_mask;
+}
+
+} // namespace Dynarmic::Backend::X64::NZCV
--- a/externals/dynarmic/src/backend/x64/oparg.h
+++ b/externals/dynarmic/src/backend/x64/oparg.h
@@ -0,0 +1,77 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <xbyak.h>
+
+#include "common/assert.h"
+
+namespace Dynarmic::Backend::X64 {
+
+struct OpArg {
+    OpArg() : type(Type::Operand), inner_operand() {}
+    /* implicit */ OpArg(const Xbyak::Address& address) : type(Type::Address), inner_address(address) {}
+    /* implicit */ OpArg(const Xbyak::Reg& reg) : type(Type::Reg), inner_reg(reg) {}
+
+    Xbyak::Operand& operator*() {
+        switch (type) {
+        case Type::Address:
+            return inner_address;
+        case Type::Operand:
+            return inner_operand;
+        case Type::Reg:
+            return inner_reg;
+        }
+        UNREACHABLE();
+    }
+
+    void setBit(int bits) {
+        switch (type) {
+        case Type::Address:
+            inner_address.setBit(bits);
+            return;
+        case Type::Operand:
+            inner_operand.setBit(bits);
+            return;
+        case Type::Reg:
+            switch (bits) {
+            case 8:
+                inner_reg = inner_reg.cvt8();
+                return;
+            case 16:
+                inner_reg = inner_reg.cvt16();
+                return;
+            case 32:
+                inner_reg = inner_reg.cvt32();
+                return;
+            case 64:
+                inner_reg = inner_reg.cvt64();
+                return;
+            default:
+                ASSERT_MSG(false, "Invalid bits");
+                return;
+            }
+        }
+        UNREACHABLE();
+    }
+
+private:
+    enum class Type {
+        Operand,
+        Address,
+        Reg,
+    };
+
+    Type type;
+
+    union {
+        Xbyak::Operand inner_operand;
+        Xbyak::Address inner_address;
+        Xbyak::Reg inner_reg;
+    };
+};
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/perf_map.cpp
+++ b/externals/dynarmic/src/backend/x64/perf_map.cpp
@@ -0,0 +1,90 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <cstddef>
+#include <string>
+
+#include "backend/x64/perf_map.h"
+
+#ifdef __linux__
+
+#include <cstdio>
+#include <cstdlib>
+#include <mutex>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+
+namespace Dynarmic::Backend::X64 {
+
+namespace {
+std::mutex mutex;
+std::FILE* file = nullptr;
+
+void OpenFile() {
+    const char* perf_dir = std::getenv("PERF_BUILDID_DIR");
+    if (!perf_dir) {
+        file = nullptr;
+        return;
+    }
+
+    const pid_t pid = getpid();
+    const std::string filename = fmt::format("{:s}/perf-{:d}.map", perf_dir, pid);
+
+    file = std::fopen(filename.c_str(), "w");
+    if (!file) {
+        return;
+    }
+
+    std::setvbuf(file, nullptr, _IONBF, 0);
+}
+} // anonymous namespace
+
+namespace detail {
+void PerfMapRegister(const void* start, const void* end, std::string_view friendly_name) {
+    std::lock_guard guard{mutex};
+
+    if (!file) {
+        OpenFile();
+        if (!file) {
+            return;
+        }
+    }
+
+    const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name);
+    std::fwrite(line.data(), sizeof *line.data(), line.size(), file);
+}
+} // namespace detail
+
+void PerfMapClear() {
+    std::lock_guard guard{mutex};
+
+    if (!file) {
+        return;
+    }
+
+    std::fclose(file);
+    file = nullptr;
+    OpenFile();
+}
+
+} // namespace Dynarmic::Backend::X64
+
+#else
+
+namespace Dynarmic::Backend::X64 {
+
+namespace detail {
+void PerfMapRegister(const void*, const void*, std::string_view) {}
+} // namespace detail
+
+void PerfMapClear() {}
+
+} // namespace Dynarmic::Backend::X64
+
+#endif
--- a/externals/dynarmic/src/backend/x64/perf_map.h
+++ b/externals/dynarmic/src/backend/x64/perf_map.h
@@ -0,0 +1,25 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <string_view>
+
+#include "common/cast_util.h"
+
+namespace Dynarmic::Backend::X64 {
+
+namespace detail {
+void PerfMapRegister(const void* start, const void* end, std::string_view friendly_name);
+} // namespace detail
+
+template<typename T>
+void PerfMapRegister(T start, const void* end, std::string_view friendly_name) {
+    detail::PerfMapRegister(Common::BitCast<const void*>(start), end, friendly_name);
+}
+
+void PerfMapClear();
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/reg_alloc.cpp
+++ b/externals/dynarmic/src/backend/x64/reg_alloc.cpp
@@ -0,0 +1,696 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+
+#include <fmt/ostream.h>
+#include <xbyak.h>
+
+#include "backend/x64/abi.h"
+#include "backend/x64/reg_alloc.h"
+#include "common/assert.h"
+
+namespace Dynarmic::Backend::X64 {
+
+#define MAYBE_AVX(OPCODE, ...)                                          \
+    [&] {                                                               \
+        if (code.HasAVX()) {                                            \
+            code.v##OPCODE(__VA_ARGS__);                                \
+        } else {                                                        \
+            code.OPCODE(__VA_ARGS__);                                   \
+        }                                                               \
+    }()
+
+static bool CanExchange(HostLoc a, HostLoc b) {
+    return HostLocIsGPR(a) && HostLocIsGPR(b);
+}
+
+// Minimum number of bits required to represent a type
+static size_t GetBitWidth(IR::Type type) {
+    switch (type) {
+    case IR::Type::A32Reg:
+    case IR::Type::A32ExtReg:
+    case IR::Type::A64Reg:
+    case IR::Type::A64Vec:
+    case IR::Type::CoprocInfo:
+    case IR::Type::Cond:
+    case IR::Type::Void:
+    case IR::Type::Table:
+        ASSERT_FALSE("Type {} cannot be represented at runtime", type);
+    case IR::Type::Opaque:
+        ASSERT_FALSE("Not a concrete type");
+    case IR::Type::U1:
+        return 8;
+    case IR::Type::U8:
+        return 8;
+    case IR::Type::U16:
+        return 16;
+    case IR::Type::U32:
+        return 32;
+    case IR::Type::U64:
+        return 64;
+    case IR::Type::U128:
+        return 128;
+    case IR::Type::NZCVFlags:
+        return 32; // TODO: Update to 16 when flags optimization is done
+    }
+    UNREACHABLE();
+}
+
+static bool IsValuelessType(IR::Type type) {
+    switch (type) {
+    case IR::Type::Table:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool HostLocInfo::IsLocked() const {
+    return is_being_used_count > 0;
+}
+
+bool HostLocInfo::IsEmpty() const {
+    return is_being_used_count == 0 && values.empty();
+}
+
+bool HostLocInfo::IsLastUse() const {
+    return is_being_used_count == 0 && current_references == 1 && accumulated_uses + 1 == total_uses;
+}
+
+void HostLocInfo::ReadLock() {
+    ASSERT(!is_scratch);
+    is_being_used_count++;
+}
+
+void HostLocInfo::WriteLock() {
+    ASSERT(is_being_used_count == 0);
+    is_being_used_count++;
+    is_scratch = true;
+}
+
+void HostLocInfo::AddArgReference() {
+    current_references++;
+    ASSERT(accumulated_uses + current_references <= total_uses);
+}
+
+void HostLocInfo::ReleaseOne() {
+    is_being_used_count--;
+    is_scratch = false;
+
+    if (current_references == 0)
+        return;
+
+    accumulated_uses++;
+    current_references--;
+
+    if (current_references == 0)
+        ReleaseAll();
+}
+
+void HostLocInfo::ReleaseAll() {
+    accumulated_uses += current_references;
+    current_references = 0;
+
+    ASSERT(total_uses == std::accumulate(values.begin(), values.end(), size_t(0), [](size_t sum, IR::Inst* inst) { return sum + inst->UseCount(); }));
+
+    if (total_uses == accumulated_uses) {
+        values.clear();
+        accumulated_uses = 0;
+        total_uses = 0;
+        max_bit_width = 0;
+    }
+
+    is_being_used_count = 0;
+    is_scratch = false;
+}
+
+bool HostLocInfo::ContainsValue(const IR::Inst* inst) const {
+    return std::find(values.begin(), values.end(), inst) != values.end();
+}
+
+size_t HostLocInfo::GetMaxBitWidth() const {
+    return max_bit_width;
+}
+
+void HostLocInfo::AddValue(IR::Inst* inst) {
+    values.push_back(inst);
+    total_uses += inst->UseCount();
+    max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType()));
+}
+
+IR::Type Argument::GetType() const {
+    return value.GetType();
+}
+
+bool Argument::IsImmediate() const {
+    return value.IsImmediate();
+}
+
+bool Argument::IsVoid() const {
+    return GetType() == IR::Type::Void;
+}
+
+bool Argument::FitsInImmediateU32() const {
+    if (!IsImmediate())
+        return false;
+    const u64 imm = value.GetImmediateAsU64();
+    return imm < 0x100000000;
+}
+
+bool Argument::FitsInImmediateS32() const {
+    if (!IsImmediate())
+        return false;
+    const s64 imm = static_cast<s64>(value.GetImmediateAsU64());
+    return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF);
+}
+
+bool Argument::GetImmediateU1() const {
+    return value.GetU1();
+}
+
+u8 Argument::GetImmediateU8() const {
+    const u64 imm = value.GetImmediateAsU64();
+    ASSERT(imm < 0x100);
+    return u8(imm);
+}
+
+u16 Argument::GetImmediateU16() const {
+    const u64 imm = value.GetImmediateAsU64();
+    ASSERT(imm < 0x10000);
+    return u16(imm);
+}
+
+u32 Argument::GetImmediateU32() const {
+    const u64 imm = value.GetImmediateAsU64();
+    ASSERT(imm < 0x100000000);
+    return u32(imm);
+}
+
+u64 Argument::GetImmediateS32() const {
+    ASSERT(FitsInImmediateS32());
+    return value.GetImmediateAsU64();
+}
+
+u64 Argument::GetImmediateU64() const {
+    return value.GetImmediateAsU64();
+}
+
+IR::Cond Argument::GetImmediateCond() const {
+    ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
+    return value.GetCond();
+}
+
+bool Argument::IsInGpr() const {
+    if (IsImmediate())
+        return false;
+    return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst()));
+}
+
+bool Argument::IsInXmm() const {
+    if (IsImmediate())
+        return false;
+    return HostLocIsXMM(*reg_alloc.ValueLocation(value.GetInst()));
+}
+
+bool Argument::IsInMemory() const {
+    if (IsImmediate())
+        return false;
+    return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
+}
+
+RegAlloc::RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr, std::vector<HostLoc> gpr_order, std::vector<HostLoc> xmm_order)
+    : gpr_order(gpr_order)
+    , xmm_order(xmm_order)
+    , hostloc_info(NonSpillHostLocCount + num_spills)
+    , code(code)
+    , spill_to_addr(std::move(spill_to_addr))
+{}
+
+RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
+    ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
+    for (size_t i = 0; i < inst->NumArgs(); i++) {
+        const IR::Value arg = inst->GetArg(i);
+        ret[i].value = arg;
+        if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
+            ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
+            LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
+        }
+    }
+    return ret;
+}
+
+Xbyak::Reg64 RegAlloc::UseGpr(Argument& arg) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    return HostLocToReg64(UseImpl(arg.value, gpr_order));
+}
+
+Xbyak::Xmm RegAlloc::UseXmm(Argument& arg) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    return HostLocToXmm(UseImpl(arg.value, xmm_order));
+}
+
+OpArg RegAlloc::UseOpArg(Argument& arg) {
+    return UseGpr(arg);
+}
+
+void RegAlloc::Use(Argument& arg, HostLoc host_loc) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    UseImpl(arg.value, {host_loc});
+}
+
+Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    return HostLocToReg64(UseScratchImpl(arg.value, gpr_order));
+}
+
+Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    return HostLocToXmm(UseScratchImpl(arg.value, xmm_order));
+}
+
+void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    UseScratchImpl(arg.value, {host_loc});
+}
+
+void RegAlloc::DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) {
+    ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG);
+    const auto hostloc = static_cast<HostLoc>(reg.getIdx() + static_cast<size_t>(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX));
+    DefineValueImpl(inst, hostloc);
+}
+
+void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) {
+    ASSERT(!arg.allocated);
+    arg.allocated = true;
+    DefineValueImpl(inst, arg.value);
+}
+
+void RegAlloc::Release(const Xbyak::Reg& reg) {
+    ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG);
+    const auto hostloc = static_cast<HostLoc>(reg.getIdx() + static_cast<size_t>(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX));
+    LocInfo(hostloc).ReleaseOne();
+}
+
+Xbyak::Reg64 RegAlloc::ScratchGpr() {
+    return HostLocToReg64(ScratchImpl(gpr_order));
+}
+
+Xbyak::Reg64 RegAlloc::ScratchGpr(HostLoc desired_location) {
+    return HostLocToReg64(ScratchImpl({desired_location}));
+}
+
+Xbyak::Xmm RegAlloc::ScratchXmm() {
+    return HostLocToXmm(ScratchImpl(xmm_order));
+}
+
+Xbyak::Xmm RegAlloc::ScratchXmm(HostLoc desired_location) {
+    return HostLocToXmm(ScratchImpl({desired_location}));
+}
+
+HostLoc RegAlloc::UseImpl(IR::Value use_value, const std::vector<HostLoc>& desired_locations) {
+    if (use_value.IsImmediate()) {
+        return LoadImmediate(use_value, ScratchImpl(desired_locations));
+    }
+
+    const IR::Inst* use_inst = use_value.GetInst();
+    const HostLoc current_location = *ValueLocation(use_inst);
+    const size_t max_bit_width = LocInfo(current_location).GetMaxBitWidth();
+
+    const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
+    if (can_use_current_location) {
+        LocInfo(current_location).ReadLock();
+        return current_location;
+    }
+
+    if (LocInfo(current_location).IsLocked()) {
+        return UseScratchImpl(use_value, desired_locations);
+    }
+
+    const HostLoc destination_location = SelectARegister(desired_locations);
+    if (max_bit_width > HostLocBitWidth(destination_location)) {
+        return UseScratchImpl(use_value, desired_locations);
+    } else if (CanExchange(destination_location, current_location)) {
+        Exchange(destination_location, current_location);
+    } else {
+        MoveOutOfTheWay(destination_location);
+        Move(destination_location, current_location);
+    }
+    LocInfo(destination_location).ReadLock();
+    return destination_location;
+}
+
+HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const std::vector<HostLoc>& desired_locations) {
+    if (use_value.IsImmediate()) {
+        return LoadImmediate(use_value, ScratchImpl(desired_locations));
+    }
+
+    const IR::Inst* use_inst = use_value.GetInst();
+    const HostLoc current_location = *ValueLocation(use_inst);
+    const size_t bit_width = GetBitWidth(use_inst->GetType());
+
+    const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
+    if (can_use_current_location && !LocInfo(current_location).IsLocked()) {
+        if (!LocInfo(current_location).IsLastUse()) {
+            MoveOutOfTheWay(current_location);
+        }
+        LocInfo(current_location).WriteLock();
+        return current_location;
+    }
+
+    const HostLoc destination_location = SelectARegister(desired_locations);
+    MoveOutOfTheWay(destination_location);
+    CopyToScratch(bit_width, destination_location, current_location);
+    LocInfo(destination_location).WriteLock();
+    return destination_location;
+}
+
+HostLoc RegAlloc::ScratchImpl(const std::vector<HostLoc>& desired_locations) {
+    const HostLoc location = SelectARegister(desired_locations);
+    MoveOutOfTheWay(location);
+    LocInfo(location).WriteLock();
+    return location;
+}
+
+void RegAlloc::HostCall(IR::Inst* result_def, std::optional<Argument::copyable_reference> arg0,
+                        std::optional<Argument::copyable_reference> arg1,
+                        std::optional<Argument::copyable_reference> arg2,
+                        std::optional<Argument::copyable_reference> arg3) {
+    constexpr size_t args_count = 4;
+    constexpr std::array<HostLoc, args_count> args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4 };
+    const std::array<std::optional<Argument::copyable_reference>, args_count> args = { arg0, arg1, arg2, arg3 };
+
+    static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
+        std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
+
+        ret.erase(std::find(ret.begin(), ret.end(), ABI_RETURN));
+        for (auto hostloc : args_hostloc) {
+            ret.erase(std::find(ret.begin(), ret.end(), hostloc));
+        }
+
+        return ret;
+    }();
+
+    ScratchGpr(ABI_RETURN);
+    if (result_def) {
+        DefineValueImpl(result_def, ABI_RETURN);
+    }
+
+    for (size_t i = 0; i < args_count; i++) {
+        if (args[i]) {
+            UseScratch(*args[i], args_hostloc[i]);
+#if defined(__llvm__) && !defined(_WIN32)
+            // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
+            const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
+            switch (args[i]->get().GetType()) {
+            case IR::Type::U8:
+                code.movzx(reg.cvt32(), reg.cvt8());
+                break;
+            case IR::Type::U16:
+                code.movzx(reg.cvt32(), reg.cvt16());
+                break;
+            default:
+                break; // Nothing needs to be done
+            }
+#endif
+        }
+    }
+
+    for (size_t i = 0; i < args_count; i++) {
+        if (!args[i]) {
+            // TODO: Force spill
+            ScratchGpr(args_hostloc[i]);
+        }
+    }
+
+    for (HostLoc caller_saved : other_caller_save) {
+        ScratchImpl({caller_saved});
+    }
+}
+
+void RegAlloc::EndOfAllocScope() {
+    for (auto& iter : hostloc_info) {
+        iter.ReleaseAll();
+    }
+}
+
+void RegAlloc::AssertNoMoreUses() {
+    ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) { return i.IsEmpty(); }));
+}
+
+HostLoc RegAlloc::SelectARegister(const std::vector<HostLoc>& desired_locations) const {
+    std::vector<HostLoc> candidates = desired_locations;
+
+    // Find all locations that have not been allocated..
+    const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc) {
+        return !this->LocInfo(loc).IsLocked();
+    });
+    candidates.erase(allocated_locs, candidates.end());
+    ASSERT_MSG(!candidates.empty(), "All candidate registers have already been allocated");
+
+    // Selects the best location out of the available locations.
+    // TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
+
+    std::partition(candidates.begin(), candidates.end(), [this](auto loc) {
+        return this->LocInfo(loc).IsEmpty();
+    });
+
+    return candidates.front();
+}
+
+std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
+    for (size_t i = 0; i < hostloc_info.size(); i++) {
+        if (hostloc_info[i].ContainsValue(value)) {
+            return static_cast<HostLoc>(i);
+        }
+    }
+
+    return std::nullopt;
+}
+
+void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) {
+    ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
+    LocInfo(host_loc).AddValue(def_inst);
+}
+
+void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) {
+    ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
+
+    if (use_inst.IsImmediate()) {
+        const HostLoc location = ScratchImpl(gpr_order);
+        DefineValueImpl(def_inst, location);
+        LoadImmediate(use_inst, location);
+        return;
+    }
+
+    ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined");
+    const HostLoc location = *ValueLocation(use_inst.GetInst());
+    DefineValueImpl(def_inst, location);
+}
+
+HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
+    ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
+
+    if (HostLocIsGPR(host_loc)) {
+        const Xbyak::Reg64 reg = HostLocToReg64(host_loc);
+        const u64 imm_value = imm.GetImmediateAsU64();
+        if (imm_value == 0) {
+            code.xor_(reg.cvt32(), reg.cvt32());
+        } else {
+            code.mov(reg, imm_value);
+        }
+        return host_loc;
+    }
+
+    if (HostLocIsXMM(host_loc)) {
+        const Xbyak::Xmm reg = HostLocToXmm(host_loc);
+        const u64 imm_value = imm.GetImmediateAsU64();
+        if (imm_value == 0) {
+            MAYBE_AVX(xorps, reg, reg);
+        } else {
+            MAYBE_AVX(movaps, reg, code.MConst(code.xword, imm_value));
+        }
+        return host_loc;
+    }
+
+    UNREACHABLE();
+}
+
+void RegAlloc::Move(HostLoc to, HostLoc from) {
+    const size_t bit_width = LocInfo(from).GetMaxBitWidth();
+
+    ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
+    ASSERT(bit_width <= HostLocBitWidth(to));
+
+    if (LocInfo(from).IsEmpty()) {
+        return;
+    }
+
+    EmitMove(bit_width, to, from);
+
+    LocInfo(to) = std::exchange(LocInfo(from), {});
+}
+
+void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) {
+    ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty());
+
+    EmitMove(bit_width, to, from);
+}
+
+void RegAlloc::Exchange(HostLoc a, HostLoc b) {
+    ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
+    ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
+    ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
+
+    if (LocInfo(a).IsEmpty()) {
+        Move(a, b);
+        return;
+    }
+
+    if (LocInfo(b).IsEmpty()) {
+        Move(b, a);
+        return;
+    }
+
+    EmitExchange(a, b);
+
+    std::swap(LocInfo(a), LocInfo(b));
+}
+
+void RegAlloc::MoveOutOfTheWay(HostLoc reg) {
+    ASSERT(!LocInfo(reg).IsLocked());
+    if (!LocInfo(reg).IsEmpty()) {
+        SpillRegister(reg);
+    }
+}
+
+void RegAlloc::SpillRegister(HostLoc loc) {
+    ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
+    ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
+    ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
+
+    const HostLoc new_loc = FindFreeSpill();
+    Move(new_loc, loc);
+}
+
+HostLoc RegAlloc::FindFreeSpill() const {
+    for (size_t i = static_cast<size_t>(HostLoc::FirstSpill); i < hostloc_info.size(); i++) {
+        const auto loc = static_cast<HostLoc>(i);
+        if (LocInfo(loc).IsEmpty()) {
+            return loc;
+        }
+    }
+
+    ASSERT_FALSE("All spill locations are full");
+}
+
+HostLocInfo& RegAlloc::LocInfo(HostLoc loc) {
+    ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
+    return hostloc_info[static_cast<size_t>(loc)];
+}
+
+const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const {
+    ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
+    return hostloc_info[static_cast<size_t>(loc)];
+}
+
+void RegAlloc::EmitMove(size_t bit_width, HostLoc to, HostLoc from) {
+    if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
+        MAYBE_AVX(movaps, HostLocToXmm(to), HostLocToXmm(from));
+    } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
+        ASSERT(bit_width != 128);
+        if (bit_width == 64) {
+            code.mov(HostLocToReg64(to), HostLocToReg64(from));
+        } else {
+            code.mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
+        }
+    } else if (HostLocIsXMM(to) && HostLocIsGPR(from)) {
+        ASSERT(bit_width != 128);
+        if (bit_width == 64) {
+            MAYBE_AVX(movq, HostLocToXmm(to), HostLocToReg64(from));
+        } else {
+            MAYBE_AVX(movd, HostLocToXmm(to), HostLocToReg64(from).cvt32());
+        }
+    } else if (HostLocIsGPR(to) && HostLocIsXMM(from)) {
+        ASSERT(bit_width != 128);
+        if (bit_width == 64) {
+            MAYBE_AVX(movq, HostLocToReg64(to), HostLocToXmm(from));
+        } else {
+            MAYBE_AVX(movd, HostLocToReg64(to).cvt32(), HostLocToXmm(from));
+        }
+    } else if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
+        const Xbyak::Address spill_addr = spill_to_addr(from);
+        ASSERT(spill_addr.getBit() >= bit_width);
+        switch (bit_width) {
+        case 128:
+            MAYBE_AVX(movaps, HostLocToXmm(to), spill_addr);
+            break;
+        case 64:
+            MAYBE_AVX(movsd, HostLocToXmm(to), spill_addr);
+            break;
+        case 32:
+        case 16:
+        case 8:
+            MAYBE_AVX(movss, HostLocToXmm(to), spill_addr);
+            break;
+        default:
+            UNREACHABLE();
+        }
+    } else if (HostLocIsSpill(to) && HostLocIsXMM(from)) {
+        const Xbyak::Address spill_addr = spill_to_addr(to);
+        ASSERT(spill_addr.getBit() >= bit_width);
+        switch (bit_width) {
+        case 128:
+            MAYBE_AVX(movaps, spill_addr, HostLocToXmm(from));
+            break;
+        case 64:
+            MAYBE_AVX(movsd, spill_addr, HostLocToXmm(from));
+            break;
+        case 32:
+        case 16:
+        case 8:
+            MAYBE_AVX(movss, spill_addr, HostLocToXmm(from));
+            break;
+        default:
+            UNREACHABLE();
+        }
+    } else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
+        ASSERT(bit_width != 128);
+        if (bit_width == 64) {
+            code.mov(HostLocToReg64(to), spill_to_addr(from));
+        } else {
+            code.mov(HostLocToReg64(to).cvt32(), spill_to_addr(from));
+        }
+    } else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
+        ASSERT(bit_width != 128);
+        if (bit_width == 64) {
+            code.mov(spill_to_addr(to), HostLocToReg64(from));
+        } else {
+            code.mov(spill_to_addr(to), HostLocToReg64(from).cvt32());
+        }
+    } else {
+        ASSERT_FALSE("Invalid RegAlloc::EmitMove");
+    }
+}
+
+void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
+    if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
+        code.xchg(HostLocToReg64(a), HostLocToReg64(b));
+    } else if (HostLocIsXMM(a) && HostLocIsXMM(b)) {
+        ASSERT_FALSE("Check your code: Exchanging XMM registers is unnecessary");
+    } else {
+        ASSERT_FALSE("Invalid RegAlloc::EmitExchange");
+    }
+}
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/backend/x64/reg_alloc.h
+++ b/externals/dynarmic/src/backend/x64/reg_alloc.h
@@ -0,0 +1,168 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <array>
+#include <functional>
+#include <optional>
+#include <utility>
+#include <vector>
+
+#include <xbyak.h>
+
+#include "backend/x64/block_of_code.h"
+#include "backend/x64/hostloc.h"
+#include "backend/x64/oparg.h"
+#include "common/common_types.h"
+#include "frontend/ir/cond.h"
+#include "frontend/ir/microinstruction.h"
+#include "frontend/ir/value.h"
+
+namespace Dynarmic::Backend::X64 {
+
+class RegAlloc;
+
+struct HostLocInfo {
+public:
+    bool IsLocked() const;
+    bool IsEmpty() const;
+    bool IsLastUse() const;
+
+    void ReadLock();
+    void WriteLock();
+    void AddArgReference();
+    void ReleaseOne();
+    void ReleaseAll();
+
+    bool ContainsValue(const IR::Inst* inst) const;
+    size_t GetMaxBitWidth() const;
+
+    void AddValue(IR::Inst* inst);
+
+private:
+    // Current instruction state
+    size_t is_being_used_count = 0;
+    bool is_scratch = false;
+
+    // Block state
+    size_t current_references = 0;
+    size_t accumulated_uses = 0;
+    size_t total_uses = 0;
+
+    // Value state
+    std::vector<IR::Inst*> values;
+    size_t max_bit_width = 0;
+};
+
+struct Argument {
+public:
+    using copyable_reference = std::reference_wrapper<Argument>;
+
+    IR::Type GetType() const;
+    bool IsImmediate() const;
+    bool IsVoid() const;
+
+    bool FitsInImmediateU32() const;
+    bool FitsInImmediateS32() const;
+
+    bool GetImmediateU1() const;
+    u8 GetImmediateU8() const;
+    u16 GetImmediateU16() const;
+    u32 GetImmediateU32() const;
+    u64 GetImmediateS32() const;
+    u64 GetImmediateU64() const;
+    IR::Cond GetImmediateCond() const;
+
+    /// Is this value currently in a GPR?
+    bool IsInGpr() const;
+    /// Is this value currently in a XMM?
+    bool IsInXmm() const;
+    /// Is this value currently in memory?
+    bool IsInMemory() const;
+
+private:
+    friend class RegAlloc;
+    explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
+
+    bool allocated = false;
+    RegAlloc& reg_alloc;
+    IR::Value value;
+};
+
+class RegAlloc final {
+public:
+    using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
+
+    explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr, std::vector<HostLoc> gpr_order, std::vector<HostLoc> xmm_order);
+
+    ArgumentInfo GetArgumentInfo(IR::Inst* inst);
+
+    Xbyak::Reg64 UseGpr(Argument& arg);
+    Xbyak::Xmm UseXmm(Argument& arg);
+    OpArg UseOpArg(Argument& arg);
+    void Use(Argument& arg, HostLoc host_loc);
+
+    Xbyak::Reg64 UseScratchGpr(Argument& arg);
+    Xbyak::Xmm UseScratchXmm(Argument& arg);
+    void UseScratch(Argument& arg, HostLoc host_loc);
+
+    void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
+    void DefineValue(IR::Inst* inst, Argument& arg);
+
+    void Release(const Xbyak::Reg& reg);
+
+    Xbyak::Reg64 ScratchGpr();
+    Xbyak::Reg64 ScratchGpr(HostLoc desired_location);
+    Xbyak::Xmm ScratchXmm();
+    Xbyak::Xmm ScratchXmm(HostLoc desired_location);
+
+    void HostCall(IR::Inst* result_def = nullptr,
+                  std::optional<Argument::copyable_reference> arg0 = {},
+                  std::optional<Argument::copyable_reference> arg1 = {},
+                  std::optional<Argument::copyable_reference> arg2 = {},
+                  std::optional<Argument::copyable_reference> arg3 = {});
+
+    // TODO: Values in host flags
+
+    void EndOfAllocScope();
+
+    void AssertNoMoreUses();
+
+private:
+    friend struct Argument;
+
+    std::vector<HostLoc> gpr_order;
+    std::vector<HostLoc> xmm_order;
+
+    HostLoc SelectARegister(const std::vector<HostLoc>& desired_locations) const;
+    std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
+
+    HostLoc UseImpl(IR::Value use_value, const std::vector<HostLoc>& desired_locations);
+    HostLoc UseScratchImpl(IR::Value use_value, const std::vector<HostLoc>& desired_locations);
+    HostLoc ScratchImpl(const std::vector<HostLoc>& desired_locations);
+    void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc);
+    void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst);
+
+    HostLoc LoadImmediate(IR::Value imm, HostLoc host_loc);
+    void Move(HostLoc to, HostLoc from);
+    void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from);
+    void Exchange(HostLoc a, HostLoc b);
+    void MoveOutOfTheWay(HostLoc reg);
+
+    void SpillRegister(HostLoc loc);
+    HostLoc FindFreeSpill() const;
+
+    std::vector<HostLocInfo> hostloc_info;
+    HostLocInfo& LocInfo(HostLoc loc);
+    const HostLocInfo& LocInfo(HostLoc loc) const;
+
+    BlockOfCode& code;
+    std::function<Xbyak::Address(HostLoc)> spill_to_addr;
+    void EmitMove(size_t bit_width, HostLoc to, HostLoc from);
+    void EmitExchange(HostLoc a, HostLoc b);
+};
+
+} // namespace Dynarmic::Backend::X64
--- a/externals/dynarmic/src/common/assert.cpp
+++ b/externals/dynarmic/src/common/assert.cpp
@@ -0,0 +1,21 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2020 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <cstdio>
+#include <exception>
+
+#include <fmt/format.h>
+
+#include "common/assert.h"
+
+namespace Dynarmic::Common {
+
+[[noreturn]] void Terminate(fmt::string_view msg, fmt::format_args args) {
+    fmt::print(stderr, "dynarmic assertion failed: ");
+    fmt::vprint(stderr, msg, args);
+    std::terminate();
+}
+
+} // namespace Dynarmic::Common
--- a/externals/dynarmic/src/common/assert.h
+++ b/externals/dynarmic/src/common/assert.h
@@ -0,0 +1,69 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2020 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <fmt/format.h>
+
+#include "common/unlikely.h"
+
+namespace Dynarmic::Common {
+
+[[noreturn]] void Terminate(fmt::string_view msg, fmt::format_args args);
+
+namespace detail {
+
+template <typename... Ts>
+[[noreturn]] void TerminateHelper(fmt::string_view msg, Ts... args) {
+    Terminate(msg, fmt::make_format_args(args...));
+}
+
+} // namespace detail
+
+} // namespace Dynarmic::Common
+
+#if defined(NDEBUG)
+    #if defined(__clang) || defined(__GNUC__)
+        #define UNREACHABLE() __builtin_unreachable()
+        #define ASSUME(expr) [&]{ if (!(expr)) __builtin_unreachable(); }()
+    #elif defined(_MSC_VER)
+        #define UNREACHABLE() __assume(0)
+        #define ASSUME(expr) __assume(expr)
+    #else
+        #define UNREACHABLE() ASSERT_FALSE("Unreachable code!")
+        #define ASSUME(expr)
+    #endif
+#else
+    #define UNREACHABLE() ASSERT_FALSE("Unreachable code!")
+    #define ASSUME(expr)
+#endif
+
+#ifdef DYNARMIC_IGNORE_ASSERTS
+    #define ASSERT(expr) ASSUME(expr)
+    #define ASSERT_MSG(expr, ...) ASSUME(expr)
+    #define ASSERT_FALSE(...) UNREACHABLE()
+#else
+    #define ASSERT(expr)                                                                        \
+        [&]{                                                                                    \
+            if (UNLIKELY(!(expr))) {                                                            \
+                ::Dynarmic::Common::detail::TerminateHelper(#expr);                             \
+            }                                                                                   \
+        }()
+    #define ASSERT_MSG(expr, ...)                                                               \
+        [&]{                                                                                    \
+            if (UNLIKELY(!(expr))) {                                                            \
+                ::Dynarmic::Common::detail::TerminateHelper(#expr "\nMessage: " __VA_ARGS__);   \
+            }                                                                                   \
+        }()
+    #define ASSERT_FALSE(...) ::Dynarmic::Common::detail::TerminateHelper("false\nMessage: " __VA_ARGS__)
+#endif
+
+#if defined(NDEBUG) || defined(DYNARMIC_IGNORE_ASSERTS)
+    #define DEBUG_ASSERT(expr) ASSUME(expr)
+    #define DEBUG_ASSERT_MSG(expr, ...) ASSUME(expr)
+#else
+    #define DEBUG_ASSERT(expr) ASSERT(expr)
+    #define DEBUG_ASSERT_MSG(expr, ...) ASSERT_MSG(expr, __VA_ARGS__)
+#endif
--- a/externals/dynarmic/src/common/bit_util.h
+++ b/externals/dynarmic/src/common/bit_util.h
@@ -0,0 +1,243 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <bitset>
+#include <climits>
+#include <cstddef>
+#include <type_traits>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::Common {
+
+/// The size of a type in terms of bits
+template<typename T>
+constexpr size_t BitSize() {
+    return sizeof(T) * CHAR_BIT;
+}
+
+template <typename T>
+constexpr T Ones(size_t count) {
+    ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
+    if (count == BitSize<T>())
+        return static_cast<T>(~static_cast<T>(0));
+    return ~(static_cast<T>(~static_cast<T>(0)) << count);
+}
+
+/// Extract bits [begin_bit, end_bit] inclusive from value of type T.
+template<typename T>
+constexpr T Bits(const size_t begin_bit, const size_t end_bit, const T value) {
+    ASSERT_MSG(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
+    ASSERT_MSG(begin_bit < BitSize<T>(), "begin_bit must be smaller than size of T");
+    ASSERT_MSG(end_bit < BitSize<T>(), "end_bit must be smaller than size of T");
+
+    return (value >> begin_bit) & Ones<T>(end_bit - begin_bit + 1);
+}
+
+/// Extract bits [begin_bit, end_bit] inclusive from value of type T.
+template<size_t begin_bit, size_t end_bit, typename T>
+constexpr T Bits(const T value) {
+    static_assert(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
+    static_assert(begin_bit < BitSize<T>(), "begin_bit must be smaller than size of T");
+    static_assert(end_bit < BitSize<T>(), "end_bit must be smaller than size of T");
+
+    return (value >> begin_bit) & Ones<T>(end_bit - begin_bit + 1);
+}
+
+/// Create a mask of type T for bits [begin_bit, end_bit] inclusive.
+template<size_t begin_bit, size_t end_bit, typename T>
+constexpr T Mask() {
+    static_assert(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
+    static_assert(begin_bit < BitSize<T>(), "begin_bit must be smaller than size of T");
+    static_assert(end_bit < BitSize<T>(), "end_bit must be smaller than size of T");
+
+    return Ones<T>(end_bit - begin_bit + 1) << begin_bit;
+}
+
+/// Clears bits [begin_bit, end_bit] inclusive of value of type T.
+template<size_t begin_bit, size_t end_bit, typename T>
+constexpr T ClearBits(const T value) {
+    return value & ~Mask<begin_bit, end_bit, T>();
+}
+
+/// Modifies bits [begin_bit, end_bit] inclusive of value of type T.
+template<size_t begin_bit, size_t end_bit, typename T>
+constexpr T ModifyBits(const T value, const T new_bits) {
+    return ClearBits<begin_bit, end_bit, T>(value) | ((new_bits << begin_bit) & Mask<begin_bit, end_bit, T>());
+}
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4554)
+#endif
+/// Extracts a single bit at bit_position from value of type T.
+template<typename T>
+inline bool Bit(size_t bit_position, const T value) {
+    ASSERT_MSG(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return ((value >> bit_position) & 1) != 0;
+}
+
+/// Extracts a single bit at bit_position from value of type T.
+template<size_t bit_position, typename T>
+constexpr bool Bit(const T value) {
+    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return Bit<T>(bit_position, value);
+}
+
+/// Clears a single bit at bit_position from value of type T.
+template<typename T>
+inline T ClearBit(size_t bit_position, const T value) {
+    ASSERT_MSG(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return value & ~(static_cast<T>(1) << bit_position);
+}
+
+/// Clears a single bit at bit_position from value of type T.
+template<size_t bit_position, typename T>
+constexpr T ClearBit(const T value) {
+    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return ClearBit<T>(bit_position, value);
+}
+
+/// Modifies a single bit at bit_position from value of type T.
+template<typename T>
+inline T ModifyBit(size_t bit_position, const T value, bool new_bit) {
+    ASSERT_MSG(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return ClearBit<T>(bit_position, value) | (static_cast<T>(new_bit) << bit_position);
+}
+
+/// Modifies a single bit at bit_position from value of type T.
+template<size_t bit_position, typename T>
+constexpr T ModifyBit(const T value, bool new_bit) {
+    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
+
+    return ModifyBit<T>(bit_position, value, new_bit);
+}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+/// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
+template<size_t bit_count, typename T>
+constexpr T SignExtend(const T value) {
+    static_assert(bit_count <= BitSize<T>(), "bit_count larger than bitsize of T");
+
+    constexpr T mask = static_cast<T>(1ULL << bit_count) - 1;
+    const bool signbit = Bit<bit_count - 1, T>(value);
+    if (signbit) {
+        return value | ~mask;
+    }
+    return value;
+}
+
+/// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
+template<typename T>
+inline T SignExtend(const size_t bit_count, const T value) {
+    ASSERT_MSG(bit_count <= BitSize<T>(), "bit_count larger than bitsize of T");
+
+    const T mask = static_cast<T>(1ULL << bit_count) - 1;
+    const bool signbit = Bit<T>(bit_count - 1, value);
+    if (signbit) {
+        return value | ~mask;
+    }
+    return value;
+}
+
+template <typename Integral>
+inline size_t BitCount(Integral value) {
+    return std::bitset<BitSize<Integral>()>(value).count();
+}
+
+template <typename T>
+constexpr size_t CountLeadingZeros(T value) {
+    auto x = static_cast<std::make_unsigned_t<T>>(value);
+    size_t result = BitSize<T>();
+    while (x != 0) {
+        x >>= 1;
+        result--;
+    }
+    return result;
+}
+
+template <typename T>
+constexpr int HighestSetBit(T value) {
+    auto x = static_cast<std::make_unsigned_t<T>>(value);
+    int result = -1;
+    while (x != 0) {
+        x >>= 1;
+        result++;
+    }
+    return result;
+}
+
+template <typename T>
+constexpr size_t LowestSetBit(T value) {
+    auto x = static_cast<std::make_unsigned_t<T>>(value);
+    if (x == 0)
+        return BitSize<T>();
+
+    size_t result = 0;
+    while ((x & 1) == 0) {
+        x >>= 1;
+        result++;
+    }
+    return result;
+}
+
+template <typename T>
+constexpr bool MostSignificantBit(T value) {
+    return Bit<BitSize<T>() - 1, T>(value);
+}
+
+template <typename T>
+inline T Replicate(T value, size_t element_size) {
+    ASSERT_MSG(BitSize<T>() % element_size == 0, "bitsize of T not divisible by element_size");
+    if (element_size == BitSize<T>())
+        return value;
+    return Replicate(value | (value << element_size), element_size * 2);
+}
+
+template <typename T>
+constexpr T RotateRight(T value, size_t amount) {
+    amount %= BitSize<T>();
+
+    if (amount == 0) {
+        return value;
+    }
+
+    auto x = static_cast<std::make_unsigned_t<T>>(value);
+    return static_cast<T>((x >> amount) | (x << (BitSize<T>() - amount)));
+}
+
+constexpr u16 Swap16(u16 value) {
+    return static_cast<u16>(u32{value} >> 8 | u32{value} << 8);
+}
+
+constexpr u32 Swap32(u32 value) {
+    return ((value & 0xFF000000U) >> 24) |
+           ((value & 0x00FF0000U) >>  8) |
+           ((value & 0x0000FF00U) <<  8) |
+           ((value & 0x000000FFU) << 24);
+}
+
+constexpr u64 Swap64(u64 value) {
+    return  ((value & 0xFF00000000000000ULL) >> 56) |
+            ((value & 0x00FF000000000000ULL) >> 40) |
+            ((value & 0x0000FF0000000000ULL) >> 24) |
+            ((value & 0x000000FF00000000ULL) >>  8) |
+            ((value & 0x00000000FF000000ULL) <<  8) |
+            ((value & 0x0000000000FF0000ULL) << 24) |
+            ((value & 0x000000000000FF00ULL) << 40) |
+            ((value & 0x00000000000000FFULL) << 56);
+}
+
+} // namespace Dynarmic::Common
--- a/externals/dynarmic/src/common/cast_util.h
+++ b/externals/dynarmic/src/common/cast_util.h
@@ -0,0 +1,45 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <cstring>
+#include <type_traits>
+
+#include <mp/traits/function_info.h>
+
+namespace Dynarmic::Common {
+
+/// Reinterpret objects of one type as another by bit-casting between object representations.
+template <class Dest, class Source>
+inline Dest BitCast(const Source& source) noexcept {
+    static_assert(sizeof(Dest) == sizeof(Source), "size of destination and source objects must be equal");
+    static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
+    static_assert(std::is_trivially_copyable_v<Source>, "source type must be trivially copyable");
+
+    std::aligned_storage_t<sizeof(Dest), alignof(Dest)> dest;
+    std::memcpy(&dest, &source, sizeof(dest));
+    return reinterpret_cast<Dest&>(dest);
+}
+
+/// Reinterpret objects of any arbitrary type as another type by bit-casting between object representations.
+/// Note that here we do not verify if source has enough bytes to read from.
+template <class Dest, class SourcePtr>
+inline Dest BitCastPointee(const SourcePtr source) noexcept {
+    static_assert(sizeof(SourcePtr) == sizeof(void*), "source pointer must have size of a pointer");
+    static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
+
+    std::aligned_storage_t<sizeof(Dest), alignof(Dest)> dest;
+    std::memcpy(&dest, BitCast<void*>(source), sizeof(dest));
+    return reinterpret_cast<Dest&>(dest);
+}
+
+/// Cast a lambda into an equivalent function pointer.
+template <class Function>
+inline auto FptrCast(Function f) noexcept {
+    return static_cast<mp::equivalent_function_type<Function>*>(f);
+}
+
+} // namespace Dynarmic::Common
--- a/externals/dynarmic/src/common/common_types.h
+++ b/externals/dynarmic/src/common/common_types.h
@@ -0,0 +1,28 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+using u8 = std::uint8_t;
+using u16 = std::uint16_t;
+using u32 = std::uint32_t;
+using u64 = std::uint64_t;
+using uptr = std::uintptr_t;
+
+using s8 = std::int8_t;
+using s16 = std::int16_t;
+using s32 = std::int32_t;
+using s64 = std::int64_t;
+using sptr = std::intptr_t;
+
+using size_t = std::size_t;
+
+using f32 = float;
+using f64 = double;
+static_assert(sizeof(f32) == sizeof(u32), "f32 must be 32 bits wide");
+static_assert(sizeof(f64) == sizeof(u64), "f64 must be 64 bits wide");
--- a/externals/dynarmic/src/common/crypto/aes.cpp
+++ b/externals/dynarmic/src/common/crypto/aes.cpp
@@ -0,0 +1,181 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <array>
+
+#include "common/common_types.h"
+#include "common/crypto/aes.h"
+
+namespace Dynarmic::Common::Crypto::AES {
+
+using SubstitutionTable = std::array<u8, 256>;
+
+// See section 5.1.1 Figure 7 in FIPS 197
+constexpr SubstitutionTable substitution_box{{
+    // 0   1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
+    0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+    0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+    0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+    0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+    0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+    0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+    0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+    0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+    0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+    0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+    0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+    0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+    0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+    0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+    0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+    0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
+}};
+
+// See section 5.3.2 Figure 14 in FIPS 197
+constexpr SubstitutionTable inverse_substitution_box{{
+    // 0   1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
+    0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
+    0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
+    0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+    0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
+    0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
+    0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+    0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
+    0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
+    0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+    0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
+    0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
+    0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+    0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
+    0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
+    0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+    0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D
+}};
+
+// See section 4.2.1 in FIPS 197.
+static constexpr u8 xtime(u8 x) {
+    return static_cast<u8>((x << 1) ^ (((x >> 7) & 1) * 0x1B));
+}
+
+// Galois Field multiplication.
+static constexpr u8 Multiply(u8 x, u8 y) {
+    return static_cast<u8>(((y & 1) * x) ^
+            ((y >> 1 & 1) * xtime(x)) ^
+            ((y >> 2 & 1) * xtime(xtime(x))) ^
+            ((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^
+            ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x))))));
+}
+
+static void ShiftRows(State& out_state, const State& state) {
+    // Move zeroth row over
+    out_state[0] = state[0];
+    out_state[4] = state[4];
+    out_state[8] = state[8];
+    out_state[12] = state[12];
+
+    // Rotate first row 1 column left.
+    u8 temp = state[1];
+    out_state[1] = state[5];
+    out_state[5] = state[9];
+    out_state[9] = state[13];
+    out_state[13] = temp;
+
+    // Rotate second row 2 columns left
+    temp = state[2];
+    out_state[2] = state[10];
+    out_state[10] = temp;
+
+    temp = state[6];
+    out_state[6] = state[14];
+    out_state[14] = temp;
+
+    // Rotate third row 3 columns left
+    temp = state[3];
+    out_state[3] = state[15];
+    out_state[15] = state[11];
+    out_state[11] = state[7];
+    out_state[7] = temp;
+}
+
+static void InverseShiftRows(State& out_state, const State& state) {
+    // Move zeroth row over
+    out_state[0] = state[0];
+    out_state[4] = state[4];
+    out_state[8] = state[8];
+    out_state[12] = state[12];
+
+    // Rotate first row 1 column right.
+    u8 temp = state[13];
+    out_state[13] = state[9];
+    out_state[9] = state[5];
+    out_state[5] = state[1];
+    out_state[1] = temp;
+
+    // Rotate second row 2 columns right
+    temp = state[2];
+    out_state[2] = state[10];
+    out_state[10] = temp;
+
+    temp = state[6];
+    out_state[6] = state[14];
+    out_state[14] = temp;
+
+    // Rotate third row 3 columns right
+    temp = state[3];
+    out_state[3] = state[7];
+    out_state[7] = state[11];
+    out_state[11] = state[15];
+    out_state[15] = temp;
+}
+
+static void SubBytes(State& state, const SubstitutionTable& table) {
+    for (size_t i = 0; i < 4; i++) {
+        for (size_t j = 0; j < 4; j++) {
+            state[4 * i + j] = table[state[4 * i + j]];
+        }
+    }
+}
+
+void DecryptSingleRound(State& out_state, const State& state) {
+    InverseShiftRows(out_state, state);
+    SubBytes(out_state, inverse_substitution_box);
+}
+
+void EncryptSingleRound(State& out_state, const State& state) {
+    ShiftRows(out_state, state);
+    SubBytes(out_state, substitution_box);
+}
+
+void MixColumns(State& out_state, const State& state) {
+    for (size_t i = 0; i < out_state.size(); i += 4) {
+        const u8 a = state[i];
+        const u8 b = state[i + 1];
+        const u8 c = state[i + 2];
+        const u8 d = state[i + 3];
+
+        const u8 tmp = a ^ b ^ c ^ d;
+
+        out_state[i + 0] = a ^ xtime(a ^ b) ^ tmp;
+        out_state[i + 1] = b ^ xtime(b ^ c) ^ tmp;
+        out_state[i + 2] = c ^ xtime(c ^ d) ^ tmp;
+        out_state[i + 3] = d ^ xtime(d ^ a) ^ tmp;
+    }
+}
+
+void InverseMixColumns(State& out_state, const State& state) {
+    for (size_t i = 0; i < out_state.size(); i += 4) {
+        const u8 a = state[i];
+        const u8 b = state[i + 1];
+        const u8 c = state[i + 2];
+        const u8 d = state[i + 3];
+
+        out_state[i + 0] = Multiply(a, 0x0E) ^ Multiply(b, 0x0B) ^ Multiply(c, 0x0D) ^ Multiply(d, 0x09);
+        out_state[i + 1] = Multiply(a, 0x09) ^ Multiply(b, 0x0E) ^ Multiply(c, 0x0B) ^ Multiply(d, 0x0D);
+        out_state[i + 2] = Multiply(a, 0x0D) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0E) ^ Multiply(d, 0x0B);
+        out_state[i + 3] = Multiply(a, 0x0B) ^ Multiply(b, 0x0D) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0E);
+    }
+}
+
+} // namespace Dynarmic::Common::Crypto::AES
--- a/externals/dynarmic/src/common/crypto/aes.h
+++ b/externals/dynarmic/src/common/crypto/aes.h
@@ -0,0 +1,22 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <array>
+#include "common/common_types.h"
+
+namespace Dynarmic::Common::Crypto::AES {
+
+using State = std::array<u8, 16>;
+
+// Assumes the state has already been XORed by the round key.
+void DecryptSingleRound(State& out_state, const State& state);
+void EncryptSingleRound(State& out_state, const State& state);
+
+void MixColumns(State& out_state, const State& state);
+void InverseMixColumns(State& out_state, const State& state);
+
+} // namespace Dynarmic::Common::Crypto::AES
--- a/externals/dynarmic/src/common/crypto/crc32.cpp
+++ b/externals/dynarmic/src/common/crypto/crc32.cpp
@@ -0,0 +1,169 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <array>
+
+#include "common/common_types.h"
+#include "common/crypto/crc32.h"
+
+namespace Dynarmic::Common::Crypto::CRC32 {
+
+using CRC32Table = std::array<u32, 256>;
+
+// CRC32 algorithm that uses polynomial 0x1EDC6F41
+constexpr CRC32Table castagnoli_table{{
+    0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
+    0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+    0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+    0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+    0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
+    0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+    0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
+    0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+    0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+    0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+    0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
+    0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+    0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
+    0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+    0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+    0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+    0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
+    0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+    0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
+    0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+    0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+    0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+    0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
+    0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+    0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
+    0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+    0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+    0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+    0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
+    0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+    0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
+    0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+    0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+    0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+    0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
+    0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+    0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
+    0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+    0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+    0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+    0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
+    0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+    0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
+    0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+    0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+    0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+    0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
+    0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+    0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
+    0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+    0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+    0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+    0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
+    0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+    0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
+    0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+    0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+    0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+    0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
+    0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+    0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
+    0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+    0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+    0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
+}};
+
+// CRC32 algorithm that uses polynomial 0x04C11DB7
+constexpr CRC32Table iso_table{{
+    0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
+    0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
+    0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
+    0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
+    0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
+    0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
+    0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
+    0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
+    0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
+    0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
+    0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940,
+    0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
+    0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116,
+    0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
+    0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
+    0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
+    0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A,
+    0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
+    0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818,
+    0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
+    0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
+    0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
+    0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C,
+    0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
+    0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
+    0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
+    0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
+    0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
+    0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086,
+    0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
+    0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4,
+    0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
+    0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
+    0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
+    0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
+    0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
+    0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE,
+    0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
+    0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
+    0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
+    0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252,
+    0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
+    0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60,
+    0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
+    0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
+    0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
+    0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04,
+    0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
+    0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
+    0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
+    0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
+    0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
+    0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E,
+    0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
+    0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
+    0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
+    0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
+    0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
+    0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0,
+    0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
+    0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6,
+    0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
+    0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
+    0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
+}};
+
+static u32 ComputeCRC32(const CRC32Table& table, u32 crc, const u64 value, int length) {
+    const auto* data = reinterpret_cast<const unsigned char*>(&value);
+
+    while (length-- > 0) {
+        crc = (crc >> 8) ^ table[(crc ^ (*data++)) & 0xFF];
+    }
+
+    return crc;
+}
+
+u32 ComputeCRC32Castagnoli(u32 crc, u64 value, int length) {
+    return ComputeCRC32(castagnoli_table, crc, value, length);
+}
+
+u32 ComputeCRC32ISO(u32 crc, u64 value, int length) {
+    return ComputeCRC32(iso_table, crc, value, length);
+}
+
+} // namespace Dynarmic::Common::Crypto::CRC32
--- a/externals/dynarmic/src/common/crypto/crc32.h
+++ b/externals/dynarmic/src/common/crypto/crc32.h
@@ -0,0 +1,40 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Dynarmic::Common::Crypto::CRC32 {
+
+/**
+ * Computes a CRC32 value using Castagnoli polynomial (0x1EDC6F41).
+ *
+ * @param crc The initial CRC value
+ * @param value The value to compute the CRC of.
+ * @param length The length of the data to compute.
+ *
+ * @remark @p value is interpreted internally as an array of
+ *         unsigned char with @p length data.
+ *
+ * @return The computed CRC32 value.
+ */
+u32 ComputeCRC32Castagnoli(u32 crc, u64 value, int length);
+
+/**
+ * Computes a CRC32 value using the ISO polynomial (0x04C11DB7).
+ *
+ * @param crc The initial CRC value
+ * @param value The value to compute the CRC of.
+ * @param length The length of the data to compute.
+ *
+ * @remark @p value is interpreted internally as an array of
+ *         unsigned char with @p length data.
+ *
+ * @return The computed CRC32 value.
+ */
+u32 ComputeCRC32ISO(u32 crc, u64 value, int length);
+
+} // namespace Dynarmic::Common::Crypto::CRC32
--- a/externals/dynarmic/src/common/crypto/sm4.cpp
+++ b/externals/dynarmic/src/common/crypto/sm4.cpp
@@ -0,0 +1,54 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <array>
+
+#include "common/common_types.h"
+#include "common/crypto/sm4.h"
+
+namespace Dynarmic::Common::Crypto::SM4 {
+
+using SubstitutionTable = std::array<u8, 256>;
+
+constexpr SubstitutionTable substitution_box{{
+    0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7,
+    0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05,
+    0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3,
+    0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+    0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A,
+    0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62,
+    0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95,
+    0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6,
+    0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA,
+    0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8,
+    0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B,
+    0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35,
+    0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2,
+    0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87,
+    0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52,
+    0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E,
+    0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5,
+    0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1,
+    0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55,
+    0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3,
+    0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60,
+    0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F,
+    0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F,
+    0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51,
+    0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F,
+    0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8,
+    0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD,
+    0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0,
+    0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E,
+    0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
+    0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20,
+    0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48
+}};
+
+u8 AccessSubstitutionBox(u8 index) {
+    return substitution_box[index];
+}
+
+} // namespace Dynarmic::Common::Crypto::SM4
--- a/externals/dynarmic/src/common/crypto/sm4.h
+++ b/externals/dynarmic/src/common/crypto/sm4.h
@@ -0,0 +1,14 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Dynarmic::Common::Crypto::SM4 {
+
+u8 AccessSubstitutionBox(u8 index);
+
+} // namespace Dynarmic::Common::Crypto::SM4
--- a/externals/dynarmic/src/common/fp/fpcr.h
+++ b/externals/dynarmic/src/common/fp/fpcr.h
@@ -0,0 +1,207 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <optional>
+
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/fp/rounding_mode.h"
+
+namespace Dynarmic::FP {
+
+/**
+ * Representation of the Floating-Point Control Register.
+ */
+class FPCR final {
+public:
+    FPCR() = default;
+    FPCR(const FPCR&) = default;
+    FPCR(FPCR&&) = default;
+    explicit FPCR(u32 data) : value{data & mask} {}
+
+    FPCR& operator=(const FPCR&) = default;
+    FPCR& operator=(FPCR&&) = default;
+    FPCR& operator=(u32 data) {
+        value = data & mask;
+        return *this;
+    }
+
+    /// Get alternate half-precision control flag.
+    bool AHP() const {
+        return Common::Bit<26>(value);
+    }
+
+    /// Set alternate half-precision control flag.
+    void AHP(bool ahp) {
+        value = Common::ModifyBit<26>(value, ahp);
+    }
+
+    /// Get default NaN mode control bit.
+    bool DN() const {
+        return Common::Bit<25>(value);
+    }
+
+    /// Set default NaN mode control bit.
+    void DN(bool dn) {
+        value = Common::ModifyBit<25>(value, dn);
+    }
+
+    /// Get flush-to-zero mode control bit.
+    bool FZ() const {
+        return Common::Bit<24>(value);
+    }
+
+    /// Set flush-to-zero mode control bit.
+    void FZ(bool fz) {
+        value = Common::ModifyBit<24>(value, fz);
+    }
+
+    /// Get rounding mode control field.
+    FP::RoundingMode RMode() const {
+        return static_cast<FP::RoundingMode>(Common::Bits<22, 23>(value));
+    }
+
+    /// Set rounding mode control field.
+    void RMode(FP::RoundingMode rounding_mode) {
+        ASSERT_MSG(static_cast<u32>(rounding_mode) <= 0b11, "FPCR: Invalid rounding mode");
+        value = Common::ModifyBits<22, 23>(value, static_cast<u32>(rounding_mode));
+    }
+
+    /// Get the stride of a vector when executing AArch32 VFP instructions.
+    /// This field has no function in AArch64 state.
+    std::optional<size_t> Stride() const {
+        switch (Common::Bits<20, 21>(value)) {
+        case 0b00:
+            return 1;
+        case 0b11:
+            return 2;
+        default:
+            return std::nullopt;
+        }
+    }
+
+    /// Set the stride of a vector when executing AArch32 VFP instructions.
+    /// This field has no function in AArch64 state.
+    void Stride(size_t stride) {
+        ASSERT_MSG(stride >= 1 && stride <= 2, "FPCR: Invalid stride");
+        value = Common::ModifyBits<20, 21>(value, stride == 1 ? 0b00u : 0b11u);
+    }
+
+    /// Get flush-to-zero (half-precision specific) mode control bit.
+    bool FZ16() const {
+        return Common::Bit<19>(value);
+    }
+
+    /// Set flush-to-zero (half-precision specific) mode control bit.
+    void FZ16(bool fz16) {
+        value = Common::ModifyBit<19>(value, fz16);
+    }
+
+    /// Gets the length of a vector when executing AArch32 VFP instructions.
+    /// This field has no function in AArch64 state.
+    size_t Len() const {
+        return Common::Bits<16, 18>(value) + 1;
+    }
+
+    /// Sets the length of a vector when executing AArch32 VFP instructions.
+    /// This field has no function in AArch64 state.
+    void Len(size_t len) {
+        ASSERT_MSG(len >= 1 && len <= 8, "FPCR: Invalid len");
+        value = Common::ModifyBits<16, 18>(value, static_cast<u32>(len - 1));
+    }
+
+    /// Get input denormal exception trap enable flag.
+    bool IDE() const {
+        return Common::Bit<15>(value);
+    }
+
+    /// Set input denormal exception trap enable flag.
+    void IDE(bool ide) {
+        value = Common::ModifyBit<15>(value, ide);
+    }
+
+    /// Get inexact exception trap enable flag.
+    bool IXE() const {
+        return Common::Bit<12>(value);
+    }
+
+    /// Set inexact exception trap enable flag.
+    void IXE(bool ixe) {
+        value = Common::ModifyBit<12>(value, ixe);
+    }
+
+    /// Get underflow exception trap enable flag.
+    bool UFE() const {
+        return Common::Bit<11>(value);
+    }
+
+    /// Set underflow exception trap enable flag.
+    void UFE(bool ufe) {
+        value = Common::ModifyBit<11>(value, ufe);
+    }
+
+    /// Get overflow exception trap enable flag.
+    bool OFE() const {
+        return Common::Bit<10>(value);
+    }
+
+    /// Set overflow exception trap enable flag.
+    void OFE(bool ofe) {
+        value = Common::ModifyBit<10>(value, ofe);
+    }
+
+    /// Get division by zero exception trap enable flag.
+    bool DZE() const {
+        return Common::Bit<9>(value);
+    }
+
+    /// Set division by zero exception trap enable flag.
+    void DZE(bool dze) {
+        value = Common::ModifyBit<9>(value, dze);
+    }
+
+    /// Get invalid operation exception trap enable flag.
+    bool IOE() const {
+        return Common::Bit<8>(value);
+    }
+
+    /// Set invalid operation exception trap enable flag.
+    void IOE(bool ioe) {
+        value = Common::ModifyBit<8>(value, ioe);
+    }
+
+    /// Gets the underlying raw value within the FPCR.
+    u32 Value() const {
+        return value;
+    }
+
+    /// Gets the StandardFPSCRValue (A32 ASIMD).
+    FPCR ASIMDStandardValue() const {
+        FPCR stdvalue;
+        stdvalue.AHP(AHP());
+        stdvalue.FZ16(FZ16());
+        stdvalue.FZ(true);
+        stdvalue.DN(true);
+        return stdvalue;
+    }
+
+private:
+    // Bits 0-7, 13-14, and 27-31 are reserved.
+    static constexpr u32 mask = 0x07FF9F00;
+    u32 value = 0;
+};
+
+inline bool operator==(FPCR lhs, FPCR rhs) {
+    return lhs.Value() == rhs.Value();
+}
+
+inline bool operator!=(FPCR lhs, FPCR rhs) {
+    return !operator==(lhs, rhs);
+}
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/fpsr.h
+++ b/externals/dynarmic/src/common/fp/fpsr.h
@@ -0,0 +1,159 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::FP {
+
+/**
+ * Representation of the Floating-Point Status Register.
+ */
+class FPSR final {
+public:
+    FPSR() = default;
+    FPSR(const FPSR&) = default;
+    FPSR(FPSR&&) = default;
+    explicit FPSR(u32 data) : value{data & mask} {}
+
+    FPSR& operator=(const FPSR&) = default;
+    FPSR& operator=(FPSR&&) = default;
+    FPSR& operator=(u32 data) {
+        value = data & mask;
+        return *this;
+    }
+
+    /// Get negative condition flag
+    bool N() const {
+        return Common::Bit<31>(value);
+    }
+
+    /// Set negative condition flag
+    void N(bool N_) {
+        value = Common::ModifyBit<31>(value, N_);
+    }
+
+    /// Get zero condition flag
+    bool Z() const {
+        return Common::Bit<30>(value);
+    }
+
+    /// Set zero condition flag
+    void Z(bool Z_) {
+        value = Common::ModifyBit<30>(value, Z_);
+    }
+
+    /// Get carry condition flag
+    bool C() const {
+        return Common::Bit<29>(value);
+    }
+
+    /// Set carry condition flag
+    void C(bool C_) {
+        value = Common::ModifyBit<29>(value, C_);
+    }
+
+    /// Get overflow condition flag
+    bool V() const {
+        return Common::Bit<28>(value);
+    }
+
+    /// Set overflow condition flag
+    void V(bool V_) {
+        value = Common::ModifyBit<28>(value, V_);
+    }
+
+    /// Get cumulative saturation bit
+    bool QC() const {
+        return Common::Bit<27>(value);
+    }
+
+    /// Set cumulative saturation bit
+    void QC(bool QC_) {
+        value = Common::ModifyBit<27>(value, QC_);
+    }
+
+    /// Get input denormal floating-point exception bit
+    bool IDC() const {
+        return Common::Bit<7>(value);
+    }
+
+    /// Set input denormal floating-point exception bit
+    void IDC(bool IDC_) {
+        value = Common::ModifyBit<7>(value, IDC_);
+    }
+
+    /// Get inexact cumulative floating-point exception bit
+    bool IXC() const {
+        return Common::Bit<4>(value);
+    }
+
+    /// Set inexact cumulative floating-point exception bit
+    void IXC(bool IXC_) {
+        value = Common::ModifyBit<4>(value, IXC_);
+    }
+
+    /// Get underflow cumulative floating-point exception bit
+    bool UFC() const {
+        return Common::Bit<3>(value);
+    }
+
+    /// Set underflow cumulative floating-point exception bit
+    void UFC(bool UFC_) {
+        value = Common::ModifyBit<3>(value, UFC_);
+    }
+
+    /// Get overflow cumulative floating-point exception bit
+    bool OFC() const {
+        return Common::Bit<2>(value);
+    }
+
+    /// Set overflow cumulative floating-point exception bit
+    void OFC(bool OFC_) {
+        value = Common::ModifyBit<2>(value, OFC_);
+    }
+
+    /// Get divide by zero cumulative floating-point exception bit
+    bool DZC() const {
+        return Common::Bit<1>(value);
+    }
+
+    /// Set divide by zero cumulative floating-point exception bit
+    void DZC(bool DZC_) {
+        value = Common::ModifyBit<1>(value, DZC_);
+    }
+
+    /// Get invalid operation cumulative floating-point exception bit
+    bool IOC() const {
+        return Common::Bit<0>(value);
+    }
+
+    /// Set invalid operation cumulative floating-point exception bit
+    void IOC(bool IOC_) {
+        value = Common::ModifyBit<0>(value, IOC_);
+    }
+
+    /// Gets the underlying raw value within the FPSR.
+    u32 Value() const {
+        return value;
+    }
+
+private:
+    // Bits 5-6 and 8-26 are reserved.
+    static constexpr u32 mask = 0xF800009F;
+    u32 value = 0;
+};
+
+inline bool operator==(FPSR lhs, FPSR rhs) {
+    return lhs.Value() == rhs.Value();
+}
+
+inline bool operator!=(FPSR lhs, FPSR rhs) {
+    return !operator==(lhs, rhs);
+}
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/fused.cpp
+++ b/externals/dynarmic/src/common/fp/fused.cpp
@@ -0,0 +1,89 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/fp/fused.h"
+#include "common/fp/mantissa_util.h"
+#include "common/fp/unpacked.h"
+#include "common/u128.h"
+
+namespace Dynarmic::FP {
+
+constexpr size_t product_point_position = normalized_point_position * 2;
+
+static FPUnpacked ReduceMantissa(bool sign, int exponent, const u128& mantissa) {
+    constexpr int point_position_correction = normalized_point_position - (product_point_position - 64);
+    // We round-to-odd here when reducing the bitwidth of the mantissa so that subsequent roundings are accurate.
+    return {sign, exponent + point_position_correction, mantissa.upper | static_cast<u64>(mantissa.lower != 0)};
+}
+
+FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
+    const bool product_sign = op1.sign != op2.sign;
+    const auto [product_exponent, product_value] = [op1, op2]{
+        int exponent = op1.exponent + op2.exponent;
+        u128 value = Multiply64To128(op1.mantissa, op2.mantissa);
+        if (value.Bit<product_point_position + 1>()) {
+            value = value >> 1;
+            exponent++;
+        }
+        return std::make_tuple(exponent, value);
+    }();
+
+    if (product_value == 0) {
+        return addend;
+    }
+
+    if (addend.mantissa == 0) {
+        return ReduceMantissa(product_sign, product_exponent, product_value);
+    }
+
+    const int exp_diff = product_exponent - addend.exponent;
+
+    if (product_sign == addend.sign) {
+        // Addition
+
+        if (exp_diff <= 0) {
+            // addend > product
+            const u64 result = addend.mantissa + StickyLogicalShiftRight(product_value, normalized_point_position - exp_diff).lower;
+            return FPUnpacked{addend.sign, addend.exponent, result};
+        }
+
+        // addend < product
+        const u128 result = product_value + StickyLogicalShiftRight(addend.mantissa, exp_diff - normalized_point_position);
+        return ReduceMantissa(product_sign, product_exponent, result);
+    }
+
+    // Subtraction
+
+    const u128 addend_long = u128(addend.mantissa) << normalized_point_position;
+
+    bool result_sign;
+    u128 result;
+    int result_exponent;
+
+    if (exp_diff == 0 && product_value > addend_long) {
+        result_sign = product_sign;
+        result_exponent = product_exponent;
+        result = product_value - addend_long;
+    } else if (exp_diff <= 0) {
+        result_sign = !product_sign;
+        result_exponent = addend.exponent;
+        result = addend_long - StickyLogicalShiftRight(product_value, -exp_diff);
+    } else {
+        result_sign = product_sign;
+        result_exponent = product_exponent;
+        result = product_value - StickyLogicalShiftRight(addend_long, exp_diff);
+    }
+
+    if (result.upper == 0) {
+        return FPUnpacked{result_sign, result_exponent, result.lower};
+    }
+
+    const int required_shift = normalized_point_position - Common::HighestSetBit(result.upper);
+    result = result << required_shift;
+    result_exponent -= required_shift;
+    return ReduceMantissa(result_sign, result_exponent, result);
+}
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/fused.h
+++ b/externals/dynarmic/src/common/fp/fused.h
@@ -0,0 +1,15 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+struct FPUnpacked;
+
+/// This function assumes all arguments have been normalized.
+FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/info.h
+++ b/externals/dynarmic/src/common/fp/info.h
@@ -0,0 +1,138 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+struct FPInfo {};
+
+template<>
+struct FPInfo<u16> {
+    static constexpr size_t total_width = 16;
+    static constexpr size_t exponent_width = 5;
+    static constexpr size_t explicit_mantissa_width = 10;
+    static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
+
+    static constexpr u32 implicit_leading_bit = u32(1) << explicit_mantissa_width;
+    static constexpr u32 sign_mask = 0x8000;
+    static constexpr u32 exponent_mask = 0x7C00;
+    static constexpr u32 mantissa_mask = 0x3FF;
+    static constexpr u32 mantissa_msb = 0x200;
+
+    static constexpr int exponent_min = -14;
+    static constexpr int exponent_max = 15;
+    static constexpr int exponent_bias = 15;
+
+    static constexpr u16 Zero(bool sign) {
+        return sign ? static_cast<u16>(sign_mask) : u16{0};
+    }
+
+    static constexpr u16 Infinity(bool sign) {
+        return static_cast<u16>(exponent_mask | Zero(sign));
+    }
+
+    static constexpr u16 MaxNormal(bool sign) {
+        return static_cast<u16>((exponent_mask - 1) | Zero(sign));
+    }
+
+    static constexpr u16 DefaultNaN() {
+        return static_cast<u16>(exponent_mask | (u32(1) << (explicit_mantissa_width - 1)));
+    }
+};
+
+template<>
+struct FPInfo<u32> {
+    static constexpr size_t total_width = 32;
+    static constexpr size_t exponent_width = 8;
+    static constexpr size_t explicit_mantissa_width = 23;
+    static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
+
+    static constexpr u32 implicit_leading_bit = u32(1) << explicit_mantissa_width;
+    static constexpr u32 sign_mask = 0x80000000;
+    static constexpr u32 exponent_mask = 0x7F800000;
+    static constexpr u32 mantissa_mask = 0x007FFFFF;
+    static constexpr u32 mantissa_msb = 0x00400000;
+
+    static constexpr int exponent_min = -126;
+    static constexpr int exponent_max = 127;
+    static constexpr int exponent_bias = 127;
+
+    static constexpr u32 Zero(bool sign) {
+        return sign ? sign_mask : 0;
+    }
+
+    static constexpr u32 Infinity(bool sign) {
+        return exponent_mask | Zero(sign);
+    }
+
+    static constexpr u32 MaxNormal(bool sign) {
+        return (exponent_mask - 1) | Zero(sign);
+    }
+
+    static constexpr u32 DefaultNaN() {
+        return exponent_mask | (u32(1) << (explicit_mantissa_width - 1));
+    }
+};
+
+template<>
+struct FPInfo<u64> {
+    static constexpr size_t total_width = 64;
+    static constexpr size_t exponent_width = 11;
+    static constexpr size_t explicit_mantissa_width = 52;
+    static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
+
+    static constexpr u64 implicit_leading_bit = u64(1) << explicit_mantissa_width;
+    static constexpr u64 sign_mask = 0x8000'0000'0000'0000;
+    static constexpr u64 exponent_mask = 0x7FF0'0000'0000'0000;
+    static constexpr u64 mantissa_mask = 0x000F'FFFF'FFFF'FFFF;
+    static constexpr u64 mantissa_msb = 0x0008'0000'0000'0000;
+
+    static constexpr int exponent_min = -1022;
+    static constexpr int exponent_max = 1023;
+    static constexpr int exponent_bias = 1023;
+
+    static constexpr u64 Zero(bool sign) {
+        return sign ? sign_mask : 0;
+    }
+
+    static constexpr u64 Infinity(bool sign) {
+        return exponent_mask | Zero(sign);
+    }
+
+    static constexpr u64 MaxNormal(bool sign) {
+        return (exponent_mask - 1) | Zero(sign);
+    }
+
+    static constexpr u64 DefaultNaN() {
+        return exponent_mask | (u64(1) << (explicit_mantissa_width - 1));
+    }
+};
+
+/// value = (sign ? -1 : +1) * 2^exponent * value
+/// @note We do not handle denormals. Denormals will static_assert.
+template<typename FPT, bool sign, int exponent, FPT value>
+constexpr FPT FPValue() {
+    if constexpr (value == 0) {
+        return FPInfo<FPT>::Zero(sign);
+    }
+
+    constexpr int point_position = static_cast<int>(FPInfo<FPT>::explicit_mantissa_width);
+    constexpr int highest_bit = Common::HighestSetBit(value);
+    constexpr int offset = point_position - highest_bit;
+    constexpr int normalized_exponent = exponent - offset + point_position;
+    static_assert(offset >= 0);
+    static_assert(normalized_exponent >= FPInfo<FPT>::exponent_min && normalized_exponent <= FPInfo<FPT>::exponent_max);
+
+    constexpr FPT mantissa = (value << offset) & FPInfo<FPT>::mantissa_mask;
+    constexpr FPT biased_exponent = static_cast<FPT>(normalized_exponent + FPInfo<FPT>::exponent_bias);
+    return FPT(FPInfo<FPT>::Zero(sign) | mantissa | (biased_exponent << FPInfo<FPT>::explicit_mantissa_width));
+}
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/mantissa_util.h
+++ b/externals/dynarmic/src/common/fp/mantissa_util.h
@@ -0,0 +1,46 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Dynarmic::FP {
+
+enum class ResidualError {
+    Zero,
+    LessThanHalf,
+    Half,
+    GreaterThanHalf,
+};
+
+inline ResidualError ResidualErrorOnRightShift(u64 mantissa, int shift_amount) {
+    if (shift_amount <= 0 || mantissa == 0) {
+        return ResidualError::Zero;
+    }
+
+    if (shift_amount > static_cast<int>(Common::BitSize<u64>())) {
+        return Common::MostSignificantBit(mantissa) ? ResidualError::GreaterThanHalf : ResidualError::LessThanHalf;
+    }
+
+    const size_t half_bit_position = static_cast<size_t>(shift_amount - 1);
+    const u64 half = static_cast<u64>(1) << half_bit_position;
+    const u64 error_mask = Common::Ones<u64>(static_cast<size_t>(shift_amount));
+    const u64 error = mantissa & error_mask;
+
+    if (error == 0) {
+        return ResidualError::Zero;
+    }
+    if (error < half) {
+        return ResidualError::LessThanHalf;
+    }
+    if (error == half) {
+        return ResidualError::Half;
+    }
+    return ResidualError::GreaterThanHalf;
+}
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op.h
+++ b/externals/dynarmic/src/common/fp/op.h
@@ -0,0 +1,17 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include "common/fp/op/FPCompare.h"
+#include "common/fp/op/FPConvert.h"
+#include "common/fp/op/FPMulAdd.h"
+#include "common/fp/op/FPRecipEstimate.h"
+#include "common/fp/op/FPRecipExponent.h"
+#include "common/fp/op/FPRecipStepFused.h"
+#include "common/fp/op/FPRoundInt.h"
+#include "common/fp/op/FPRSqrtEstimate.h"
+#include "common/fp/op/FPRSqrtStepFused.h"
+#include "common/fp/op/FPToFixed.h"
--- a/externals/dynarmic/src/common/fp/op/FPCompare.cpp
+++ b/externals/dynarmic/src/common/fp/op/FPCompare.cpp
@@ -0,0 +1,40 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2019 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/op/FPCompare.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/unpacked.h"
+
+namespace Dynarmic::FP {
+
+template <typename FPT>
+bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr) {
+    const auto unpacked1 = FPUnpack(lhs, fpcr, fpsr);
+    const auto unpacked2 = FPUnpack(rhs, fpcr, fpsr);
+    const auto type1 = std::get<FPType>(unpacked1);
+    const auto type2 = std::get<FPType>(unpacked2);
+    const auto& value1 = std::get<FPUnpacked>(unpacked1);
+    const auto& value2 = std::get<FPUnpacked>(unpacked2);
+
+    if (type1 == FPType::QNaN || type1 == FPType::SNaN ||
+        type2 == FPType::QNaN || type2 == FPType::SNaN) {
+        if (type1 == FPType::SNaN || type2 == FPType::SNaN) {
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+        }
+
+        // Comparisons against NaN are never equal.
+        return false;
+    }
+
+    return value1 == value2 || (type1 == FPType::Zero && type2 == FPType::Zero);
+}
+
+template bool FPCompareEQ<u16>(u16 lhs, u16 rhs, FPCR fpcr, FPSR& fpsr);
+template bool FPCompareEQ<u32>(u32 lhs, u32 rhs, FPCR fpcr, FPSR& fpsr);
+template bool FPCompareEQ<u64>(u64 lhs, u64 rhs, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPCompare.h
+++ b/externals/dynarmic/src/common/fp/op/FPCompare.h
@@ -0,0 +1,16 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2019 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+
+template <typename FPT>
+bool FPCompareEQ(FPT lhs, FPT rhs, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPConvert.cpp
+++ b/externals/dynarmic/src/common/fp/op/FPConvert.cpp
@@ -0,0 +1,89 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2019 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/common_types.h"
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/info.h"
+#include "common/fp/op/FPConvert.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/unpacked.h"
+
+namespace Dynarmic::FP {
+namespace {
+template <typename FPT_TO, typename FPT_FROM>
+FPT_TO FPConvertNaN(FPT_FROM op) {
+    const bool sign = Common::Bit<Common::BitSize<FPT_FROM>() - 1>(op);
+    const u64 frac = [op] {
+        if constexpr (sizeof(FPT_FROM) == sizeof(u64)) {
+            return Common::Bits<0, 50>(op);
+        } else if constexpr (sizeof(FPT_FROM) == sizeof(u32)) {
+            return u64{Common::Bits<0, 21>(op)} << 29;
+        } else {
+            return u64{Common::Bits<0, 8>(op)} << 42;
+        }
+    }();
+
+    const size_t dest_bit_size = Common::BitSize<FPT_TO>();
+    const u64 shifted_sign = u64{sign} << (dest_bit_size - 1);
+    const u64 exponent = Common::Ones<u64>(dest_bit_size - FPInfo<FPT_TO>::explicit_mantissa_width);
+
+    if constexpr (sizeof(FPT_TO) == sizeof(u64)) {
+        return FPT_TO(shifted_sign | exponent << 51 | frac);
+    } else if constexpr (sizeof(FPT_TO) == sizeof(u32)) {
+        return FPT_TO(shifted_sign | exponent << 22 | Common::Bits<29, 50>(frac));
+    } else {
+        return FPT_TO(shifted_sign | exponent << 9 | Common::Bits<42, 50>(frac));
+    }
+}
+} // Anonymous namespace
+
+template <typename FPT_TO, typename FPT_FROM>
+FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr) {
+    const auto [type, sign, value] = FPUnpackCV<FPT_FROM>(op, fpcr, fpsr);
+    const bool is_althp = Common::BitSize<FPT_TO>() == 16 && fpcr.AHP();
+
+    if (type == FPType::SNaN || type == FPType::QNaN) {
+        std::uintmax_t result{};
+
+        if (is_althp) {
+            result = FPInfo<FPT_TO>::Zero(sign);
+        } else if (fpcr.DN()) {
+            result = FPInfo<FPT_TO>::DefaultNaN();
+        } else {
+            result = FPConvertNaN<FPT_TO>(op);
+        }
+
+        if (type == FPType::SNaN || is_althp) {
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+        }
+
+        return FPT_TO(result);
+    }
+
+    if (type == FPType::Infinity) {
+        if (is_althp) {
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+            return FPT_TO(u32{sign} << 15 | 0b111111111111111);
+        }
+
+        return FPInfo<FPT_TO>::Infinity(sign);
+    }
+
+    if (type == FPType::Zero) {
+        return FPInfo<FPT_TO>::Zero(sign);
+    }
+
+    return FPRoundCV<FPT_TO>(value, fpcr, rounding_mode, fpsr);
+}
+
+template u16 FPConvert<u16, u32>(u32 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
+template u16 FPConvert<u16, u64>(u64 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
+template u32 FPConvert<u32, u16>(u16 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
+template u32 FPConvert<u32, u64>(u64 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
+template u64 FPConvert<u64, u16>(u16 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
+template u64 FPConvert<u64, u32>(u32 op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPConvert.h
+++ b/externals/dynarmic/src/common/fp/op/FPConvert.h
@@ -0,0 +1,17 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2019 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+enum class RoundingMode;
+
+template <typename FPT_TO, typename FPT_FROM>
+FPT_TO FPConvert(FPT_FROM op, FPCR fpcr, RoundingMode rounding_mode, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPMulAdd.cpp
+++ b/externals/dynarmic/src/common/fp/op/FPMulAdd.cpp
@@ -0,0 +1,79 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/common_types.h"
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/info.h"
+#include "common/fp/fused.h"
+#include "common/fp/op/FPMulAdd.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/process_nan.h"
+#include "common/fp/unpacked.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
+    const RoundingMode rounding = fpcr.RMode();
+
+    const auto [typeA, signA, valueA] = FPUnpack(addend, fpcr, fpsr);
+    const auto [type1, sign1, value1] = FPUnpack(op1, fpcr, fpsr);
+    const auto [type2, sign2, value2] = FPUnpack(op2, fpcr, fpsr);
+
+    const bool infA = typeA == FPType::Infinity;
+    const bool inf1 = type1 == FPType::Infinity;
+    const bool inf2 = type2 == FPType::Infinity;
+    const bool zeroA = typeA == FPType::Zero;
+    const bool zero1 = type1 == FPType::Zero;
+    const bool zero2 = type2 == FPType::Zero;
+
+    const auto maybe_nan = FPProcessNaNs3<FPT>(typeA, type1, type2, addend, op1, op2, fpcr, fpsr);
+
+    if (typeA == FPType::QNaN && ((inf1 && zero2) || (zero1 && inf2))) {
+        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+        return FPInfo<FPT>::DefaultNaN();
+    }
+
+    if (maybe_nan) {
+        return *maybe_nan;
+    }
+
+    // Calculate properties of product (op1 * op2).
+    const bool signP = sign1 != sign2;
+    const bool infP = inf1 || inf2;
+    const bool zeroP = zero1 || zero2;
+
+    // Raise NaN on (inf * inf) of opposite signs or (inf * zero).
+    if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) {
+        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+        return FPInfo<FPT>::DefaultNaN();
+    }
+
+    // Handle infinities
+    if ((infA && !signA) || (infP && !signP)) {
+        return FPInfo<FPT>::Infinity(false);
+    }
+    if ((infA && signA) || (infP && signP)) {
+        return FPInfo<FPT>::Infinity(true);
+    }
+
+    // Result is exactly zero
+    if (zeroA && zeroP && signA == signP) {
+        return FPInfo<FPT>::Zero(signA);
+    }
+
+    const FPUnpacked result_value = FusedMulAdd(valueA, value1, value2);
+    if (result_value.mantissa == 0) {
+        return FPInfo<FPT>::Zero(rounding == RoundingMode::TowardsMinusInfinity);
+    }
+    return FPRound<FPT>(result_value, fpcr, fpsr);
+}
+
+template u16 FPMulAdd<u16>(u16 addend, u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
+template u32 FPMulAdd<u32>(u32 addend, u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
+template u64 FPMulAdd<u64>(u64 addend, u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPMulAdd.h
+++ b/externals/dynarmic/src/common/fp/op/FPMulAdd.h
@@ -0,0 +1,16 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+
+template<typename FPT>
+FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPNeg.h
+++ b/externals/dynarmic/src/common/fp/op/FPNeg.h
@@ -0,0 +1,17 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include "common/fp/info.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+constexpr FPT FPNeg(FPT op) {
+    return op ^ FPInfo<FPT>::sign_mask;
+}
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRSqrtEstimate.cpp
+++ b/externals/dynarmic/src/common/fp/op/FPRSqrtEstimate.cpp
@@ -0,0 +1,56 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/common_types.h"
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/info.h"
+#include "common/fp/op/FPRSqrtEstimate.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/process_nan.h"
+#include "common/fp/unpacked.h"
+#include "common/math_util.h"
+#include "common/safe_ops.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
+    const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
+
+    if (type == FPType::SNaN || type == FPType::QNaN) {
+        return FPProcessNaN(type, op, fpcr, fpsr);
+    }
+
+    if (type == FPType::Zero) {
+        FPProcessException(FPExc::DivideByZero, fpcr, fpsr);
+        return FPInfo<FPT>::Infinity(sign);
+    }
+
+    if (sign) {
+        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+        return FPInfo<FPT>::DefaultNaN();
+    }
+
+    if (type == FPType::Infinity) {
+        return FPInfo<FPT>::Zero(false);
+    }
+
+    const int result_exponent = (-(value.exponent + 1)) >> 1;
+    const bool was_exponent_odd = (value.exponent) % 2 == 0;
+
+    const u64 scaled = Safe::LogicalShiftRight(value.mantissa, normalized_point_position - (was_exponent_odd ? 7 : 8));
+    const u64 estimate = Common::RecipSqrtEstimate(scaled);
+
+    const FPT bits_exponent = static_cast<FPT>(result_exponent + FPInfo<FPT>::exponent_bias);
+    const FPT bits_mantissa = static_cast<FPT>(estimate << (FPInfo<FPT>::explicit_mantissa_width - 8));
+    return (bits_exponent << FPInfo<FPT>::explicit_mantissa_width) | (bits_mantissa & FPInfo<FPT>::mantissa_mask);
+}
+
+template u16 FPRSqrtEstimate<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
+template u32 FPRSqrtEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
+template u64 FPRSqrtEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRSqrtEstimate.h
+++ b/externals/dynarmic/src/common/fp/op/FPRSqrtEstimate.h
@@ -0,0 +1,16 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+
+template<typename FPT>
+FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRSqrtStepFused.cpp
+++ b/externals/dynarmic/src/common/fp/op/FPRSqrtStepFused.cpp
@@ -0,0 +1,56 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/fused.h"
+#include "common/fp/info.h"
+#include "common/fp/op/FPNeg.h"
+#include "common/fp/op/FPRSqrtStepFused.h"
+#include "common/fp/process_nan.h"
+#include "common/fp/unpacked.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
+    op1 = FPNeg(op1);
+
+    const auto [type1, sign1, value1] = FPUnpack(op1, fpcr, fpsr);
+    const auto [type2, sign2, value2] = FPUnpack(op2, fpcr, fpsr);
+
+    if (const auto maybe_nan = FPProcessNaNs(type1, type2, op1, op2, fpcr, fpsr)) {
+        return *maybe_nan;
+    }
+
+    const bool inf1 = type1 == FPType::Infinity;
+    const bool inf2 = type2 == FPType::Infinity;
+    const bool zero1 = type1 == FPType::Zero;
+    const bool zero2 = type2 == FPType::Zero;
+
+    if ((inf1 && zero2) || (zero1 && inf2)) {
+        // return +1.5
+        return FPValue<FPT, false, -1, 3>();
+    }
+
+    if (inf1 || inf2) {
+        return FPInfo<FPT>::Infinity(sign1 != sign2);
+    }
+
+    // result_value = (3.0 + (value1 * value2)) / 2.0
+    FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 3), value1, value2);
+    result_value.exponent--;
+
+    if (result_value.mantissa == 0) {
+        return FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity);
+    }
+    return FPRound<FPT>(result_value, fpcr, fpsr);
+}
+
+template u16 FPRSqrtStepFused<u16>(u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
+template u32 FPRSqrtStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
+template u64 FPRSqrtStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRSqrtStepFused.h
+++ b/externals/dynarmic/src/common/fp/op/FPRSqrtStepFused.h
@@ -0,0 +1,16 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+
+template<typename FPT>
+FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRecipEstimate.cpp
+++ b/externals/dynarmic/src/common/fp/op/FPRecipEstimate.cpp
@@ -0,0 +1,98 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <tuple>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/info.h"
+#include "common/fp/op/FPRecipEstimate.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/process_nan.h"
+#include "common/fp/unpacked.h"
+#include "common/math_util.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
+    FPType type;
+    bool sign;
+    FPUnpacked value;
+    std::tie(type, sign, value) = FPUnpack<FPT>(op, fpcr, fpsr);
+
+    if (type == FPType::SNaN || type == FPType::QNaN) {
+        return FPProcessNaN(type, op, fpcr, fpsr);
+    }
+
+    if (type == FPType::Infinity) {
+        return FPInfo<FPT>::Zero(sign);
+    }
+
+    if (type == FPType::Zero) {
+        FPProcessException(FPExc::DivideByZero, fpcr, fpsr);
+        return FPInfo<FPT>::Infinity(sign);
+    }
+
+    if (value.exponent < FPInfo<FPT>::exponent_min - 2) {
+        const bool overflow_to_inf = [&]{
+            switch (fpcr.RMode()) {
+            case RoundingMode::ToNearest_TieEven:
+                return true;
+            case RoundingMode::TowardsPlusInfinity:
+                return !sign;
+            case RoundingMode::TowardsMinusInfinity:
+                return sign;
+            case RoundingMode::TowardsZero:
+                return false;
+            default:
+                UNREACHABLE();
+            }
+        }();
+
+        FPProcessException(FPExc::Overflow, fpcr, fpsr);
+        FPProcessException(FPExc::Inexact, fpcr, fpsr);
+        return overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
+    }
+
+    if ((fpcr.FZ() && !std::is_same_v<FPT, u16>) || (fpcr.FZ16() && std::is_same_v<FPT, u16>)) {
+        if (value.exponent >= -FPInfo<FPT>::exponent_min) {
+            fpsr.UFC(true);
+            return FPInfo<FPT>::Zero(sign);
+        }
+    }
+
+    const u64 scaled = value.mantissa >> (normalized_point_position - 8);
+    u64 estimate = static_cast<u64>(Common::RecipEstimate(scaled)) << (FPInfo<FPT>::explicit_mantissa_width - 8);
+    int result_exponent = -(value.exponent + 1);
+    if (result_exponent < FPInfo<FPT>::exponent_min) {
+        switch (result_exponent) {
+        case (FPInfo<FPT>::exponent_min - 1):
+            estimate |= FPInfo<FPT>::implicit_leading_bit;
+            estimate >>= 1;
+            break;
+        case (FPInfo<FPT>::exponent_min - 2):
+            estimate |= FPInfo<FPT>::implicit_leading_bit;
+            estimate >>= 2;
+            result_exponent++;
+            break;
+        default:
+            UNREACHABLE();
+        }
+    }
+
+    const FPT bits_sign = FPInfo<FPT>::Zero(sign);
+    const FPT bits_exponent = static_cast<FPT>(result_exponent + FPInfo<FPT>::exponent_bias);
+    const FPT bits_mantissa = static_cast<FPT>(estimate);
+    return FPT((bits_exponent << FPInfo<FPT>::explicit_mantissa_width) | (bits_mantissa & FPInfo<FPT>::mantissa_mask) | bits_sign);
+}
+
+template u16 FPRecipEstimate<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
+template u32 FPRecipEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
+template u64 FPRecipEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRecipEstimate.h
+++ b/externals/dynarmic/src/common/fp/op/FPRecipEstimate.h
@@ -0,0 +1,16 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+
+template<typename FPT>
+FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRecipExponent.cpp
+++ b/externals/dynarmic/src/common/fp/op/FPRecipExponent.cpp
@@ -0,0 +1,57 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/common_types.h"
+#include "common/bit_util.h"
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/info.h"
+#include "common/fp/op/FPRecipExponent.h"
+#include "common/fp/process_nan.h"
+#include "common/fp/unpacked.h"
+
+namespace Dynarmic::FP {
+namespace {
+template <typename FPT>
+FPT DetermineExponentValue(size_t value) {
+    if constexpr (sizeof(FPT) == sizeof(u32)) {
+        return static_cast<FPT>(Common::Bits<23, 30>(value));
+    } else if constexpr (sizeof(FPT) == sizeof(u64)) {
+        return static_cast<FPT>(Common::Bits<52, 62>(value));
+    } else {
+        return static_cast<FPT>(Common::Bits<10, 14>(value));
+    }
+}
+} // Anonymous namespace
+
+template <typename FPT>
+FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
+    const auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
+    (void)value;
+
+    if (type == FPType::SNaN || type == FPType::QNaN) {
+        return FPProcessNaN(type, op, fpcr, fpsr);
+    }
+
+    const FPT sign_bits = FPInfo<FPT>::Zero(sign);
+    const FPT exponent = DetermineExponentValue<FPT>(op);
+
+    // Zero and denormals
+    if (exponent == 0) {
+        const FPT max_exponent = Common::Ones<FPT>(FPInfo<FPT>::exponent_width) - 1;
+        return FPT(sign_bits | (max_exponent << FPInfo<FPT>::explicit_mantissa_width));
+    }
+
+    // Infinities and normals
+    const FPT negated_exponent = FPT(~exponent);
+    const FPT adjusted_exponent = FPT(negated_exponent << FPInfo<FPT>::explicit_mantissa_width) & FPInfo<FPT>::exponent_mask;
+    return FPT(sign_bits | adjusted_exponent);
+}
+
+template u16 FPRecipExponent<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
+template u32 FPRecipExponent<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
+template u64 FPRecipExponent<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRecipExponent.h
+++ b/externals/dynarmic/src/common/fp/op/FPRecipExponent.h
@@ -0,0 +1,16 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2019 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+
+template <typename FPT>
+FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRecipStepFused.cpp
+++ b/externals/dynarmic/src/common/fp/op/FPRecipStepFused.cpp
@@ -0,0 +1,55 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/fused.h"
+#include "common/fp/info.h"
+#include "common/fp/op/FPNeg.h"
+#include "common/fp/op/FPRecipStepFused.h"
+#include "common/fp/process_nan.h"
+#include "common/fp/unpacked.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
+    op1 = FPNeg(op1);
+
+    const auto [type1, sign1, value1] = FPUnpack<FPT>(op1, fpcr, fpsr);
+    const auto [type2, sign2, value2] = FPUnpack<FPT>(op2, fpcr, fpsr);
+
+    if (const auto maybe_nan = FPProcessNaNs(type1, type2, op1, op2, fpcr, fpsr)) {
+        return *maybe_nan;
+    }
+
+    const bool inf1 = type1 == FPType::Infinity;
+    const bool inf2 = type2 == FPType::Infinity;
+    const bool zero1 = type1 == FPType::Zero;
+    const bool zero2 = type2 == FPType::Zero;
+
+    if ((inf1 && zero2) || (zero1 && inf2)) {
+        // return +2.0
+        return FPValue<FPT, false, 0, 2>();
+    }
+
+    if (inf1 || inf2) {
+        return FPInfo<FPT>::Infinity(sign1 != sign2);
+    }
+
+    // result_value = 2.0 + (value1 * value2)
+    const FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 2), value1, value2);
+
+    if (result_value.mantissa == 0) {
+        return FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity);
+    }
+    return FPRound<FPT>(result_value, fpcr, fpsr);
+}
+
+template u16 FPRecipStepFused<u16>(u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
+template u32 FPRecipStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
+template u64 FPRecipStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRecipStepFused.h
+++ b/externals/dynarmic/src/common/fp/op/FPRecipStepFused.h
@@ -0,0 +1,16 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+
+template<typename FPT>
+FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRoundInt.cpp
+++ b/externals/dynarmic/src/common/fp/op/FPRoundInt.cpp
@@ -0,0 +1,95 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/info.h"
+#include "common/fp/mantissa_util.h"
+#include "common/fp/op/FPRoundInt.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/process_nan.h"
+#include "common/fp/rounding_mode.h"
+#include "common/fp/unpacked.h"
+#include "common/safe_ops.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr) {
+    ASSERT(rounding != RoundingMode::ToOdd);
+
+    auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
+
+    if (type == FPType::SNaN || type == FPType::QNaN) {
+        return FPProcessNaN(type, op, fpcr, fpsr);
+    }
+
+    if (type == FPType::Infinity) {
+        return FPInfo<FPT>::Infinity(sign);
+    }
+
+    if (type == FPType::Zero) {
+        return FPInfo<FPT>::Zero(sign);
+    }
+
+    // Reshift decimal point back to bit zero.
+    const int exponent = value.exponent - normalized_point_position;
+
+    if (exponent >= 0) {
+        // Guaranteed to be an integer
+        return op;
+    }
+
+    u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
+    const ResidualError error = ResidualErrorOnRightShift(int_result, -exponent);
+    int_result = Safe::ArithmeticShiftLeft(int_result, exponent);
+
+    bool round_up = false;
+    switch (rounding) {
+    case RoundingMode::ToNearest_TieEven:
+        round_up = error > ResidualError::Half || (error == ResidualError::Half && Common::Bit<0>(int_result));
+        break;
+    case RoundingMode::TowardsPlusInfinity:
+        round_up = error != ResidualError::Zero;
+        break;
+    case RoundingMode::TowardsMinusInfinity:
+        round_up = false;
+        break;
+    case RoundingMode::TowardsZero:
+        round_up = error != ResidualError::Zero && Common::MostSignificantBit(int_result);
+        break;
+    case RoundingMode::ToNearest_TieAwayFromZero:
+        round_up = error > ResidualError::Half || (error == ResidualError::Half && !Common::MostSignificantBit(int_result));
+        break;
+    case RoundingMode::ToOdd:
+        UNREACHABLE();
+    }
+
+    if (round_up) {
+        int_result++;
+    }
+
+    const bool new_sign = Common::MostSignificantBit(int_result);
+    const u64 abs_int_result = new_sign ? Safe::Negate<u64>(int_result) : static_cast<u64>(int_result);
+
+    const FPT result = int_result == 0
+                     ? FPInfo<FPT>::Zero(sign)
+                     : FPRound<FPT>(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
+
+    if (error != ResidualError::Zero && exact) {
+        FPProcessException(FPExc::Inexact, fpcr, fpsr);
+    }
+
+    return result;
+}
+
+template u64 FPRoundInt<u16>(u16 op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
+template u64 FPRoundInt<u32>(u32 op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
+template u64 FPRoundInt<u64>(u64 op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPRoundInt.h
+++ b/externals/dynarmic/src/common/fp/op/FPRoundInt.h
@@ -0,0 +1,19 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+enum class RoundingMode;
+
+template<typename FPT>
+u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPToFixed.cpp
+++ b/externals/dynarmic/src/common/fp/op/FPToFixed.cpp
@@ -0,0 +1,101 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/mantissa_util.h"
+#include "common/fp/op/FPToFixed.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/rounding_mode.h"
+#include "common/fp/unpacked.h"
+#include "common/safe_ops.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
+    ASSERT(rounding != RoundingMode::ToOdd);
+    ASSERT(ibits <= 64);
+    ASSERT(fbits <= ibits);
+
+    auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
+
+    if (type == FPType::SNaN || type == FPType::QNaN) {
+        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+    }
+
+    // Handle zero
+    if (value.mantissa == 0) {
+        return 0;
+    }
+
+    if (sign && unsigned_) {
+        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+        return 0;
+    }
+
+    // value *= 2.0^fbits and reshift the decimal point back to bit zero.
+    int exponent = value.exponent + static_cast<int>(fbits) - normalized_point_position;
+
+    u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
+    const ResidualError error = ResidualErrorOnRightShift(int_result, -exponent);
+    int_result = Safe::ArithmeticShiftLeft(int_result, exponent);
+
+    bool round_up = false;
+    switch (rounding) {
+    case RoundingMode::ToNearest_TieEven:
+        round_up = error > ResidualError::Half || (error == ResidualError::Half && Common::Bit<0>(int_result));
+        break;
+    case RoundingMode::TowardsPlusInfinity:
+        round_up = error != ResidualError::Zero;
+        break;
+    case RoundingMode::TowardsMinusInfinity:
+        round_up = false;
+        break;
+    case RoundingMode::TowardsZero:
+        round_up = error != ResidualError::Zero && Common::MostSignificantBit(int_result);
+        break;
+    case RoundingMode::ToNearest_TieAwayFromZero:
+        round_up = error > ResidualError::Half || (error == ResidualError::Half && !Common::MostSignificantBit(int_result));
+        break;
+    case RoundingMode::ToOdd:
+        UNREACHABLE();
+    }
+
+    if (round_up) {
+        int_result++;
+    }
+
+    // Detect Overflow
+    const int min_exponent_for_overflow = static_cast<int>(ibits) - static_cast<int>(Common::HighestSetBit(value.mantissa + (round_up ? 1 : 0))) - (unsigned_ ? 0 : 1);
+    if (exponent >= min_exponent_for_overflow) {
+        // Positive overflow
+        if (unsigned_ || !sign) {
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+            return Common::Ones<u64>(ibits - (unsigned_ ? 0 : 1));
+        }
+
+        // Negative overflow
+        const u64 min_value = Safe::Negate<u64>(static_cast<u64>(1) << (ibits - 1));
+        if (!(exponent == min_exponent_for_overflow && int_result == min_value)) {
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+            return static_cast<u64>(1) << (ibits - 1);
+        }
+    }
+
+    if (error != ResidualError::Zero) {
+        FPProcessException(FPExc::Inexact, fpcr, fpsr);
+    }
+    return int_result & Common::Ones<u64>(ibits);
+}
+
+template u64 FPToFixed<u16>(size_t ibits, u16 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+template u64 FPToFixed<u32>(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+template u64 FPToFixed<u64>(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/op/FPToFixed.h
+++ b/externals/dynarmic/src/common/fp/op/FPToFixed.h
@@ -0,0 +1,19 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+enum class RoundingMode;
+
+template<typename FPT>
+u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/process_exception.cpp
+++ b/externals/dynarmic/src/common/fp/process_exception.cpp
@@ -0,0 +1,57 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/assert.h"
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/process_exception.h"
+
+namespace Dynarmic::FP {
+
+void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr) {
+    switch (exception) {
+    case FPExc::InvalidOp:
+        if (fpcr.IOE()) {
+            ASSERT_FALSE("Raising floating point exceptions unimplemented");
+        }
+        fpsr.IOC(true);
+        break;
+    case FPExc::DivideByZero:
+        if (fpcr.DZE()) {
+            ASSERT_FALSE("Raising floating point exceptions unimplemented");
+        }
+        fpsr.DZC(true);
+        break;
+    case FPExc::Overflow:
+        if (fpcr.OFE()) {
+            ASSERT_FALSE("Raising floating point exceptions unimplemented");
+        }
+        fpsr.OFC(true);
+        break;
+    case FPExc::Underflow:
+        if (fpcr.UFE()) {
+            ASSERT_FALSE("Raising floating point exceptions unimplemented");
+        }
+        fpsr.UFC(true);
+        break;
+    case FPExc::Inexact:
+        if (fpcr.IXE()) {
+            ASSERT_FALSE("Raising floating point exceptions unimplemented");
+        }
+        fpsr.IXC(true);
+        break;
+    case FPExc::InputDenorm:
+        if (fpcr.IDE()) {
+            ASSERT_FALSE("Raising floating point exceptions unimplemented");
+        }
+        fpsr.IDC(true);
+        break;
+    default:
+        UNREACHABLE();
+        break;
+    }
+}
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/process_exception.h
+++ b/externals/dynarmic/src/common/fp/process_exception.h
@@ -0,0 +1,24 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+
+enum class FPExc {
+    InvalidOp,
+    DivideByZero,
+    Overflow,
+    Underflow,
+    Inexact,
+    InputDenorm,
+};
+
+void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/process_nan.cpp
+++ b/externals/dynarmic/src/common/fp/process_nan.cpp
@@ -0,0 +1,91 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <optional>
+
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/fp/fpcr.h"
+#include "common/fp/fpsr.h"
+#include "common/fp/info.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/process_nan.h"
+#include "common/fp/unpacked.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+FPT FPProcessNaN(FPType type, FPT op, FPCR fpcr, FPSR& fpsr) {
+    ASSERT(type == FPType::QNaN || type == FPType::SNaN);
+
+    constexpr size_t topfrac = FPInfo<FPT>::explicit_mantissa_width - 1;
+
+    FPT result = op;
+
+    if (type == FPType::SNaN) {
+        result = Common::ModifyBit<topfrac>(op, true);
+        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+    }
+
+    if (fpcr.DN()) {
+        result = FPInfo<FPT>::DefaultNaN();
+    }
+
+    return result;
+}
+
+template u16 FPProcessNaN<u16>(FPType type, u16 op, FPCR fpcr, FPSR& fpsr);
+template u32 FPProcessNaN<u32>(FPType type, u32 op, FPCR fpcr, FPSR& fpsr);
+template u64 FPProcessNaN<u64>(FPType type, u64 op, FPCR fpcr, FPSR& fpsr);
+
+template<typename FPT>
+std::optional<FPT> FPProcessNaNs(FPType type1, FPType type2, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
+    if (type1 == FPType::SNaN) {
+        return FPProcessNaN<FPT>(type1, op1, fpcr, fpsr);
+    }
+    if (type2 == FPType::SNaN) {
+        return FPProcessNaN<FPT>(type2, op2, fpcr, fpsr);
+    }
+    if (type1 == FPType::QNaN) {
+        return FPProcessNaN<FPT>(type1, op1, fpcr, fpsr);
+    }
+    if (type2 == FPType::QNaN) {
+        return FPProcessNaN<FPT>(type2, op2, fpcr, fpsr);
+    }
+    return std::nullopt;
+}
+
+template std::optional<u16> FPProcessNaNs<u16>(FPType type1, FPType type2, u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
+template std::optional<u32> FPProcessNaNs<u32>(FPType type1, FPType type2, u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
+template std::optional<u64> FPProcessNaNs<u64>(FPType type1, FPType type2, u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
+
+template<typename FPT>
+std::optional<FPT> FPProcessNaNs3(FPType type1, FPType type2, FPType type3, FPT op1, FPT op2, FPT op3, FPCR fpcr, FPSR& fpsr) {
+    if (type1 == FPType::SNaN) {
+        return FPProcessNaN<FPT>(type1, op1, fpcr, fpsr);
+    }
+    if (type2 == FPType::SNaN) {
+        return FPProcessNaN<FPT>(type2, op2, fpcr, fpsr);
+    }
+    if (type3 == FPType::SNaN) {
+        return FPProcessNaN<FPT>(type3, op3, fpcr, fpsr);
+    }
+    if (type1 == FPType::QNaN) {
+        return FPProcessNaN<FPT>(type1, op1, fpcr, fpsr);
+    }
+    if (type2 == FPType::QNaN) {
+        return FPProcessNaN<FPT>(type2, op2, fpcr, fpsr);
+    }
+    if (type3 == FPType::QNaN) {
+        return FPProcessNaN<FPT>(type3, op3, fpcr, fpsr);
+    }
+    return std::nullopt;
+}
+
+template std::optional<u16> FPProcessNaNs3<u16>(FPType type1, FPType type2, FPType type3, u16 op1, u16 op2, u16 op3, FPCR fpcr, FPSR& fpsr);
+template std::optional<u32> FPProcessNaNs3<u32>(FPType type1, FPType type2, FPType type3, u32 op1, u32 op2, u32 op3, FPCR fpcr, FPSR& fpsr);
+template std::optional<u64> FPProcessNaNs3<u64>(FPType type1, FPType type2, FPType type3, u64 op1, u64 op2, u64 op3, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/process_nan.h
+++ b/externals/dynarmic/src/common/fp/process_nan.h
@@ -0,0 +1,25 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <optional>
+
+namespace Dynarmic::FP {
+
+class FPCR;
+class FPSR;
+enum class FPType;
+
+template<typename FPT>
+FPT FPProcessNaN(FPType type, FPT op, FPCR fpcr, FPSR& fpsr);
+
+template<typename FPT>
+std::optional<FPT> FPProcessNaNs(FPType type1, FPType type2, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
+
+template<typename FPT>
+std::optional<FPT> FPProcessNaNs3(FPType type1, FPType type2, FPType type3, FPT op1, FPT op2, FPT op3, FPCR fpcr, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/rounding_mode.h
+++ b/externals/dynarmic/src/common/fp/rounding_mode.h
@@ -0,0 +1,27 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+namespace Dynarmic::FP {
+
+/// Ordering of first four values is important as they correspond to bits in FPCR.
+enum class RoundingMode {
+    /// Round to nearest floating point. If there is a tie, round to nearest even digit in required position.
+    ToNearest_TieEven,
+    /// Round up towards positive infinity.
+    TowardsPlusInfinity,
+    /// Round downwards towards negative infinity.
+    TowardsMinusInfinity,
+    /// Truncate towards zero.
+    TowardsZero,
+    /// Round to nearest floating point. If there is a tie, round away from zero.
+    ToNearest_TieAwayFromZero,
+    /// Von Neumann rounding (as modified by Brent). Also known as sticky rounding.
+    /// Set the least significant bit to 1 if the result is not exact.
+    ToOdd,
+};
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/unpacked.cpp
+++ b/externals/dynarmic/src/common/fp/unpacked.cpp
@@ -0,0 +1,191 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "common/fp/fpsr.h"
+#include "common/fp/info.h"
+#include "common/fp/mantissa_util.h"
+#include "common/fp/process_exception.h"
+#include "common/fp/rounding_mode.h"
+#include "common/fp/unpacked.h"
+#include "common/safe_ops.h"
+
+namespace Dynarmic::FP {
+
+template<typename FPT>
+std::tuple<FPType, bool, FPUnpacked> FPUnpackBase(FPT op, FPCR fpcr, [[maybe_unused]] FPSR& fpsr) {
+    constexpr size_t sign_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width;
+    constexpr size_t exponent_high_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width - 1;
+    constexpr size_t exponent_low_bit = FPInfo<FPT>::explicit_mantissa_width;
+    constexpr size_t mantissa_high_bit = FPInfo<FPT>::explicit_mantissa_width - 1;
+    constexpr size_t mantissa_low_bit = 0;
+    constexpr int denormal_exponent = FPInfo<FPT>::exponent_min - int(FPInfo<FPT>::explicit_mantissa_width);
+
+    constexpr bool is_half_precision = std::is_same_v<FPT, u16>;
+    const bool sign = Common::Bit<sign_bit>(op);
+    const FPT exp_raw = Common::Bits<exponent_low_bit, exponent_high_bit>(op);
+    const FPT frac_raw = Common::Bits<mantissa_low_bit, mantissa_high_bit>(op);
+
+    if (exp_raw == 0) {
+        if constexpr (is_half_precision) {
+            if (frac_raw == 0 || fpcr.FZ16()) {
+                return {FPType::Zero, sign, {sign, 0, 0}};
+            }
+            return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)};
+        } else {
+            if (frac_raw == 0 || fpcr.FZ()) {
+                if (frac_raw != 0) {
+                    FPProcessException(FPExc::InputDenorm, fpcr, fpsr);
+                }
+                return {FPType::Zero, sign, {sign, 0, 0}};
+            }
+
+            return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)};
+        }
+    }
+
+    const bool exp_all_ones = exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width);
+    const bool ahp_disabled = is_half_precision && !fpcr.AHP();
+    if ((exp_all_ones && !is_half_precision) || (exp_all_ones && ahp_disabled)) {
+        if (frac_raw == 0) {
+            return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)};
+        }
+
+        const bool is_quiet = Common::Bit<mantissa_high_bit>(frac_raw);
+        return {is_quiet ? FPType::QNaN : FPType::SNaN, sign, {sign, 0, 0}};
+    }
+
+    const int exp = static_cast<int>(exp_raw) - FPInfo<FPT>::exponent_bias;
+    const u64 frac = static_cast<u64>(frac_raw | FPInfo<FPT>::implicit_leading_bit) << (normalized_point_position - FPInfo<FPT>::explicit_mantissa_width);
+    return {FPType::Nonzero, sign, {sign, exp, frac}};
+}
+
+template std::tuple<FPType, bool, FPUnpacked> FPUnpackBase<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
+template std::tuple<FPType, bool, FPUnpacked> FPUnpackBase<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
+template std::tuple<FPType, bool, FPUnpacked> FPUnpackBase<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
+
+template<size_t F>
+std::tuple<bool, int, u64, ResidualError> Normalize(FPUnpacked op, int extra_right_shift = 0) {
+    const int highest_set_bit = Common::HighestSetBit(op.mantissa);
+    const int shift_amount = highest_set_bit - static_cast<int>(F) + extra_right_shift;
+    const u64 mantissa = Safe::LogicalShiftRight(op.mantissa, shift_amount);
+    const ResidualError error = ResidualErrorOnRightShift(op.mantissa, shift_amount);
+    const int exponent = op.exponent + highest_set_bit - normalized_point_position;
+    return std::make_tuple(op.sign, exponent, mantissa, error);
+}
+
+template<typename FPT>
+FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
+    ASSERT(op.mantissa != 0);
+    ASSERT(rounding != RoundingMode::ToNearest_TieAwayFromZero);
+
+    constexpr int minimum_exp = FPInfo<FPT>::exponent_min;
+    constexpr size_t E = FPInfo<FPT>::exponent_width;
+    constexpr size_t F = FPInfo<FPT>::explicit_mantissa_width;
+    constexpr bool isFP16 = FPInfo<FPT>::total_width == 16;
+
+    auto [sign, exponent, mantissa, error] = Normalize<F>(op);
+
+    if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) {
+        fpsr.UFC(true);
+        return FPInfo<FPT>::Zero(sign);
+    }
+
+    int biased_exp = std::max<int>(exponent - minimum_exp + 1, 0);
+    if (biased_exp == 0) {
+        std::tie(sign, exponent, mantissa, error) = Normalize<F>(op, minimum_exp - exponent);
+    }
+
+    if (biased_exp == 0 && (error != ResidualError::Zero || fpcr.UFE())) {
+        FPProcessException(FPExc::Underflow, fpcr, fpsr);
+    }
+
+    bool round_up = false, overflow_to_inf = false;
+    switch (rounding) {
+    case RoundingMode::ToNearest_TieEven: {
+        round_up = (error > ResidualError::Half) || (error == ResidualError::Half && Common::Bit<0>(mantissa));
+        overflow_to_inf = true;
+        break;
+    }
+    case RoundingMode::TowardsPlusInfinity:
+        round_up = error != ResidualError::Zero && !sign;
+        overflow_to_inf = !sign;
+        break;
+    case RoundingMode::TowardsMinusInfinity:
+        round_up = error != ResidualError::Zero && sign;
+        overflow_to_inf = sign;
+        break;
+    default:
+        break;
+    }
+
+    if (round_up) {
+        if ((mantissa & FPInfo<FPT>::mantissa_mask) == FPInfo<FPT>::mantissa_mask) {
+            // Overflow on rounding up is going to happen
+            if (mantissa == FPInfo<FPT>::mantissa_mask) {
+                // Rounding up from denormal to normal
+                mantissa++;
+                biased_exp++;
+            } else {
+                // Rounding up to next exponent
+                mantissa = (mantissa + 1) / 2;
+                biased_exp++;
+            }
+        } else {
+            mantissa++;
+        }
+    }
+
+    if (error != ResidualError::Zero && rounding == RoundingMode::ToOdd) {
+        mantissa = Common::ModifyBit<0>(mantissa, true);
+    }
+
+    FPT result = 0;
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4127) // C4127: conditional expression is constant
+#endif
+    if (!isFP16 || !fpcr.AHP()) {
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+        constexpr int max_biased_exp = (1 << E) - 1;
+        if (biased_exp >= max_biased_exp) {
+            result = overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
+            FPProcessException(FPExc::Overflow, fpcr, fpsr);
+            FPProcessException(FPExc::Inexact, fpcr, fpsr);
+        } else {
+            result = sign ? 1 : 0;
+            result <<= E;
+            result += FPT(biased_exp);
+            result <<= F;
+            result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
+            if (error != ResidualError::Zero) {
+                FPProcessException(FPExc::Inexact, fpcr, fpsr);
+            }
+        }
+    } else {
+        constexpr int max_biased_exp = (1 << E);
+        if (biased_exp >= max_biased_exp) {
+            result = sign ? 0xFFFF : 0x7FFF;
+            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
+        } else {
+            result = sign ? 1 : 0;
+            result <<= E;
+            result += FPT(biased_exp);
+            result <<= F;
+            result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
+            if (error != ResidualError::Zero) {
+                FPProcessException(FPExc::Inexact, fpcr, fpsr);
+            }
+        }
+    }
+    return result;
+}
+
+template u16 FPRoundBase<u16>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+template u32 FPRoundBase<u32>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+template u64 FPRoundBase<u64>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/unpacked.h
+++ b/externals/dynarmic/src/common/fp/unpacked.h
@@ -0,0 +1,88 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <tuple>
+
+#include "common/common_types.h"
+#include "common/fp/fpcr.h"
+
+namespace Dynarmic::FP {
+
+class FPSR;
+enum class RoundingMode;
+
+enum class FPType {
+    Nonzero,
+    Zero,
+    Infinity,
+    QNaN,
+    SNaN,
+};
+
+constexpr size_t normalized_point_position = 62;
+
+/// value = (sign ? -1 : +1) * mantissa/(2^62) * 2^exponent
+/// 63rd bit of mantissa is always set (unless value is zero)
+struct FPUnpacked {
+    bool sign;
+    int exponent;
+    u64 mantissa;
+};
+
+inline bool operator==(const FPUnpacked& a, const FPUnpacked& b) {
+    return std::tie(a.sign, a.exponent, a.mantissa) == std::tie(b.sign, b.exponent, b.mantissa);
+}
+
+/// return value = (sign ? -1 : +1) * value * 2^exponent
+constexpr FPUnpacked ToNormalized(bool sign, int exponent, u64 value) {
+    if (value == 0) {
+        return {sign, 0, 0};
+    }
+
+    const int highest_bit = Common::HighestSetBit(value);
+    const int offset = static_cast<int>(normalized_point_position) - highest_bit;
+    value <<= offset;
+    exponent -= offset - normalized_point_position;
+    return {sign, exponent, value};
+}
+
+template<typename FPT>
+std::tuple<FPType, bool, FPUnpacked> FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr);
+
+template<typename FPT>
+std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) {
+    fpcr.AHP(false);
+    return FPUnpackBase(op, fpcr, fpsr);
+}
+
+template<typename FPT>
+std::tuple<FPType, bool, FPUnpacked> FPUnpackCV(FPT op, FPCR fpcr, FPSR& fpsr) {
+    fpcr.FZ16(false);
+    return FPUnpackBase(op, fpcr, fpsr);
+}
+
+template<typename FPT>
+FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
+
+template<typename FPT>
+FPT FPRound(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
+    fpcr.AHP(false);
+    return FPRoundBase<FPT>(op, fpcr, rounding, fpsr);
+}
+
+template<typename FPT>
+FPT FPRoundCV(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
+    fpcr.FZ16(false);
+    return FPRoundBase<FPT>(op, fpcr, rounding, fpsr);
+}
+
+template<typename FPT>
+FPT FPRound(FPUnpacked op, FPCR fpcr, FPSR& fpsr) {
+    return FPRound<FPT>(op, fpcr, fpcr.RMode(), fpsr);
+}
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/fp/util.h
+++ b/externals/dynarmic/src/common/fp/util.h
@@ -0,0 +1,99 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <optional>
+
+#include "common/fp/fpcr.h"
+#include "common/fp/info.h"
+
+namespace Dynarmic::FP {
+
+/// Is floating point value a zero?
+template<typename FPT>
+inline bool IsZero(FPT value, FPCR fpcr) {
+    if (fpcr.FZ()) {
+        return (value & FPInfo<FPT>::exponent_mask) == 0;
+    }
+    return (value & ~FPInfo<FPT>::sign_mask) == 0;
+}
+
+/// Is floating point value an infinity?
+template<typename FPT>
+constexpr bool IsInf(FPT value) {
+    return (value & ~FPInfo<FPT>::sign_mask) == FPInfo<FPT>::Infinity(false);
+}
+
+/// Is floating point value a QNaN?
+template<typename FPT>
+constexpr bool IsQNaN(FPT value) {
+    constexpr FPT qnan_bits = FPInfo<FPT>::exponent_mask | FPInfo<FPT>::mantissa_msb;
+    return (value & qnan_bits) == qnan_bits;
+}
+
+/// Is floating point value a SNaN?
+template<typename FPT>
+constexpr bool IsSNaN(FPT value) {
+    constexpr FPT qnan_bits = FPInfo<FPT>::exponent_mask | FPInfo<FPT>::mantissa_msb;
+    constexpr FPT snan_bits = FPInfo<FPT>::exponent_mask;
+    return (value & qnan_bits) == snan_bits && (value & FPInfo<FPT>::mantissa_mask) != 0;
+}
+
+/// Is floating point value a NaN?
+template<typename FPT>
+constexpr bool IsNaN(FPT value) {
+    return IsQNaN(value) || IsSNaN(value);
+}
+
+/// Given a single argument, return the NaN value which would be returned by an ARM processor.
+/// If the argument isn't a NaN, returns std::nullopt.
+template<typename FPT>
+constexpr std::optional<FPT> ProcessNaNs(FPT a) {
+    if (IsSNaN(a)) {
+        return a | FPInfo<FPT>::mantissa_msb;
+    } else if (IsQNaN(a)) {
+        return a;
+    }
+    return std::nullopt;
+}
+
+/// Given a pair of arguments, return the NaN value which would be returned by an ARM processor.
+/// If neither argument is a NaN, returns std::nullopt.
+template<typename FPT>
+constexpr std::optional<FPT> ProcessNaNs(FPT a, FPT b) {
+    if (IsSNaN(a)) {
+        return a | FPInfo<FPT>::mantissa_msb;
+    } else if (IsSNaN(b)) {
+        return b | FPInfo<FPT>::mantissa_msb;
+    } else if (IsQNaN(a)) {
+        return a;
+    } else if (IsQNaN(b)) {
+        return b;
+    }
+    return std::nullopt;
+}
+
+/// Given three arguments, return the NaN value which would be returned by an ARM processor.
+/// If none of the arguments is a NaN, returns std::nullopt.
+template<typename FPT>
+constexpr std::optional<FPT> ProcessNaNs(FPT a, FPT b, FPT c) {
+    if (IsSNaN(a)) {
+        return a | FPInfo<FPT>::mantissa_msb;
+    } else if (IsSNaN(b)) {
+        return b | FPInfo<FPT>::mantissa_msb;
+    } else if (IsSNaN(c)) {
+        return c | FPInfo<FPT>::mantissa_msb;
+    } else if (IsQNaN(a)) {
+        return a;
+    } else if (IsQNaN(b)) {
+        return b;
+    } else if (IsQNaN(c)) {
+        return c;
+    }
+    return std::nullopt;
+}
+
+} // namespace Dynarmic::FP
--- a/externals/dynarmic/src/common/intrusive_list.h
+++ b/externals/dynarmic/src/common/intrusive_list.h
@@ -0,0 +1,375 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+
+#include "common/assert.h"
+
+namespace Dynarmic::Common {
+
+template <typename T> class IntrusiveList;
+template <typename T> class IntrusiveListIterator;
+
+template <typename T>
+class IntrusiveListNode {
+public:
+    bool IsSentinel() const {
+        return is_sentinel;
+    }
+
+protected:
+    IntrusiveListNode* next = nullptr;
+    IntrusiveListNode* prev = nullptr;
+    bool is_sentinel = false;
+
+    friend class IntrusiveList<T>;
+    friend class IntrusiveListIterator<T>;
+    friend class IntrusiveListIterator<const T>;
+};
+
+template <typename T>
+class IntrusiveListSentinel final : public IntrusiveListNode<T>
+{
+    using IntrusiveListNode<T>::next;
+    using IntrusiveListNode<T>::prev;
+    using IntrusiveListNode<T>::is_sentinel;
+
+public:
+    IntrusiveListSentinel() {
+        next = this;
+        prev = this;
+        is_sentinel = true;
+    }
+};
+
+template <typename T>
+class IntrusiveListIterator {
+public:
+    using iterator_category = std::bidirectional_iterator_tag;
+    using difference_type   = std::ptrdiff_t;
+    using value_type        = T;
+    using pointer           = value_type*;
+    using const_pointer     = const value_type*;
+    using reference         = value_type&;
+    using const_reference   = const value_type&;
+
+    // If value_type is const, we want "const IntrusiveListNode<value_type>", not "const IntrusiveListNode<const value_type>"
+    using node_type         = std::conditional_t<std::is_const<value_type>::value,
+                                                 const IntrusiveListNode<std::remove_const_t<value_type>>,
+                                                 IntrusiveListNode<value_type>>;
+    using node_pointer      = node_type*;
+    using node_reference    = node_type&;
+
+    IntrusiveListIterator() = default;
+    IntrusiveListIterator(const IntrusiveListIterator& other) = default;
+    IntrusiveListIterator& operator=(const IntrusiveListIterator& other) = default;
+
+    explicit IntrusiveListIterator(node_pointer list_node) : node(list_node) {
+    }
+    explicit IntrusiveListIterator(pointer data) : node(data) {
+    }
+    explicit IntrusiveListIterator(reference data) : node(&data) {
+    }
+
+    IntrusiveListIterator& operator++() {
+        node = node->next;
+        return *this;
+    }
+    IntrusiveListIterator& operator--() {
+        node = node->prev;
+        return *this;
+    }
+    IntrusiveListIterator operator++(int) {
+        IntrusiveListIterator it(*this);
+        ++*this;
+        return it;
+    }
+    IntrusiveListIterator operator--(int) {
+        IntrusiveListIterator it(*this);
+        --*this;
+        return it;
+    }
+
+    bool operator==(const IntrusiveListIterator& other) const {
+        return node == other.node;
+    }
+    bool operator!=(const IntrusiveListIterator& other) const {
+        return !operator==(other);
+    }
+
+    reference operator*() const {
+        DEBUG_ASSERT(!node->IsSentinel());
+        return static_cast<reference>(*node);
+    }
+    pointer operator->() const {
+        return std::addressof(operator*());
+    }
+
+    node_pointer AsNodePointer() const {
+        return node;
+    }
+
+private:
+    friend class IntrusiveList<T>;
+    node_pointer node = nullptr;
+};
+
+template <typename T>
+class IntrusiveList {
+public:
+    using difference_type        = std::ptrdiff_t;
+    using size_type              = std::size_t;
+    using value_type             = T;
+    using pointer                = value_type*;
+    using const_pointer          = const value_type*;
+    using reference              = value_type&;
+    using const_reference        = const value_type&;
+    using iterator               = IntrusiveListIterator<value_type>;
+    using const_iterator         = IntrusiveListIterator<const value_type>;
+    using reverse_iterator       = std::reverse_iterator<iterator>;
+    using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+    /**
+     * Inserts a node at the given location indicated by an iterator.
+     *
+     * @param location The location to insert the node.
+     * @param new_node The node to add.
+     */
+    iterator insert(iterator location, pointer new_node) {
+        return insert_before(location, new_node);
+    }
+
+    /**
+     * Inserts a node at the given location, moving the previous
+     * node occupant ahead of the one inserted.
+     *
+     * @param location The location to insert the new node.
+     * @param new_node The node to insert into the list.
+     */
+    iterator insert_before(iterator location, pointer new_node) {
+        auto existing_node = location.AsNodePointer();
+
+        new_node->next = existing_node;
+        new_node->prev = existing_node->prev;
+        existing_node->prev->next = new_node;
+        existing_node->prev = new_node;
+
+        return iterator(new_node);
+    }
+
+    /**
+     * Inserts a new node into the list ahead of the position indicated.
+     *
+     * @param position Location to insert the node in front of.
+     * @param new_node The node to be inserted into the list.
+     */
+    iterator insert_after(iterator position, pointer new_node) {
+        if (empty())
+            return insert(begin(), new_node);
+
+        return insert(++position, new_node);
+    }
+
+    /**
+     * Add an entry to the start of the list.
+     * @param node Node to add to the list.
+     */
+    void push_front(pointer node) {
+        insert(begin(), node);
+    }
+
+    /**
+     * Add an entry to the end of the list
+     * @param node Node to add to the list.
+     */
+    void push_back(pointer node) {
+        insert(end(), node);
+    }
+
+    /**
+     * Erases the node at the front of the list.
+     * @note Must not be called on an empty list.
+     */
+    void pop_front() {
+        DEBUG_ASSERT(!empty());
+        erase(begin());
+    }
+
+    /**
+     * Erases the node at the back of the list.
+     * @note Must not be called on an empty list.
+     */
+    void pop_back() {
+        DEBUG_ASSERT(!empty());
+        erase(--end());
+    }
+
+    /**
+     * Removes a node from this list
+     * @param it An iterator that points to the node to remove from list.
+     */
+    pointer remove(iterator& it) {
+        DEBUG_ASSERT(it != end());
+
+        pointer node = &*it++;
+
+        node->prev->next = node->next;
+        node->next->prev = node->prev;
+    #if !defined(NDEBUG)
+        node->next = nullptr;
+        node->prev = nullptr;
+    #endif
+
+        return node;
+    }
+
+    /**
+     * Removes a node from this list
+     * @param it A constant iterator that points to the node to remove from list.
+     */
+    pointer remove(const iterator& it) {
+        iterator copy = it;
+        return remove(copy);
+    }
+
+    /**
+     * Removes a node from this list.
+     * @param node A pointer to the node to remove.
+     */
+    pointer remove(pointer node) {
+        return remove(iterator(node));
+    }
+
+    /**
+     * Removes a node from this list.
+     * @param node A reference to the node to remove.
+     */
+    pointer remove(reference node) {
+        return remove(iterator(node));
+    }
+
+    /**
+     * Is this list empty?
+     * @returns true if there are no nodes in this list.
+     */
+    bool empty() const {
+        return root->next == root.get();
+    }
+
+    /**
+     * Gets the total number of elements within this list.
+     * @return the number of elements in this list.
+     */
+    size_type size() const {
+        return static_cast<size_type>(std::distance(begin(), end()));
+    }
+
+    /**
+     * Retrieves a reference to the node at the front of the list.
+     * @note Must not be called on an empty list.
+     */
+    reference front() {
+        DEBUG_ASSERT(!empty());
+        return *begin();
+    }
+
+    /**
+     * Retrieves a constant reference to the node at the front of the list.
+     * @note Must not be called on an empty list.
+     */
+    const_reference front() const {
+        DEBUG_ASSERT(!empty());
+        return *begin();
+    }
+
+    /**
+     * Retrieves a reference to the node at the back of the list.
+     * @note Must not be called on an empty list.
+     */
+    reference back() {
+        DEBUG_ASSERT(!empty());
+        return *--end();
+    }
+
+    /**
+     * Retrieves a constant reference to the node at the back of the list.
+     * @note Must not be called on an empty list.
+     */
+    const_reference back() const {
+        DEBUG_ASSERT(!empty());
+        return *--end();
+    }
+
+    // Iterator interface
+    iterator               begin()         { return iterator(root->next);            }
+    const_iterator         begin()   const { return const_iterator(root->next);      }
+    const_iterator         cbegin()  const { return begin();                         }
+
+    iterator               end()           { return iterator(root.get());            }
+    const_iterator         end()     const { return const_iterator(root.get());      }
+    const_iterator         cend()    const { return end();                           }
+
+    reverse_iterator       rbegin()        { return reverse_iterator(end());         }
+    const_reverse_iterator rbegin()  const { return const_reverse_iterator(end());   }
+    const_reverse_iterator crbegin() const { return rbegin();                        }
+
+    reverse_iterator       rend()          { return reverse_iterator(begin());       }
+    const_reverse_iterator rend()    const { return const_reverse_iterator(begin()); }
+    const_reverse_iterator crend()   const { return rend();                          }
+
+    /**
+     * Erases a node from the list, indicated by an iterator.
+     * @param it The iterator that points to the node to erase.
+     */
+    iterator erase(iterator it) {
+        remove(it);
+        return it;
+    }
+
+    /**
+     * Erases a node from this list.
+     * @param node A pointer to the node to erase from this list.
+     */
+    iterator erase(pointer node) {
+        return erase(iterator(node));
+    }
+
+    /**
+     * Erases a node from this list.
+     * @param node A reference to the node to erase from this list.
+     */
+    iterator erase(reference node) {
+        return erase(iterator(node));
+    }
+
+    /**
+     * Exchanges contents of this list with another list instance.
+     * @param other The other list to swap with.
+     */
+    void swap(IntrusiveList& other) noexcept {
+        root.swap(other.root);
+    }
+
+private:
+    std::shared_ptr<IntrusiveListNode<T>> root = std::make_shared<IntrusiveListSentinel<T>>();
+};
+
+/**
+ * Exchanges contents of an intrusive list with another intrusive list.
+ * @tparam T The type of data being kept track of by the lists.
+ * @param lhs The first list.
+ * @param rhs The second list.
+ */
+template <typename T>
+void swap(IntrusiveList<T>& lhs, IntrusiveList<T>& rhs) noexcept {
+    lhs.swap(rhs);
+}
+
+} // namespace Dynarmic::Common
--- a/externals/dynarmic/src/common/iterator_util.h
+++ b/externals/dynarmic/src/common/iterator_util.h
@@ -0,0 +1,35 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <iterator>
+
+namespace Dynarmic::Common {
+namespace detail {
+
+template<typename T>
+struct ReverseAdapter {
+    T& iterable;
+
+    constexpr auto begin() {
+        using namespace std;
+        return rbegin(iterable);
+    }
+
+    constexpr auto end() {
+        using namespace std;
+        return rend(iterable);
+    }
+};
+
+} // namespace detail
+
+template<typename T>
+constexpr detail::ReverseAdapter<T> Reverse(T&& iterable) {
+    return detail::ReverseAdapter<T>{iterable};
+}
+
+} // namespace Dynarmic::Common
--- a/externals/dynarmic/src/common/llvm_disassemble.cpp
+++ b/externals/dynarmic/src/common/llvm_disassemble.cpp
@@ -0,0 +1,103 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <string>
+
+#include <fmt/format.h>
+
+#ifdef DYNARMIC_USE_LLVM
+#include <llvm-c/Disassembler.h>
+#include <llvm-c/Target.h>
+#endif
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/llvm_disassemble.h"
+
+namespace Dynarmic::Common {
+
+std::string DisassembleX64(const void* begin, const void* end) {
+    std::string result;
+
+#ifdef DYNARMIC_USE_LLVM
+    LLVMInitializeX86TargetInfo();
+    LLVMInitializeX86TargetMC();
+    LLVMInitializeX86Disassembler();
+    LLVMDisasmContextRef llvm_ctx = LLVMCreateDisasm("x86_64", nullptr, 0, nullptr, nullptr);
+    LLVMSetDisasmOptions(llvm_ctx, LLVMDisassembler_Option_AsmPrinterVariant);
+
+    const u8* pos = reinterpret_cast<const u8*>(begin);
+    size_t remaining = reinterpret_cast<size_t>(end) - reinterpret_cast<size_t>(pos);
+    while (pos < end) {
+        char buffer[80];
+        size_t inst_size = LLVMDisasmInstruction(llvm_ctx, const_cast<u8*>(pos), remaining, reinterpret_cast<u64>(pos), buffer, sizeof(buffer));
+        ASSERT(inst_size);
+        for (const u8* i = pos; i < pos + inst_size; i++)
+            result += fmt::format("{:02x} ", *i);
+        for (size_t i = inst_size; i < 10; i++)
+            result += "   ";
+        result += buffer;
+        result += '\n';
+
+        pos += inst_size;
+        remaining -= inst_size;
+    }
+
+    LLVMDisasmDispose(llvm_ctx);
+#else
+    result += fmt::format("(recompile with DYNARMIC_USE_LLVM=ON to disassemble the generated x86_64 code)\n");
+    result += fmt::format("start: {:016x}, end: {:016x}\n", begin, end);
+#endif
+
+    return result;
+}
+
+std::string DisassembleAArch32([[maybe_unused]] u32 instruction, [[maybe_unused]] u64 pc) {
+    std::string result;
+
+#ifdef DYNARMIC_USE_LLVM
+    LLVMInitializeARMTargetInfo();
+    LLVMInitializeARMTargetMC();
+    LLVMInitializeARMDisassembler();
+    LLVMDisasmContextRef llvm_ctx = LLVMCreateDisasm("armv8-arm", nullptr, 0, nullptr, nullptr);
+    LLVMSetDisasmOptions(llvm_ctx, LLVMDisassembler_Option_AsmPrinterVariant);
+
+    char buffer[80];
+    size_t inst_size = LLVMDisasmInstruction(llvm_ctx, (u8*)&instruction, sizeof(instruction), pc, buffer, sizeof(buffer));
+    result = inst_size > 0 ? buffer : "<invalid instruction>";
+    result += '\n';
+
+    LLVMDisasmDispose(llvm_ctx);
+#else
+    result += fmt::format("(disassembly disabled)\n");
+#endif
+
+    return result;
+}
+
+std::string DisassembleAArch64([[maybe_unused]] u32 instruction, [[maybe_unused]] u64 pc) {
+    std::string result;
+
+#ifdef DYNARMIC_USE_LLVM
+    LLVMInitializeAArch64TargetInfo();
+    LLVMInitializeAArch64TargetMC();
+    LLVMInitializeAArch64Disassembler();
+    LLVMDisasmContextRef llvm_ctx = LLVMCreateDisasm("aarch64", nullptr, 0, nullptr, nullptr);
+    LLVMSetDisasmOptions(llvm_ctx, LLVMDisassembler_Option_AsmPrinterVariant);
+
+    char buffer[80];
+    size_t inst_size = LLVMDisasmInstruction(llvm_ctx, (u8*)&instruction, sizeof(instruction), pc, buffer, sizeof(buffer));
+    result = inst_size > 0 ? buffer : "<invalid instruction>";
+    result += '\n';
+
+    LLVMDisasmDispose(llvm_ctx);
+#else
+    result += fmt::format("(disassembly disabled)\n");
+#endif
+
+    return result;
+}
+
+} // namespace Dynarmic::Common
--- a/externals/dynarmic/src/common/llvm_disassemble.h
+++ b/externals/dynarmic/src/common/llvm_disassemble.h
@@ -0,0 +1,18 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#pragma once
+
+#include <string>
+
+#include "common/common_types.h"
+
+namespace Dynarmic::Common {
+
+std::string DisassembleX64(const void* pos, const void* end);
+std::string DisassembleAArch32(u32 instruction, u64 pc = 0);
+std::string DisassembleAArch64(u32 instruction, u64 pc = 0);
+
+} // namespace Dynarmic::Common
--- a/Show More
+++ b/Show More