early-access version 1262

2020-12-30 00:04:04 +00:00
parent a5ba8b4937
commit ac593731e0
118 changed files with 8331 additions and 4646 deletions
--- a/externals/dynarmic/src/backend/x64/a32_emit_x64.cpp
+++ b/externals/dynarmic/src/backend/x64/a32_emit_x64.cpp
@@ -935,7 +935,11 @@ Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A32EmitContext& ctx, size_t bit
    code.mov(tmp, vaddr.cvt32());
    code.shr(tmp, static_cast<int>(page_bits));
    code.mov(page, qword[r14 + tmp.cvt64() * sizeof(void*)]);
-    code.test(page, page);
+    if (ctx.conf.page_table_pointer_mask_bits == 0) {
+        code.test(page, page);
+    } else {
+        code.and_(page, ~u32(0) << ctx.conf.page_table_pointer_mask_bits);
+    }
    code.jz(abort, code.T_NEAR);
    if (ctx.conf.absolute_offset_page_table) {
        return page + vaddr;
--- a/externals/dynarmic/src/backend/x64/a64_emit_x64.cpp
+++ b/externals/dynarmic/src/backend/x64/a64_emit_x64.cpp
@@ -815,7 +815,11 @@ Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A64EmitContext& ctx, size_t bit
        code.jnz(abort, code.T_NEAR);
    }
    code.mov(page, qword[r14 + tmp * sizeof(void*)]);
-    code.test(page, page);
+    if (ctx.conf.page_table_pointer_mask_bits == 0) {
+        code.test(page, page);
+    } else {
+        code.and_(page, ~u32(0) << ctx.conf.page_table_pointer_mask_bits);
+    }
    code.jz(abort, code.T_NEAR);
    if (ctx.conf.absolute_offset_page_table) {
        return page + vaddr;
--- a/externals/dynarmic/src/backend/x64/block_of_code.cpp
+++ b/externals/dynarmic/src/backend/x64/block_of_code.cpp
@@ -384,6 +384,21 @@ bool BlockOfCode::HasAVX512_Skylake() const {
        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL);
 }

+bool BlockOfCode::HasAVX512_Icelake() const {
+    return DoesCpuSupport(Xbyak::util::Cpu::tAVX512F)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512CD)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512BW)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512_VPOPCNTDQ)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512_VNNI)
+        && DoesCpuSupport(Xbyak::util::Cpu::tGFNI)
+        && DoesCpuSupport(Xbyak::util::Cpu::tVAES)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512_VBMI2)
+        && DoesCpuSupport(Xbyak::util::Cpu::tAVX512_BITALG)
+        && DoesCpuSupport(Xbyak::util::Cpu::tVPCLMULQDQ);
+}
+
 bool BlockOfCode::HasAVX512_BITALG() const {
    return DoesCpuSupport(Xbyak::util::Cpu::tAVX512_BITALG);
 }
--- a/externals/dynarmic/src/backend/x64/block_of_code.h
+++ b/externals/dynarmic/src/backend/x64/block_of_code.h
@@ -158,6 +158,7 @@ public:
    bool HasFMA() const;
    bool HasAVX2() const;
    bool HasAVX512_Skylake() const;
+    bool HasAVX512_Icelake() const;
    bool HasAVX512_BITALG() const;

 private:
--- a/externals/dynarmic/src/backend/x64/emit_x64_data_processing.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_data_processing.cpp
@@ -291,8 +291,6 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) {
    auto& shift_arg = args[1];
    auto& carry_arg = args[2];

-    // TODO: Consider using BMI2 instructions like SHLX when arm-in-host flags is implemented.
-
    if (!carry_inst) {
        if (shift_arg.IsImmediate()) {
            const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();
@@ -304,6 +302,18 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) {
                code.xor_(result, result);
            }

+            ctx.reg_alloc.DefineValue(inst, result);
+        } else if (code.HasBMI2()) {
+            const Xbyak::Reg32 shift = ctx.reg_alloc.UseGpr(shift_arg).cvt32();
+            const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32();
+            const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
+            const Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr().cvt32();
+
+            code.shlx(result, operand, shift);
+            code.xor_(zero, zero);
+            code.cmp(shift.cvt8(), 32);
+            code.cmovnb(result, zero);
+
            ctx.reg_alloc.DefineValue(inst, result);
        } else {
            ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
@@ -398,6 +408,18 @@ void EmitX64::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) {
            code.xor_(result.cvt32(), result.cvt32());
        }

+        ctx.reg_alloc.DefineValue(inst, result);
+    } else if (code.HasBMI2()) {
+        const Xbyak::Reg64 shift = ctx.reg_alloc.UseGpr(shift_arg);
+        const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(operand_arg);
+        const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
+        const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr();
+
+        code.shlx(result, operand, shift);
+        code.xor_(zero.cvt32(), zero.cvt32());
+        code.cmp(shift.cvt8(), 64);
+        code.cmovnb(result, zero);
+
        ctx.reg_alloc.DefineValue(inst, result);
    } else {
        ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
@@ -405,7 +427,7 @@ void EmitX64::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) {
        const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr();

        // The x64 SHL instruction masks the shift count by 0x1F before performing the shift.
-        // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros.
+        // ARM differs from the behaviour: It does not mask the count, so shifts above 63 result in zeros.

        code.shl(result, code.cl);
        code.xor_(zero.cvt32(), zero.cvt32());
@@ -435,6 +457,18 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) {
                code.xor_(result, result);
            }

+            ctx.reg_alloc.DefineValue(inst, result);
+        } else if (code.HasBMI2()) {
+            const Xbyak::Reg32 shift = ctx.reg_alloc.UseGpr(shift_arg).cvt32();
+            const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32();
+            const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
+            const Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr().cvt32();
+
+            code.shrx(result, operand, shift);
+            code.xor_(zero, zero);
+            code.cmp(shift.cvt8(), 32);
+            code.cmovnb(result, zero);
+
            ctx.reg_alloc.DefineValue(inst, result);
        } else {
            ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
@@ -530,6 +564,18 @@ void EmitX64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) {
            code.xor_(result.cvt32(), result.cvt32());
        }

+        ctx.reg_alloc.DefineValue(inst, result);
+    } else if (code.HasBMI2()) {
+        const Xbyak::Reg64 shift = ctx.reg_alloc.UseGpr(shift_arg);
+        const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(operand_arg);
+        const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
+        const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr();
+
+        code.shrx(result, operand, shift);
+        code.xor_(zero.cvt32(), zero.cvt32());
+        code.cmp(shift.cvt8(), 63);
+        code.cmovnb(result, zero);
+
        ctx.reg_alloc.DefineValue(inst, result);
    } else {
        ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
@@ -537,7 +583,7 @@ void EmitX64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) {
        const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr();

        // The x64 SHR instruction masks the shift count by 0x1F before performing the shift.
-        // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros.
+        // ARM differs from the behaviour: It does not mask the count, so shifts above 63 result in zeros.

        code.shr(result, code.cl);
        code.xor_(zero.cvt32(), zero.cvt32());
@@ -563,6 +609,22 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) {

            code.sar(result, u8(shift < 31 ? shift : 31));

+            ctx.reg_alloc.DefineValue(inst, result);
+        } else if (code.HasBMI2()) {
+            const Xbyak::Reg32 shift = ctx.reg_alloc.UseScratchGpr(shift_arg).cvt32();
+            const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32();
+            const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
+            const Xbyak::Reg32 const31 = ctx.reg_alloc.ScratchGpr().cvt32();
+
+            // The 32-bit x64 SAR instruction masks the shift count by 0x1F before performing the shift.
+            // ARM differs from the behaviour: It does not mask the count.
+
+            // We note that all shift values above 31 have the same behaviour as 31 does, so we saturate `shift` to 31.
+            code.mov(const31, 31);
+            code.cmp(shift.cvt8(), 31);
+            code.cmovnb(shift, const31);
+            code.sarx(result, operand, shift);
+
            ctx.reg_alloc.DefineValue(inst, result);
        } else {
            ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX);
@@ -574,9 +636,8 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) {

            // We note that all shift values above 31 have the same behaviour as 31 does, so we saturate `shift` to 31.
            code.mov(const31, 31);
-            code.movzx(code.ecx, code.cl);
-            code.cmp(code.ecx, u32(31));
-            code.cmovg(code.ecx, const31);
+            code.cmp(code.cl, u32(31));
+            code.cmova(code.ecx, const31);
            code.sar(result, code.cl);

            ctx.reg_alloc.DefineValue(inst, result);
@@ -647,6 +708,18 @@ void EmitX64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) {

        code.sar(result, u8(shift < 63 ? shift : 63));

+        ctx.reg_alloc.DefineValue(inst, result);
+    } else if (code.HasBMI2()) {
+        const Xbyak::Reg64 shift = ctx.reg_alloc.UseScratchGpr(shift_arg);
+        const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(operand_arg);
+        const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
+        const Xbyak::Reg64 const63 = ctx.reg_alloc.ScratchGpr();
+
+        code.mov(const63.cvt32(), 63);
+        code.cmp(shift.cvt8(), 63);
+        code.cmovnb(shift, const63);
+        code.sarx(result, operand, shift);
+
        ctx.reg_alloc.DefineValue(inst, result);
    } else {
        ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX);
@@ -658,8 +731,7 @@ void EmitX64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) {

        // We note that all shift values above 63 have the same behaviour as 63 does, so we saturate `shift` to 63.
        code.mov(const63, 63);
-        code.movzx(code.ecx, code.cl);
-        code.cmp(code.ecx, u32(63));
+        code.cmp(code.cl, u32(63));
        code.cmovg(code.ecx, const63);
        code.sar(result, code.cl);

@@ -676,7 +748,15 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) {
    auto& carry_arg = args[2];

    if (!carry_inst) {
-        if (shift_arg.IsImmediate()) {
+        if (shift_arg.IsImmediate() && code.HasBMI2()) {
+            const u8 shift = shift_arg.GetImmediateU8();
+            const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32();
+            const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
+
+            code.rorx(result, operand, shift);
+
+            ctx.reg_alloc.DefineValue(inst, result);
+        } else if (shift_arg.IsImmediate()) {
            const u8 shift = shift_arg.GetImmediateU8();
            const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();

@@ -751,7 +831,15 @@ void EmitX64::EmitRotateRight64(EmitContext& ctx, IR::Inst* inst) {
    auto& operand_arg = args[0];
    auto& shift_arg = args[1];

-    if (shift_arg.IsImmediate()) {
+    if (shift_arg.IsImmediate() && code.HasBMI2()) {
+        const u8 shift = shift_arg.GetImmediateU8();
+        const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(operand_arg);
+        const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
+
+        code.rorx(result, operand, shift);
+
+        ctx.reg_alloc.DefineValue(inst, result);
+    } else if (shift_arg.IsImmediate()) {
        const u8 shift = shift_arg.GetImmediateU8();
        const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);

--- a/externals/dynarmic/src/backend/x64/emit_x64_vector.cpp
+++ b/externals/dynarmic/src/backend/x64/emit_x64_vector.cpp
@@ -439,6 +439,14 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) {
 }

 static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
+    if (code.HasAVX512_Icelake()) {
+        // Do a logical shift right upon the 8x8 bit-matrix, but shift in
+        // `0x80` bytes into the matrix to repeat the most significant bit.
+        const u64 zero_extend = ~(0xFFFFFFFFFFFFFFFF << (shift_amount * 8)) & 0x8080808080808080;
+        const u64 shift_matrix = (0x0102040810204080 >> (shift_amount * 8)) | zero_extend;
+        code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
+        return;
+    }
    const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();

    code.punpckhbw(tmp, result);
@@ -1460,11 +1468,17 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
    if (shift_amount == 1) {
        code.paddb(result, result);
    } else if (shift_amount > 0) {
-        const u64 replicand = (0xFFULL << shift_amount) & 0xFF;
-        const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());
+        if (code.HasAVX512_Icelake()) {
+            // Galois 8x8 identity matrix, bit-shifted by the shift-amount
+            const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8);
+            code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
+        } else {
+            const u64 replicand = (0xFFULL << shift_amount) & 0xFF;
+            const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());

-        code.psllw(result, shift_amount);
-        code.pand(result, code.MConst(xword, mask, mask));
+            code.psllw(result, shift_amount);
+            code.pand(result, code.MConst(xword, mask, mask));
+        }
    }

    ctx.reg_alloc.DefineValue(inst, result);
@@ -1510,11 +1524,17 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) {
    const u8 shift_amount = args[1].GetImmediateU8();

    if (shift_amount > 0) {
-        const u64 replicand = 0xFEULL >> shift_amount;
-        const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());
+        if (code.HasAVX512_Icelake()) {
+            // Galois 8x8 identity matrix, bit-shifted by the shift-amount
+            const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8);
+            code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
+        } else {
+            const u64 replicand = 0xFEULL >> shift_amount;
+            const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());

-        code.psrlw(result, shift_amount);
-        code.pand(result, code.MConst(xword, mask, mask));
+            code.psrlw(result, shift_amount);
+            code.pand(result, code.MConst(xword, mask, mask));
+        }
    }

    ctx.reg_alloc.DefineValue(inst, result);
@@ -2747,40 +2767,48 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);

    const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
-    const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm();

-    code.movdqa(high_nibble_reg, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));
-    code.pand(high_nibble_reg, data);
-    code.pxor(data, high_nibble_reg);
-    code.psrld(high_nibble_reg, 4);
-
-    if (code.HasSSSE3()) {
-        // High lookup
-        const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm();
-        code.movdqa(high_reversed_reg, code.MConst(xword, 0xE060A020C0408000, 0xF070B030D0509010));
-        code.pshufb(high_reversed_reg, data);
-
-        // Low lookup (low nibble equivalent of the above)
-        code.movdqa(data, code.MConst(xword, 0x0E060A020C040800, 0x0F070B030D050901));
-        code.pshufb(data, high_nibble_reg);
-        code.por(data, high_reversed_reg);
+    if (code.HasAVX512_Icelake() && code.HasSSSE3()) {
+        // GFNI(vgf2p8affineqb) and SSSE3(pshuf)
+        // Reverse bits within bytes
+        code.vgf2p8affineqb(data, data, code.MConst(xword_b, 0x8040201008040201), 0);
+        // Reverse bytes within vector
+        code.pshufb(data, code.MConst(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f));
    } else {
-        code.pslld(data, 4);
-        code.por(data, high_nibble_reg);
-
-        code.movdqa(high_nibble_reg, code.MConst(xword, 0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC));
+        const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm();
+        code.movdqa(high_nibble_reg, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));
        code.pand(high_nibble_reg, data);
        code.pxor(data, high_nibble_reg);
-        code.psrld(high_nibble_reg, 2);
-        code.pslld(data, 2);
-        code.por(data, high_nibble_reg);
+        code.psrld(high_nibble_reg, 4);

-        code.movdqa(high_nibble_reg, code.MConst(xword, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA));
-        code.pand(high_nibble_reg, data);
-        code.pxor(data, high_nibble_reg);
-        code.psrld(high_nibble_reg, 1);
-        code.paddd(data, data);
-        code.por(data, high_nibble_reg);
+        if (code.HasSSSE3()) {
+            // High lookup
+            const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm();
+            code.movdqa(high_reversed_reg, code.MConst(xword, 0xE060A020C0408000, 0xF070B030D0509010));
+            code.pshufb(high_reversed_reg, data);
+
+            // Low lookup (low nibble equivalent of the above)
+            code.movdqa(data, code.MConst(xword, 0x0E060A020C040800, 0x0F070B030D050901));
+            code.pshufb(data, high_nibble_reg);
+            code.por(data, high_reversed_reg);
+        } else {
+            code.pslld(data, 4);
+            code.por(data, high_nibble_reg);
+
+            code.movdqa(high_nibble_reg, code.MConst(xword, 0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC));
+            code.pand(high_nibble_reg, data);
+            code.pxor(data, high_nibble_reg);
+            code.psrld(high_nibble_reg, 2);
+            code.pslld(data, 2);
+            code.por(data, high_nibble_reg);
+
+            code.movdqa(high_nibble_reg, code.MConst(xword, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA));
+            code.pand(high_nibble_reg, data);
+            code.pxor(data, high_nibble_reg);
+            code.psrld(high_nibble_reg, 1);
+            code.paddd(data, data);
+            code.por(data, high_nibble_reg);
+        }
    }

    ctx.reg_alloc.DefineValue(inst, data);
--- a/externals/dynarmic/src/backend/x64/exception_handler_posix.cpp
+++ b/externals/dynarmic/src/backend/x64/exception_handler_posix.cpp
@@ -35,6 +35,7 @@ struct CodeBlockInfo {
 class SigHandler {
 public:
    SigHandler();
+    ~SigHandler();

    void AddCodeBlock(CodeBlockInfo info);
    void RemoveCodeBlock(u64 rip);
@@ -48,6 +49,8 @@ private:

    bool supports_fast_mem = true;

+    void* signal_stack_memory = nullptr;
+
    std::vector<CodeBlockInfo> code_block_infos;
    std::mutex code_block_infos_mutex;

@@ -62,8 +65,10 @@ SigHandler sig_handler;
 SigHandler::SigHandler() {
    constexpr size_t signal_stack_size = std::max(SIGSTKSZ, 2 * 1024 * 1024);

+    signal_stack_memory = std::malloc(signal_stack_size);
+
    stack_t signal_stack;
-    signal_stack.ss_sp = std::malloc(signal_stack_size);
+    signal_stack.ss_sp = signal_stack_memory;
    signal_stack.ss_size = signal_stack_size;
    signal_stack.ss_flags = 0;
    if (sigaltstack(&signal_stack, nullptr) != 0) {
@@ -91,6 +96,10 @@ SigHandler::SigHandler() {
 #endif
 }

+SigHandler::~SigHandler() {
+    std::free(signal_stack_memory);
+}
+
 void SigHandler::AddCodeBlock(CodeBlockInfo cbi) {
    std::lock_guard<std::mutex> guard(code_block_infos_mutex);
    if (auto iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) {
--- a/externals/dynarmic/src/frontend/A32/location_descriptor.h
+++ b/externals/dynarmic/src/frontend/A32/location_descriptor.h
@@ -55,7 +55,7 @@ public:
    bool SingleStepping() const { return single_stepping; }

    bool operator == (const LocationDescriptor& o) const {
-        return std::tie(arm_pc, cpsr, fpscr, single_stepping) == std::tie(o.arm_pc, o.cpsr, o.fpscr, single_stepping);
+        return std::tie(arm_pc, cpsr, fpscr, single_stepping) == std::tie(o.arm_pc, o.cpsr, o.fpscr, o.single_stepping);
    }

    bool operator != (const LocationDescriptor& o) const {
--- a/externals/dynarmic/src/frontend/A64/location_descriptor.h
+++ b/externals/dynarmic/src/frontend/A64/location_descriptor.h
@@ -45,7 +45,7 @@ public:
    bool SingleStepping() const { return single_stepping; }

    bool operator == (const LocationDescriptor& o) const {
-        return std::tie(pc, fpcr, single_stepping) == std::tie(o.pc, o.fpcr, single_stepping);
+        return std::tie(pc, fpcr, single_stepping) == std::tie(o.pc, o.fpcr, o.single_stepping);
    }

    bool operator != (const LocationDescriptor& o) const {
--- a/externals/dynarmic/src/frontend/decoder/decoder_detail.h
+++ b/externals/dynarmic/src/frontend/decoder/decoder_detail.h
@@ -63,15 +63,12 @@ private:
     */
    template<size_t N>
    static auto GetArgInfo(const char* const bitstring) {
-        const auto one = static_cast<opcode_type>(1);
        std::array<opcode_type, N> masks = {};
        std::array<size_t, N> shifts = {};
        size_t arg_index = 0;
        char ch = 0;

        for (size_t i = 0; i < opcode_bitsize; i++) {
-            const size_t bit_position = opcode_bitsize - i - 1;
-
            if (bitstring[i] == '0' || bitstring[i] == '1' || bitstring[i] == '-') {
                if (ch != 0) {
                    ch = 0;
@@ -85,9 +82,15 @@ private:
                    arg_index++;
                }

-                ASSERT(arg_index < N);
-                masks[arg_index] |= one << bit_position;
-                shifts[arg_index] = bit_position;
+                if constexpr (N > 0) {
+                    const size_t bit_position = opcode_bitsize - i - 1;
+
+                    ASSERT(arg_index < N);
+                    masks[arg_index] |= static_cast<opcode_type>(1) << bit_position;
+                    shifts[arg_index] = bit_position;
+                } else {
+                    ASSERT_FALSE();
+                }
            }
        }