early-access version 1432

2021-02-09 04:25:58 +01:00
parent de64eab4b4
commit 3d5a9d908a
7336 changed files with 1773492 additions and 111 deletions
--- a/externals/ffmpeg/tests/checkasm/.gitignore
+++ b/externals/ffmpeg/tests/checkasm/.gitignore
@@ -0,0 +1 @@
+/checkasm
--- a/externals/ffmpeg/tests/checkasm/Makefile
+++ b/externals/ffmpeg/tests/checkasm/Makefile
@@ -0,0 +1,85 @@
+# libavcodec tests
+# subsystems
+AVCODECOBJS-$(CONFIG_AUDIODSP)          += audiodsp.o
+AVCODECOBJS-$(CONFIG_BLOCKDSP)          += blockdsp.o
+AVCODECOBJS-$(CONFIG_BSWAPDSP)          += bswapdsp.o
+AVCODECOBJS-$(CONFIG_FLACDSP)           += flacdsp.o
+AVCODECOBJS-$(CONFIG_FMTCONVERT)        += fmtconvert.o
+AVCODECOBJS-$(CONFIG_G722DSP)           += g722dsp.o
+AVCODECOBJS-$(CONFIG_H264DSP)           += h264dsp.o
+AVCODECOBJS-$(CONFIG_H264PRED)          += h264pred.o
+AVCODECOBJS-$(CONFIG_H264QPEL)          += h264qpel.o
+AVCODECOBJS-$(CONFIG_LLVIDDSP)          += llviddsp.o
+AVCODECOBJS-$(CONFIG_LLVIDENCDSP)       += llviddspenc.o
+AVCODECOBJS-$(CONFIG_VP8DSP)            += vp8dsp.o
+AVCODECOBJS-$(CONFIG_VIDEODSP)          += videodsp.o
+
+# decoders/encoders
+AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o \
+                                           sbrdsp.o
+AVCODECOBJS-$(CONFIG_ALAC_DECODER)      += alacdsp.o
+AVCODECOBJS-$(CONFIG_DCA_DECODER)       += synth_filter.o
+AVCODECOBJS-$(CONFIG_EXR_DECODER)       += exrdsp.o
+AVCODECOBJS-$(CONFIG_HUFFYUV_DECODER)   += huffyuvdsp.o
+AVCODECOBJS-$(CONFIG_JPEG2000_DECODER)  += jpeg2000dsp.o
+AVCODECOBJS-$(CONFIG_OPUS_DECODER)      += opusdsp.o
+AVCODECOBJS-$(CONFIG_PIXBLOCKDSP)       += pixblockdsp.o
+AVCODECOBJS-$(CONFIG_HEVC_DECODER)      += hevc_add_res.o hevc_idct.o hevc_sao.o
+AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER)   += utvideodsp.o
+AVCODECOBJS-$(CONFIG_V210_DECODER)      += v210dec.o
+AVCODECOBJS-$(CONFIG_V210_ENCODER)      += v210enc.o
+AVCODECOBJS-$(CONFIG_VP9_DECODER)       += vp9dsp.o
+
+CHECKASMOBJS-$(CONFIG_AVCODEC)          += $(AVCODECOBJS-yes)
+
+# libavfilter tests
+AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
+AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
+AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
+AVFILTEROBJS-$(CONFIG_EQ_FILTER)         += vf_eq.o
+AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)      += vf_gblur.o
+AVFILTEROBJS-$(CONFIG_HFLIP_FILTER)      += vf_hflip.o
+AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER)  += vf_threshold.o
+AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER)    += vf_nlmeans.o
+
+CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
+
+# swscale tests
+SWSCALEOBJS                             += sw_rgb.o sw_scale.o
+
+CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWSCALEOBJS)
+
+# libavutil tests
+AVUTILOBJS                              += fixed_dsp.o
+AVUTILOBJS                              += float_dsp.o
+
+CHECKASMOBJS-$(CONFIG_AVUTIL)  += $(AVUTILOBJS)
+
+CHECKASMOBJS-$(ARCH_AARCH64)            += aarch64/checkasm.o
+CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL)   += arm/checkasm.o
+CHECKASMOBJS-$(HAVE_X86ASM)             += x86/checkasm.o
+
+CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o
+CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%))
+
+-include $(CHECKASMOBJS:.o=.d)
+
+CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS)))
+$(CHECKASMOBJS): | $(CHECKASMDIRS)
+OUTDIRS += $(CHECKASMDIRS)
+
+tests/checkasm/checkasm.o: CFLAGS += -Umain
+
+CHECKASM := tests/checkasm/checkasm$(EXESUF)
+
+$(CHECKASM): $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS)
+	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS) $(EXTRALIBS-avcodec) $(EXTRALIBS-avfilter) $(EXTRALIBS-avformat) $(EXTRALIBS-avutil) $(EXTRALIBS-swresample) $(EXTRALIBS)
+
+checkasm: $(CHECKASM)
+
+testclean:: checkasmclean
+
+checkasmclean:
+	$(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%)
+
+.PHONY: checkasm
--- a/externals/ffmpeg/tests/checkasm/aacpsdsp.c
+++ b/externals/ffmpeg/tests/checkasm/aacpsdsp.c
@@ -0,0 +1,261 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "libavcodec/aacpsdsp.h"
+#include "libavutil/intfloat.h"
+
+#include "checkasm.h"
+
+#define N 32
+#define STRIDE 128
+#define BUF_SIZE (N * STRIDE)
+
+#define randomize(buf, len) do {                                \
+    int i;                                                      \
+    for (i = 0; i < len; i++) {                                 \
+        const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
+        (buf)[i] = f;                                           \
+    }                                                           \
+} while (0)
+
+#define EPS 0.005
+
+static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
+{
+    int i;
+    for (i = 0; i < len; i++) {
+        union av_intfloat32 u = { .f = buf[i] };
+        u.i &= (0xffffffff << bits);
+        buf[i] = u.f;
+    }
+}
+
+static void test_add_squares(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
+
+    declare_func(void, INTFLOAT *dst,
+                 const INTFLOAT (*src)[2], int n);
+
+    randomize((INTFLOAT *)src, BUF_SIZE * 2);
+    randomize(dst0, BUF_SIZE);
+    memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
+    call_ref(dst0, src, BUF_SIZE);
+    call_new(dst1, src, BUF_SIZE);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
+        fail();
+    bench_new(dst1, src, BUF_SIZE);
+}
+
+static void test_mul_pair_single(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
+
+    declare_func(void, INTFLOAT (*dst)[2],
+                       INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
+
+    randomize((INTFLOAT *)src0, BUF_SIZE * 2);
+    randomize(src1, BUF_SIZE);
+    call_ref(dst0, src0, src1, BUF_SIZE);
+    call_new(dst1, src0, src1, BUF_SIZE);
+    if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
+        fail();
+    bench_new(dst1, src0, src1, BUF_SIZE);
+}
+
+static void test_hybrid_analysis(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
+
+    declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
+                 const INTFLOAT (*filter)[8][2],
+                 ptrdiff_t stride, int n);
+
+    randomize((INTFLOAT *)in, 13 * 2);
+    randomize((INTFLOAT *)filter, N * 8 * 2);
+
+    randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
+    memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
+
+    call_ref(dst0, in, filter, STRIDE, N);
+    call_new(dst1, in, filter, STRIDE, N);
+
+    if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
+        fail();
+    bench_new(dst1, in, filter, STRIDE, N);
+}
+
+static void test_hybrid_analysis_ileave(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, in,   [2], [38][64]);
+    LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
+    LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
+
+    declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
+                       int i, int len);
+
+    randomize((INTFLOAT *)out0, 91 * 32 * 2);
+    randomize((INTFLOAT *)in,    2 * 38 * 64);
+    memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
+
+    /* len is hardcoded to 32 as that's the only value used in
+       libavcodec. asm functions are likely to be optimized
+       hardcoding this value in their loops and could fail with
+       anything else.
+       i is hardcoded to the two values currently used by the
+       aac decoder because the arm neon implementation is
+       micro-optimized for them and will fail for almost every
+       other value. */
+    call_ref(out0, in, 3, 32);
+    call_new(out1, in, 3, 32);
+
+    /* the function just moves data around, so memcmp is enough */
+    if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
+        fail();
+
+    call_ref(out0, in, 5, 32);
+    call_new(out1, in, 5, 32);
+
+    if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
+        fail();
+
+    bench_new(out1, in, 3, 32);
+}
+
+static void test_hybrid_synthesis_deint(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
+    LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
+    LOCAL_ALIGNED_16(INTFLOAT, in,  [91], [32][2]);
+
+    declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
+                       int i, int len);
+
+    randomize((INTFLOAT *)in,  91 * 32 * 2);
+    randomize((INTFLOAT *)out0, 2 * 38 * 64);
+    memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
+
+    /* len is hardcoded to 32 as that's the only value used in
+       libavcodec. asm functions are likely to be optimized
+       hardcoding this value in their loops and could fail with
+       anything else.
+       i is hardcoded to the two values currently used by the
+       aac decoder because the arm neon implementation is
+       micro-optimized for them and will fail for almost every
+       other value. */
+    call_ref(out0, in, 3, 32);
+    call_new(out1, in, 3, 32);
+
+    /* the function just moves data around, so memcmp is enough */
+    if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
+        fail();
+
+    call_ref(out0, in, 5, 32);
+    call_new(out1, in, 5, 32);
+
+    if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
+        fail();
+
+    bench_new(out1, in, 3, 32);
+}
+
+static void test_stereo_interpolate(PSDSPContext *psdsp)
+{
+    int i;
+    LOCAL_ALIGNED_16(INTFLOAT, l,  [BUF_SIZE], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, r,  [BUF_SIZE], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
+    LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
+
+    declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
+                       INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
+
+    randomize((INTFLOAT *)l, BUF_SIZE * 2);
+    randomize((INTFLOAT *)r, BUF_SIZE * 2);
+
+    for (i = 0; i < 2; i++) {
+        if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
+            memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
+            memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
+            memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
+            memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
+
+            randomize((INTFLOAT *)h, 2 * 4);
+            randomize((INTFLOAT *)h_step, 2 * 4);
+            // Clear the least significant 14 bits of h_step, to avoid
+            // divergence when accumulating h_step BUF_SIZE times into
+            // a float variable which may or may not have extra intermediate
+            // precision. Therefore clear roughly log2(BUF_SIZE) less
+            // significant bits, to get the same result regardless of any
+            // extra precision in the accumulator.
+            clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
+
+            call_ref(l0, r0, h, h_step, BUF_SIZE);
+            call_new(l1, r1, h, h_step, BUF_SIZE);
+            if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
+                !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
+                fail();
+
+            memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
+            memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
+            bench_new(l1, r1, h, h_step, BUF_SIZE);
+        }
+    }
+}
+
+void checkasm_check_aacpsdsp(void)
+{
+    PSDSPContext psdsp;
+
+    ff_psdsp_init(&psdsp);
+
+    if (check_func(psdsp.add_squares, "ps_add_squares"))
+        test_add_squares();
+    report("add_squares");
+
+    if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
+        test_mul_pair_single();
+    report("mul_pair_single");
+
+    if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
+        test_hybrid_analysis();
+    report("hybrid_analysis");
+
+    if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
+        test_hybrid_analysis_ileave();
+    report("hybrid_analysis_ileave");
+
+    if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
+        test_hybrid_synthesis_deint();
+    report("hybrid_synthesis_deint");
+
+    test_stereo_interpolate(&psdsp);
+    report("stereo_interpolate");
+}
--- a/externals/ffmpeg/tests/checkasm/aarch64/checkasm.S
+++ b/externals/ffmpeg/tests/checkasm/aarch64/checkasm.S
@@ -0,0 +1,198 @@
+/****************************************************************************
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Martin Storsjo
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#include "libavutil/aarch64/asm.S"
+
+const register_init, align=4
+        .quad 0x21f86d66c8ca00ce
+        .quad 0x75b6ba21077c48ad
+        .quad 0xed56bb2dcb3c7736
+        .quad 0x8bda43d3fd1a7e06
+        .quad 0xb64a9c9e5d318408
+        .quad 0xdf9a54b303f1d3a3
+        .quad 0x4a75479abd64e097
+        .quad 0x249214109d5d1c88
+        .quad 0x1a1b2550a612b48c
+        .quad 0x79445c159ce79064
+        .quad 0x2eed899d5a28ddcd
+        .quad 0x86b2536fcd8cf636
+        .quad 0xb0856806085e7943
+        .quad 0x3f2bf84fc0fcca4e
+        .quad 0xacbd382dcf5b8de2
+        .quad 0xd229e1f5b281303f
+        .quad 0x71aeaff20b095fd9
+        .quad 0xab63e2e11fa38ed9
+endconst
+
+
+const error_message_register
+        .asciz "failed to preserve register"
+error_message_stack:
+        .asciz "stack clobbered"
+endconst
+
+
+// max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15)
+
+function checkasm_stack_clobber, export=1
+        mov             x3,  sp
+        mov             x2,  #CLOBBER_STACK
+1:
+        stp             x0,  x1,  [sp, #-16]!
+        subs            x2,  x2,  #16
+        b.gt            1b
+        mov             sp,  x3
+        ret
+endfunc
+
+// + 16 for stack canary reference
+#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15 + 16)
+
+function checkasm_checked_call, export=1
+        stp             x29, x30, [sp, #-16]!
+        mov             x29, sp
+        stp             x19, x20, [sp, #-16]!
+        stp             x21, x22, [sp, #-16]!
+        stp             x23, x24, [sp, #-16]!
+        stp             x25, x26, [sp, #-16]!
+        stp             x27, x28, [sp, #-16]!
+        stp             d8,  d9,  [sp, #-16]!
+        stp             d10, d11, [sp, #-16]!
+        stp             d12, d13, [sp, #-16]!
+        stp             d14, d15, [sp, #-16]!
+
+        movrel          x9, register_init
+        ldp             d8,  d9,  [x9], #16
+        ldp             d10, d11, [x9], #16
+        ldp             d12, d13, [x9], #16
+        ldp             d14, d15, [x9], #16
+        ldp             x19, x20, [x9], #16
+        ldp             x21, x22, [x9], #16
+        ldp             x23, x24, [x9], #16
+        ldp             x25, x26, [x9], #16
+        ldp             x27, x28, [x9], #16
+
+        sub             sp,  sp,  #ARG_STACK
+.equ pos, 0
+.rept MAX_ARGS-8
+        // Skip the first 8 args, that are loaded into registers
+        ldr             x9, [x29, #16 + 8*8 + pos]
+        str             x9, [sp, #pos]
+.equ pos, pos + 8
+.endr
+
+        // Fill x8-x17 with garbage. This doesn't have to be preserved,
+        // but avoids relying on them having any particular value.
+        movrel          x9, register_init
+        ldp             x10, x11, [x9], #32
+        ldp             x12, x13, [x9], #32
+        ldp             x14, x15, [x9], #32
+        ldp             x16, x17, [x9], #32
+        ldp             x8,  x9,  [x9]
+
+        // For stack overflows, the callee is free to overwrite the parameters
+        // that were passed on the stack (if any), so we can only check after
+        // that point. First figure out how many parameters the function
+        // really took on the stack:
+        ldr             w2,  [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
+        // Load the first non-parameter value from the stack, that should be
+        // left untouched by the function. Store a copy of it inverted, so that
+        // e.g. overwriting everything with zero would be noticed.
+        ldr             x2,  [sp, x2, lsl #3]
+        mvn             x2,  x2
+        str             x2,  [sp, #ARG_STACK-8]
+
+        // Load the in-register arguments
+        mov             x12, x0
+        ldp             x0,  x1,  [x29, #16]
+        ldp             x2,  x3,  [x29, #32]
+        ldp             x4,  x5,  [x29, #48]
+        ldp             x6,  x7,  [x29, #64]
+        // Call the target function
+        blr             x12
+
+        // Load the number of stack parameters, stack canary and its reference
+        ldr             w2,  [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
+        ldr             x2,  [sp, x2, lsl #3]
+        ldr             x3,  [sp, #ARG_STACK-8]
+
+        add             sp,  sp,  #ARG_STACK
+        stp             x0,  x1,  [sp, #-16]!
+
+        mvn             x3,  x3
+        cmp             x2,  x3
+        b.ne            2f
+
+        movrel          x9, register_init
+        movi            v3.8h,  #0
+
+.macro check_reg_neon reg1, reg2
+        ldr             q1,  [x9], #16
+        uzp1            v2.2d,  v\reg1\().2d, v\reg2\().2d
+        eor             v1.16b, v1.16b, v2.16b
+        orr             v3.16b, v3.16b, v1.16b
+.endm
+        check_reg_neon  8,  9
+        check_reg_neon  10, 11
+        check_reg_neon  12, 13
+        check_reg_neon  14, 15
+        uqxtn           v3.8b,  v3.8h
+        umov            x3,  v3.d[0]
+
+.macro check_reg reg1, reg2
+        ldp             x0,  x1,  [x9], #16
+        eor             x0,  x0,  \reg1
+        eor             x1,  x1,  \reg2
+        orr             x3,  x3,  x0
+        orr             x3,  x3,  x1
+.endm
+        check_reg       x19, x20
+        check_reg       x21, x22
+        check_reg       x23, x24
+        check_reg       x25, x26
+        check_reg       x27, x28
+
+        cbz             x3,  0f
+
+        movrel          x0, error_message_register
+        b               1f
+2:
+        movrel          x0, error_message_stack
+1:
+        bl              X(checkasm_fail_func)
+0:
+        ldp             x0,  x1,  [sp], #16
+        ldp             d14, d15, [sp], #16
+        ldp             d12, d13, [sp], #16
+        ldp             d10, d11, [sp], #16
+        ldp             d8,  d9,  [sp], #16
+        ldp             x27, x28, [sp], #16
+        ldp             x25, x26, [sp], #16
+        ldp             x23, x24, [sp], #16
+        ldp             x21, x22, [sp], #16
+        ldp             x19, x20, [sp], #16
+        ldp             x29, x30, [sp], #16
+        ret
+endfunc
--- a/externals/ffmpeg/tests/checkasm/af_afir.c
+++ b/externals/ffmpeg/tests/checkasm/af_afir.c
@@ -0,0 +1,95 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+
+#include <float.h>
+#include <stdint.h>
+
+#include "libavfilter/af_afir.h"
+#include "libavutil/internal.h"
+#include "checkasm.h"
+
+#define LEN 256
+
+#define randomize_buffer(buf)                 \
+do {                                          \
+    int i;                                    \
+    double bmg[2], stddev = 10.0, mean = 0.0; \
+                                              \
+    for (i = 0; i < LEN*2+8; i += 2) {        \
+        av_bmg_get(&checkasm_lfg, bmg);       \
+        buf[i]     = bmg[0] * stddev + mean;  \
+        buf[i + 1] = bmg[1] * stddev + mean;  \
+    }                                         \
+} while(0);
+
+static void test_fcmul_add(const float *src0, const float *src1, const float *src2)
+{
+    LOCAL_ALIGNED_32(float, cdst, [LEN*2+8]);
+    LOCAL_ALIGNED_32(float, odst, [LEN*2+8]);
+    int i;
+
+    declare_func(void, float *sum, const float *t, const float *c,
+                 ptrdiff_t len);
+
+    memcpy(cdst, src0, (LEN*2+8) * sizeof(float));
+    memcpy(odst, src0, (LEN*2+8) * sizeof(float));
+    call_ref(cdst, src1, src2, LEN);
+    call_new(odst, src1, src2, LEN);
+    for (i = 0; i <= LEN*2; i++) {
+        int idx = i & ~1;
+        float cre = src2[idx];
+        float cim = src2[idx + 1];
+        float tre = src1[idx];
+        float tim = src1[idx + 1];
+        double t = fabs(src0[i]) +
+                   fabs(tre) + fabs(tim) + fabs(cre) + fabs(cim) +
+                   fabs(tre * cre) + fabs(tim * cim) +
+                   fabs(tre * cim) + fabs(tim * cre) +
+                   fabs(tre * cre - tim * cim) +
+                   fabs(tre * cim + tim * cre) +
+                   fabs(cdst[i]) + 1.0;
+        if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
+            fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
+                    i, cdst[i], odst[i], cdst[i] - odst[i]);
+            fail();
+            break;
+        }
+    }
+    memcpy(odst, src0, (LEN*2+8) * sizeof(float));
+    bench_new(odst, src1, src2, LEN);
+}
+
+void checkasm_check_afir(void)
+{
+    LOCAL_ALIGNED_32(float, src0, [LEN*2+8]);
+    LOCAL_ALIGNED_32(float, src1, [LEN*2+8]);
+    LOCAL_ALIGNED_32(float, src2, [LEN*2+8]);
+    AudioFIRDSPContext fir = { 0 };
+
+    ff_afir_init(&fir);
+
+    randomize_buffer(src0);
+    randomize_buffer(src1);
+    randomize_buffer(src2);
+
+    if (check_func(fir.fcmul_add, "fcmul_add"))
+        test_fcmul_add(src0, src1, src2);
+    report("fcmul_add");
+}
--- a/externals/ffmpeg/tests/checkasm/alacdsp.c
+++ b/externals/ffmpeg/tests/checkasm/alacdsp.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2015 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/alacdsp.h"
+#include "libavcodec/mathops.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+
+#define BUF_SIZE 256
+#define MAX_CHANNELS 2
+
+#define randomize_buffers()                           \
+    do {                                              \
+        int i;                                        \
+        for (i = 0; i < BUF_SIZE*MAX_CHANNELS; i++) { \
+            int32_t r = sign_extend(rnd(), 24);       \
+            ref_buf[i] = r;                           \
+            new_buf[i] = r;                           \
+        }                                             \
+    } while (0)
+
+static void check_decorrelate_stereo(void)
+{
+    LOCAL_ALIGNED_16(int32_t, ref_buf, [BUF_SIZE*MAX_CHANNELS]);
+    LOCAL_ALIGNED_16(int32_t, new_buf, [BUF_SIZE*MAX_CHANNELS]);
+    int32_t *ref[2] = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1] };
+    int32_t *new[2] = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1] };
+    ALACDSPContext c;
+
+    ff_alacdsp_init(&c);
+    if (check_func(c.decorrelate_stereo, "alac_decorrelate_stereo")) {
+        int len    = (rnd() & 0xFF) + 1;
+        int shift  =  rnd() & 0x1F;
+        int weight =  rnd() & 0xFF;
+        declare_func(void, int32_t *buf[2], int len, int shift, int weight);
+
+        randomize_buffers();
+        call_ref(ref, len, shift, weight);
+        call_new(new, len, shift, weight);
+        if (memcmp(ref[0], new[0], len * sizeof(int32_t)) ||
+            memcmp(ref[1], new[1], len * sizeof(int32_t)))
+            fail();
+        bench_new(new, BUF_SIZE, shift, weight);
+    }
+
+    report("decorrelate_stereo");
+}
+
+#undef randomize_buffers
+#define randomize_buffers()                           \
+    do {                                              \
+        int i, j;                                     \
+        for (i = 0; i < BUF_SIZE; i++) {              \
+            for (j = 0; j < ch; j++) {                \
+                int32_t r = sign_extend(rnd(), 24);   \
+                ref[j][i] = r;                        \
+                new[j][i] = r;                        \
+                r = rnd() & 0xFF;                     \
+                ref_ebb[j][i] = r;                    \
+                new_ebb[j][i] = r;                    \
+            }                                         \
+        }                                             \
+    } while (0)
+
+static void check_append_extra_bits(void)
+{
+    LOCAL_ALIGNED_16(int32_t, ref_buf, [BUF_SIZE*MAX_CHANNELS*2]);
+    LOCAL_ALIGNED_16(int32_t, new_buf, [BUF_SIZE*MAX_CHANNELS*2]);
+    int32_t *ref[2]     = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1] };
+    int32_t *new[2]     = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1] };
+    int32_t *ref_ebb[2] = { &ref_buf[BUF_SIZE*2], &ref_buf[BUF_SIZE*3] };
+    int32_t *new_ebb[2] = { &new_buf[BUF_SIZE*2], &new_buf[BUF_SIZE*3] };
+    ALACDSPContext c;
+    static const char * const channels[2] = { "mono", "stereo" };
+    int ch;
+
+    ff_alacdsp_init(&c);
+    for (ch = 1; ch <= 2; ch++) {
+        if (check_func(c.append_extra_bits[ch-1], "alac_append_extra_bits_%s", channels[ch-1])) {
+            int len    = (rnd() & 0xFF) + 1;
+            declare_func(void, int32_t *buf[2], int32_t *ebb[2], int ebits, int ch, int len);
+
+            randomize_buffers();
+            call_ref(ref, ref_ebb, 8, ch, len);
+            call_new(new, new_ebb, 8, ch, len);
+            if (            memcmp(ref[0], new[0], len * sizeof(int32_t)) ||
+                (ch == 2 && memcmp(ref[1], new[1], len * sizeof(int32_t))))
+                fail();
+            bench_new(new, new_ebb, 8, ch, BUF_SIZE);
+        }
+    }
+
+    report("append_extra_bits");
+}
+
+void checkasm_check_alacdsp(void)
+{
+    check_decorrelate_stereo();
+    check_append_extra_bits();
+}
--- a/externals/ffmpeg/tests/checkasm/arm/checkasm.S
+++ b/externals/ffmpeg/tests/checkasm/arm/checkasm.S
@@ -0,0 +1,197 @@
+/****************************************************************************
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Martin Storsjo
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#include "libavutil/arm/asm.S"
+
+/* override fpu so that NEON instructions are rejected */
+#if HAVE_VFP
+FPU     .fpu            vfp
+ELF     .eabi_attribute 10, 0           @ suppress Tag_FP_arch
+#endif
+
+const register_init, align=3
+        .quad 0x21f86d66c8ca00ce
+        .quad 0x75b6ba21077c48ad
+        .quad 0xed56bb2dcb3c7736
+        .quad 0x8bda43d3fd1a7e06
+        .quad 0xb64a9c9e5d318408
+        .quad 0xdf9a54b303f1d3a3
+        .quad 0x4a75479abd64e097
+        .quad 0x249214109d5d1c88
+endconst
+
+const error_message_fpscr
+        .asciz "failed to preserve register FPSCR, changed bits: %x"
+error_message_gpr:
+        .asciz "failed to preserve register r%d"
+error_message_vfp:
+        .asciz "failed to preserve register d%d"
+error_message_stack:
+        .asciz "failed to preserve stack"
+endconst
+
+@ max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK 4*(MAX_ARGS - 4)
+
+@ Align the used stack space to 8 to preserve the stack alignment.
+@ +8 for stack canary reference.
+#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed + 8)
+
+.macro clobbercheck variant
+.equ pushed, 4*9
+function checkasm_checked_call_\variant, export=1
+        push            {r4-r11, lr}
+.ifc \variant, vfp
+        vpush           {d8-d15}
+        fmrx            r4,  FPSCR
+        push            {r4}
+.equ pushed, pushed + 16*4 + 4
+.endif
+
+        movrel          r12, register_init
+.ifc \variant, vfp
+        vldm            r12, {d8-d15}
+.endif
+        ldm             r12, {r4-r11}
+
+        sub             sp,  sp,  #ARG_STACK_A
+.equ pos, 0
+.rept MAX_ARGS-4
+        ldr             r12, [sp, #ARG_STACK_A + pushed + 8 + pos]
+        str             r12, [sp, #pos]
+.equ pos, pos + 4
+.endr
+
+        @ For stack overflows, the callee is free to overwrite the parameters
+        @ that were passed on the stack (if any), so we can only check after
+        @ that point. First figure out how many parameters the function
+        @ really took on the stack:
+        ldr             r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
+        @ Load the first non-parameter value from the stack, that should be
+        @ left untouched by the function. Store a copy of it inverted, so that
+        @ e.g. overwriting everything with zero would be noticed.
+        ldr             r12, [sp, r12, lsl #2]
+        mvn             r12, r12
+        str             r12, [sp, #ARG_STACK_A - 4]
+
+        mov             r12, r0
+        mov             r0,  r2
+        mov             r1,  r3
+        ldrd            r2,  r3,  [sp, #ARG_STACK_A + pushed]
+        @ Call the target function
+        blx             r12
+
+        @ Load the number of stack parameters, stack canary and its reference
+        ldr             r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
+        ldr             r2,  [sp, r12, lsl #2]
+        ldr             r3,  [sp, #ARG_STACK_A - 4]
+
+        add             sp,  sp,  #ARG_STACK_A
+        push            {r0, r1}
+
+        mvn             r3,  r3
+        cmp             r2,  r3
+        bne             5f
+
+        movrel          r12, register_init
+.ifc \variant, vfp
+.macro check_reg_vfp, dreg, offset
+        ldrd            r2,  r3,  [r12, #8 * (\offset)]
+        vmov            r0,  lr,  \dreg
+        eor             r2,  r2,  r0
+        eor             r3,  r3,  lr
+        orrs            r2,  r2,  r3
+        bne             4f
+.endm
+
+.irp n, 8, 9, 10, 11, 12, 13, 14, 15
+        @ keep track of the checked double/SIMD register
+        mov             r1,  #\n
+        check_reg_vfp   d\n, \n-8
+.endr
+.purgem check_reg_vfp
+
+        fmrx            r1,  FPSCR
+        ldr             r3,  [sp, #8]
+        eor             r1,  r1,  r3
+        @ Ignore changes in bits 0-4 and 7
+        bic             r1,  r1,  #0x9f
+        @ Ignore changes in the topmost 5 bits
+        bics            r1,  r1,  #0xf8000000
+        bne             3f
+.endif
+
+        @ keep track of the checked GPR
+        mov             r1,  #4
+.macro check_reg reg1, reg2=
+        ldrd            r2,  r3,  [r12], #8
+        eors            r2,  r2,  \reg1
+        bne             2f
+        add             r1,  r1,  #1
+.ifnb \reg2
+        eors            r3,  r3,  \reg2
+        bne             2f
+.endif
+        add             r1,  r1,  #1
+.endm
+        check_reg       r4,  r5
+        check_reg       r6,  r7
+@ r9 is a volatile register in the ios ABI
+#ifdef __APPLE__
+        check_reg       r8
+#else
+        check_reg       r8,  r9
+#endif
+        check_reg       r10, r11
+.purgem check_reg
+
+        b               0f
+5:
+        movrel          r0, error_message_stack
+        b               1f
+4:
+        movrel          r0, error_message_vfp
+        b               1f
+3:
+        movrel          r0, error_message_fpscr
+        b               1f
+2:
+        movrel          r0, error_message_gpr
+1:
+        bl              X(checkasm_fail_func)
+0:
+        pop             {r0, r1}
+.ifc \variant, vfp
+        pop             {r2}
+        fmxr            FPSCR, r2
+        vpop            {d8-d15}
+.endif
+        pop             {r4-r11, pc}
+endfunc
+.endm
+
+#if HAVE_VFP || HAVE_NEON
+clobbercheck vfp
+#endif
+clobbercheck novfp
--- a/externals/ffmpeg/tests/checkasm/audiodsp.c
+++ b/externals/ffmpeg/tests/checkasm/audiodsp.c
@@ -0,0 +1,146 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <math.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "libavcodec/audiodsp.h"
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+
+#include "checkasm.h"
+
+#define MAX_SIZE (32 * 128)
+
+#define randomize_float(buf, len)                               \
+    do {                                                        \
+        int i;                                                  \
+        for (i = 0; i < len; i++) {                             \
+            float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f;   \
+            buf[i] = f;                                         \
+        }                                                       \
+    } while (0)
+
+#define randomize_int(buf, len, size, bits)                         \
+    do {                                                            \
+        int i;                                                      \
+        for (i = 0; i < len; i++) {                                 \
+            uint ## size ## _t r = rnd() & ((1LL << bits) - 1);     \
+            AV_WN ## size ## A(buf + i, -(1LL << (bits - 1)) + r);  \
+        }                                                           \
+    } while (0)
+
+void checkasm_check_audiodsp(void)
+{
+    AudioDSPContext adsp;
+
+    ff_audiodsp_init(&adsp);
+
+    if (check_func(adsp.scalarproduct_int16, "audiodsp.scalarproduct_int16")) {
+        LOCAL_ALIGNED(32, int16_t, v1, [MAX_SIZE]);
+        LOCAL_ALIGNED(32, int16_t, v2, [MAX_SIZE]);
+        unsigned int len_bits_minus4, v1_bits, v2_bits, len;
+        int32_t res0, res1;
+
+        declare_func_emms(AV_CPU_FLAG_MMX, int32_t, const int16_t *v1, const int16_t *v2, int len);
+
+        // generate random 5-12bit vector length
+        len_bits_minus4 = rnd() % 8;
+        len = rnd() & ((1 << len_bits_minus4) - 1);
+        len = 16 * FFMAX(len, 1);
+
+        // generate the bit counts for each of the vectors such that the result
+        // fits into int32
+        v1_bits = 1 + rnd() % 15;
+        v2_bits = FFMIN(32 - (len_bits_minus4 + 4) - v1_bits - 1, 15);
+
+        randomize_int(v1, MAX_SIZE, 16, v1_bits + 1);
+        randomize_int(v2, MAX_SIZE, 16, v2_bits + 1);
+
+        res0 = call_ref(v1, v2, len);
+        res1 = call_new(v1, v2, len);
+        if (res0 != res1)
+            fail();
+        bench_new(v1, v2, MAX_SIZE);
+    }
+
+    if (check_func(adsp.vector_clip_int32, "audiodsp.vector_clip_int32")) {
+        LOCAL_ALIGNED(32, int32_t, src,  [MAX_SIZE]);
+        LOCAL_ALIGNED(32, int32_t, dst0, [MAX_SIZE]);
+        LOCAL_ALIGNED(32, int32_t, dst1, [MAX_SIZE]);
+        int32_t val1, val2, min, max;
+        int len;
+
+        declare_func_emms(AV_CPU_FLAG_MMX, void, int32_t *dst, const int32_t *src,
+                          int32_t min, int32_t max, unsigned int len);
+
+        val1 = ((int32_t)rnd());
+        val1 = FFSIGN(val1) * (val1 & ((1 << 24) - 1));
+        val2 = ((int32_t)rnd());
+        val2 = FFSIGN(val2) * (val2 & ((1 << 24) - 1));
+
+        min = FFMIN(val1, val2);
+        max = FFMAX(val1, val2);
+
+        randomize_int(src, MAX_SIZE, 32, 32);
+
+        len = rnd() % 128;
+        len = 32 * FFMAX(len, 1);
+
+        call_ref(dst0, src, min, max, len);
+        call_new(dst1, src, min, max, len);
+        if (memcmp(dst0, dst1, len * sizeof(*dst0)))
+            fail();
+        bench_new(dst1, src, min, max, MAX_SIZE);
+    }
+
+    if (check_func(adsp.vector_clipf, "audiodsp.vector_clipf")) {
+        LOCAL_ALIGNED(32, float, src, [MAX_SIZE]);
+        LOCAL_ALIGNED(32, float, dst0, [MAX_SIZE]);
+        LOCAL_ALIGNED(32, float, dst1, [MAX_SIZE]);
+        float val1, val2, min, max;
+        int i, len;
+
+        declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src,
+                          int len, float min, float max);
+
+        val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
+        val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
+
+        min = FFMIN(val1, val2);
+        max = FFMAX(val1, val2);
+
+        randomize_float(src, MAX_SIZE);
+
+        len = rnd() % 128;
+        len = 16 * FFMAX(len, 1);
+
+        call_ref(dst0, src, len, min, max);
+        call_new(dst1, src, len, min, max);
+        for (i = 0; i < len; i++) {
+            if (!float_near_ulp_array(dst0, dst1, 3, len))
+                fail();
+        }
+        bench_new(dst1, src, MAX_SIZE, min, max);
+    }
+
+    report("audiodsp");
+}
--- a/externals/ffmpeg/tests/checkasm/blockdsp.c
+++ b/externals/ffmpeg/tests/checkasm/blockdsp.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2015 Henrik Gramner
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "checkasm.h"
+
+#include "libavcodec/blockdsp.h"
+
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define randomize_buffers(size)             \
+    do {                                    \
+        int i;                              \
+        for (i = 0; i < size; i++) {        \
+            uint16_t r = rnd();             \
+            AV_WN16A(buf0 + i, r);          \
+            AV_WN16A(buf1 + i, r);          \
+        }                                   \
+    } while (0)
+
+#define check_clear(func, size)                                     \
+do {                                                                \
+    if (check_func(h.func, "blockdsp." #func)) {                    \
+        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block);   \
+        randomize_buffers(size);                                    \
+        call_ref(buf0);                                             \
+        call_new(buf1);                                             \
+        if (memcmp(buf0, buf1, sizeof(*buf0) * size))               \
+            fail();                                                 \
+        bench_new(buf0);                                            \
+    }                                                               \
+} while (0)
+
+void checkasm_check_blockdsp(void)
+{
+    LOCAL_ALIGNED_32(uint16_t, buf0, [6 * 8 * 8]);
+    LOCAL_ALIGNED_32(uint16_t, buf1, [6 * 8 * 8]);
+
+    AVCodecContext avctx = { 0 };
+    BlockDSPContext h;
+
+    ff_blockdsp_init(&h, &avctx);
+
+    check_clear(clear_block,  8 * 8);
+    check_clear(clear_blocks, 8 * 8 * 6);
+
+    report("blockdsp");
+}
--- a/externals/ffmpeg/tests/checkasm/bswapdsp.c
+++ b/externals/ffmpeg/tests/checkasm/bswapdsp.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2015 Henrik Gramner
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/bswapdsp.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_SIZE 512
+
+#define randomize_buffers()                 \
+    do {                                    \
+        int i;                              \
+        for (i = 0; i < BUF_SIZE; i += 4) { \
+            uint32_t r = rnd();             \
+            AV_WN32A(src0 + i, r);          \
+            AV_WN32A(src1 + i, r);          \
+            r = rnd();                      \
+            AV_WN32A(dst0 + i, r);          \
+            AV_WN32A(dst1 + i, r);          \
+        }                                   \
+    } while (0)
+
+#define check_bswap(type)                                                                  \
+    do {                                                                                   \
+        int w;                                                                             \
+        declare_func(void, type *dst, const type *src, int w);                             \
+                                                                                           \
+        for (w = 0; w < BUF_SIZE / sizeof(type); w++) {                                    \
+            int offset = (BUF_SIZE / sizeof(type) - w) & 15; /* Test various alignments */ \
+            randomize_buffers();                                                           \
+            call_ref((type *)dst0 + offset, (type *)src0 + offset, w);                     \
+            call_new((type *)dst1 + offset, (type *)src1 + offset, w);                     \
+            if (memcmp(src0, src1, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE))              \
+                fail();                                                                    \
+            bench_new((type *)dst1 + offset, (type *)src1 + offset, w);                    \
+        }                                                                                  \
+    } while (0)
+
+void checkasm_check_bswapdsp(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, src0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, src1, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]);
+    BswapDSPContext h;
+
+    ff_bswapdsp_init(&h);
+
+    if (check_func(h.bswap_buf, "bswap_buf"))
+        check_bswap(uint32_t);
+
+    if (check_func(h.bswap16_buf, "bswap16_buf"))
+        check_bswap(uint16_t);
+
+    report("bswap");
+}
--- a/externals/ffmpeg/tests/checkasm/checkasm.c
+++ b/externals/ffmpeg/tests/checkasm/checkasm.c
@@ -0,0 +1,881 @@
+/*
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Henrik Gramner
+ * Copyright (c) 2008 Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+
+#if CONFIG_LINUX_PERF
+# ifndef _GNU_SOURCE
+#  define _GNU_SOURCE // for syscall (performance monitoring API)
+# endif
+#endif
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "checkasm.h"
+#include "libavutil/common.h"
+#include "libavutil/cpu.h"
+#include "libavutil/intfloat.h"
+#include "libavutil/random_seed.h"
+
+#if HAVE_IO_H
+#include <io.h>
+#endif
+
+#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
+#include <windows.h>
+#define COLOR_RED    FOREGROUND_RED
+#define COLOR_GREEN  FOREGROUND_GREEN
+#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
+#else
+#define COLOR_RED    1
+#define COLOR_GREEN  2
+#define COLOR_YELLOW 3
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if !HAVE_ISATTY
+#define isatty(fd) 1
+#endif
+
+#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+#include "libavutil/arm/cpu.h"
+
+void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp;
+#endif
+
+/* List of tests to invoke */
+static const struct {
+    const char *name;
+    void (*func)(void);
+} tests[] = {
+#if CONFIG_AVCODEC
+    #if CONFIG_AAC_DECODER
+        { "aacpsdsp", checkasm_check_aacpsdsp },
+        { "sbrdsp",   checkasm_check_sbrdsp },
+    #endif
+    #if CONFIG_ALAC_DECODER
+        { "alacdsp", checkasm_check_alacdsp },
+    #endif
+    #if CONFIG_AUDIODSP
+        { "audiodsp", checkasm_check_audiodsp },
+    #endif
+    #if CONFIG_BLOCKDSP
+        { "blockdsp", checkasm_check_blockdsp },
+    #endif
+    #if CONFIG_BSWAPDSP
+        { "bswapdsp", checkasm_check_bswapdsp },
+    #endif
+    #if CONFIG_DCA_DECODER
+        { "synth_filter", checkasm_check_synth_filter },
+    #endif
+    #if CONFIG_EXR_DECODER
+        { "exrdsp", checkasm_check_exrdsp },
+    #endif
+    #if CONFIG_FLACDSP
+        { "flacdsp", checkasm_check_flacdsp },
+    #endif
+    #if CONFIG_FMTCONVERT
+        { "fmtconvert", checkasm_check_fmtconvert },
+    #endif
+    #if CONFIG_G722DSP
+        { "g722dsp", checkasm_check_g722dsp },
+    #endif
+    #if CONFIG_H264DSP
+        { "h264dsp", checkasm_check_h264dsp },
+    #endif
+    #if CONFIG_H264PRED
+        { "h264pred", checkasm_check_h264pred },
+    #endif
+    #if CONFIG_H264QPEL
+        { "h264qpel", checkasm_check_h264qpel },
+    #endif
+    #if CONFIG_HEVC_DECODER
+        { "hevc_add_res", checkasm_check_hevc_add_res },
+        { "hevc_idct", checkasm_check_hevc_idct },
+        { "hevc_sao", checkasm_check_hevc_sao },
+    #endif
+    #if CONFIG_HUFFYUV_DECODER
+        { "huffyuvdsp", checkasm_check_huffyuvdsp },
+    #endif
+    #if CONFIG_JPEG2000_DECODER
+        { "jpeg2000dsp", checkasm_check_jpeg2000dsp },
+    #endif
+    #if CONFIG_HUFFYUVDSP
+        { "llviddsp", checkasm_check_llviddsp },
+    #endif
+    #if CONFIG_LLVIDENCDSP
+        { "llviddspenc", checkasm_check_llviddspenc },
+    #endif
+    #if CONFIG_OPUS_DECODER
+        { "opusdsp", checkasm_check_opusdsp },
+    #endif
+    #if CONFIG_PIXBLOCKDSP
+        { "pixblockdsp", checkasm_check_pixblockdsp },
+    #endif
+    #if CONFIG_UTVIDEO_DECODER
+        { "utvideodsp", checkasm_check_utvideodsp },
+    #endif
+    #if CONFIG_V210_DECODER
+        { "v210dec", checkasm_check_v210dec },
+    #endif
+    #if CONFIG_V210_ENCODER
+        { "v210enc", checkasm_check_v210enc },
+    #endif
+    #if CONFIG_VP8DSP
+        { "vp8dsp", checkasm_check_vp8dsp },
+    #endif
+    #if CONFIG_VP9_DECODER
+        { "vp9dsp", checkasm_check_vp9dsp },
+    #endif
+    #if CONFIG_VIDEODSP
+        { "videodsp", checkasm_check_videodsp },
+    #endif
+#endif
+#if CONFIG_AVFILTER
+    #if CONFIG_AFIR_FILTER
+        { "af_afir", checkasm_check_afir },
+    #endif
+    #if CONFIG_BLEND_FILTER
+        { "vf_blend", checkasm_check_blend },
+    #endif
+    #if CONFIG_COLORSPACE_FILTER
+        { "vf_colorspace", checkasm_check_colorspace },
+    #endif
+    #if CONFIG_EQ_FILTER
+        { "vf_eq", checkasm_check_vf_eq },
+    #endif
+    #if CONFIG_GBLUR_FILTER
+        { "vf_gblur", checkasm_check_vf_gblur },
+    #endif
+    #if CONFIG_HFLIP_FILTER
+        { "vf_hflip", checkasm_check_vf_hflip },
+    #endif
+    #if CONFIG_NLMEANS_FILTER
+        { "vf_nlmeans", checkasm_check_nlmeans },
+    #endif
+    #if CONFIG_THRESHOLD_FILTER
+        { "vf_threshold", checkasm_check_vf_threshold },
+    #endif
+#endif
+#if CONFIG_SWSCALE
+    { "sw_rgb", checkasm_check_sw_rgb },
+    { "sw_scale", checkasm_check_sw_scale },
+#endif
+#if CONFIG_AVUTIL
+        { "fixed_dsp", checkasm_check_fixed_dsp },
+        { "float_dsp", checkasm_check_float_dsp },
+#endif
+    { NULL }
+};
+
+/* List of cpu flags to check */
+static const struct {
+    const char *name;
+    const char *suffix;
+    int flag;
+} cpus[] = {
+#if   ARCH_AARCH64
+    { "ARMV8",    "armv8",    AV_CPU_FLAG_ARMV8 },
+    { "NEON",     "neon",     AV_CPU_FLAG_NEON },
+#elif ARCH_ARM
+    { "ARMV5TE",  "armv5te",  AV_CPU_FLAG_ARMV5TE },
+    { "ARMV6",    "armv6",    AV_CPU_FLAG_ARMV6 },
+    { "ARMV6T2",  "armv6t2",  AV_CPU_FLAG_ARMV6T2 },
+    { "VFP",      "vfp",      AV_CPU_FLAG_VFP },
+    { "VFP_VM",   "vfp_vm",   AV_CPU_FLAG_VFP_VM },
+    { "VFPV3",    "vfp3",     AV_CPU_FLAG_VFPV3 },
+    { "NEON",     "neon",     AV_CPU_FLAG_NEON },
+#elif ARCH_PPC
+    { "ALTIVEC",  "altivec",  AV_CPU_FLAG_ALTIVEC },
+    { "VSX",      "vsx",      AV_CPU_FLAG_VSX },
+    { "POWER8",   "power8",   AV_CPU_FLAG_POWER8 },
+#elif ARCH_X86
+    { "MMX",      "mmx",      AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
+    { "MMXEXT",   "mmxext",   AV_CPU_FLAG_MMXEXT },
+    { "3DNOW",    "3dnow",    AV_CPU_FLAG_3DNOW },
+    { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
+    { "SSE",      "sse",      AV_CPU_FLAG_SSE },
+    { "SSE2",     "sse2",     AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
+    { "SSE3",     "sse3",     AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
+    { "SSSE3",    "ssse3",    AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
+    { "SSE4.1",   "sse4",     AV_CPU_FLAG_SSE4 },
+    { "SSE4.2",   "sse42",    AV_CPU_FLAG_SSE42 },
+    { "AES-NI",   "aesni",    AV_CPU_FLAG_AESNI },
+    { "AVX",      "avx",      AV_CPU_FLAG_AVX },
+    { "XOP",      "xop",      AV_CPU_FLAG_XOP },
+    { "FMA3",     "fma3",     AV_CPU_FLAG_FMA3 },
+    { "FMA4",     "fma4",     AV_CPU_FLAG_FMA4 },
+    { "AVX2",     "avx2",     AV_CPU_FLAG_AVX2 },
+    { "AVX-512",  "avx512",   AV_CPU_FLAG_AVX512 },
+#endif
+    { NULL }
+};
+
+typedef struct CheckasmFuncVersion {
+    struct CheckasmFuncVersion *next;
+    void *func;
+    int ok;
+    int cpu;
+    CheckasmPerf perf;
+} CheckasmFuncVersion;
+
+/* Binary search tree node */
+typedef struct CheckasmFunc {
+    struct CheckasmFunc *child[2];
+    CheckasmFuncVersion versions;
+    uint8_t color; /* 0 = red, 1 = black */
+    char name[1];
+} CheckasmFunc;
+
+/* Internal state */
+static struct {
+    CheckasmFunc *funcs;
+    CheckasmFunc *current_func;
+    CheckasmFuncVersion *current_func_ver;
+    const char *current_test_name;
+    const char *bench_pattern;
+    int bench_pattern_len;
+    int num_checked;
+    int num_failed;
+
+    /* perf */
+    int nop_time;
+    int sysfd;
+
+    int cpu_flag;
+    const char *cpu_flag_name;
+    const char *test_name;
+    int verbose;
+} state;
+
+/* PRNG state */
+AVLFG checkasm_lfg;
+
+/* float compare support code */
+static int is_negative(union av_intfloat32 u)
+{
+    return u.i >> 31;
+}
+
+int float_near_ulp(float a, float b, unsigned max_ulp)
+{
+    union av_intfloat32 x, y;
+
+    x.f = a;
+    y.f = b;
+
+    if (is_negative(x) != is_negative(y)) {
+        // handle -0.0 == +0.0
+        return a == b;
+    }
+
+    if (llabs((int64_t)x.i - y.i) <= max_ulp)
+        return 1;
+
+    return 0;
+}
+
+int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
+                         unsigned len)
+{
+    unsigned i;
+
+    for (i = 0; i < len; i++) {
+        if (!float_near_ulp(a[i], b[i], max_ulp))
+            return 0;
+    }
+    return 1;
+}
+
+int float_near_abs_eps(float a, float b, float eps)
+{
+    float abs_diff = fabsf(a - b);
+    if (abs_diff < eps)
+        return 1;
+
+    fprintf(stderr, "test failed comparing %g with %g (abs diff=%g with EPS=%g)\n", a, b, abs_diff, eps);
+
+    return 0;
+}
+
+int float_near_abs_eps_array(const float *a, const float *b, float eps,
+                         unsigned len)
+{
+    unsigned i;
+
+    for (i = 0; i < len; i++) {
+        if (!float_near_abs_eps(a[i], b[i], eps))
+            return 0;
+    }
+    return 1;
+}
+
+int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp)
+{
+    return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
+}
+
+int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
+                         unsigned max_ulp, unsigned len)
+{
+    unsigned i;
+
+    for (i = 0; i < len; i++) {
+        if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
+            return 0;
+    }
+    return 1;
+}
+
+int double_near_abs_eps(double a, double b, double eps)
+{
+    double abs_diff = fabs(a - b);
+
+    return abs_diff < eps;
+}
+
+int double_near_abs_eps_array(const double *a, const double *b, double eps,
+                              unsigned len)
+{
+    unsigned i;
+
+    for (i = 0; i < len; i++) {
+        if (!double_near_abs_eps(a[i], b[i], eps))
+            return 0;
+    }
+    return 1;
+}
+
+/* Print colored text to stderr if the terminal supports it */
+static void color_printf(int color, const char *fmt, ...)
+{
+    static int use_color = -1;
+    va_list arg;
+
+#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
+    static HANDLE con;
+    static WORD org_attributes;
+
+    if (use_color < 0) {
+        CONSOLE_SCREEN_BUFFER_INFO con_info;
+        con = GetStdHandle(STD_ERROR_HANDLE);
+        if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) {
+            org_attributes = con_info.wAttributes;
+            use_color = 1;
+        } else
+            use_color = 0;
+    }
+    if (use_color)
+        SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f));
+#else
+    if (use_color < 0) {
+        const char *term = getenv("TERM");
+        use_color = term && strcmp(term, "dumb") && isatty(2);
+    }
+    if (use_color)
+        fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
+#endif
+
+    va_start(arg, fmt);
+    vfprintf(stderr, fmt, arg);
+    va_end(arg);
+
+    if (use_color) {
+#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
+        SetConsoleTextAttribute(con, org_attributes);
+#else
+        fprintf(stderr, "\x1b[0m");
+#endif
+    }
+}
+
+/* Deallocate a tree */
+static void destroy_func_tree(CheckasmFunc *f)
+{
+    if (f) {
+        CheckasmFuncVersion *v = f->versions.next;
+        while (v) {
+            CheckasmFuncVersion *next = v->next;
+            free(v);
+            v = next;
+        }
+
+        destroy_func_tree(f->child[0]);
+        destroy_func_tree(f->child[1]);
+        free(f);
+    }
+}
+
+/* Allocate a zero-initialized block, clean up and exit on failure */
+static void *checkasm_malloc(size_t size)
+{
+    void *ptr = calloc(1, size);
+    if (!ptr) {
+        fprintf(stderr, "checkasm: malloc failed\n");
+        destroy_func_tree(state.funcs);
+        exit(1);
+    }
+    return ptr;
+}
+
+/* Get the suffix of the specified cpu flag */
+static const char *cpu_suffix(int cpu)
+{
+    int i = FF_ARRAY_ELEMS(cpus);
+
+    while (--i >= 0)
+        if (cpu & cpus[i].flag)
+            return cpus[i].suffix;
+
+    return "c";
+}
+
+static int cmp_nop(const void *a, const void *b)
+{
+    return *(const uint16_t*)a - *(const uint16_t*)b;
+}
+
+/* Measure the overhead of the timing code (in decicycles) */
+static int measure_nop_time(void)
+{
+    uint16_t nops[10000];
+    int i, nop_sum = 0;
+    av_unused const int sysfd = state.sysfd;
+
+    uint64_t t = 0;
+    for (i = 0; i < 10000; i++) {
+        PERF_START(t);
+        PERF_STOP(t);
+        nops[i] = t;
+    }
+
+    qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
+    for (i = 2500; i < 7500; i++)
+        nop_sum += nops[i];
+
+    return nop_sum / 500;
+}
+
+/* Print benchmark results */
+static void print_benchs(CheckasmFunc *f)
+{
+    if (f) {
+        print_benchs(f->child[0]);
+
+        /* Only print functions with at least one assembly version */
+        if (f->versions.cpu || f->versions.next) {
+            CheckasmFuncVersion *v = &f->versions;
+            do {
+                CheckasmPerf *p = &v->perf;
+                if (p->iterations) {
+                    int decicycles = (10*p->cycles/p->iterations - state.nop_time) / 4;
+                    printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
+                }
+            } while ((v = v->next));
+        }
+
+        print_benchs(f->child[1]);
+    }
+}
+
+/* ASCIIbetical sort except preserving natural order for numbers */
+static int cmp_func_names(const char *a, const char *b)
+{
+    const char *start = a;
+    int ascii_diff, digit_diff;
+
+    for (; !(ascii_diff = *(const unsigned char*)a - *(const unsigned char*)b) && *a; a++, b++);
+    for (; av_isdigit(*a) && av_isdigit(*b); a++, b++);
+
+    if (a > start && av_isdigit(a[-1]) && (digit_diff = av_isdigit(*a) - av_isdigit(*b)))
+        return digit_diff;
+
+    return ascii_diff;
+}
+
+/* Perform a tree rotation in the specified direction and return the new root */
+static CheckasmFunc *rotate_tree(CheckasmFunc *f, int dir)
+{
+    CheckasmFunc *r = f->child[dir^1];
+    f->child[dir^1] = r->child[dir];
+    r->child[dir] = f;
+    r->color = f->color;
+    f->color = 0;
+    return r;
+}
+
+#define is_red(f) ((f) && !(f)->color)
+
+/* Balance a left-leaning red-black tree at the specified node */
+static void balance_tree(CheckasmFunc **root)
+{
+    CheckasmFunc *f = *root;
+
+    if (is_red(f->child[0]) && is_red(f->child[1])) {
+        f->color ^= 1;
+        f->child[0]->color = f->child[1]->color = 1;
+    }
+
+    if (!is_red(f->child[0]) && is_red(f->child[1]))
+        *root = rotate_tree(f, 0); /* Rotate left */
+    else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
+        *root = rotate_tree(f, 1); /* Rotate right */
+}
+
+/* Get a node with the specified name, creating it if it doesn't exist */
+static CheckasmFunc *get_func(CheckasmFunc **root, const char *name)
+{
+    CheckasmFunc *f = *root;
+
+    if (f) {
+        /* Search the tree for a matching node */
+        int cmp = cmp_func_names(name, f->name);
+        if (cmp) {
+            f = get_func(&f->child[cmp > 0], name);
+
+            /* Rebalance the tree on the way up if a new node was inserted */
+            if (!f->versions.func)
+                balance_tree(root);
+        }
+    } else {
+        /* Allocate and insert a new node into the tree */
+        int name_length = strlen(name);
+        f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length);
+        memcpy(f->name, name, name_length + 1);
+    }
+
+    return f;
+}
+
+/* Perform tests and benchmarks for the specified cpu flag if supported by the host */
+static void check_cpu_flag(const char *name, int flag)
+{
+    int old_cpu_flag = state.cpu_flag;
+
+    flag |= old_cpu_flag;
+    av_force_cpu_flags(-1);
+    state.cpu_flag = flag & av_get_cpu_flags();
+    av_force_cpu_flags(state.cpu_flag);
+
+    if (!flag || state.cpu_flag != old_cpu_flag) {
+        int i;
+
+        state.cpu_flag_name = name;
+        for (i = 0; tests[i].func; i++) {
+            if (state.test_name && strcmp(tests[i].name, state.test_name))
+                continue;
+            state.current_test_name = tests[i].name;
+            tests[i].func();
+        }
+    }
+}
+
+/* Print the name of the current CPU flag, but only do it once */
+static void print_cpu_name(void)
+{
+    if (state.cpu_flag_name) {
+        color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
+        state.cpu_flag_name = NULL;
+    }
+}
+
+#if CONFIG_LINUX_PERF
+static int bench_init_linux(void)
+{
+    struct perf_event_attr attr = {
+        .type           = PERF_TYPE_HARDWARE,
+        .size           = sizeof(struct perf_event_attr),
+        .config         = PERF_COUNT_HW_CPU_CYCLES,
+        .disabled       = 1, // start counting only on demand
+        .exclude_kernel = 1,
+        .exclude_hv     = 1,
+    };
+
+    printf("benchmarking with Linux Perf Monitoring API\n");
+
+    state.sysfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
+    if (state.sysfd == -1) {
+        perror("syscall");
+        return -1;
+    }
+    return 0;
+}
+#endif
+
+#if !CONFIG_LINUX_PERF
+static int bench_init_ffmpeg(void)
+{
+#ifdef AV_READ_TIME
+    printf("benchmarking with native FFmpeg timers\n");
+    return 0;
+#else
+    fprintf(stderr, "checkasm: --bench is not supported on your system\n");
+    return -1;
+#endif
+}
+#endif
+
+static int bench_init(void)
+{
+#if CONFIG_LINUX_PERF
+    int ret = bench_init_linux();
+#else
+    int ret = bench_init_ffmpeg();
+#endif
+    if (ret < 0)
+        return ret;
+
+    state.nop_time = measure_nop_time();
+    printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
+    return 0;
+}
+
+static void bench_uninit(void)
+{
+#if CONFIG_LINUX_PERF
+    if (state.sysfd > 0)
+        close(state.sysfd);
+#endif
+}
+
+int main(int argc, char *argv[])
+{
+    unsigned int seed = av_get_random_seed();
+    int i, ret = 0;
+
+#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+    if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags()))
+        checkasm_checked_call = checkasm_checked_call_vfp;
+#endif
+
+    if (!tests[0].func || !cpus[0].flag) {
+        fprintf(stderr, "checkasm: no tests to perform\n");
+        return 0;
+    }
+
+    while (argc > 1) {
+        if (!strncmp(argv[1], "--bench", 7)) {
+            if (bench_init() < 0)
+                return 1;
+            if (argv[1][7] == '=') {
+                state.bench_pattern = argv[1] + 8;
+                state.bench_pattern_len = strlen(state.bench_pattern);
+            } else
+                state.bench_pattern = "";
+        } else if (!strncmp(argv[1], "--test=", 7)) {
+            state.test_name = argv[1] + 7;
+        } else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) {
+            state.verbose = 1;
+        } else {
+            seed = strtoul(argv[1], NULL, 10);
+        }
+
+        argc--;
+        argv++;
+    }
+
+    fprintf(stderr, "checkasm: using random seed %u\n", seed);
+    av_lfg_init(&checkasm_lfg, seed);
+
+    check_cpu_flag(NULL, 0);
+    for (i = 0; cpus[i].flag; i++)
+        check_cpu_flag(cpus[i].name, cpus[i].flag);
+
+    if (state.num_failed) {
+        fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked);
+        ret = 1;
+    } else {
+        fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
+        if (state.bench_pattern) {
+            print_benchs(state.funcs);
+        }
+    }
+
+    destroy_func_tree(state.funcs);
+    bench_uninit();
+    return ret;
+}
+
+/* Decide whether or not the specified function needs to be tested and
+ * allocate/initialize data structures if needed. Returns a pointer to a
+ * reference function if the function should be tested, otherwise NULL */
+void *checkasm_check_func(void *func, const char *name, ...)
+{
+    char name_buf[256];
+    void *ref = func;
+    CheckasmFuncVersion *v;
+    int name_length;
+    va_list arg;
+
+    va_start(arg, name);
+    name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
+    va_end(arg);
+
+    if (!func || name_length <= 0 || name_length >= sizeof(name_buf))
+        return NULL;
+
+    state.current_func = get_func(&state.funcs, name_buf);
+    state.funcs->color = 1;
+    v = &state.current_func->versions;
+
+    if (v->func) {
+        CheckasmFuncVersion *prev;
+        do {
+            /* Only test functions that haven't already been tested */
+            if (v->func == func)
+                return NULL;
+
+            if (v->ok)
+                ref = v->func;
+
+            prev = v;
+        } while ((v = v->next));
+
+        v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
+    }
+
+    v->func = func;
+    v->ok = 1;
+    v->cpu = state.cpu_flag;
+    state.current_func_ver = v;
+
+    if (state.cpu_flag)
+        state.num_checked++;
+
+    return ref;
+}
+
+/* Decide whether or not the current function needs to be benchmarked */
+int checkasm_bench_func(void)
+{
+    return !state.num_failed && state.bench_pattern &&
+           !strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len);
+}
+
+/* Indicate that the current test has failed */
+void checkasm_fail_func(const char *msg, ...)
+{
+    if (state.current_func_ver->cpu && state.current_func_ver->ok) {
+        va_list arg;
+
+        print_cpu_name();
+        fprintf(stderr, "   %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu));
+        va_start(arg, msg);
+        vfprintf(stderr, msg, arg);
+        va_end(arg);
+        fprintf(stderr, ")\n");
+
+        state.current_func_ver->ok = 0;
+        state.num_failed++;
+    }
+}
+
+/* Get the benchmark context of the current function */
+CheckasmPerf *checkasm_get_perf_context(void)
+{
+    CheckasmPerf *perf = &state.current_func_ver->perf;
+    memset(perf, 0, sizeof(*perf));
+    perf->sysfd = state.sysfd;
+    return perf;
+}
+
+/* Print the outcome of all tests performed since the last time this function was called */
+void checkasm_report(const char *name, ...)
+{
+    static int prev_checked, prev_failed, max_length;
+
+    if (state.num_checked > prev_checked) {
+        int pad_length = max_length + 4;
+        va_list arg;
+
+        print_cpu_name();
+        pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
+        va_start(arg, name);
+        pad_length -= vfprintf(stderr, name, arg);
+        va_end(arg);
+        fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '[');
+
+        if (state.num_failed == prev_failed)
+            color_printf(COLOR_GREEN, "OK");
+        else
+            color_printf(COLOR_RED, "FAILED");
+        fprintf(stderr, "]\n");
+
+        prev_checked = state.num_checked;
+        prev_failed  = state.num_failed;
+    } else if (!state.cpu_flag) {
+        /* Calculate the amount of padding required to make the output vertically aligned */
+        int length = strlen(state.current_test_name);
+        va_list arg;
+
+        va_start(arg, name);
+        length += vsnprintf(NULL, 0, name, arg);
+        va_end(arg);
+
+        if (length > max_length)
+            max_length = length;
+    }
+}
+
+#define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
+int checkasm_check_##type(const char *const file, const int line, \
+                          const type *buf1, ptrdiff_t stride1, \
+                          const type *buf2, ptrdiff_t stride2, \
+                          const int w, int h, const char *const name) \
+{ \
+    int y = 0; \
+    stride1 /= sizeof(*buf1); \
+    stride2 /= sizeof(*buf2); \
+    for (y = 0; y < h; y++) \
+        if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \
+            break; \
+    if (y == h) \
+        return 0; \
+    checkasm_fail_func("%s:%d", file, line); \
+    if (!state.verbose) \
+        return 1; \
+    fprintf(stderr, "%s:\n", name); \
+    while (h--) { \
+        for (int x = 0; x < w; x++) \
+            fprintf(stderr, " " fmt, buf1[x]); \
+        fprintf(stderr, "    "); \
+        for (int x = 0; x < w; x++) \
+            fprintf(stderr, " " fmt, buf2[x]); \
+        fprintf(stderr, "    "); \
+        for (int x = 0; x < w; x++) \
+            fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \
+        buf1 += stride1; \
+        buf2 += stride2; \
+        fprintf(stderr, "\n"); \
+    } \
+    return 1; \
+}
+
+DEF_CHECKASM_CHECK_FUNC(uint8_t,  "%02x")
+DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x")
+DEF_CHECKASM_CHECK_FUNC(int16_t,  "%6d")
+DEF_CHECKASM_CHECK_FUNC(int32_t,  "%9d")
--- a/externals/ffmpeg/tests/checkasm/checkasm.h
+++ b/externals/ffmpeg/tests/checkasm/checkasm.h
@@ -0,0 +1,282 @@
+/*
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Henrik Gramner
+ * Copyright (c) 2008 Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef TESTS_CHECKASM_CHECKASM_H
+#define TESTS_CHECKASM_CHECKASM_H
+
+#include <stdint.h>
+#include "config.h"
+
+#if CONFIG_LINUX_PERF
+#include <unistd.h> // read(3)
+#include <sys/ioctl.h>
+#include <asm/unistd.h>
+#include <linux/perf_event.h>
+#endif
+
+#include "libavutil/avstring.h"
+#include "libavutil/cpu.h"
+#include "libavutil/internal.h"
+#include "libavutil/lfg.h"
+#include "libavutil/timer.h"
+
+void checkasm_check_aacpsdsp(void);
+void checkasm_check_afir(void);
+void checkasm_check_alacdsp(void);
+void checkasm_check_audiodsp(void);
+void checkasm_check_blend(void);
+void checkasm_check_blockdsp(void);
+void checkasm_check_bswapdsp(void);
+void checkasm_check_colorspace(void);
+void checkasm_check_exrdsp(void);
+void checkasm_check_fixed_dsp(void);
+void checkasm_check_flacdsp(void);
+void checkasm_check_float_dsp(void);
+void checkasm_check_fmtconvert(void);
+void checkasm_check_g722dsp(void);
+void checkasm_check_h264dsp(void);
+void checkasm_check_h264pred(void);
+void checkasm_check_h264qpel(void);
+void checkasm_check_hevc_add_res(void);
+void checkasm_check_hevc_idct(void);
+void checkasm_check_hevc_sao(void);
+void checkasm_check_huffyuvdsp(void);
+void checkasm_check_jpeg2000dsp(void);
+void checkasm_check_llviddsp(void);
+void checkasm_check_llviddspenc(void);
+void checkasm_check_nlmeans(void);
+void checkasm_check_opusdsp(void);
+void checkasm_check_pixblockdsp(void);
+void checkasm_check_sbrdsp(void);
+void checkasm_check_synth_filter(void);
+void checkasm_check_sw_rgb(void);
+void checkasm_check_sw_scale(void);
+void checkasm_check_utvideodsp(void);
+void checkasm_check_v210dec(void);
+void checkasm_check_v210enc(void);
+void checkasm_check_vf_eq(void);
+void checkasm_check_vf_gblur(void);
+void checkasm_check_vf_hflip(void);
+void checkasm_check_vf_threshold(void);
+void checkasm_check_vp8dsp(void);
+void checkasm_check_vp9dsp(void);
+void checkasm_check_videodsp(void);
+
+struct CheckasmPerf;
+
+void *checkasm_check_func(void *func, const char *name, ...) av_printf_format(2, 3);
+int checkasm_bench_func(void);
+void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2);
+struct CheckasmPerf *checkasm_get_perf_context(void);
+void checkasm_report(const char *name, ...) av_printf_format(1, 2);
+
+/* float compare utilities */
+int float_near_ulp(float a, float b, unsigned max_ulp);
+int float_near_abs_eps(float a, float b, float eps);
+int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp);
+int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
+                         unsigned len);
+int float_near_abs_eps_array(const float *a, const float *b, float eps,
+                             unsigned len);
+int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
+                                 unsigned max_ulp, unsigned len);
+int double_near_abs_eps(double a, double b, double eps);
+int double_near_abs_eps_array(const double *a, const double *b, double eps,
+                              unsigned len);
+
+extern AVLFG checkasm_lfg;
+#define rnd() av_lfg_get(&checkasm_lfg)
+
+static av_unused void *func_ref, *func_new;
+
+#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
+
+/* Decide whether or not the specified function needs to be tested */
+#define check_func(func, ...) (func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
+
+/* Declare the function prototype. The first argument is the return value, the remaining
+ * arguments are the function parameters. Naming parameters is optional. */
+#define declare_func(ret, ...) declare_new(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
+#define declare_func_float(ret, ...) declare_new_float(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
+#define declare_func_emms(cpu_flags, ret, ...) declare_new_emms(cpu_flags, ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
+
+/* Indicate that the current test has failed */
+#define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__)
+
+/* Print the test outcome */
+#define report checkasm_report
+
+/* Call the reference function */
+#define call_ref(...) ((func_type *)func_ref)(__VA_ARGS__)
+
+#if ARCH_X86 && HAVE_X86ASM
+/* Verifies that clobbered callee-saved registers are properly saved and restored
+ * and that either no MMX registers are touched or emms is issued */
+void checkasm_checked_call(void *func, ...);
+/* Verifies that clobbered callee-saved registers are properly saved and restored
+ * and issues emms for asm functions which are not required to do so */
+void checkasm_checked_call_emms(void *func, ...);
+/* Verifies that clobbered callee-saved registers are properly saved and restored
+ * but doesn't issue emms. Meant for dsp functions returning float or double */
+void checkasm_checked_call_float(void *func, ...);
+
+#if ARCH_X86_64
+/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
+ * This is done by clobbering the stack with junk around the stack pointer and calling the
+ * assembly function through checked_call() with added dummy arguments which forces all
+ * real arguments to be passed on the stack and not in registers. For 32-bit arguments the
+ * upper half of the 64-bit register locations on the stack will now contain junk which will
+ * cause misbehaving functions to either produce incorrect output or segfault. Note that
+ * even though this works extremely well in practice, it's technically not guaranteed
+ * and false negatives is theoretically possible, but there can never be any false positives.
+ */
+void checkasm_stack_clobber(uint64_t clobber, ...);
+#define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\
+                              = (void *)checkasm_checked_call;
+#define declare_new_float(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\
+                                    = (void *)checkasm_checked_call_float;
+#define declare_new_emms(cpu_flags, ret, ...) \
+    ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) = \
+        ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \
+                                             (void *)checkasm_checked_call;
+#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
+#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
+                                              CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
+                      checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__))
+#elif ARCH_X86_32
+#define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call;
+#define declare_new_float(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call_float;
+#define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = \
+        ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms :        \
+                                             (void *)checkasm_checked_call;
+#define call_new(...) checked_call(func_new, __VA_ARGS__)
+#endif
+#elif ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+/* Use a dummy argument, to offset the real parameters by 2, not only 1.
+ * This makes sure that potential 8-byte-alignment of parameters is kept the same
+ * even when the extra parameters have been removed. */
+void checkasm_checked_call_vfp(void *func, int dummy, ...);
+void checkasm_checked_call_novfp(void *func, int dummy, ...);
+extern void (*checkasm_checked_call)(void *func, int dummy, ...);
+#define declare_new(ret, ...) ret (*checked_call)(void *, int dummy, __VA_ARGS__, \
+                                                  int, int, int, int, int, int, int, int, \
+                                                  int, int, int, int, int, int, int) = (void *)checkasm_checked_call;
+#define call_new(...) checked_call(func_new, 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0)
+#elif ARCH_AARCH64 && !defined(__APPLE__)
+void checkasm_stack_clobber(uint64_t clobber, ...);
+void checkasm_checked_call(void *func, ...);
+#define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, int, int, __VA_ARGS__,\
+                                                  int, int, int, int, int, int, int, int,\
+                                                  int, int, int, int, int, int, int)\
+                              = (void *)checkasm_checked_call;
+#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
+#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
+                                              CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
+                      checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
+                                   7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0))
+#else
+#define declare_new(ret, ...)
+#define declare_new_float(ret, ...)
+#define declare_new_emms(cpu_flags, ret, ...)
+/* Call the function */
+#define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
+#endif
+
+#ifndef declare_new_emms
+#define declare_new_emms(cpu_flags, ret, ...) declare_new(ret, __VA_ARGS__)
+#endif
+#ifndef declare_new_float
+#define declare_new_float(ret, ...) declare_new(ret, __VA_ARGS__)
+#endif
+
+typedef struct CheckasmPerf {
+    int sysfd;
+    uint64_t cycles;
+    int iterations;
+} CheckasmPerf;
+
+#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF
+
+#if CONFIG_LINUX_PERF
+#define PERF_START(t) do {                              \
+    ioctl(sysfd, PERF_EVENT_IOC_RESET, 0);              \
+    ioctl(sysfd, PERF_EVENT_IOC_ENABLE, 0);             \
+} while (0)
+#define PERF_STOP(t) do {                               \
+    ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0);            \
+    read(sysfd, &t, sizeof(t));                         \
+} while (0)
+#else
+#define PERF_START(t) t = AV_READ_TIME()
+#define PERF_STOP(t)  t = AV_READ_TIME() - t
+#endif
+
+/* Benchmark the function */
+#define bench_new(...)\
+    do {\
+        if (checkasm_bench_func()) {\
+            struct CheckasmPerf *perf = checkasm_get_perf_context();\
+            av_unused const int sysfd = perf->sysfd;\
+            func_type *tfunc = func_new;\
+            uint64_t tsum = 0;\
+            int ti, tcount = 0;\
+            uint64_t t = 0; \
+            for (ti = 0; ti < BENCH_RUNS; ti++) {\
+                PERF_START(t);\
+                tfunc(__VA_ARGS__);\
+                tfunc(__VA_ARGS__);\
+                tfunc(__VA_ARGS__);\
+                tfunc(__VA_ARGS__);\
+                PERF_STOP(t);\
+                if (t*tcount <= tsum*4 && ti > 0) {\
+                    tsum += t;\
+                    tcount++;\
+                }\
+            }\
+            emms_c();\
+            perf->cycles += t;\
+            perf->iterations++;\
+        }\
+    } while (0)
+#else
+#define bench_new(...) while(0)
+#define PERF_START(t)  while(0)
+#define PERF_STOP(t)   while(0)
+#endif
+
+#define DECL_CHECKASM_CHECK_FUNC(type) \
+int checkasm_check_##type(const char *const file, const int line, \
+                          const type *const buf1, const ptrdiff_t stride1, \
+                          const type *const buf2, const ptrdiff_t stride2, \
+                          const int w, const int h, const char *const name)
+
+DECL_CHECKASM_CHECK_FUNC(uint8_t);
+DECL_CHECKASM_CHECK_FUNC(uint16_t);
+DECL_CHECKASM_CHECK_FUNC(int16_t);
+DECL_CHECKASM_CHECK_FUNC(int32_t);
+
+#define PASTE(a,b) a ## b
+#define CONCAT(a,b) PASTE(a,b)
+
+#define checkasm_check(prefix, ...) CONCAT(checkasm_check_, prefix)(__FILE__, __LINE__, __VA_ARGS__)
+
+#endif /* TESTS_CHECKASM_CHECKASM_H */
--- a/externals/ffmpeg/tests/checkasm/exrdsp.c
+++ b/externals/ffmpeg/tests/checkasm/exrdsp.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "checkasm.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/exrdsp.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_SIZE 5120
+#define PADDED_BUF_SIZE BUF_SIZE+AV_INPUT_BUFFER_PADDING_SIZE*2
+
+#define randomize_buffers()                 \
+    do {                                    \
+        int i;                              \
+        for (i = 0; i < BUF_SIZE; i += 4) { \
+            uint32_t r = rnd();             \
+            AV_WN32A(src + i, r);           \
+        }                                   \
+    } while (0)
+
+static void check_reorder_pixels(void) {
+    LOCAL_ALIGNED_32(uint8_t, src,     [PADDED_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]);
+
+    declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t size);
+
+    memset(src,     0, PADDED_BUF_SIZE);
+    memset(dst_ref, 0, PADDED_BUF_SIZE);
+    memset(dst_new, 0, PADDED_BUF_SIZE);
+    randomize_buffers();
+    call_ref(dst_ref, src, BUF_SIZE);
+    call_new(dst_new, src, BUF_SIZE);
+    if (memcmp(dst_ref, dst_new, BUF_SIZE))
+        fail();
+    bench_new(dst_new, src, BUF_SIZE);
+}
+
+static void check_predictor(void) {
+    LOCAL_ALIGNED_32(uint8_t, src,     [PADDED_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]);
+
+    declare_func(void, uint8_t *src, ptrdiff_t size);
+
+    memset(src,     0, PADDED_BUF_SIZE);
+    randomize_buffers();
+    memcpy(dst_ref, src, PADDED_BUF_SIZE);
+    memcpy(dst_new, src, PADDED_BUF_SIZE);
+    call_ref(dst_ref, BUF_SIZE);
+    call_new(dst_new, BUF_SIZE);
+    if (memcmp(dst_ref, dst_new, BUF_SIZE))
+        fail();
+    bench_new(dst_new, BUF_SIZE);
+}
+
+void checkasm_check_exrdsp(void)
+{
+    ExrDSPContext h;
+
+    ff_exrdsp_init(&h);
+
+    if (check_func(h.reorder_pixels, "reorder_pixels"))
+        check_reorder_pixels();
+
+    report("reorder_pixels");
+
+    if (check_func(h.predictor, "predictor"))
+        check_predictor();
+
+    report("predictor");
+}
--- a/externals/ffmpeg/tests/checkasm/fixed_dsp.c
+++ b/externals/ffmpeg/tests/checkasm/fixed_dsp.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2015 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "checkasm.h"
+#include "libavutil/common.h"
+#include "libavutil/fixed_dsp.h"
+#include "libavutil/internal.h"
+#include "libavutil/mem.h"
+
+#define BUF_SIZE 256
+
+#define randomize_buffers()                   \
+    do {                                      \
+        int i;                                \
+        for (i = 0; i < BUF_SIZE; i++) {      \
+            src0[i] = sign_extend(rnd(), 24); \
+            src1[i] = sign_extend(rnd(), 24); \
+            src2[i] = sign_extend(rnd(), 24); \
+        }                                     \
+    } while (0)
+
+static void check_vector_fmul(const int *src0, const int *src1)
+{
+    LOCAL_ALIGNED_32(int, ref, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(int, new, [BUF_SIZE]);
+
+    declare_func(void, int *dst, const int *src0, const int *src1, int len);
+
+    call_ref(ref, src0, src1, BUF_SIZE);
+    call_new(new, src0, src1, BUF_SIZE);
+    if (memcmp(ref, new, BUF_SIZE * sizeof(int)))
+        fail();
+    bench_new(new, src0, src1, BUF_SIZE);
+}
+
+static void check_vector_fmul_add(const int *src0, const int *src1, const int *src2)
+{
+    LOCAL_ALIGNED_32(int, ref, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(int, new, [BUF_SIZE]);
+
+    declare_func(void, int *dst, const int *src0, const int *src1, const int *src2, int len);
+
+    call_ref(ref, src0, src1, src2, BUF_SIZE);
+    call_new(new, src0, src1, src2, BUF_SIZE);
+    if (memcmp(ref, new, BUF_SIZE * sizeof(int)))
+        fail();
+    bench_new(new, src0, src1, src2, BUF_SIZE);
+}
+
+static void check_vector_fmul_window(const int32_t *src0, const int32_t *src1, const int32_t *win)
+{
+    LOCAL_ALIGNED_32(int32_t, ref, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(int32_t, new, [BUF_SIZE]);
+
+    declare_func(void, int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len);
+
+    call_ref(ref, src0, src1, win, BUF_SIZE / 2);
+    call_new(new, src0, src1, win, BUF_SIZE / 2);
+    if (memcmp(ref, new, BUF_SIZE * sizeof(int32_t)))
+        fail();
+    bench_new(new, src0, src1, win, BUF_SIZE / 2);
+}
+
+static void check_vector_fmul_window_scaled(const int32_t *src0, const int32_t *src1, const int32_t *win)
+{
+    LOCAL_ALIGNED_16(int16_t, ref, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int16_t, new, [BUF_SIZE]);
+
+    declare_func(void, int16_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len, uint8_t bits);
+
+    call_ref(ref, src0, src1, win, BUF_SIZE / 2, 2);
+    call_new(new, src0, src1, win, BUF_SIZE / 2, 2);
+    if (memcmp(ref, new, BUF_SIZE * sizeof(int16_t)))
+        fail();
+    bench_new(new, src0, src1, win, BUF_SIZE / 2, 2);
+}
+
+static void check_butterflies(const int *src0, const int *src1)
+{
+    LOCAL_ALIGNED_16(int, ref0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int, ref1, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int, new0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int, new1, [BUF_SIZE]);
+
+    declare_func(void, int *av_restrict src0, int *av_restrict src1, int len);
+
+    memcpy(ref0, src0, BUF_SIZE * sizeof(*src0));
+    memcpy(ref1, src1, BUF_SIZE * sizeof(*src1));
+    memcpy(new0, src0, BUF_SIZE * sizeof(*src0));
+    memcpy(new1, src1, BUF_SIZE * sizeof(*src1));
+
+    call_ref(ref0, ref1, BUF_SIZE);
+    call_new(new0, new1, BUF_SIZE);
+    if (memcmp(ref0, new0, BUF_SIZE * sizeof(*ref0)) ||
+        memcmp(ref1, new1, BUF_SIZE * sizeof(*ref1)))
+        fail();
+    memcpy(new0, src0, BUF_SIZE * sizeof(*src0));
+    memcpy(new1, src1, BUF_SIZE * sizeof(*src1));
+    bench_new(new0, new1, BUF_SIZE);
+}
+
+static void check_scalarproduct_fixed(const int *src0, const int *src1)
+{
+    int ref, new;
+
+    declare_func(int, const int *src0, const int *src1, int len);
+
+    ref = call_ref(src0, src1, BUF_SIZE);
+    new = call_new(src0, src1, BUF_SIZE);
+    if (ref != new)
+        fail();
+    bench_new(src0, src1, BUF_SIZE);
+}
+
+void checkasm_check_fixed_dsp(void)
+{
+    LOCAL_ALIGNED_32(int32_t, src0, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(int32_t, src1, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(int32_t, src2, [BUF_SIZE]);
+    AVFixedDSPContext *fdsp = avpriv_alloc_fixed_dsp(1);
+
+    randomize_buffers();
+    if (check_func(fdsp->vector_fmul, "vector_fmul_fixed"))
+        check_vector_fmul(src0, src1);
+    if (check_func(fdsp->vector_fmul_add, "vector_fmul_add_fixed"))
+        check_vector_fmul_add(src0, src1, src2);
+    if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse_fixed"))
+        check_vector_fmul(src0, src1);
+    if (check_func(fdsp->vector_fmul_window, "vector_fmul_window_fixed"))
+        check_vector_fmul_window(src0, src1, src2);
+    if (check_func(fdsp->vector_fmul_window_scaled, "vector_fmul_window_scaled_fixed"))
+        check_vector_fmul_window_scaled(src0, src1, src2);
+    report("vector_fmul");
+    if (check_func(fdsp->butterflies_fixed, "butterflies_fixed"))
+        check_butterflies(src0, src1);
+    report("butterflies_fixed");
+    if (check_func(fdsp->scalarproduct_fixed, "scalarproduct_fixed"))
+        check_scalarproduct_fixed(src0, src1);
+    report("scalarproduct_fixed");
+
+    av_freep(&fdsp);
+}
--- a/externals/ffmpeg/tests/checkasm/flacdsp.c
+++ b/externals/ffmpeg/tests/checkasm/flacdsp.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2015 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/flacdsp.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_SIZE 256
+#define MAX_CHANNELS 8
+
+#define randomize_buffers()                                 \
+    do {                                                    \
+        int i, j;                                           \
+        for (i = 0; i < BUF_SIZE; i += 4) {                 \
+            for (j = 0; j < channels; j++) {                \
+                uint32_t r = rnd() & (1 << (bits - 2)) - 1; \
+                AV_WN32A(ref_src[j] + i, r);                \
+                AV_WN32A(new_src[j] + i, r);                \
+            }                                               \
+        }                                                   \
+    } while (0)
+
+static void check_decorrelate(uint8_t **ref_dst, uint8_t **ref_src, uint8_t **new_dst, uint8_t **new_src,
+                              int channels, int bits) {
+    declare_func(void, uint8_t **out, int32_t **in, int channels, int len, int shift);
+
+    randomize_buffers();
+    call_ref(ref_dst, (int32_t **)ref_src, channels, BUF_SIZE / sizeof(int32_t), 8);
+    call_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8);
+    if (memcmp(*ref_dst, *new_dst, bits == 16 ? BUF_SIZE * (channels/2) : BUF_SIZE * channels) ||
+        memcmp(*ref_src, *new_src, BUF_SIZE * channels))
+        fail();
+    bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8);
+}
+
+void checkasm_check_flacdsp(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]);
+    LOCAL_ALIGNED_16(uint8_t, ref_buf, [BUF_SIZE*MAX_CHANNELS]);
+    LOCAL_ALIGNED_16(uint8_t, new_dst, [BUF_SIZE*MAX_CHANNELS]);
+    LOCAL_ALIGNED_16(uint8_t, new_buf, [BUF_SIZE*MAX_CHANNELS]);
+    uint8_t *ref_src[] = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1], &ref_buf[BUF_SIZE*2], &ref_buf[BUF_SIZE*3],
+                           &ref_buf[BUF_SIZE*4], &ref_buf[BUF_SIZE*5], &ref_buf[BUF_SIZE*6], &ref_buf[BUF_SIZE*7] };
+    uint8_t *new_src[] = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1], &new_buf[BUF_SIZE*2], &new_buf[BUF_SIZE*3],
+                           &new_buf[BUF_SIZE*4], &new_buf[BUF_SIZE*5], &new_buf[BUF_SIZE*6], &new_buf[BUF_SIZE*7] };
+    static const char * const names[3] = { "ls", "rs", "ms" };
+    static const struct {
+        enum AVSampleFormat fmt;
+        int bits;
+    } fmts[] = {
+        { AV_SAMPLE_FMT_S16, 16 },
+        { AV_SAMPLE_FMT_S32, 32 },
+    };
+    FLACDSPContext h;
+    int i, j;
+
+    for (i = 0; i < 2; i++) {
+        ff_flacdsp_init(&h, fmts[i].fmt, 2, 0);
+        for (j = 0; j < 3; j++)
+            if (check_func(h.decorrelate[j], "flac_decorrelate_%s_%d", names[j], fmts[i].bits))
+                check_decorrelate(&ref_dst, ref_src, &new_dst, new_src, 2, fmts[i].bits);
+        for (j = 2; j <= MAX_CHANNELS; j += 2) {
+            ff_flacdsp_init(&h, fmts[i].fmt, j, 0);
+            if (check_func(h.decorrelate[0], "flac_decorrelate_indep%d_%d", j, fmts[i].bits))
+                check_decorrelate(&ref_dst, ref_src, &new_dst, new_src, j, fmts[i].bits);
+        }
+    }
+
+    report("decorrelate");
+}
--- a/externals/ffmpeg/tests/checkasm/float_dsp.c
+++ b/externals/ffmpeg/tests/checkasm/float_dsp.c
@@ -0,0 +1,338 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+
+#include <float.h>
+#include <stdint.h>
+
+#include "libavutil/float_dsp.h"
+#include "libavutil/internal.h"
+#include "checkasm.h"
+
+#define LEN 256
+
+#define randomize_buffer(buf)                 \
+do {                                          \
+    int i;                                    \
+    double bmg[2], stddev = 10.0, mean = 0.0; \
+                                              \
+    for (i = 0; i < LEN; i += 2) {            \
+        av_bmg_get(&checkasm_lfg, bmg);       \
+        buf[i]     = bmg[0] * stddev + mean;  \
+        buf[i + 1] = bmg[1] * stddev + mean;  \
+    }                                         \
+} while(0);
+
+static void test_vector_fmul(const float *src0, const float *src1)
+{
+    LOCAL_ALIGNED_32(float, cdst, [LEN]);
+    LOCAL_ALIGNED_32(float, odst, [LEN]);
+    int i;
+
+    declare_func(void, float *dst, const float *src0, const float *src1,
+                 int len);
+
+    call_ref(cdst, src0, src1, LEN);
+    call_new(odst, src0, src1, LEN);
+    for (i = 0; i < LEN; i++) {
+        double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
+        if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
+            fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
+                    i, cdst[i], odst[i], cdst[i] - odst[i]);
+            fail();
+            break;
+        }
+    }
+    bench_new(odst, src0, src1, LEN);
+}
+
+static void test_vector_dmul(const double *src0, const double *src1)
+{
+    LOCAL_ALIGNED_32(double, cdst, [LEN]);
+    LOCAL_ALIGNED_32(double, odst, [LEN]);
+    int i;
+
+    declare_func(void, double *dst, const double *src0, const double *src1,
+                 int len);
+
+    call_ref(cdst, src0, src1, LEN);
+    call_new(odst, src0, src1, LEN);
+    for (i = 0; i < LEN; i++) {
+        double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
+        if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
+            fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
+                    i, cdst[i], odst[i], cdst[i] - odst[i]);
+            fail();
+            break;
+        }
+    }
+    bench_new(odst, src0, src1, LEN);
+}
+
+#define ARBITRARY_FMUL_ADD_CONST 0.005
+static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
+{
+    LOCAL_ALIGNED_32(float, cdst, [LEN]);
+    LOCAL_ALIGNED_32(float, odst, [LEN]);
+    int i;
+
+    declare_func(void, float *dst, const float *src0, const float *src1,
+                     const float *src2, int len);
+
+    call_ref(cdst, src0, src1, src2, LEN);
+    call_new(odst, src0, src1, src2, LEN);
+    for (i = 0; i < LEN; i++) {
+        if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
+            fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
+                    i, cdst[i], odst[i], cdst[i] - odst[i]);
+            fail();
+            break;
+        }
+    }
+    bench_new(odst, src0, src1, src2, LEN);
+}
+
+static void test_vector_fmul_scalar(const float *src0, const float *src1)
+{
+    LOCAL_ALIGNED_16(float, cdst, [LEN]);
+    LOCAL_ALIGNED_16(float, odst, [LEN]);
+    int i;
+
+    declare_func(void, float *dst, const float *src, float mul, int len);
+
+    call_ref(cdst, src0, src1[0], LEN);
+    call_new(odst, src0, src1[0], LEN);
+        for (i = 0; i < LEN; i++) {
+            double t = fabs(src0[i]) + fabs(src1[0]) + fabs(src0[i] * src1[0]) + 1.0;
+            if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
+                fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
+                        i, cdst[i], odst[i], cdst[i] - odst[i]);
+                fail();
+                break;
+            }
+        }
+    bench_new(odst, src0, src1[0], LEN);
+}
+
+#define ARBITRARY_FMUL_WINDOW_CONST 0.008
+static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
+{
+    LOCAL_ALIGNED_16(float, cdst, [LEN]);
+    LOCAL_ALIGNED_16(float, odst, [LEN]);
+    int i;
+
+    declare_func(void, float *dst, const float *src0, const float *src1,
+                 const float *win, int len);
+
+    call_ref(cdst, src0, src1, win, LEN / 2);
+    call_new(odst, src0, src1, win, LEN / 2);
+    for (i = 0; i < LEN; i++) {
+        if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
+            fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
+                    i, cdst[i], odst[i], cdst[i] - odst[i]);
+            fail();
+            break;
+        }
+    }
+    bench_new(odst, src0, src1, win, LEN / 2);
+}
+
+#define ARBITRARY_FMAC_SCALAR_CONST 0.005
+static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
+{
+    LOCAL_ALIGNED_32(float, cdst, [LEN]);
+    LOCAL_ALIGNED_32(float, odst, [LEN]);
+    int i;
+
+    declare_func(void, float *dst, const float *src, float mul, int len);
+
+    memcpy(cdst, src2, LEN * sizeof(*src2));
+    memcpy(odst, src2, LEN * sizeof(*src2));
+
+    call_ref(cdst, src0, src1[0], LEN);
+    call_new(odst, src0, src1[0], LEN);
+    for (i = 0; i < LEN; i++) {
+        if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
+            fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
+                    i, cdst[i], odst[i], cdst[i] - odst[i]);
+            fail();
+            break;
+        }
+    }
+    memcpy(odst, src2, LEN * sizeof(*src2));
+    bench_new(odst, src0, src1[0], LEN);
+}
+
+static void test_vector_dmul_scalar(const double *src0, const double *src1)
+{
+    LOCAL_ALIGNED_32(double, cdst, [LEN]);
+    LOCAL_ALIGNED_32(double, odst, [LEN]);
+    int i;
+
+    declare_func(void, double *dst, const double *src, double mul, int len);
+
+    call_ref(cdst, src0, src1[0], LEN);
+    call_new(odst, src0, src1[0], LEN);
+    for (i = 0; i < LEN; i++) {
+        double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
+        if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
+            fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
+                    cdst[i], odst[i], cdst[i] - odst[i]);
+            fail();
+            break;
+        }
+    }
+    bench_new(odst, src0, src1[0], LEN);
+}
+
+#define ARBITRARY_DMAC_SCALAR_CONST 0.005
+static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
+{
+    LOCAL_ALIGNED_32(double, cdst, [LEN]);
+    LOCAL_ALIGNED_32(double, odst, [LEN]);
+    int i;
+
+    declare_func(void, double *dst, const double *src, double mul, int len);
+
+    memcpy(cdst, src2, LEN * sizeof(*src2));
+    memcpy(odst, src2, LEN * sizeof(*src2));
+    call_ref(cdst, src0, src1[0], LEN);
+    call_new(odst, src0, src1[0], LEN);
+    for (i = 0; i < LEN; i++) {
+        if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
+            fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
+                    i, cdst[i], odst[i], cdst[i] - odst[i]);
+            fail();
+            break;
+        }
+    }
+    memcpy(odst, src2, LEN * sizeof(*src2));
+    bench_new(odst, src0, src1[0], LEN);
+}
+
+static void test_butterflies_float(const float *src0, const float *src1)
+{
+    LOCAL_ALIGNED_16(float,  cdst,  [LEN]);
+    LOCAL_ALIGNED_16(float,  odst,  [LEN]);
+    LOCAL_ALIGNED_16(float,  cdst1, [LEN]);
+    LOCAL_ALIGNED_16(float,  odst1, [LEN]);
+    int i;
+
+    declare_func(void, float *av_restrict src0, float *av_restrict src1,
+    int len);
+
+    memcpy(cdst,  src0, LEN * sizeof(*src0));
+    memcpy(cdst1, src1, LEN * sizeof(*src1));
+    memcpy(odst,  src0, LEN * sizeof(*src0));
+    memcpy(odst1, src1, LEN * sizeof(*src1));
+
+    call_ref(cdst, cdst1, LEN);
+    call_new(odst, odst1, LEN);
+    for (i = 0; i < LEN; i++) {
+        if (!float_near_abs_eps(cdst[i],  odst[i],  FLT_EPSILON) ||
+            !float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
+            fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
+                    i, cdst[i], odst[i], cdst[i] - odst[i]);
+            fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
+                    i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
+            fail();
+            break;
+        }
+    }
+    memcpy(odst,  src0, LEN * sizeof(*src0));
+    memcpy(odst1, src1, LEN * sizeof(*src1));
+    bench_new(odst, odst1, LEN);
+}
+
+#define ARBITRARY_SCALARPRODUCT_CONST 0.2
+static void test_scalarproduct_float(const float *src0, const float *src1)
+{
+    float cprod, oprod;
+
+    declare_func_float(float, const float *src0, const float *src1, int len);
+
+    cprod = call_ref(src0, src1, LEN);
+    oprod = call_new(src0, src1, LEN);
+    if (!float_near_abs_eps(cprod, oprod, ARBITRARY_SCALARPRODUCT_CONST)) {
+        fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
+                cprod, oprod, cprod - oprod);
+        fail();
+    }
+    bench_new(src0, src1, LEN);
+}
+
+void checkasm_check_float_dsp(void)
+{
+    LOCAL_ALIGNED_32(float,  src0,     [LEN]);
+    LOCAL_ALIGNED_32(float,  src1,     [LEN]);
+    LOCAL_ALIGNED_32(float,  src2,     [LEN]);
+    LOCAL_ALIGNED_16(float,  src3,     [LEN]);
+    LOCAL_ALIGNED_16(float,  src4,     [LEN]);
+    LOCAL_ALIGNED_16(float,  src5,     [LEN]);
+    LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
+    LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
+    LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
+    AVFloatDSPContext *fdsp = avpriv_float_dsp_alloc(1);
+
+    if (!fdsp) {
+        fprintf(stderr, "floatdsp: Out of memory error\n");
+        return;
+    }
+
+    randomize_buffer(src0);
+    randomize_buffer(src1);
+    randomize_buffer(src2);
+    randomize_buffer(src3);
+    randomize_buffer(src4);
+    randomize_buffer(src5);
+    randomize_buffer(dbl_src0);
+    randomize_buffer(dbl_src1);
+    randomize_buffer(dbl_src2);
+
+    if (check_func(fdsp->vector_fmul, "vector_fmul"))
+        test_vector_fmul(src0, src1);
+    if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
+        test_vector_fmul_add(src0, src1, src2);
+    if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
+        test_vector_fmul_scalar(src3, src4);
+    if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
+        test_vector_fmul(src0, src1);
+    if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
+        test_vector_fmul_window(src3, src4, src5);
+    report("vector_fmul");
+    if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
+        test_vector_fmac_scalar(src0, src1, src2);
+    report("vector_fmac");
+    if (check_func(fdsp->vector_dmul, "vector_dmul"))
+        test_vector_dmul(dbl_src0, dbl_src1);
+    if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
+        test_vector_dmul_scalar(dbl_src0, dbl_src1);
+    report("vector_dmul");
+    if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
+        test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
+    report("vector_dmac");
+    if (check_func(fdsp->butterflies_float, "butterflies_float"))
+        test_butterflies_float(src3, src4);
+    report("butterflies_float");
+    if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
+        test_scalarproduct_float(src3, src4);
+    report("scalarproduct_float");
+
+    av_freep(&fdsp);
+}
--- a/externals/ffmpeg/tests/checkasm/fmtconvert.c
+++ b/externals/ffmpeg/tests/checkasm/fmtconvert.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "libavutil/internal.h"
+#include "libavutil/common.h"
+#include "libavcodec/fmtconvert.h"
+
+#include "checkasm.h"
+
+#define BUF_SIZE 1024
+
+#define randomize_input(len)                                    \
+    do {                                                        \
+        int k;                                                  \
+        for (k = 0; k < len; k++) {                             \
+            in[k] = rnd() - INT32_MAX;                         \
+        }                                                       \
+        for (     ; k < BUF_SIZE; k++) {                        \
+            in[k] = INT32_MAX;                                  \
+        }                                                       \
+    } while (0)
+
+void checkasm_check_fmtconvert(void)
+{
+    FmtConvertContext c;
+    LOCAL_ALIGNED(32, float,   dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED(32, float,   dst1, [BUF_SIZE]);
+    LOCAL_ALIGNED(32, int32_t, in,   [BUF_SIZE]);
+    float scale_arr[128];
+    int length[] = {8, 16, 24, 56, 72, 128, 512, 520, 656, 768, 992};
+    int i, j;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++)
+        scale_arr[i] = (FF_ARRAY_ELEMS(scale_arr) - FF_ARRAY_ELEMS(scale_arr) / 2) / 13;
+
+    ff_fmt_convert_init(&c, NULL);
+
+    memset(dst0, 0, sizeof(*dst0) * BUF_SIZE);
+    memset(dst1, 0, sizeof(*dst1) * BUF_SIZE);
+
+    if (check_func(c.int32_to_float_fmul_scalar, "int32_to_float_fmul_scalar")) {
+        declare_func(void, float *, const int32_t *, float, int);
+
+        for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++) {
+            for (j = 0; j < FF_ARRAY_ELEMS(length); j++) {
+
+                randomize_input(length[j]);
+
+                call_ref(dst0, in, scale_arr[i], length[j]);
+                call_new(dst1, in, scale_arr[i], length[j]);
+
+                if (!float_near_ulp_array(dst0, dst1, 3, length[j])) {
+                    fail();
+                    break;
+                }
+
+                bench_new(dst1, in, scale_arr[i], length[j]);
+            }
+        }
+    }
+    if (check_func(c.int32_to_float_fmul_array8, "int32_to_float_fmul_array8")) {
+        declare_func(void, FmtConvertContext *, float *, const int32_t *,
+                     const float *, int);
+
+        for (i = 0; i < 4; i++) {
+            for (j = 0; j < FF_ARRAY_ELEMS(length); j++) {
+
+                randomize_input(length[j]);
+
+                call_ref(&c, dst0, in, scale_arr, length[j]);
+                call_new(&c, dst1, in, scale_arr, length[j]);
+
+                if (!float_near_ulp_array(dst0, dst1, 3, length[j])) {
+                    fail();
+                    fprintf(stderr, "int32_to_float_fmul_array8: len: %d\n", length[j]);
+                    break;
+                }
+
+                bench_new(&c, dst1, in, scale_arr, length[j]);
+            }
+        }
+    }
+    report("fmtconvert");
+}
--- a/externals/ffmpeg/tests/checkasm/g722dsp.c
+++ b/externals/ffmpeg/tests/checkasm/g722dsp.c
@@ -0,0 +1,63 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/g722.h"
+#include "libavcodec/g722dsp.h"
+#include "libavcodec/mathops.h"
+
+#define randomize_buffers()                             \
+    do {                                                \
+        int i;                                          \
+        for (i = 0; i < PREV_SAMPLES_BUF_SIZE; i++) {   \
+            src0[i] = src1[i] = sign_extend(rnd(), 16); \
+        }                                               \
+    } while (0)
+
+static void check_qmf(void) {
+    int16_t src0[PREV_SAMPLES_BUF_SIZE];
+    int16_t src1[PREV_SAMPLES_BUF_SIZE];
+    const int16_t *tmp0 = src0;
+    const int16_t *tmp1 = src1;
+    int dst0[2], dst1[2];
+    int i;
+
+    declare_func(void, const int16_t *prev_samples, int xout[2]);
+
+    randomize_buffers();
+    for (i = 0; i < PREV_SAMPLES_BUF_SIZE - 24; i++) {
+        call_ref(tmp0++, dst0);
+        call_new(tmp1++, dst1);
+        if (memcmp(dst0, dst1, sizeof(dst0)))
+            fail();
+    }
+    bench_new(src1, dst1);
+}
+
+void checkasm_check_g722dsp(void)
+{
+    G722DSPContext h;
+
+    ff_g722dsp_init(&h);
+
+    if (check_func(h.apply_qmf, "g722_apply_qmf"))
+        check_qmf();
+
+    report("apply_qmf");
+}
--- a/externals/ffmpeg/tests/checkasm/h264dsp.c
+++ b/externals/ffmpeg/tests/checkasm/h264dsp.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2016 Martin Storsjo
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/h264dsp.h"
+#include "libavcodec/h264data.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
+static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f };
+
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
+#define SIZEOF_COEF  (2 * ((bit_depth + 7) / 8))
+#define PIXEL_STRIDE 16
+
+#define randomize_buffers()                                                  \
+    do {                                                                     \
+        int x, y;                                                            \
+        uint32_t mask = pixel_mask[bit_depth - 8];                           \
+        for (y = 0; y < sz; y++) {                                           \
+            for (x = 0; x < PIXEL_STRIDE; x += 4) {                          \
+                AV_WN32A(src + y * PIXEL_STRIDE + x, rnd() & mask);          \
+                AV_WN32A(dst + y * PIXEL_STRIDE + x, rnd() & mask);          \
+            }                                                                \
+            for (x = 0; x < sz; x++) {                                       \
+                if (bit_depth == 8) {                                        \
+                    coef[y * sz + x] = src[y * PIXEL_STRIDE + x] -           \
+                                       dst[y * PIXEL_STRIDE + x];            \
+                } else {                                                     \
+                    ((int32_t *)coef)[y * sz + x] =                          \
+                        ((uint16_t *)src)[y * (PIXEL_STRIDE/2) + x] -        \
+                        ((uint16_t *)dst)[y * (PIXEL_STRIDE/2) + x];         \
+                }                                                            \
+            }                                                                \
+        }                                                                    \
+    } while (0)
+
+#define dct4x4_impl(size, dctcoef)                                           \
+static void dct4x4_##size(dctcoef *coef)                                     \
+{                                                                            \
+    int i, y, x;                                                             \
+    dctcoef tmp[16];                                                         \
+    for (i = 0; i < 4; i++) {                                                \
+        const int z0 = coef[i*4 + 0] + coef[i*4 + 3];                        \
+        const int z1 = coef[i*4 + 1] + coef[i*4 + 2];                        \
+        const int z2 = coef[i*4 + 0] - coef[i*4 + 3];                        \
+        const int z3 = coef[i*4 + 1] - coef[i*4 + 2];                        \
+        tmp[i + 4*0] =   z0 +   z1;                                          \
+        tmp[i + 4*1] = 2*z2 +   z3;                                          \
+        tmp[i + 4*2] =   z0 -   z1;                                          \
+        tmp[i + 4*3] =   z2 - 2*z3;                                          \
+    }                                                                        \
+    for (i = 0; i < 4; i++) {                                                \
+        const int z0 = tmp[i*4 + 0] + tmp[i*4 + 3];                          \
+        const int z1 = tmp[i*4 + 1] + tmp[i*4 + 2];                          \
+        const int z2 = tmp[i*4 + 0] - tmp[i*4 + 3];                          \
+        const int z3 = tmp[i*4 + 1] - tmp[i*4 + 2];                          \
+        coef[i*4 + 0] =   z0 +   z1;                                         \
+        coef[i*4 + 1] = 2*z2 +   z3;                                         \
+        coef[i*4 + 2] =   z0 -   z1;                                         \
+        coef[i*4 + 3] =   z2 - 2*z3;                                         \
+    }                                                                        \
+    for (y = 0; y < 4; y++) {                                                \
+        for (x = 0; x < 4; x++) {                                            \
+            static const int scale[] = { 13107 * 10, 8066 * 13, 5243 * 16 }; \
+            const int idx = (y & 1) + (x & 1);                               \
+            coef[y*4 + x] = (coef[y*4 + x] * scale[idx] + (1 << 14)) >> 15;  \
+        }                                                                    \
+    }                                                                        \
+}
+
+#define DCT8_1D(src, srcstride, dst, dststride) do {                         \
+    const int a0 = (src)[srcstride * 0] + (src)[srcstride * 7];              \
+    const int a1 = (src)[srcstride * 0] - (src)[srcstride * 7];              \
+    const int a2 = (src)[srcstride * 1] + (src)[srcstride * 6];              \
+    const int a3 = (src)[srcstride * 1] - (src)[srcstride * 6];              \
+    const int a4 = (src)[srcstride * 2] + (src)[srcstride * 5];              \
+    const int a5 = (src)[srcstride * 2] - (src)[srcstride * 5];              \
+    const int a6 = (src)[srcstride * 3] + (src)[srcstride * 4];              \
+    const int a7 = (src)[srcstride * 3] - (src)[srcstride * 4];              \
+    const int b0 = a0 + a6;                                                  \
+    const int b1 = a2 + a4;                                                  \
+    const int b2 = a0 - a6;                                                  \
+    const int b3 = a2 - a4;                                                  \
+    const int b4 = a3 + a5 + (a1 + (a1 >> 1));                               \
+    const int b5 = a1 - a7 - (a5 + (a5 >> 1));                               \
+    const int b6 = a1 + a7 - (a3 + (a3 >> 1));                               \
+    const int b7 = a3 - a5 + (a7 + (a7 >> 1));                               \
+    (dst)[dststride * 0] =  b0 +  b1;                                        \
+    (dst)[dststride * 1] =  b4 + (b7 >> 2);                                  \
+    (dst)[dststride * 2] =  b2 + (b3 >> 1);                                  \
+    (dst)[dststride * 3] =  b5 + (b6 >> 2);                                  \
+    (dst)[dststride * 4] =  b0  - b1;                                        \
+    (dst)[dststride * 5] =  b6 - (b5 >> 2);                                  \
+    (dst)[dststride * 6] = (b2 >> 1) - b3;                                   \
+    (dst)[dststride * 7] = (b4 >> 2) - b7;                                   \
+} while (0)
+
+#define dct8x8_impl(size, dctcoef)                                           \
+static void dct8x8_##size(dctcoef *coef)                                     \
+{                                                                            \
+    int i, x, y;                                                             \
+    dctcoef tmp[64];                                                         \
+    for (i = 0; i < 8; i++)                                                  \
+        DCT8_1D(coef + i, 8, tmp + i, 8);                                    \
+                                                                             \
+    for (i = 0; i < 8; i++)                                                  \
+        DCT8_1D(tmp + 8*i, 1, coef + i, 8);                                  \
+                                                                             \
+    for (y = 0; y < 8; y++) {                                                \
+        for (x = 0; x < 8; x++) {                                            \
+            static const int scale[] = {                                     \
+                13107 * 20, 11428 * 18, 20972 * 32,                          \
+                12222 * 19, 16777 * 25, 15481 * 24,                          \
+            };                                                               \
+            static const int idxmap[] = {                                    \
+                0, 3, 4, 3,                                                  \
+                3, 1, 5, 1,                                                  \
+                4, 5, 2, 5,                                                  \
+                3, 1, 5, 1,                                                  \
+            };                                                               \
+            const int idx = idxmap[(y & 3) * 4 + (x & 3)];                   \
+            coef[y*8 + x] = ((int64_t)coef[y*8 + x] *                        \
+                             scale[idx] + (1 << 17)) >> 18;                  \
+        }                                                                    \
+    }                                                                        \
+}
+
+dct4x4_impl(16, int16_t)
+dct4x4_impl(32, int32_t)
+
+dct8x8_impl(16, int16_t)
+dct8x8_impl(32, int32_t)
+
+static void dct4x4(int16_t *coef, int bit_depth)
+{
+    if (bit_depth == 8)
+        dct4x4_16(coef);
+    else
+        dct4x4_32((int32_t *) coef);
+}
+
+static void dct8x8(int16_t *coef, int bit_depth)
+{
+    if (bit_depth == 8) {
+        dct8x8_16(coef);
+    } else {
+        dct8x8_32((int32_t *) coef);
+    }
+}
+
+
+static void check_idct(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, src,  [8 * 8 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst,  [8 * 8 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [8 * 8 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst1_base, [8 * 8 * 2 + 32]);
+    LOCAL_ALIGNED_16(int16_t, coef, [8 * 8 * 2]);
+    LOCAL_ALIGNED_16(int16_t, subcoef0, [8 * 8 * 2]);
+    LOCAL_ALIGNED_16(int16_t, subcoef1, [8 * 8 * 2]);
+    H264DSPContext h;
+    int bit_depth, sz, align, dc;
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, int stride);
+
+    for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+        ff_h264dsp_init(&h, bit_depth, 1);
+        for (sz = 4; sz <= 8; sz += 4) {
+            randomize_buffers();
+
+            if (sz == 4)
+                dct4x4(coef, bit_depth);
+            else
+                dct8x8(coef, bit_depth);
+
+            for (dc = 0; dc <= 1; dc++) {
+                void (*idct)(uint8_t *, int16_t *, int) = NULL;
+                switch ((sz << 1) | dc) {
+                case (4 << 1) | 0: idct = h.h264_idct_add; break;
+                case (4 << 1) | 1: idct = h.h264_idct_dc_add; break;
+                case (8 << 1) | 0: idct = h.h264_idct8_add; break;
+                case (8 << 1) | 1: idct = h.h264_idct8_dc_add; break;
+                }
+                if (check_func(idct, "h264_idct%d_add%s_%dbpp", sz, dc ? "_dc" : "", bit_depth)) {
+                    for (align = 0; align < 16; align += sz * SIZEOF_PIXEL) {
+                        uint8_t *dst1 = dst1_base + align;
+                        if (dc) {
+                            memset(subcoef0, 0, sz * sz * SIZEOF_COEF);
+                            memcpy(subcoef0, coef, SIZEOF_COEF);
+                        } else {
+                            memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
+                        }
+                        memcpy(dst0, dst, sz * PIXEL_STRIDE);
+                        memcpy(dst1, dst, sz * PIXEL_STRIDE);
+                        memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
+                        call_ref(dst0, subcoef0, PIXEL_STRIDE);
+                        call_new(dst1, subcoef1, PIXEL_STRIDE);
+                        if (memcmp(dst0, dst1, sz * PIXEL_STRIDE) ||
+                            memcmp(subcoef0, subcoef1, sz * sz * SIZEOF_COEF))
+                            fail();
+                        bench_new(dst1, subcoef1, sz * SIZEOF_PIXEL);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static void check_idct_multiple(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, dst_full,  [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst0,  [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst1,  [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, nnzc,  [15 * 8]);
+    H264DSPContext h;
+    int bit_depth, i, y, func;
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]);
+
+    for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+        ff_h264dsp_init(&h, bit_depth, 1);
+        for (func = 0; func < 3; func++) {
+            void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL;
+            const char *name;
+            int sz = 4, intra = 0;
+            int block_offset[16] = { 0 };
+            switch (func) {
+            case 0:
+                idct = h.h264_idct_add16;
+                name = "h264_idct_add16";
+                break;
+            case 1:
+                idct = h.h264_idct_add16intra;
+                name = "h264_idct_add16intra";
+                intra = 1;
+                break;
+            case 2:
+                idct = h.h264_idct8_add4;
+                name = "h264_idct8_add4";
+                sz = 8;
+                break;
+            }
+            memset(nnzc, 0, 15 * 8);
+            memset(coef_full, 0, 16 * 16 * SIZEOF_COEF);
+            for (i = 0; i < 16 * 16; i += sz * sz) {
+                uint8_t src[8 * 8 * 2];
+                uint8_t dst[8 * 8 * 2];
+                int16_t coef[8 * 8 * 2];
+                int index = i / sz;
+                int block_y = (index / 16) * sz;
+                int block_x = index % 16;
+                int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL;
+                int nnz = rnd() % 3;
+
+                randomize_buffers();
+                if (sz == 4)
+                    dct4x4(coef, bit_depth);
+                else
+                    dct8x8(coef, bit_depth);
+
+                for (y = 0; y < sz; y++)
+                    memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL],
+                           &dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL);
+
+                if (nnz > 1)
+                    nnz = sz * sz;
+                memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])],
+                       coef, nnz * SIZEOF_COEF);
+
+                if (intra && nnz == 1)
+                    nnz = 0;
+
+                nnzc[scan8[i / 16]] = nnz;
+                block_offset[i / 16] = offset;
+            }
+
+            if (check_func(idct, "%s_%dbpp", name, bit_depth)) {
+                memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF);
+                memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF);
+                memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL);
+                memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL);
+                call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc);
+                call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
+                if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) ||
+                    memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF))
+                    fail();
+                bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
+            }
+        }
+    }
+}
+
+
+static void check_loop_filter(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
+    H264DSPContext h;
+    int bit_depth;
+    int alphas[36], betas[36];
+    int8_t tc0[36][4];
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
+                      int alpha, int beta, int8_t *tc0);
+
+    for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+        int i, j, a, c;
+        uint32_t mask = pixel_mask_lf[bit_depth - 8];
+        ff_h264dsp_init(&h, bit_depth, 1);
+        for (i = 35, a = 255, c = 250; i >= 0; i--) {
+            alphas[i] = a << (bit_depth - 8);
+            betas[i]  = (i + 1) / 2 << (bit_depth - 8);
+            tc0[i][0] = tc0[i][3] = (c + 6) / 10;
+            tc0[i][1] = (c + 7) / 15;
+            tc0[i][2] = (c + 9) / 20;
+            a = a*9/10;
+            c = c*9/10;
+        }
+
+#define CHECK_LOOP_FILTER(name, align, idc)                             \
+        do {                                                            \
+            if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) {   \
+                for (j = 0; j < 36; j++) {                              \
+                    intptr_t off = 8 * 32 + (j & 15) * 4 * !align;      \
+                    for (i = 0; i < 1024; i+=4) {                       \
+                        AV_WN32A(dst + i, rnd() & mask);                \
+                    }                                                   \
+                    memcpy(dst0, dst, 32 * 16 * 2);                     \
+                    memcpy(dst1, dst, 32 * 16 * 2);                     \
+                                                                        \
+                    call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \
+                    call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \
+                    if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) {   \
+                        fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d " \
+                                "tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], \
+                                tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
+                        fail();                                         \
+                    }                                                   \
+                    bench_new(dst1, 32, alphas[j], betas[j], tc0[j]);   \
+                }                                                       \
+            }                                                           \
+        } while (0)
+
+        CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1,);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0,);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0,);
+        CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1,);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0,);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0,);
+
+        ff_h264dsp_init(&h, bit_depth, 2);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0, 422);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0, 422);
+#undef CHECK_LOOP_FILTER
+    }
+}
+
+static void check_loop_filter_intra(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
+    H264DSPContext h;
+    int bit_depth;
+    int alphas[36], betas[36];
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
+                      int alpha, int beta);
+
+    for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+        int i, j, a;
+        uint32_t mask = pixel_mask_lf[bit_depth - 8];
+        ff_h264dsp_init(&h, bit_depth, 1);
+        for (i = 35, a = 255; i >= 0; i--) {
+            alphas[i] = a << (bit_depth - 8);
+            betas[i]  = (i + 1) / 2 << (bit_depth - 8);
+            a = a*9/10;
+        }
+
+#define CHECK_LOOP_FILTER(name, align, idc)                             \
+        do {                                                            \
+            if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) {   \
+                for (j = 0; j < 36; j++) {                              \
+                    intptr_t off = 8 * 32 + (j & 15) * 4 * !align;      \
+                    for (i = 0; i < 1024; i+=4) {                       \
+                        AV_WN32A(dst + i, rnd() & mask);                \
+                    }                                                   \
+                    memcpy(dst0, dst, 32 * 16 * 2);                     \
+                    memcpy(dst1, dst, 32 * 16 * 2);                     \
+                                                                        \
+                    call_ref(dst0 + off, 32, alphas[j], betas[j]);      \
+                    call_new(dst1 + off, 32, alphas[j], betas[j]);      \
+                    if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) {   \
+                        fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d\n", \
+                                j, alphas[j], betas[j]);                \
+                        fail();                                         \
+                    }                                                   \
+                    bench_new(dst1, 32, alphas[j], betas[j]);           \
+                }                                                       \
+            }                                                           \
+        } while (0)
+
+        CHECK_LOOP_FILTER(h264_v_loop_filter_luma_intra, 1,);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_luma_intra, 0,);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff_intra, 0,);
+        CHECK_LOOP_FILTER(h264_v_loop_filter_chroma_intra, 1,);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0,);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0,);
+
+        ff_h264dsp_init(&h, bit_depth, 2);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0, 422);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0, 422);
+#undef CHECK_LOOP_FILTER
+    }
+}
+
+void checkasm_check_h264dsp(void)
+{
+    check_idct();
+    check_idct_multiple();
+    report("idct");
+
+    check_loop_filter();
+    report("loop_filter");
+
+    check_loop_filter_intra();
+    report("loop_filter_intra");
+}
--- a/externals/ffmpeg/tests/checkasm/h264pred.c
+++ b/externals/ffmpeg/tests/checkasm/h264pred.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2015 Henrik Gramner
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/h264pred.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+static const int codec_ids[4] = { AV_CODEC_ID_H264, AV_CODEC_ID_VP8, AV_CODEC_ID_RV40, AV_CODEC_ID_SVQ3 };
+
+static const char * const pred4x4_modes[4][15] = {
+    { /* H.264 */
+        [VERT_PRED           ] = "vertical",
+        [HOR_PRED            ] = "horizontal",
+        [DC_PRED             ] = "dc",
+        [DIAG_DOWN_LEFT_PRED ] = "down_left",
+        [DIAG_DOWN_RIGHT_PRED] = "down_right",
+        [VERT_RIGHT_PRED     ] = "vertical_right",
+        [HOR_DOWN_PRED       ] = "horizontal_right",
+        [VERT_LEFT_PRED      ] = "vertical_left",
+        [HOR_UP_PRED         ] = "horizontal_up",
+        [LEFT_DC_PRED        ] = "left_dc",
+        [TOP_DC_PRED         ] = "top_dc",
+        [DC_128_PRED         ] = "dc_128",
+    },
+    { /* VP8 */
+        [VERT_PRED     ] = "vertical_vp8",
+        [HOR_PRED      ] = "horizontal_vp8",
+        [VERT_LEFT_PRED] = "vertical_left_vp8",
+        [TM_VP8_PRED   ] = "tm_vp8",
+        [DC_127_PRED   ] = "dc_127_vp8",
+        [DC_129_PRED   ] = "dc_129_vp8",
+    },
+    { /* RV40 */
+        [DIAG_DOWN_LEFT_PRED            ] = "down_left_rv40",
+        [VERT_LEFT_PRED                 ] = "vertical_left_rv40",
+        [HOR_UP_PRED                    ] = "horizontal_up_rv40",
+        [DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = "down_left_nodown_rv40",
+        [HOR_UP_PRED_RV40_NODOWN        ] = "horizontal_up_nodown_rv40",
+        [VERT_LEFT_PRED_RV40_NODOWN     ] = "vertical_left_nodown_rv40",
+    },
+    { /* SVQ3 */
+        [DIAG_DOWN_LEFT_PRED] = "down_left_svq3",
+    },
+};
+
+static const char * const pred8x8_modes[4][11] = {
+    { /* H.264 */
+        [DC_PRED8x8              ] = "dc",
+        [HOR_PRED8x8             ] = "horizontal",
+        [VERT_PRED8x8            ] = "vertical",
+        [PLANE_PRED8x8           ] = "plane",
+        [LEFT_DC_PRED8x8         ] = "left_dc",
+        [TOP_DC_PRED8x8          ] = "top_dc",
+        [DC_128_PRED8x8          ] = "dc_128",
+        [ALZHEIMER_DC_L0T_PRED8x8] = "mad_cow_dc_l0t",
+        [ALZHEIMER_DC_0LT_PRED8x8] = "mad_cow_dc_0lt",
+        [ALZHEIMER_DC_L00_PRED8x8] = "mad_cow_dc_l00",
+        [ALZHEIMER_DC_0L0_PRED8x8] = "mad_cow_dc_0l0",
+    },
+    { /* VP8 */
+        [PLANE_PRED8x8 ] = "tm_vp8",
+        [DC_127_PRED8x8] = "dc_127_vp8",
+        [DC_129_PRED8x8] = "dc_129_vp8",
+    },
+    { /* RV40 */
+        [DC_PRED8x8     ] = "dc_rv40",
+        [LEFT_DC_PRED8x8] = "left_dc_rv40",
+        [TOP_DC_PRED8x8 ] = "top_dc_rv40",
+    },
+    /* nothing for SVQ3 */
+};
+
+static const char * const pred16x16_modes[4][9] = {
+    { /* H.264 */
+        [DC_PRED8x8     ] = "dc",
+        [HOR_PRED8x8    ] = "horizontal",
+        [VERT_PRED8x8   ] = "vertical",
+        [PLANE_PRED8x8  ] = "plane",
+        [LEFT_DC_PRED8x8] = "left_dc",
+        [TOP_DC_PRED8x8 ] = "top_dc",
+        [DC_128_PRED8x8 ] = "dc_128",
+    },
+    { /* VP8 */
+        [PLANE_PRED8x8 ] = "tm_vp8",
+        [DC_127_PRED8x8] = "dc_127_vp8",
+        [DC_129_PRED8x8] = "dc_129_vp8",
+    },
+    { /* RV40 */
+        [PLANE_PRED8x8] = "plane_rv40",
+    },
+    { /* SVQ3 */
+        [PLANE_PRED8x8] = "plane_svq3",
+    },
+};
+
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
+
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
+#define BUF_SIZE (3 * 16 * 17)
+
+#define check_pred_func(func, name, mode_name)                                    \
+    (mode_name && ((codec_ids[codec] == AV_CODEC_ID_H264) ?                       \
+                   check_func(func, "pred%s_%s_%d", name, mode_name, bit_depth) : \
+                   check_func(func, "pred%s_%s", name, mode_name)))
+
+#define randomize_buffers()                        \
+    do {                                           \
+        uint32_t mask = pixel_mask[bit_depth - 8]; \
+        int i;                                     \
+        for (i = 0; i < BUF_SIZE; i += 4) {        \
+            uint32_t r = rnd() & mask;             \
+            AV_WN32A(buf0 + i, r);                 \
+            AV_WN32A(buf1 + i, r);                 \
+        }                                          \
+    } while (0)
+
+#define src0 (buf0 + 4 * 16) /* Offset to allow room for top and left */
+#define src1 (buf1 + 4 * 16)
+
+static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                          int codec, int chroma_format, int bit_depth)
+{
+    if (chroma_format == 1) {
+        uint8_t *topright = buf0 + 2*16;
+        int pred_mode;
+        declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride);
+
+        for (pred_mode = 0; pred_mode < 15; pred_mode++) {
+            if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) {
+                randomize_buffers();
+                call_ref(src0, topright, 12*SIZEOF_PIXEL);
+                call_new(src1, topright, 12*SIZEOF_PIXEL);
+                if (memcmp(buf0, buf1, BUF_SIZE))
+                    fail();
+                bench_new(src1, topright, 12*SIZEOF_PIXEL);
+            }
+        }
+    }
+}
+
+static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                          int codec, int chroma_format, int bit_depth)
+{
+    int pred_mode;
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride);
+
+    for (pred_mode = 0; pred_mode < 11; pred_mode++) {
+        if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8",
+                            pred8x8_modes[codec][pred_mode])) {
+            randomize_buffers();
+            call_ref(src0, 24*SIZEOF_PIXEL);
+            call_new(src1, 24*SIZEOF_PIXEL);
+            if (memcmp(buf0, buf1, BUF_SIZE))
+                fail();
+            bench_new(src1, 24*SIZEOF_PIXEL);
+        }
+    }
+}
+
+static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                            int codec, int chroma_format, int bit_depth)
+{
+    if (chroma_format == 1) {
+        int pred_mode;
+        declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride);
+
+        for (pred_mode = 0; pred_mode < 9; pred_mode++) {
+            if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) {
+                randomize_buffers();
+                call_ref(src0, 48);
+                call_new(src1, 48);
+                if (memcmp(buf0, buf1, BUF_SIZE))
+                    fail();
+                bench_new(src1, 48);
+            }
+        }
+    }
+}
+
+static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                           int codec, int chroma_format, int bit_depth)
+{
+    if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) {
+        int pred_mode;
+        declare_func_emms(AV_CPU_FLAG_MMXEXT, void, uint8_t *src, int topleft, int topright, ptrdiff_t stride);
+
+        for (pred_mode = 0; pred_mode < 12; pred_mode++) {
+            if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) {
+                int neighbors;
+                for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) {
+                    int has_topleft  = neighbors & 0x8000;
+                    int has_topright = neighbors & 0x4000;
+
+                    if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft)
+                        continue; /* Those aren't allowed according to the spec */
+
+                    randomize_buffers();
+                    call_ref(src0, has_topleft, has_topright, 24*SIZEOF_PIXEL);
+                    call_new(src1, has_topleft, has_topright, 24*SIZEOF_PIXEL);
+                    if (memcmp(buf0, buf1, BUF_SIZE))
+                        fail();
+                    bench_new(src1, has_topleft, has_topright, 24*SIZEOF_PIXEL);
+                }
+            }
+        }
+    }
+}
+
+/* TODO: Add tests for H.264 lossless H/V prediction */
+
+void checkasm_check_h264pred(void)
+{
+    static const struct {
+        void (*func)(H264PredContext*, uint8_t*, uint8_t*, int, int, int);
+        const char *name;
+    } tests[] = {
+        { check_pred4x4,   "pred4x4"   },
+        { check_pred8x8,   "pred8x8"   },
+        { check_pred16x16, "pred16x16" },
+        { check_pred8x8l,  "pred8x8l"  },
+    };
+
+    LOCAL_ALIGNED_16(uint8_t, buf0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, buf1, [BUF_SIZE]);
+    H264PredContext h;
+    int test, codec, chroma_format, bit_depth;
+
+    for (test = 0; test < FF_ARRAY_ELEMS(tests); test++) {
+        for (codec = 0; codec < 4; codec++) {
+            int codec_id = codec_ids[codec];
+            for (bit_depth = 8; bit_depth <= (codec_id == AV_CODEC_ID_H264 ? 10 : 8); bit_depth++)
+                for (chroma_format = 1; chroma_format <= (codec_id == AV_CODEC_ID_H264 ? 2 : 1); chroma_format++) {
+                    ff_h264_pred_init(&h, codec_id, bit_depth, chroma_format);
+                    tests[test].func(&h, buf0, buf1, codec, chroma_format, bit_depth);
+                }
+        }
+        report("%s", tests[test].name);
+    }
+}
--- a/externals/ffmpeg/tests/checkasm/h264qpel.c
+++ b/externals/ffmpeg/tests/checkasm/h264qpel.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2015 Henrik Gramner
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/h264qpel.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
+
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
+#define BUF_SIZE (2 * 16 * (16 + 3 + 4))
+
+#define randomize_buffers()                        \
+    do {                                           \
+        uint32_t mask = pixel_mask[bit_depth - 8]; \
+        int k;                                     \
+        for (k = 0; k < BUF_SIZE; k += 4) {        \
+            uint32_t r = rnd() & mask;             \
+            AV_WN32A(buf0 + k, r);                 \
+            AV_WN32A(buf1 + k, r);                 \
+            r = rnd();                             \
+            AV_WN32A(dst0 + k, r);                 \
+            AV_WN32A(dst1 + k, r);                 \
+        }                                          \
+    } while (0)
+
+#define src0 (buf0 + 3 * 2 * 16) /* h264qpel functions read data from negative src pointer offsets */
+#define src1 (buf1 + 3 * 2 * 16)
+
+void checkasm_check_h264qpel(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, buf0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, buf1, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]);
+    H264QpelContext h;
+    int op, bit_depth, i, j;
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+
+    for (op = 0; op < 2; op++) {
+        qpel_mc_func (*tab)[16] = op ? h.avg_h264_qpel_pixels_tab : h.put_h264_qpel_pixels_tab;
+        const char *op_name = op ? "avg" : "put";
+
+        for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+            ff_h264qpel_init(&h, bit_depth);
+            for (i = 0; i < (op ? 3 : 4); i++) {
+                int size = 16 >> i;
+                for (j = 0; j < 16; j++)
+                    if (check_func(tab[i][j], "%s_h264_qpel_%d_mc%d%d_%d", op_name, size, j & 3, j >> 2, bit_depth)) {
+                        randomize_buffers();
+                        call_ref(dst0, src0, size * SIZEOF_PIXEL);
+                        call_new(dst1, src1, size * SIZEOF_PIXEL);
+                        if (memcmp(buf0, buf1, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE))
+                            fail();
+                        bench_new(dst1, src1, size * SIZEOF_PIXEL);
+                    }
+            }
+        }
+        report("%s", op_name);
+    }
+}
--- a/externals/ffmpeg/tests/checkasm/hevc_add_res.c
+++ b/externals/ffmpeg/tests/checkasm/hevc_add_res.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/intreadwrite.h"
+
+#include "libavcodec/hevcdsp.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)            \
+    do {                                        \
+        int j;                                  \
+        for (j = 0; j < size; j++) {            \
+            int16_t r = rnd();                  \
+            AV_WN16A(buf + j, r >> 3);          \
+        }                                       \
+    } while (0)
+
+#define randomize_buffers2(buf, size)             \
+    do {                                          \
+        int j;                                    \
+        for (j = 0; j < size; j++)                \
+            AV_WN16A(buf + j * 2, rnd() & 0x3FF); \
+    } while (0)
+
+static void compare_add_res(int size, ptrdiff_t stride, int overflow_test)
+{
+    LOCAL_ALIGNED_32(int16_t, res0, [32 * 32]);
+    LOCAL_ALIGNED_32(int16_t, res1, [32 * 32]);
+    LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *res, ptrdiff_t stride);
+
+    randomize_buffers(res0, size);
+    randomize_buffers2(dst0, size);
+    if (overflow_test)
+        res0[0] = 0x8000;
+    memcpy(res1, res0, sizeof(*res0) * size);
+    memcpy(dst1, dst0, sizeof(int16_t) * size);
+
+    call_ref(dst0, res0, stride);
+    call_new(dst1, res1, stride);
+    if (memcmp(dst0, dst1, size))
+        fail();
+    bench_new(dst1, res1, stride);
+}
+
+static void check_add_res(HEVCDSPContext h, int bit_depth)
+{
+    int i;
+
+    for (i = 2; i <= 5; i++) {
+        int block_size = 1 << i;
+        int size = block_size * block_size;
+        ptrdiff_t stride = block_size << (bit_depth > 8);
+
+        if (check_func(h.add_residual[i - 2], "hevc_add_res_%dx%d_%d", block_size, block_size, bit_depth)) {
+            compare_add_res(size, stride, 0);
+            // overflow test for res = -32768
+            compare_add_res(size, stride, 1);
+        }
+    }
+}
+
+void checkasm_check_hevc_add_res(void)
+{
+    int bit_depth;
+
+    for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+        HEVCDSPContext h;
+
+        ff_hevc_dsp_init(&h, bit_depth);
+        check_add_res(h, bit_depth);
+    }
+    report("add_residual");
+}
--- a/externals/ffmpeg/tests/checkasm/hevc_idct.c
+++ b/externals/ffmpeg/tests/checkasm/hevc_idct.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/intreadwrite.h"
+
+#include "libavcodec/hevcdsp.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)            \
+    do {                                        \
+        int j;                                  \
+        for (j = 0; j < size; j++) {            \
+            int16_t r = rnd();                  \
+            AV_WN16A(buf + j, r);               \
+        }                                       \
+    } while (0)
+
+static void check_idct(HEVCDSPContext h, int bit_depth)
+{
+    int i;
+    LOCAL_ALIGNED(32, int16_t, coeffs0, [32 * 32]);
+    LOCAL_ALIGNED(32, int16_t, coeffs1, [32 * 32]);
+
+    for (i = 2; i <= 5; i++) {
+        int block_size = 1 << i;
+        int size = block_size * block_size;
+        int col_limit = block_size;
+        declare_func(void, int16_t *coeffs, int col_limit);
+
+        randomize_buffers(coeffs0, size);
+        memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
+        if (check_func(h.idct[i - 2], "hevc_idct_%dx%d_%d", block_size, block_size, bit_depth)) {
+            call_ref(coeffs0, col_limit);
+            call_new(coeffs1, col_limit);
+            if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size))
+                fail();
+            bench_new(coeffs1, col_limit);
+        }
+    }
+}
+
+static void check_idct_dc(HEVCDSPContext h, int bit_depth)
+{
+    int i;
+    LOCAL_ALIGNED(32, int16_t, coeffs0, [32 * 32]);
+    LOCAL_ALIGNED(32, int16_t, coeffs1, [32 * 32]);
+
+    for (i = 2; i <= 5; i++) {
+        int block_size = 1 << i;
+        int size = block_size * block_size;
+        declare_func_emms(AV_CPU_FLAG_MMXEXT, void, int16_t *coeffs);
+
+        randomize_buffers(coeffs0, size);
+        memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
+
+        if (check_func(h.idct_dc[i - 2], "hevc_idct_%dx%d_dc_%d", block_size, block_size, bit_depth)) {
+            call_ref(coeffs0);
+            call_new(coeffs1);
+            if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size))
+                fail();
+            bench_new(coeffs1);
+        }
+    }
+}
+
+void checkasm_check_hevc_idct(void)
+{
+    int bit_depth;
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        HEVCDSPContext h;
+
+        ff_hevc_dsp_init(&h, bit_depth);
+        check_idct_dc(h, bit_depth);
+    }
+    report("idct_dc");
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        HEVCDSPContext h;
+
+        ff_hevc_dsp_init(&h, bit_depth);
+        check_idct(h, bit_depth);
+    }
+    report("idct");
+}
--- a/externals/ffmpeg/tests/checkasm/hevc_sao.c
+++ b/externals/ffmpeg/tests/checkasm/hevc_sao.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2018 Yingming Fan <yingmingfan@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/intreadwrite.h"
+
+#include "libavcodec/avcodec.h"
+
+#include "libavcodec/hevcdsp.h"
+
+#include "checkasm.h"
+
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
+static const uint32_t sao_size[5] = {8, 16, 32, 48, 64};
+
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
+#define PIXEL_STRIDE (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) //same with sao_edge src_stride
+#define BUF_SIZE (PIXEL_STRIDE * (64+2) * 2) //+2 for top and bottom row, *2 for high bit depth
+#define OFFSET_THRESH (1 << (bit_depth - 5))
+#define OFFSET_LENGTH 5
+
+#define randomize_buffers(buf0, buf1, size)                 \
+    do {                                                    \
+        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1];   \
+        int k;                                              \
+        for (k = 0; k < size; k += 4) {                     \
+            uint32_t r = rnd() & mask;                      \
+            AV_WN32A(buf0 + k, r);                          \
+            AV_WN32A(buf1 + k, r);                          \
+        }                                                   \
+    } while (0)
+
+#define randomize_buffers2(buf, size)                       \
+    do {                                                    \
+        uint32_t max_offset = OFFSET_THRESH;                \
+        int k;                                              \
+        if (bit_depth == 8) {                               \
+            for (k = 0; k < size; k++) {                    \
+                uint8_t r = rnd() % max_offset;             \
+                buf[k] = r;                                 \
+            }                                               \
+        } else {                                            \
+            for (k = 0; k < size; k++) {                    \
+                uint16_t r = rnd() % max_offset;            \
+                buf[k] = r;                                 \
+            }                                               \
+        }                                                   \
+    } while (0)
+
+static void check_sao_band(HEVCDSPContext h, int bit_depth)
+{
+    int i;
+    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]);
+    int16_t offset_val[OFFSET_LENGTH];
+    int left_class = rnd()%32;
+
+    for (i = 0; i <= 4; i++) {
+        int block_size = sao_size[i];
+        ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
+        declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride,
+                          int16_t *sao_offset_val, int sao_left_class, int width, int height);
+
+        randomize_buffers(src0, src1, BUF_SIZE);
+        randomize_buffers2(offset_val, OFFSET_LENGTH);
+        memset(dst0, 0, BUF_SIZE);
+        memset(dst1, 0, BUF_SIZE);
+
+        if (check_func(h.sao_band_filter[i], "hevc_sao_band_%dx%d_%d", block_size, block_size, bit_depth)) {
+            call_ref(dst0, src0, stride, stride, offset_val, left_class, block_size, block_size);
+            call_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size);
+            if (memcmp(dst0, dst1, BUF_SIZE))
+                fail();
+            bench_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size);
+        }
+    }
+}
+
+static void check_sao_edge(HEVCDSPContext h, int bit_depth)
+{
+    int i;
+    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]);
+    int16_t offset_val[OFFSET_LENGTH];
+    int eo = rnd()%4;
+
+    for (i = 0; i <= 4; i++) {
+        int block_size = sao_size[i];
+        ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
+        int offset = (AV_INPUT_BUFFER_PADDING_SIZE + PIXEL_STRIDE)*SIZEOF_PIXEL;
+        declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
+                          int16_t *sao_offset_val, int eo, int width, int height);
+
+        randomize_buffers(src0, src1, BUF_SIZE);
+        randomize_buffers2(offset_val, OFFSET_LENGTH);
+        memset(dst0, 0, BUF_SIZE);
+        memset(dst1, 0, BUF_SIZE);
+
+        if (check_func(h.sao_edge_filter[i], "hevc_sao_edge_%dx%d_%d", block_size, block_size, bit_depth)) {
+            call_ref(dst0, src0 + offset, stride, offset_val, eo, block_size, block_size);
+            call_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size);
+            if (memcmp(dst0, dst1, BUF_SIZE))
+                fail();
+            bench_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size);
+        }
+    }
+}
+
+void checkasm_check_hevc_sao(void)
+{
+    int bit_depth;
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        HEVCDSPContext h;
+
+        ff_hevc_dsp_init(&h, bit_depth);
+        check_sao_band(h, bit_depth);
+    }
+    report("sao_band");
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        HEVCDSPContext h;
+
+        ff_hevc_dsp_init(&h, bit_depth);
+        check_sao_edge(h, bit_depth);
+    }
+    report("sao_edge");
+}
--- a/externals/ffmpeg/tests/checkasm/huffyuvdsp.c
+++ b/externals/ffmpeg/tests/checkasm/huffyuvdsp.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "libavcodec/huffyuvdsp.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)     \
+    do {                                 \
+        int j;                           \
+        for (j = 0; j < size; j++)       \
+            buf[j] = rnd() & 0xFFFF;       \
+    } while (0)
+
+static void check_add_int16(HuffYUVDSPContext c, unsigned mask, int width, const char * name)
+{
+    uint16_t *src0 = av_mallocz(width * sizeof(uint16_t));
+    uint16_t *src1 = av_mallocz(width * sizeof(uint16_t));
+    uint16_t *dst0 = av_mallocz(width * sizeof(uint16_t));
+    uint16_t *dst1 = av_mallocz(width * sizeof(uint16_t));
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint16_t *dst, uint16_t *src, unsigned mask, int w);
+
+    if (!src0 || !src1 || !dst0 || !dst1)
+        fail();
+
+    randomize_buffers(src0, width);
+    memcpy(src1, src0, width * sizeof(uint16_t));
+
+    if (check_func(c.add_int16, "%s", name)) {
+        call_ref(dst0, src0, mask, width);
+        call_new(dst1, src1, mask, width);
+        if (memcmp(dst0, dst1, width * sizeof(uint16_t)))
+            fail();
+        bench_new(dst1, src1, mask, width);
+    }
+
+    av_free(src0);
+    av_free(src1);
+    av_free(dst0);
+    av_free(dst1);
+}
+
+void checkasm_check_huffyuvdsp(void)
+{
+    HuffYUVDSPContext c;
+    int width = 16 * av_clip(rnd(), 16, 128);
+
+    ff_huffyuvdsp_init(&c, AV_PIX_FMT_YUV422P);
+
+    /*! test width not multiple of mmsize */
+    check_add_int16(c, 65535, width, "add_int16_rnd_width");
+    report("add_int16_rnd_width");
+
+    /*! test always with the same size (for perf test) */
+    check_add_int16(c, 65535, 16*128, "add_int16_128");
+    report("add_int16_128");
+}
--- a/externals/ffmpeg/tests/checkasm/jpeg2000dsp.c
+++ b/externals/ffmpeg/tests/checkasm/jpeg2000dsp.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2015 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "checkasm.h"
+#include "libavcodec/jpeg2000dsp.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_SIZE 512
+
+#define randomize_buffers()                 \
+    do {                                    \
+        int i;                              \
+        for (i = 0; i < BUF_SIZE*3; i++)    \
+            src[i] = rnd();                 \
+    } while (0)
+
+#define randomize_buffers_float()                    \
+    do {                                             \
+        int i;                                       \
+        for (i = 0; i < BUF_SIZE*3; i++)             \
+            src[i] = (float)rnd() / (UINT_MAX >> 5); \
+    } while (0)
+
+static void check_rct_int(void)
+{
+    LOCAL_ALIGNED_32(int32_t, src, [BUF_SIZE*3]);
+    LOCAL_ALIGNED_32(int32_t, ref, [BUF_SIZE*3]);
+    LOCAL_ALIGNED_32(int32_t, new, [BUF_SIZE*3]);
+    int32_t *ref0 = &ref[BUF_SIZE*0], *new0 = &new[BUF_SIZE*0];
+    int32_t *ref1 = &ref[BUF_SIZE*1], *new1 = &new[BUF_SIZE*1];
+    int32_t *ref2 = &ref[BUF_SIZE*2], *new2 = &new[BUF_SIZE*2];
+
+    declare_func(void, void *src0, void *src1, void *src2, int csize);
+
+    randomize_buffers();
+    memcpy(ref, src, BUF_SIZE * 3 * sizeof(*src));
+    memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
+    call_ref(ref0, ref1, ref2, BUF_SIZE);
+    call_new(new0, new1, new2, BUF_SIZE);
+    if (memcmp(ref0, new0, BUF_SIZE * sizeof(*src)) ||
+        memcmp(ref1, new1, BUF_SIZE * sizeof(*src)) ||
+        memcmp(ref2, new2, BUF_SIZE * sizeof(*src)))
+        fail();
+    memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
+    bench_new(new0, new1, new2, BUF_SIZE);
+}
+
+static void check_ict_float(void)
+{
+    LOCAL_ALIGNED_32(float, src, [BUF_SIZE*3]);
+    LOCAL_ALIGNED_32(float, ref, [BUF_SIZE*3]);
+    LOCAL_ALIGNED_32(float, new, [BUF_SIZE*3]);
+    float *ref0 = &ref[BUF_SIZE*0], *new0 = &new[BUF_SIZE*0];
+    float *ref1 = &ref[BUF_SIZE*1], *new1 = &new[BUF_SIZE*1];
+    float *ref2 = &ref[BUF_SIZE*2], *new2 = &new[BUF_SIZE*2];
+
+    declare_func(void, void *src0, void *src1, void *src2, int csize);
+
+    randomize_buffers_float();
+    memcpy(ref, src, BUF_SIZE * 3 * sizeof(*src));
+    memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
+    call_ref(ref0, ref1, ref2, BUF_SIZE);
+    call_new(new0, new1, new2, BUF_SIZE);
+    if (!float_near_abs_eps_array(ref0, new0, 1.0e-5, BUF_SIZE) ||
+        !float_near_abs_eps_array(ref1, new1, 1.0e-5, BUF_SIZE) ||
+        !float_near_abs_eps_array(ref2, new2, 1.0e-5, BUF_SIZE))
+        fail();
+    memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
+    bench_new(new0, new1, new2, BUF_SIZE);
+}
+
+void checkasm_check_jpeg2000dsp(void)
+{
+    Jpeg2000DSPContext h;
+
+    ff_jpeg2000dsp_init(&h);
+
+    if (check_func(h.mct_decode[FF_DWT53], "jpeg2000_rct_int"))
+        check_rct_int();
+    if (check_func(h.mct_decode[FF_DWT97], "jpeg2000_ict_float"))
+        check_ict_float();
+
+    report("mct_decode");
+}
--- a/externals/ffmpeg/tests/checkasm/llviddsp.c
+++ b/externals/ffmpeg/tests/checkasm/llviddsp.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "libavcodec/lossless_videodsp.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)     \
+    do {                                 \
+        int j;                           \
+        uint8_t *tmp_buf = (uint8_t *)buf;\
+        for (j = 0; j < size; j++)       \
+            tmp_buf[j] = rnd() & 0xFF;       \
+    } while (0)
+
+#define init_buffer(a0, a1, type, width)\
+    if (!a0 || !a1)\
+        fail();\
+    randomize_buffers(a0, width * sizeof(type));\
+    memcpy(a1, a0, width*sizeof(type));\
+
+static void check_add_bytes(LLVidDSPContext c, int width)
+{
+    uint8_t *dst0 = av_mallocz(width);
+    uint8_t *dst1 = av_mallocz(width);
+    uint8_t *src0 = av_mallocz_array(width, sizeof(uint8_t));
+    uint8_t *src1 = av_mallocz_array(width, sizeof(uint8_t));
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t w);
+
+    init_buffer(src0, src1, uint8_t, width);
+
+    if (!dst0 || !dst1)
+        fail();
+
+
+    if (check_func(c.add_bytes, "add_bytes")) {
+        call_ref(dst0, src0, width);
+        call_new(dst1, src1, width);
+        if (memcmp(dst0, dst1, width))
+            fail();
+        bench_new(dst1, src1, width);
+    }
+
+    av_free(src0);
+    av_free(src1);
+    av_free(dst0);
+    av_free(dst1);
+}
+
+static void check_add_median_pred(LLVidDSPContext c, int width) {
+    int A0, A1, B0, B1;
+    uint8_t *dst0 = av_mallocz(width);
+    uint8_t *dst1 = av_mallocz(width);
+    uint8_t *src0 = av_mallocz_array(width, sizeof(uint8_t));
+    uint8_t *src1 = av_mallocz_array(width, sizeof(uint8_t));
+    uint8_t *diff0 = av_mallocz_array(width, sizeof(uint8_t));
+    uint8_t *diff1 = av_mallocz_array(width, sizeof(uint8_t));
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src1,
+                      const uint8_t *diff, ptrdiff_t w,
+                      int *left, int *left_top);
+
+    init_buffer(src0, src1, uint8_t, width);
+    init_buffer(diff0, diff1, uint8_t, width);
+
+    A0 = rnd() & 0xFF;
+    B0 = rnd() & 0xFF;
+    A1 = A0;
+    B1 = B0;
+
+
+    if (check_func(c.add_median_pred, "add_median_pred")) {
+        call_ref(dst0, src0, diff0, width, &A0, &B0);
+        call_new(dst1, src1, diff1, width, &A1, &B1);
+        if (memcmp(dst0, dst1, width) || (A0 != A1) || (B0 != B1))
+            fail();
+        bench_new(dst1, src1, diff1, width, &A1, &B1);
+    }
+
+    av_free(src0);
+    av_free(src1);
+    av_free(diff0);
+    av_free(diff1);
+    av_free(dst0);
+    av_free(dst1);
+}
+
+static void check_add_left_pred(LLVidDSPContext c, int width, int acc, const char * report)
+{
+    int res0, res1;
+    uint8_t *dst0 = av_mallocz(width);
+    uint8_t *dst1 = av_mallocz(width);
+    uint8_t *src0 = av_mallocz_array(width, sizeof(uint8_t));
+    uint8_t *src1 = av_mallocz_array(width, sizeof(uint8_t));
+    declare_func_emms(AV_CPU_FLAG_MMX, int, uint8_t *dst, uint8_t *src, ptrdiff_t w, int acc);
+
+    init_buffer(src0, src1, uint8_t, width);
+
+    if (!dst0 || !dst1)
+        fail();
+
+    if (check_func(c.add_left_pred, "%s", report)) {
+        res0 = call_ref(dst0, src0, width, acc);
+        res1 = call_new(dst1, src1, width, acc);
+        if ((res0 & 0xFF) != (res1 & 0xFF)||\
+            memcmp(dst0, dst1, width))
+            fail();
+        bench_new(dst1, src1, width, acc);
+    }
+
+    av_free(src0);
+    av_free(src1);
+    av_free(dst0);
+    av_free(dst1);
+}
+
+static void check_add_left_pred_16(LLVidDSPContext c, unsigned mask, int width, unsigned acc, const char * report)
+{
+    int res0, res1;
+    uint16_t *dst0 = av_mallocz_array(width, sizeof(uint16_t));
+    uint16_t *dst1 = av_mallocz_array(width, sizeof(uint16_t));
+    uint16_t *src0 = av_mallocz_array(width, sizeof(uint16_t));
+    uint16_t *src1 = av_mallocz_array(width, sizeof(uint16_t));
+    declare_func_emms(AV_CPU_FLAG_MMX, int, uint16_t *dst, uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
+
+    init_buffer(src0, src1, uint16_t, width);
+
+    if (!dst0 || !dst1)
+        fail();
+
+    if (check_func(c.add_left_pred_int16, "%s", report)) {
+        res0 = call_ref(dst0, src0, mask, width, acc);
+        res1 = call_new(dst1, src1, mask, width, acc);
+        if ((res0 &0xFFFF) != (res1 &0xFFFF)||\
+            memcmp(dst0, dst1, width))
+            fail();
+        bench_new(dst1, src1, mask, width, acc);
+    }
+
+    av_free(src0);
+    av_free(src1);
+    av_free(dst0);
+    av_free(dst1);
+}
+
+static void check_add_gradient_pred(LLVidDSPContext c, int w) {
+    int src_size, stride;
+    uint8_t *src0, *src1;
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *src, const ptrdiff_t stride,
+                      const ptrdiff_t width);
+
+    stride = w + 32;
+    src_size = (stride + 32) * 2; /* dsp need previous line, and ignore the start of the line */
+    src0 = av_mallocz(src_size);
+    src1 = av_mallocz(src_size);
+
+    init_buffer(src0, src1, uint8_t, src_size);
+
+    if (check_func(c.add_gradient_pred, "add_gradient_pred")) {
+        call_ref(src0 + stride + 32, stride, w);
+        call_new(src1 + stride + 32, stride, w);
+        if (memcmp(src0, src1, stride)||/* previous line doesn't change */
+            memcmp(src0+stride, src1 + stride, w + 32)) {
+            fail();
+        }
+        bench_new(src1 + stride + 32, stride, w);
+    }
+
+    av_free(src0);
+    av_free(src1);
+}
+
+void checkasm_check_llviddsp(void)
+{
+    LLVidDSPContext c;
+    int width = 16 * av_clip(rnd(), 16, 128);
+    int accRnd = rnd() & 0xFF;
+
+    ff_llviddsp_init(&c);
+
+    check_add_bytes(c, width);
+    report("add_bytes");
+
+    check_add_median_pred(c, width);
+    report("add_median_pred");
+
+    check_add_left_pred(c, width, 0, "add_left_pred_zero");
+    report("add_left_pred_zero");
+
+    check_add_left_pred(c, width, accRnd, "add_left_pred_rnd_acc");
+    report("add_left_pred_rnd_acc");
+
+    check_add_left_pred_16(c, 255, width, accRnd, "add_left_pred_int16");
+    report("add_left_pred_int16");
+
+    check_add_gradient_pred(c, width);
+    report("add_gradient_pred");
+}
--- a/externals/ffmpeg/tests/checkasm/llviddspenc.c
+++ b/externals/ffmpeg/tests/checkasm/llviddspenc.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "libavcodec/lossless_videoencdsp.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)      \
+    do {                                  \
+        int j;                            \
+        for (j = 0; j < size; j+=4)       \
+            AV_WN32(buf + j, rnd());      \
+    } while (0)
+
+static const struct {uint8_t w, h, s;} planes[] = {
+    {16,16,16}, {21,23,25}, {32,17,48}, {15,128,16}, {128,127,128}
+};
+
+#define MAX_STRIDE 128
+#define MAX_HEIGHT 127
+
+static void check_diff_bytes(LLVidEncDSPContext *c)
+{
+    int i;
+    LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, src2, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, src3, [MAX_STRIDE]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src1,
+                      const uint8_t *src2, intptr_t w);
+
+    memset(dst0, 0, MAX_STRIDE);
+    memset(dst1, 0, MAX_STRIDE);
+    randomize_buffers(src0, MAX_STRIDE);
+    memcpy(src1, src0, MAX_STRIDE);
+    randomize_buffers(src2, MAX_STRIDE);
+    memcpy(src3, src2, MAX_STRIDE);
+
+    if (check_func(c->diff_bytes, "diff_bytes")) {
+        for (i = 0; i < 5; i ++) {
+            call_ref(dst0, src0, src2, planes[i].w);
+            call_new(dst1, src1, src3, planes[i].w);
+            if (memcmp(dst0, dst1, planes[i].w))
+                fail();
+        }
+        bench_new(dst1, src0, src2, planes[4].w);
+    }
+}
+
+static void check_sub_left_pred(LLVidEncDSPContext *c)
+{
+    int i;
+    LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src,
+                      ptrdiff_t stride, ptrdiff_t width, int height);
+
+    memset(dst0, 0, MAX_STRIDE * MAX_HEIGHT);
+    memset(dst1, 0, MAX_STRIDE * MAX_HEIGHT);
+    randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT);
+    memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT);
+
+    if (check_func(c->sub_left_predict, "sub_left_predict")) {
+        for (i = 0; i < 5; i ++) {
+            call_ref(dst0, src0, planes[i].s, planes[i].w, planes[i].h);
+            call_new(dst1, src1, planes[i].s, planes[i].w, planes[i].h);
+            if (memcmp(dst0, dst1, planes[i].w * planes[i].h))
+                fail();
+            break;
+        }
+        bench_new(dst1, src0, planes[4].s, planes[4].w, planes[4].h);
+    }
+}
+
+void checkasm_check_llviddspenc(void)
+{
+    LLVidEncDSPContext c;
+    ff_llvidencdsp_init(&c);
+
+    check_diff_bytes(&c);
+    report("diff_bytes");
+
+    check_sub_left_pred(&c);
+    report("sub_left_predict");
+}
--- a/externals/ffmpeg/tests/checkasm/opusdsp.c
+++ b/externals/ffmpeg/tests/checkasm/opusdsp.c
@@ -0,0 +1,103 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "libavcodec/opusdsp.h"
+
+#include "checkasm.h"
+
+#define randomize_float(buf, len)                               \
+    do {                                                        \
+        for (int i = 0; i < len; i++) {                         \
+            float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f;   \
+            buf[i] = f;                                         \
+        }                                                       \
+    } while (0)
+
+#define EPS 0.005
+#define MAX_SIZE (960)
+
+/* period is between 15 and 1022, inclusive */
+static void test_postfilter(int period)
+{
+    LOCAL_ALIGNED(16, float, data0, [MAX_SIZE + 1024]);
+    LOCAL_ALIGNED(16, float, data1, [MAX_SIZE + 1024]);
+
+    /* This filter can explode very easily, so use a tapset from the codec.
+     * In the codec these are usually multiplied by at least 0.09375f,
+     * so its outside the largest filter value, but the filter is still stable
+     * so use it. */
+    float gains[3] = { 0.3066406250f, 0.2170410156f, 0.1296386719f };
+
+    /* The codec will always call with an offset which is aligned once
+     * (period + 2) is subtracted, but here we have to align it outselves. */
+    int offset = FFALIGN(period + 2, 4);
+
+    declare_func(void, float *data, int period, float *gains, int len);
+
+    randomize_float(data0, MAX_SIZE + 1024);
+    memcpy(data1, data0, (MAX_SIZE + 1024)*sizeof(float));
+
+    call_ref(data0 + offset, period, gains, MAX_SIZE);
+    call_new(data1 + offset, period, gains, MAX_SIZE);
+
+    if (!float_near_abs_eps_array(data0 + offset, data1 + offset, EPS, MAX_SIZE))
+        fail();
+    bench_new(data1 + offset, period, gains, MAX_SIZE);
+}
+
+static void test_deemphasis(void)
+{
+    LOCAL_ALIGNED(16, float, src, [FFALIGN(MAX_SIZE, 4)]);
+    LOCAL_ALIGNED(16, float, dst0, [FFALIGN(MAX_SIZE, 4)]);
+    LOCAL_ALIGNED(16, float, dst1, [FFALIGN(MAX_SIZE, 4)]);
+    float coeff0 = (float)rnd() / (UINT_MAX >> 5) - 16.0f, coeff1 = coeff0;
+
+    declare_func_float(float, float *out, float *in, float coeff, int len);
+
+    randomize_float(src, MAX_SIZE);
+
+    coeff0 = call_ref(dst0, src, coeff0, MAX_SIZE);
+    coeff1 = call_new(dst1, src, coeff1, MAX_SIZE);
+
+    if (!float_near_abs_eps(coeff0, coeff1, EPS) ||
+        !float_near_abs_eps_array(dst0, dst1, EPS, MAX_SIZE))
+        fail();
+    bench_new(dst1, src, coeff1, MAX_SIZE);
+}
+
+void checkasm_check_opusdsp(void)
+{
+    OpusDSP ctx;
+    ff_opus_dsp_init(&ctx);
+
+    if (check_func(ctx.postfilter, "postfilter_15"))
+        test_postfilter(15);
+    report("postfilter_15");
+
+    if (check_func(ctx.postfilter, "postfilter_512"))
+        test_postfilter(512);
+    report("postfilter_512");
+
+    if (check_func(ctx.postfilter, "postfilter_1022"))
+        test_postfilter(1022);
+    report("postfilter_1022");
+
+    if (check_func(ctx.deemphasis, "deemphasis"))
+        test_deemphasis();
+    report("deemphasis");
+}
--- a/externals/ffmpeg/tests/checkasm/pixblockdsp.c
+++ b/externals/ffmpeg/tests/checkasm/pixblockdsp.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2015 Tiancheng "Timothy" Gu
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/pixblockdsp.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_UNITS 8
+#define BUF_SIZE (BUF_UNITS * 128 + 8 * BUF_UNITS)
+
+#define randomize_buffers()                 \
+    do {                                    \
+        int i;                              \
+        for (i = 0; i < BUF_SIZE; i += 4) { \
+            uint32_t r = rnd();             \
+            AV_WN32A(src10 + i, r);         \
+            AV_WN32A(src11 + i, r);         \
+            r = rnd();                      \
+            AV_WN32A(src20 + i, r);         \
+            AV_WN32A(src21 + i, r);         \
+            r = rnd();                      \
+            AV_WN32A(dst0_ + i, r);         \
+            AV_WN32A(dst1_ + i, r);         \
+        }                                   \
+    } while (0)
+
+#define check_get_pixels(type, aligned)                                                    \
+    do {                                                                                   \
+        int i;                                                                             \
+        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);    \
+                                                                                           \
+        for (i = 0; i < BUF_UNITS; i++) {                                              \
+            int src_offset = i * 64 * sizeof(type) + (aligned ? 8 : 1) * i;                \
+            int dst_offset = i * 64; /* dst must be aligned */                             \
+            randomize_buffers();                                                           \
+            call_ref(dst0 + dst_offset, src10 + src_offset, 8);                            \
+            call_new(dst1 + dst_offset, src11 + src_offset, 8);                            \
+            if (memcmp(src10, src11, BUF_SIZE)|| memcmp(dst0, dst1, BUF_SIZE)) \
+                fail();                                                                    \
+            bench_new(dst1 + dst_offset, src11 + src_offset, 8);                           \
+        }                                                                                  \
+    } while (0)
+
+#define check_diff_pixels(type, aligned)                                                   \
+    do {                                                                                   \
+        int i;                                                                             \
+        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); \
+                                                                                           \
+        for (i = 0; i < BUF_UNITS; i++) {                                              \
+            int src_offset = i * 64 * sizeof(type) + (aligned ? 8 : 1) * i;                \
+            int dst_offset = i * 64; /* dst must be aligned */                             \
+            randomize_buffers();                                                           \
+            call_ref(dst0 + dst_offset, src10 + src_offset, src20 + src_offset, 8);        \
+            call_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8);        \
+            if (memcmp(src10, src11, BUF_SIZE) || memcmp(src20, src21, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE)) \
+                fail();                                                                    \
+            bench_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8);       \
+        }                                                                                  \
+    } while (0)
+
+void checkasm_check_pixblockdsp(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, src10, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, src11, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, src20, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, src21, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst0_, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst1_, [BUF_SIZE]);
+    uint16_t *dst0 = (uint16_t *)dst0_;
+    uint16_t *dst1 = (uint16_t *)dst1_;
+    PixblockDSPContext h;
+    AVCodecContext avctx = {
+        .bits_per_raw_sample = 8,
+    };
+
+    ff_pixblockdsp_init(&h, &avctx);
+
+    if (check_func(h.get_pixels, "get_pixels"))
+        check_get_pixels(uint8_t, 1);
+    if (check_func(h.get_pixels_unaligned, "get_pixels_unaligned"))
+        check_get_pixels(uint8_t, 0);
+
+    report("get_pixels");
+
+    if (check_func(h.diff_pixels, "diff_pixels"))
+        check_diff_pixels(uint8_t, 1);
+    if (check_func(h.diff_pixels_unaligned, "diff_pixels_unaligned"))
+        check_diff_pixels(uint8_t, 0);
+
+    report("diff_pixels");
+}
--- a/externals/ffmpeg/tests/checkasm/sbrdsp.c
+++ b/externals/ffmpeg/tests/checkasm/sbrdsp.c
@@ -0,0 +1,300 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "libavcodec/sbrdsp.h"
+#include <float.h>
+
+#include "checkasm.h"
+
+#define randomize(buf, len) do {                                \
+    int i;                                                      \
+    for (i = 0; i < len; i++) {                                 \
+        const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
+        (buf)[i] = f;                                           \
+    }                                                           \
+} while (0)
+
+#define EPS 0.0001
+
+static void test_sum64x5(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64 + 256]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64 + 256]);
+
+    declare_func(void, INTFLOAT *z);
+
+    randomize((INTFLOAT *)dst0, 64 + 256);
+    memcpy(dst1, dst0, (64 + 256) * sizeof(INTFLOAT));
+    call_ref(dst0);
+    call_new(dst1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64 + 256))
+        fail();
+    bench_new(dst1);
+}
+
+static void test_sum_square(void)
+{
+    INTFLOAT res0;
+    INTFLOAT res1;
+    LOCAL_ALIGNED_16(INTFLOAT, src, [256], [2]);
+    double t = 4 * 256;
+
+    declare_func_float(INTFLOAT, INTFLOAT (*x)[2], int n);
+
+    randomize((INTFLOAT *)src, 256 * 2);
+    res0 = call_ref(src, 256);
+    res1 = call_new(src, 256);
+    if (!float_near_abs_eps(res0, res1, t * 2 * FLT_EPSILON))
+        fail();
+    bench_new(src, 256);
+}
+
+static void test_neg_odd_64(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
+
+    declare_func(void, INTFLOAT *x);
+
+    randomize((INTFLOAT *)dst0, 64);
+    memcpy(dst1, dst0, (64) * sizeof(INTFLOAT));
+    call_ref(dst0);
+    call_new(dst1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
+        fail();
+    bench_new(dst1);
+}
+
+static void test_qmf_pre_shuffle(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
+
+    declare_func(void, INTFLOAT *z);
+
+    randomize((INTFLOAT *)dst0, 128);
+    memcpy(dst1, dst0, (128) * sizeof(INTFLOAT));
+    call_ref(dst0);
+    call_new(dst1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
+        fail();
+    bench_new(dst1);
+}
+
+static void test_qmf_post_shuffle(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [32], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [32], [2]);
+
+    declare_func(void, INTFLOAT W[32][2], const INTFLOAT *z);
+
+    randomize((INTFLOAT *)src, 64);
+    call_ref(dst0, src);
+    call_new(dst1, src);
+    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 64))
+        fail();
+    bench_new(dst1, src);
+}
+
+static void test_qmf_deint_neg(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
+
+    declare_func(void, INTFLOAT *v, const INTFLOAT *src);
+
+    randomize((INTFLOAT *)src, 64);
+    call_ref(dst0, src);
+    call_new(dst1, src);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
+        fail();
+    bench_new(dst1, src);
+}
+
+static void test_qmf_deint_bfly(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src0, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, src1, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
+
+    declare_func(void, INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT *src1);
+
+    memset(dst0, 0, 128 * sizeof(INTFLOAT));
+    memset(dst1, 0, 128 * sizeof(INTFLOAT));
+
+    randomize((INTFLOAT *)src0, 64);
+    randomize((INTFLOAT *)src1, 64);
+    call_ref(dst0, src0, src1);
+    call_new(dst1, src0, src1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
+        fail();
+    bench_new(dst1, src0, src1);
+}
+
+static void test_autocorrelate(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src, [40], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [3], [2][2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [3], [2][2]);
+
+    declare_func(void, const INTFLOAT x[40][2], INTFLOAT phi[3][2][2]);
+
+    memset(dst0, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
+    memset(dst1, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
+
+    randomize((INTFLOAT *)src, 80);
+    call_ref(src, dst0);
+    call_new(src, dst1);
+    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 3 * 2 * 2))
+        fail();
+    bench_new(src, dst1);
+}
+
+static void test_hf_gen(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, low, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, alpha0, [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, alpha1, [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
+    INTFLOAT bw = (INTFLOAT)rnd() / UINT_MAX;
+    int i;
+
+    declare_func(void, INTFLOAT (*X_high)[2], const INTFLOAT (*X_low)[2],
+                       const INTFLOAT alpha0[2], const INTFLOAT alpha1[2],
+                       INTFLOAT bw, int start, int end);
+
+    randomize((INTFLOAT *)low, 128 * 2);
+    randomize((INTFLOAT *)alpha0, 2);
+    randomize((INTFLOAT *)alpha1, 2);
+    for (i = 2; i < 64; i += 2) {
+        memset(dst0, 0, 128 * 2 * sizeof(INTFLOAT));
+        memset(dst1, 0, 128 * 2 * sizeof(INTFLOAT));
+        call_ref(dst0, low, alpha0, alpha1, 0.0, i, 128);
+        call_new(dst1, low, alpha0, alpha1, 0.0, i, 128);
+        if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
+            fail();
+        bench_new(dst1, low, alpha0, alpha1, bw, i, 128);
+    }
+}
+
+static void test_hf_g_filt(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, high, [128], [40][2]);
+    LOCAL_ALIGNED_16(INTFLOAT, g_filt, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
+
+    declare_func(void, INTFLOAT (*Y)[2], const INTFLOAT (*X_high)[40][2],
+                       const INTFLOAT *g_filt, int m_max, intptr_t ixh);
+
+    randomize((INTFLOAT *)high, 128 * 40 * 2);
+    randomize((INTFLOAT *)g_filt, 128);
+
+    call_ref(dst0, high, g_filt, 128, 20);
+    call_new(dst1, high, g_filt, 128, 20);
+    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
+        fail();
+    bench_new(dst1, high, g_filt, 128, 20);
+}
+
+static void test_hf_apply_noise(const SBRDSPContext *sbrdsp)
+{
+    LOCAL_ALIGNED_16(AAC_FLOAT, s_m, [128]);
+    LOCAL_ALIGNED_16(AAC_FLOAT, q_filt, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, ref, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
+    int noise = 0x2a;
+    int i, j;
+
+    declare_func(void, INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
+                       const AAC_FLOAT *q_filt, int noise,
+                       int kx, int m_max);
+
+    randomize((INTFLOAT *)ref, 128 * 2);
+    randomize((INTFLOAT *)s_m, 128);
+    randomize((INTFLOAT *)q_filt, 128);
+
+    for (i = 0; i < 4; i++) {
+        if (check_func(sbrdsp->hf_apply_noise[i], "hf_apply_noise_%d", i)) {
+            for (j = 0; j < 2; j++) {
+                memcpy(dst0, ref, 128 * 2 * sizeof(INTFLOAT));
+                memcpy(dst1, ref, 128 * 2 * sizeof(INTFLOAT));
+                call_ref(dst0, s_m, q_filt, noise, j, 128);
+                call_new(dst1, s_m, q_filt, noise, j, 128);
+                if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
+                    fail();
+                bench_new(dst1, s_m, q_filt, noise, j, 128);
+            }
+        }
+    }
+}
+
+void checkasm_check_sbrdsp(void)
+{
+    SBRDSPContext sbrdsp;
+
+    ff_sbrdsp_init(&sbrdsp);
+
+    if (check_func(sbrdsp.sum64x5, "sum64x5"))
+        test_sum64x5();
+    report("sum64x5");
+
+    if (check_func(sbrdsp.sum_square, "sum_square"))
+        test_sum_square();
+    report("sum_square");
+
+    if (check_func(sbrdsp.neg_odd_64, "neg_odd_64"))
+        test_neg_odd_64();
+    report("neg_odd_64");
+
+    if (check_func(sbrdsp.qmf_pre_shuffle, "qmf_pre_shuffle"))
+        test_qmf_pre_shuffle();
+    report("qmf_pre_shuffle");
+
+    if (check_func(sbrdsp.qmf_post_shuffle, "qmf_post_shuffle"))
+        test_qmf_post_shuffle();
+    report("qmf_post_shuffle");
+
+    if (check_func(sbrdsp.qmf_deint_neg, "qmf_deint_neg"))
+        test_qmf_deint_neg();
+    report("qmf_deint_neg");
+
+    if (check_func(sbrdsp.qmf_deint_bfly, "qmf_deint_bfly"))
+        test_qmf_deint_bfly();
+    report("qmf_deint_bfly");
+
+    if (check_func(sbrdsp.autocorrelate, "autocorrelate"))
+        test_autocorrelate();
+    report("autocorrelate");
+
+    if (check_func(sbrdsp.hf_gen, "hf_gen"))
+        test_hf_gen();
+    report("hf_gen");
+
+    if (check_func(sbrdsp.hf_g_filt, "hf_g_filt"))
+        test_hf_g_filt();
+    report("hf_g_filt");
+
+    test_hf_apply_noise(&sbrdsp);
+    report("hf_apply_noise");
+}
--- a/externals/ffmpeg/tests/checkasm/sw_rgb.c
+++ b/externals/ffmpeg/tests/checkasm/sw_rgb.c
@@ -0,0 +1,206 @@
+/*
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "libswscale/rgb2rgb.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)      \
+    do {                                  \
+        int j;                            \
+        for (j = 0; j < size; j+=4)       \
+            AV_WN32(buf + j, rnd());      \
+    } while (0)
+
+static const uint8_t width[] = {12, 16, 20, 32, 36, 128};
+static const struct {uint8_t w, h, s;} planes[] = {
+    {12,16,12}, {16,16,16}, {20,23,25}, {32,18,48}, {8,128,16}, {128,128,128}
+};
+
+#define MAX_STRIDE 128
+#define MAX_HEIGHT 128
+
+static void check_shuffle_bytes(void * func, const char * report)
+{
+    int i;
+    LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, const uint8_t *src, uint8_t *dst, int src_size);
+
+    memset(dst0, 0, MAX_STRIDE);
+    memset(dst1, 0, MAX_STRIDE);
+    randomize_buffers(src0, MAX_STRIDE);
+    memcpy(src1, src0, MAX_STRIDE);
+
+    if (check_func(func, "%s", report)) {
+        for (i = 0; i < 6; i ++) {
+            call_ref(src0, dst0, width[i]);
+            call_new(src1, dst1, width[i]);
+            if (memcmp(dst0, dst1, MAX_STRIDE))
+                fail();
+        }
+        bench_new(src0, dst0, width[5]);
+    }
+}
+
+static void check_uyvy_to_422p(void)
+{
+    int i;
+
+    LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT * 2]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst_y_0, [MAX_STRIDE * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst_y_1, [MAX_STRIDE * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst_u_0, [(MAX_STRIDE/2) * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst_u_1, [(MAX_STRIDE/2) * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst_v_0, [(MAX_STRIDE/2) * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst_v_1, [(MAX_STRIDE/2) * MAX_HEIGHT]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                      const uint8_t *src, int width, int height,
+                      int lumStride, int chromStride, int srcStride);
+
+    randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT * 2);
+    memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT * 2);
+
+    if (check_func(uyvytoyuv422, "uyvytoyuv422")) {
+        for (i = 0; i < 6; i ++) {
+            memset(dst_y_0, 0, MAX_STRIDE * MAX_HEIGHT);
+            memset(dst_y_1, 0, MAX_STRIDE * MAX_HEIGHT);
+            memset(dst_u_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
+            memset(dst_u_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
+            memset(dst_v_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
+            memset(dst_v_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
+
+            call_ref(dst_y_0, dst_u_0, dst_v_0, src0, planes[i].w, planes[i].h,
+                     MAX_STRIDE, MAX_STRIDE / 2, planes[i].s);
+            call_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[i].w, planes[i].h,
+                     MAX_STRIDE, MAX_STRIDE / 2, planes[i].s);
+            if (memcmp(dst_y_0, dst_y_1, MAX_STRIDE * MAX_HEIGHT) ||
+                memcmp(dst_u_0, dst_u_1, (MAX_STRIDE/2) * MAX_HEIGHT) ||
+                memcmp(dst_v_0, dst_v_1, (MAX_STRIDE/2) * MAX_HEIGHT))
+                fail();
+        }
+        bench_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[5].w, planes[5].h,
+                  MAX_STRIDE, MAX_STRIDE / 2, planes[5].s);
+    }
+}
+
+static void check_interleave_bytes(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, src0_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
+    LOCAL_ALIGNED_16(uint8_t, src1_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
+    LOCAL_ALIGNED_16(uint8_t, dst0_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]);
+    LOCAL_ALIGNED_16(uint8_t, dst1_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]);
+    // Intentionally using unaligned buffers, as this function doesn't have
+    // any alignment requirements.
+    uint8_t *src0 = src0_buf + 1;
+    uint8_t *src1 = src1_buf + 1;
+    uint8_t *dst0 = dst0_buf + 2;
+    uint8_t *dst1 = dst1_buf + 2;
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, const uint8_t *, const uint8_t *,
+                                       uint8_t *, int, int, int, int, int);
+
+    randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT);
+    randomize_buffers(src1, MAX_STRIDE * MAX_HEIGHT);
+
+    if (check_func(interleaveBytes, "interleave_bytes")) {
+        for (int i = 0; i <= 16; i++) {
+            // Try all widths [1,16], and try one random width.
+
+            int w = i > 0 ? i : (1 + (rnd() % (MAX_STRIDE-2)));
+            int h = 1 + (rnd() % (MAX_HEIGHT-2));
+
+            int src0_offset = 0, src0_stride = MAX_STRIDE;
+            int src1_offset = 0, src1_stride = MAX_STRIDE;
+            int dst_offset  = 0, dst_stride  = 2 * MAX_STRIDE;
+
+            memset(dst0, 0, 2 * MAX_STRIDE * MAX_HEIGHT);
+            memset(dst1, 0, 2 * MAX_STRIDE * MAX_HEIGHT);
+
+            // Try different combinations of negative strides
+            if (i & 1) {
+                src0_offset = (h-1)*src0_stride;
+                src0_stride = -src0_stride;
+            }
+            if (i & 2) {
+                src1_offset = (h-1)*src1_stride;
+                src1_stride = -src1_stride;
+            }
+            if (i & 4) {
+                dst_offset = (h-1)*dst_stride;
+                dst_stride = -dst_stride;
+            }
+
+            call_ref(src0 + src0_offset, src1 + src1_offset, dst0 + dst_offset,
+                     w, h, src0_stride, src1_stride, dst_stride);
+            call_new(src0 + src0_offset, src1 + src1_offset, dst1 + dst_offset,
+                     w, h, src0_stride, src1_stride, dst_stride);
+            // Check a one pixel-pair edge around the destination area,
+            // to catch overwrites past the end.
+            checkasm_check(uint8_t, dst0, 2*MAX_STRIDE, dst1, 2*MAX_STRIDE,
+                           2 * w + 2, h + 1, "dst");
+        }
+
+        bench_new(src0, src1, dst1, 127, MAX_HEIGHT,
+                  MAX_STRIDE, MAX_STRIDE, 2*MAX_STRIDE);
+    }
+    if (check_func(interleaveBytes, "interleave_bytes_aligned")) {
+        // Bench the function in a more typical case, with aligned
+        // buffers and widths.
+        bench_new(src0_buf, src1_buf, dst1_buf, 128, MAX_HEIGHT,
+                  MAX_STRIDE, MAX_STRIDE, 2*MAX_STRIDE);
+    }
+}
+
+void checkasm_check_sw_rgb(void)
+{
+    ff_sws_rgb2rgb_init();
+
+    check_shuffle_bytes(shuffle_bytes_2103, "shuffle_bytes_2103");
+    report("shuffle_bytes_2103");
+
+    check_shuffle_bytes(shuffle_bytes_0321, "shuffle_bytes_0321");
+    report("shuffle_bytes_0321");
+
+    check_shuffle_bytes(shuffle_bytes_1230, "shuffle_bytes_1230");
+    report("shuffle_bytes_1230");
+
+    check_shuffle_bytes(shuffle_bytes_3012, "shuffle_bytes_3012");
+    report("shuffle_bytes_3012");
+
+    check_shuffle_bytes(shuffle_bytes_3210, "shuffle_bytes_3210");
+    report("shuffle_bytes_3210");
+
+    check_uyvy_to_422p();
+    report("uyvytoyuv422");
+
+    check_interleave_bytes();
+    report("interleave_bytes");
+}
--- a/externals/ffmpeg/tests/checkasm/sw_scale.c
+++ b/externals/ffmpeg/tests/checkasm/sw_scale.c
@@ -0,0 +1,134 @@
+/*
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)      \
+    do {                                  \
+        int j;                            \
+        for (j = 0; j < size; j+=4)       \
+            AV_WN32(buf + j, rnd());      \
+    } while (0)
+
+#define SRC_PIXELS 128
+
+static void check_hscale(void)
+{
+#define MAX_FILTER_WIDTH 40
+#define FILTER_SIZES 5
+    static const int filter_sizes[FILTER_SIZES] = { 4, 8, 16, 32, 40 };
+
+#define HSCALE_PAIRS 2
+    static const int hscale_pairs[HSCALE_PAIRS][2] = {
+        { 8, 14 },
+        { 8, 18 },
+    };
+
+    int i, j, fsi, hpi, width;
+    struct SwsContext *ctx;
+
+    // padded
+    LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);
+    LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
+    LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
+
+    // padded
+    LOCAL_ALIGNED_32(int16_t, filter, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
+    LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
+
+    // The dst parameter here is either int16_t or int32_t but we use void* to
+    // just cover both cases.
+    declare_func_emms(AV_CPU_FLAG_MMX, void, void *c, void *dst, int dstW,
+                      const uint8_t *src, const int16_t *filter,
+                      const int32_t *filterPos, int filterSize);
+
+    ctx = sws_alloc_context();
+    if (sws_init_context(ctx, NULL, NULL) < 0)
+        fail();
+
+    randomize_buffers(src, SRC_PIXELS + MAX_FILTER_WIDTH - 1);
+
+    for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {
+        for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
+            width = filter_sizes[fsi];
+
+            ctx->srcBpc = hscale_pairs[hpi][0];
+            ctx->dstBpc = hscale_pairs[hpi][1];
+            ctx->hLumFilterSize = ctx->hChrFilterSize = width;
+
+            for (i = 0; i < SRC_PIXELS; i++) {
+                filterPos[i] = i;
+
+                // These filter cofficients are chosen to try break two corner
+                // cases, namely:
+                //
+                // - Negative filter coefficients. The filters output signed
+                //   values, and it should be possible to end up with negative
+                //   output values.
+                //
+                // - Positive clipping. The hscale filter function has clipping
+                //   at (1<<15) - 1
+                //
+                // The coefficients sum to the 1.0 point for the hscale
+                // functions (1 << 14).
+
+                for (j = 0; j < width; j++) {
+                    filter[i * width + j] = -((1 << 14) / (width - 1));
+                }
+                filter[i * width + (rnd() % width)] = ((1 << 15) - 1);
+            }
+
+            for (i = 0; i < MAX_FILTER_WIDTH; i++) {
+                // These values should be unused in SIMD implementations but
+                // may still be read, random coefficients here should help show
+                // issues where they are used in error.
+
+                filter[SRC_PIXELS * width + i] = rnd();
+            }
+            ff_getSwsFunc(ctx);
+
+            if (check_func(ctx->hcScale, "hscale_%d_to_%d_width%d", ctx->srcBpc, ctx->dstBpc + 1, width)) {
+                memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
+                memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
+
+                call_ref(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
+                call_new(NULL, dst1, SRC_PIXELS, src, filter, filterPos, width);
+                if (memcmp(dst0, dst1, SRC_PIXELS * sizeof(dst0[0])))
+                    fail();
+                bench_new(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
+            }
+        }
+    }
+    sws_freeContext(ctx);
+}
+
+void checkasm_check_sw_scale(void)
+{
+    check_hscale();
+    report("hscale");
+}
--- a/externals/ffmpeg/tests/checkasm/synth_filter.c
+++ b/externals/ffmpeg/tests/checkasm/synth_filter.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <math.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "libavutil/internal.h"
+#include "libavutil/intfloat.h"
+#include "libavcodec/dcadata.h"
+#include "libavcodec/synth_filter.h"
+
+#include "checkasm.h"
+
+#define BUF_SIZE 32
+
+#define randomize_input()                                       \
+    do {                                                        \
+        int i;                                                  \
+        for (i = 0; i < BUF_SIZE; i++) {                        \
+            float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f;   \
+            in[i] = f;                                          \
+        }                                                       \
+    } while (0)
+
+void checkasm_check_synth_filter(void)
+{
+    FFTContext imdct;
+    SynthFilterContext synth;
+
+    ff_mdct_init(&imdct, 6, 1, 1.0);
+    ff_synth_filter_init(&synth);
+
+    if (check_func(synth.synth_filter_float, "synth_filter_float")) {
+        LOCAL_ALIGNED(32, float,   out0,   [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   out1,   [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   out_b,  [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   in,     [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   buf2_0, [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   buf2_1, [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   buf2_b, [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   buf0,   [512]);
+        LOCAL_ALIGNED(32, float,   buf1,   [512]);
+        LOCAL_ALIGNED(32, float,   buf_b,  [512]);
+        float scale = 1.0f;
+        int i, offset0 = 0, offset1 = 0, offset_b = 0;
+
+        declare_func(void, FFTContext *, float *, int *, float[32], const float[512],
+                     float[32], float[32], float);
+
+        memset(buf2_0, 0, sizeof(*buf2_0) * BUF_SIZE);
+        memset(buf2_1, 0, sizeof(*buf2_1) * BUF_SIZE);
+        memset(buf2_b, 0, sizeof(*buf2_b) * BUF_SIZE);
+        memset(buf0, 0, sizeof(*buf2_0) * 512);
+        memset(buf1, 0, sizeof(*buf2_1) * 512);
+        memset(buf_b, 0, sizeof(*buf2_b) * 512);
+
+        /* more than 1 synth_buf_offset wrap-around */
+        for (i = 0; i < 20; i++) {
+            int j;
+            const float * window = (i & 1) ? ff_dca_fir_32bands_perfect : ff_dca_fir_32bands_nonperfect;
+
+            memset(out0, 0, sizeof(*out0) * BUF_SIZE);
+            memset(out1, 0, sizeof(*out1) * BUF_SIZE);
+            memset(out_b, 0, sizeof(*out_b) * BUF_SIZE);
+
+            randomize_input();
+
+            call_ref(&imdct, buf0, &offset0, buf2_0, window,
+                     out0, in, scale);
+            call_new(&imdct, buf1, &offset1, buf2_1, window,
+                     out1, in, scale);
+
+            if (offset0 != offset1) {
+                fail();
+                fprintf(stderr, "offsets do not match: %d, %d", offset0, offset1);
+                break;
+            }
+
+            for (j = 0; j < BUF_SIZE; j++) {
+                if (!float_near_abs_eps_ulp(out0[j],   out1[j],   7.0e-7, 16) ||
+                    !float_near_abs_eps_ulp(buf2_0[j], buf2_1[j], 7.0e-7, 16)) {
+                    union av_intfloat32 o0, o1, b0, b1;
+
+                    fail();
+                    o0.f = out0[j];   o1.f = out1[j];
+                    b0.f = buf2_0[j], b1.f = buf2_1[j];
+                    fprintf(stderr, "out:  %11g (0x%08x); %11g (0x%08x); abs diff %11g\n",
+                            o0.f, o0.i, o1.f, o1.i, fabsf(o0.f - o1.f));
+                    fprintf(stderr, "buf2: %11g (0x%08x); %11g (0x%08x); abs diff %11g\n",
+                            b0.f, b0.i, b1.f, b1.i, fabsf(b0.f - b1.f));
+                    break;
+                }
+            }
+
+            bench_new(&imdct, buf_b, &offset_b, buf2_b, window,
+                      out_b, in, scale);
+        }
+    }
+    ff_mdct_end(&imdct);
+
+    report("synth_filter");
+}
--- a/externals/ffmpeg/tests/checkasm/utvideodsp.c
+++ b/externals/ffmpeg/tests/checkasm/utvideodsp.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 Jokyo Images
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "checkasm.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/utvideodsp.h"
+#include "libavutil/intreadwrite.h"
+
+#define WIDTH 240
+#define HEIGHT 120
+#define WIDTH_PADDED (WIDTH + 16) /* padded to 32 */
+#define BUFFER_SIZE (WIDTH_PADDED * HEIGHT)
+
+
+#define randomize_plane(buf, type)          \
+    do {                                    \
+        int w, h;                           \
+        type * tmp = buf;                   \
+        for (h = 0; h < HEIGHT; h++) {      \
+            for (w = 0; w < WIDTH; w++)     \
+                tmp[w] = rnd() & 0xFF;      \
+            tmp += WIDTH_PADDED;            \
+        }                                   \
+    } while (0)
+
+#define cmp_plane(buf0, buf1, s)                    \
+    do {                                            \
+        int h;                                      \
+        for (h = 0; h < HEIGHT; h++) {              \
+            if (memcmp(buf0 + h*WIDTH_PADDED,       \
+                buf1 + h*WIDTH_PADDED, WIDTH *s))   \
+                fail();\
+        }                                           \
+    } while (0)
+
+
+#define CHECK_RESTORE(type)\
+LOCAL_ALIGNED_32(type, src_r0, [BUFFER_SIZE]);  \
+LOCAL_ALIGNED_32(type, src_g0, [BUFFER_SIZE]);  \
+LOCAL_ALIGNED_32(type, src_b0, [BUFFER_SIZE]);  \
+LOCAL_ALIGNED_32(type, src_r1, [BUFFER_SIZE]);  \
+LOCAL_ALIGNED_32(type, src_g1, [BUFFER_SIZE]);  \
+LOCAL_ALIGNED_32(type, src_b1, [BUFFER_SIZE]);  \
+declare_func(void, type *src_r, type *src_g, type *src_b,   \
+             ptrdiff_t linesize_r, ptrdiff_t linesize_g,    \
+             ptrdiff_t linesize_b, int width, int height);  \
+memset(src_r0, 0, BUFFER_SIZE * sizeof(type));  \
+memset(src_g0, 0, BUFFER_SIZE * sizeof(type));  \
+memset(src_b0, 0, BUFFER_SIZE * sizeof(type));  \
+randomize_plane(src_r0, type);                  \
+randomize_plane(src_g0, type);                  \
+randomize_plane(src_b0, type);                  \
+memcpy(src_r1, src_r0, BUFFER_SIZE * sizeof(type));         \
+memcpy(src_g1, src_g0, BUFFER_SIZE * sizeof(type));         \
+memcpy(src_b1, src_b0, BUFFER_SIZE * sizeof(type));         \
+call_ref(src_r0, src_g0, src_b0, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT);\
+call_new(src_r1, src_g1, src_b1, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT);\
+cmp_plane(src_r0, src_r1, sizeof(type));    \
+cmp_plane(src_g0, src_g1, sizeof(type));    \
+cmp_plane(src_b0, src_b1, sizeof(type));    \
+bench_new(src_r1, src_g1, src_b1, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT)
+
+static void check_restore_rgb_planes(void) {
+    CHECK_RESTORE(uint8_t);
+}
+
+static void check_restore_rgb_planes10(void) {
+    CHECK_RESTORE(uint16_t);
+}
+
+void checkasm_check_utvideodsp(void)
+{
+    UTVideoDSPContext h;
+
+    ff_utvideodsp_init(&h);
+
+    if (check_func(h.restore_rgb_planes, "restore_rgb_planes"))
+        check_restore_rgb_planes();
+
+    report("restore_rgb_planes");
+
+    if (check_func(h.restore_rgb_planes10, "restore_rgb_planes10"))
+        check_restore_rgb_planes10();
+
+    report("restore_rgb_planes10");
+}
--- a/externals/ffmpeg/tests/checkasm/v210dec.c
+++ b/externals/ffmpeg/tests/checkasm/v210dec.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 James Darnley
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/v210dec.h"
+
+static uint32_t get_v210(void)
+{
+    uint32_t t0 = rnd() & 0x3ff,
+             t1 = rnd() & 0x3ff,
+             t2 = rnd() & 0x3ff;
+    uint32_t value =  t0
+                   | (t1 << 10)
+                   | (t2 << 20);
+    return value;
+}
+
+#define NUM_SAMPLES 2048
+
+static void randomize_buffers(uint32_t *src0, uint32_t *src1, int len)
+{
+    for (int i = 0; i < len; i++) {
+        uint32_t value = get_v210();
+        src0[i] = value;
+        src1[i] = value;
+    }
+}
+
+void checkasm_check_v210dec(void)
+{
+    V210DecContext h;
+
+    h.aligned_input = 0;
+    ff_v210dec_init(&h);
+
+    if (check_func(h.unpack_frame, "v210_unpack")) {
+        uint32_t src0[NUM_SAMPLES/3];
+        uint32_t src1[NUM_SAMPLES/3];
+        uint16_t y0[NUM_SAMPLES/2];
+        uint16_t y1[NUM_SAMPLES/2];
+        uint16_t u0[NUM_SAMPLES/4];
+        uint16_t u1[NUM_SAMPLES/4];
+        uint16_t v0[NUM_SAMPLES/4];
+        uint16_t v1[NUM_SAMPLES/4];
+        declare_func(void, const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+        const int pixels = NUM_SAMPLES / 2 / 6 * 6;
+
+        randomize_buffers(src0, src1, NUM_SAMPLES/3);
+        call_ref(src0, y0, u0, v0, pixels);
+        call_new(src1, y1, u1, v1, pixels);
+        if (memcmp(src0, src1, NUM_SAMPLES/3 * sizeof src0[0])
+                || memcmp(y0, y1, pixels * sizeof y0[0])
+                || memcmp(u0, u1, pixels/2 * sizeof u0[0])
+                || memcmp(v0, v1, pixels/2 * sizeof v0[0]))
+            fail();
+        bench_new(src1, y1, u1, v1, pixels);
+    }
+    report("v210_unpack");
+}
--- a/externals/ffmpeg/tests/checkasm/v210enc.c
+++ b/externals/ffmpeg/tests/checkasm/v210enc.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2015 Henrik Gramner
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/v210enc.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_SIZE 512
+
+#define randomize_buffers(mask)                        \
+    do {                                               \
+        int i, size = sizeof(*y0);                     \
+        for (i = 0; i < BUF_SIZE; i += 4 / size) {     \
+            uint32_t r = rnd() & mask;                 \
+            AV_WN32A(y0 + i, r);                       \
+            AV_WN32A(y1 + i, r);                       \
+        }                                              \
+        for (i = 0; i < BUF_SIZE / 2; i += 4 / size) { \
+            uint32_t r = rnd() & mask;                 \
+            AV_WN32A(u0 + i, r);                       \
+            AV_WN32A(u1 + i, r);                       \
+            r = rnd() & mask;                          \
+            AV_WN32A(v0 + i, r);                       \
+            AV_WN32A(v1 + i, r);                       \
+        }                                              \
+        for (i = 0; i < width * 8 / 3; i += 4) {       \
+            uint32_t r = rnd();                        \
+            AV_WN32A(dst0 + i, r);                     \
+            AV_WN32A(dst1 + i, r);                     \
+        }                                              \
+    } while (0)
+
+#define check_pack_line(type, mask)                                                \
+    do {                                                                           \
+        LOCAL_ALIGNED_16(type, y0, [BUF_SIZE]);                                    \
+        LOCAL_ALIGNED_16(type, y1, [BUF_SIZE]);                                    \
+        LOCAL_ALIGNED_16(type, u0, [BUF_SIZE / 2]);                                \
+        LOCAL_ALIGNED_16(type, u1, [BUF_SIZE / 2]);                                \
+        LOCAL_ALIGNED_16(type, v0, [BUF_SIZE / 2]);                                \
+        LOCAL_ALIGNED_16(type, v1, [BUF_SIZE / 2]);                                \
+        LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE * 8 / 3]);                       \
+        LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE * 8 / 3]);                       \
+                                                                                   \
+        declare_func(void, const type * y, const type * u, const type * v,         \
+                     uint8_t * dst, ptrdiff_t width);                              \
+        ptrdiff_t width, step = 12 / sizeof(type);                                 \
+                                                                                   \
+        for (width = step; width < BUF_SIZE - 15; width += step) {                 \
+            int y_offset  = rnd() & 15;                                            \
+            int uv_offset = y_offset / 2;                                          \
+            randomize_buffers(mask);                                               \
+            call_ref(y0 + y_offset, u0 + uv_offset, v0 + uv_offset, dst0, width);  \
+            call_new(y1 + y_offset, u1 + uv_offset, v1 + uv_offset, dst1, width);  \
+            if (memcmp(y0, y1, BUF_SIZE) || memcmp(u0, u1, BUF_SIZE / 2) ||        \
+                memcmp(v0, v1, BUF_SIZE / 2) || memcmp(dst0, dst1, width * 8 / 3)) \
+                fail();                                                            \
+            bench_new(y1 + y_offset, u1 + uv_offset, v1 + uv_offset, dst1, width); \
+        }                                                                          \
+    } while (0)
+
+void checkasm_check_v210enc(void)
+{
+    V210EncContext h;
+
+    ff_v210enc_init(&h);
+
+    if (check_func(h.pack_line_8, "v210_planar_pack_8"))
+        check_pack_line(uint8_t, 0xffffffff);
+
+    if (check_func(h.pack_line_10, "v210_planar_pack_10"))
+        check_pack_line(uint16_t, 0x03ff03ff);
+
+    report("planar_pack");
+}
--- a/externals/ffmpeg/tests/checkasm/vf_blend.c
+++ b/externals/ffmpeg/tests/checkasm/vf_blend.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2016 Tiancheng "Timothy" Gu
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavfilter/blend.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define WIDTH 256
+#define HEIGHT 256
+#define BUF_UNITS 3
+#define SIZE_PER_UNIT (WIDTH * HEIGHT)
+#define BUF_SIZE (BUF_UNITS * SIZE_PER_UNIT)
+
+#define randomize_buffers()                   \
+    do {                                      \
+        int i, j;                             \
+        for (i = 0; i < HEIGHT; i++) {        \
+            for (j = 0; j < WIDTH; j++) {     \
+                top1[i * WIDTH + j] =         \
+                top2[i * WIDTH + j] = i;      \
+                bot1[i * WIDTH + j] =         \
+                bot2[i * WIDTH + j] = j;      \
+            }                                 \
+        }                                     \
+        for (i = 0; i < SIZE_PER_UNIT; i += 4) { \
+            uint32_t r = rnd();               \
+            AV_WN32A(dst1 + i, r);            \
+            AV_WN32A(dst2 + i, r);            \
+        }                                     \
+        for (; i < BUF_SIZE; i += 4) {        \
+            uint32_t r = rnd();               \
+            AV_WN32A(top1 + i, r);            \
+            AV_WN32A(top2 + i, r);            \
+            r = rnd();                        \
+            AV_WN32A(bot1 + i, r);            \
+            AV_WN32A(bot2 + i, r);            \
+            r = rnd();                        \
+            AV_WN32A(dst1 + i, r);            \
+            AV_WN32A(dst2 + i, r);            \
+        }                                     \
+    } while (0)
+
+#define check_blend_func(depth)                                                            \
+    do {                                                                                   \
+        int i, w;                                                                          \
+        declare_func(void, const uint8_t *top, ptrdiff_t top_linesize,                     \
+                     const uint8_t *bottom, ptrdiff_t bottom_linesize,                     \
+                     uint8_t *dst, ptrdiff_t dst_linesize,                                 \
+                     ptrdiff_t width, ptrdiff_t height,                                    \
+                     struct FilterParams *param, double *values);                          \
+        w = WIDTH / depth;                                                                 \
+                                                                                           \
+        for (i = 0; i < BUF_UNITS - 1; i++) {                                              \
+            int src_offset = i * SIZE_PER_UNIT + (BUF_UNITS - 1 - i) * depth; /* Test various alignments */  \
+            int dst_offset = i * SIZE_PER_UNIT; /* dst must be aligned */                  \
+            randomize_buffers();                                                           \
+            call_ref(top1 + src_offset, w, bot1 + src_offset, w,                           \
+                     dst1 + dst_offset, w, w, HEIGHT, &param, NULL);                       \
+            call_new(top2 + src_offset, w, bot2 + src_offset, w,                           \
+                     dst2 + dst_offset, w, w, HEIGHT, &param, NULL);                       \
+            if (memcmp(top1, top2, BUF_SIZE) || memcmp(bot1, bot2, BUF_SIZE) || memcmp(dst1, dst2, BUF_SIZE)) \
+                fail();                                                                    \
+        }                                                                                  \
+        bench_new(top2, w / 4, bot2, w / 4, dst2, w / 4,                                   \
+                  w / 4, HEIGHT / 4, &param, NULL);                                        \
+    } while (0)
+
+void checkasm_check_blend(void)
+{
+    uint8_t *top1 = av_malloc(BUF_SIZE);
+    uint8_t *top2 = av_malloc(BUF_SIZE);
+    uint8_t *bot1 = av_malloc(BUF_SIZE);
+    uint8_t *bot2 = av_malloc(BUF_SIZE);
+    uint8_t *dst1 = av_malloc(BUF_SIZE);
+    uint8_t *dst2 = av_malloc(BUF_SIZE);
+    FilterParams param = {
+        .opacity = 1.0,
+    };
+
+#define check_and_report(name, val, depth)        \
+    param.mode = val;                             \
+    ff_blend_init(&param, depth - 1);             \
+    if (check_func(param.blend, #name))           \
+        check_blend_func(depth);
+
+    check_and_report(addition, BLEND_ADDITION, 1)
+    check_and_report(grainmerge, BLEND_GRAINMERGE, 1)
+    check_and_report(and, BLEND_AND, 1)
+    check_and_report(average, BLEND_AVERAGE, 1)
+    check_and_report(darken, BLEND_DARKEN, 1)
+    check_and_report(grainextract, BLEND_GRAINEXTRACT, 1)
+    check_and_report(hardmix, BLEND_HARDMIX, 1)
+    check_and_report(lighten, BLEND_LIGHTEN, 1)
+    check_and_report(multiply, BLEND_MULTIPLY, 1)
+    check_and_report(or, BLEND_OR, 1)
+    check_and_report(phoenix, BLEND_PHOENIX, 1)
+    check_and_report(screen, BLEND_SCREEN, 1)
+    check_and_report(subtract, BLEND_SUBTRACT, 1)
+    check_and_report(xor, BLEND_XOR, 1)
+    check_and_report(difference, BLEND_DIFFERENCE, 1)
+    check_and_report(extremity, BLEND_EXTREMITY, 1)
+    check_and_report(negation, BLEND_NEGATION, 1)
+
+    report("8bit");
+
+    check_and_report(addition_16, BLEND_ADDITION, 2)
+    check_and_report(grainmerge_16, BLEND_GRAINMERGE, 2)
+    check_and_report(and_16, BLEND_AND, 2)
+    check_and_report(average_16, BLEND_AVERAGE, 2)
+    check_and_report(darken_16, BLEND_DARKEN, 2)
+    check_and_report(grainextract_16, BLEND_GRAINEXTRACT, 2)
+    check_and_report(difference_16, BLEND_DIFFERENCE, 2)
+    check_and_report(extremity_16, BLEND_EXTREMITY, 2)
+    check_and_report(negation_16, BLEND_NEGATION, 2)
+    check_and_report(lighten_16, BLEND_LIGHTEN, 2)
+    check_and_report(or_16, BLEND_OR, 2)
+    check_and_report(phoenix_16, BLEND_PHOENIX, 2)
+    check_and_report(subtract_16, BLEND_SUBTRACT, 2)
+    check_and_report(xor_16, BLEND_SUBTRACT, 2)
+
+    report("16bit");
+
+    av_freep(&top1);
+    av_freep(&top2);
+    av_freep(&bot1);
+    av_freep(&bot2);
+    av_freep(&dst1);
+    av_freep(&dst2);
+}
--- a/externals/ffmpeg/tests/checkasm/vf_colorspace.c
+++ b/externals/ffmpeg/tests/checkasm/vf_colorspace.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavfilter/colorspacedsp.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define W 64
+#define H 64
+
+#define randomize_buffers()                     \
+    do {                                        \
+        unsigned mask = bpp_mask[idepth];       \
+        int n, m;                               \
+        int bpp = 1 + (!!idepth);               \
+        int buf_size = W * H * bpp;             \
+        for (m = 0; m < 3; m++) {               \
+            int ss = m ? ss_w + ss_h : 0;       \
+            int plane_sz = buf_size >> ss;      \
+            for (n = 0; n < plane_sz; n += 4) { \
+                unsigned r = rnd() & mask;      \
+                AV_WN32A(&src[m][n], r);        \
+            }                                   \
+        }                                       \
+    } while (0)
+
+static const char *format_string[] = {
+    "444", "422", "420"
+};
+
+static const unsigned bpp_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
+
+static void check_yuv2yuv(void)
+{
+    declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
+                 uint8_t *src[3], ptrdiff_t src_stride[3],
+                 int w, int h, const int16_t coeff[3][3][8],
+                 const int16_t off[2][8]);
+    ColorSpaceDSPContext dsp;
+    int idepth, odepth, fmt, n;
+    LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
+    uint8_t *src[3] = { src_y, src_u, src_v };
+    LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
+    uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
+    LOCAL_ALIGNED_32(int16_t, offset_buf, [16]);
+    LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
+    int16_t (*offset)[8] = (int16_t(*)[8]) offset_buf;
+    int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
+
+    ff_colorspacedsp_init(&dsp);
+    for (n = 0; n < 8; n++) {
+        offset[0][n] = offset[1][n] = 16;
+
+        coeff[0][0][n] = (1 << 14) + (1 << 7) + 1;
+        coeff[0][1][n] = (1 << 7) - 1;
+        coeff[0][2][n] = -(1 << 8);
+        coeff[1][0][n] = coeff[2][0][n] = 0;
+        coeff[1][1][n] = (1 << 14) + (1 << 7);
+        coeff[1][2][n] = -(1 << 7);
+        coeff[2][2][n] = (1 << 14) - (1 << 6);
+        coeff[2][1][n] = 1 << 6;
+    }
+    for (idepth = 0; idepth < 3; idepth++) {
+        for (odepth = 0; odepth < 3; odepth++) {
+            for (fmt = 0; fmt < 3; fmt++) {
+                if (check_func(dsp.yuv2yuv[idepth][odepth][fmt],
+                               "ff_colorspacedsp_yuv2yuv_%sp%dto%d",
+                               format_string[fmt],
+                               idepth * 2 + 8, odepth * 2 + 8)) {
+                    int ss_w = !!fmt, ss_h = fmt == 2;
+                    int y_src_stride = W << !!idepth, y_dst_stride = W << !!odepth;
+                    int uv_src_stride = y_src_stride >> ss_w, uv_dst_stride = y_dst_stride >> ss_w;
+
+                    randomize_buffers();
+                    call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
+                             src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
+                             W, H, coeff, offset);
+                    call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
+                             src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
+                             W, H, coeff, offset);
+                    if (memcmp(dst0[0], dst1[0], y_dst_stride * H) ||
+                        memcmp(dst0[1], dst1[1], uv_dst_stride * H >> ss_h) ||
+                        memcmp(dst0[2], dst1[2], uv_dst_stride * H >> ss_h)) {
+                        fail();
+                    }
+                }
+            }
+        }
+    }
+
+    report("yuv2yuv");
+}
+
+static void check_yuv2rgb(void)
+{
+    declare_func(void, int16_t *dst[3], ptrdiff_t dst_stride,
+                 uint8_t *src[3], ptrdiff_t src_stride[3],
+                 int w, int h, const int16_t coeff[3][3][8],
+                 const int16_t off[8]);
+    ColorSpaceDSPContext dsp;
+    int idepth, fmt, n;
+    LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
+    uint8_t *src[3] = { src_y, src_u, src_v };
+    LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
+    LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
+    LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
+    LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
+    LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
+    LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
+    int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
+    LOCAL_ALIGNED_32(int16_t, offset, [8]);
+    LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
+    int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
+
+    ff_colorspacedsp_init(&dsp);
+    for (n = 0; n < 8; n++) {
+        offset[n] = 16;
+
+        coeff[0][0][n] = coeff[1][0][n] = coeff[2][0][n] = (1 << 14) | 1;
+        coeff[0][1][n] = coeff[2][2][n] = 0;
+        coeff[0][2][n] = 1 << 13;
+        coeff[1][1][n] = -(1 << 12);
+        coeff[1][2][n] = 1 << 12;
+        coeff[2][1][n] = 1 << 11;
+    }
+    for (idepth = 0; idepth < 3; idepth++) {
+        for (fmt = 0; fmt < 3; fmt++) {
+            if (check_func(dsp.yuv2rgb[idepth][fmt],
+                           "ff_colorspacedsp_yuv2rgb_%sp%d",
+                           format_string[fmt], idepth * 2 + 8)) {
+                int ss_w = !!fmt, ss_h = fmt == 2;
+                int y_src_stride = W << !!idepth;
+                int uv_src_stride = y_src_stride >> ss_w;
+
+                randomize_buffers();
+                call_ref(dst0, W, src,
+                         (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
+                         W, H, coeff, offset);
+                call_new(dst1, W, src,
+                         (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
+                         W, H, coeff, offset);
+                if (memcmp(dst0[0], dst1[0], W * H * sizeof(int16_t)) ||
+                    memcmp(dst0[1], dst1[1], W * H * sizeof(int16_t)) ||
+                    memcmp(dst0[2], dst1[2], W * H * sizeof(int16_t))) {
+                    fail();
+                }
+            }
+        }
+    }
+
+    report("yuv2rgb");
+}
+
+#undef randomize_buffers
+#define randomize_buffers()                     \
+    do {                                        \
+        int y, x, p;                            \
+        for (p = 0; p < 3; p++) {               \
+            for (y = 0; y < H; y++) {           \
+                for (x = 0; x < W; x++) {       \
+                    int r = rnd() & 0x7fff;     \
+                    r -= (32768 - 28672) >> 1;  \
+                    src[p][y * W + x] = r;      \
+                }                               \
+            }                                   \
+        }                                       \
+    } while (0)
+
+static void check_rgb2yuv(void)
+{
+    declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
+                 int16_t *src[3], ptrdiff_t src_stride,
+                 int w, int h, const int16_t coeff[3][3][8],
+                 const int16_t off[8]);
+    ColorSpaceDSPContext dsp;
+    int odepth, fmt, n;
+    LOCAL_ALIGNED_32(int16_t, src_y, [W * H * 2]);
+    LOCAL_ALIGNED_32(int16_t, src_u, [W * H * 2]);
+    LOCAL_ALIGNED_32(int16_t, src_v, [W * H * 2]);
+    int16_t *src[3] = { src_y, src_u, src_v };
+    LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
+    uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
+    LOCAL_ALIGNED_32(int16_t, offset, [8]);
+    LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
+    int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
+
+    ff_colorspacedsp_init(&dsp);
+    for (n = 0; n < 8; n++) {
+        offset[n] = 16;
+
+        // these somewhat resemble bt601/smpte170m coefficients
+        coeff[0][0][n] = lrint(0.3 * (1 << 14));
+        coeff[0][1][n] = lrint(0.6 * (1 << 14));
+        coeff[0][2][n] = lrint(0.1 * (1 << 14));
+        coeff[1][0][n] = lrint(-0.15 * (1 << 14));
+        coeff[1][1][n] = lrint(-0.35 * (1 << 14));
+        coeff[1][2][n] = lrint(0.5 * (1 << 14));
+        coeff[2][0][n] = lrint(0.5 * (1 << 14));
+        coeff[2][1][n] = lrint(-0.42 * (1 << 14));
+        coeff[2][2][n] = lrint(-0.08 * (1 << 14));
+    }
+    for (odepth = 0; odepth < 3; odepth++) {
+        for (fmt = 0; fmt < 3; fmt++) {
+            if (check_func(dsp.rgb2yuv[odepth][fmt],
+                           "ff_colorspacedsp_rgb2yuv_%sp%d",
+                           format_string[fmt], odepth * 2 + 8)) {
+                int ss_w = !!fmt, ss_h = fmt == 2;
+                int y_dst_stride = W << !!odepth;
+                int uv_dst_stride = y_dst_stride >> ss_w;
+
+                randomize_buffers();
+                call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
+                         src, W, W, H, coeff, offset);
+                call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
+                         src, W, W, H, coeff, offset);
+                if (memcmp(dst0[0], dst1[0], H * y_dst_stride) ||
+                    memcmp(dst0[1], dst1[1], H * uv_dst_stride >> ss_h) ||
+                    memcmp(dst0[2], dst1[2], H * uv_dst_stride >> ss_h)) {
+                    fail();
+                }
+            }
+        }
+    }
+
+    report("rgb2yuv");
+}
+
+static void check_multiply3x3(void)
+{
+    declare_func(void, int16_t *data[3], ptrdiff_t stride,
+                 int w, int h, const int16_t coeff[3][3][8]);
+    ColorSpaceDSPContext dsp;
+    LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
+    LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
+    LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
+    LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
+    LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
+    LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
+    int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
+    int16_t **src = dst0;
+    LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
+    int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
+    int n;
+
+    ff_colorspacedsp_init(&dsp);
+    for (n = 0; n < 8; n++) {
+        coeff[0][0][n] = lrint(0.85 * (1 << 14));
+        coeff[0][1][n] = lrint(0.10 * (1 << 14));
+        coeff[0][2][n] = lrint(0.05 * (1 << 14));
+        coeff[1][0][n] = lrint(-0.1 * (1 << 14));
+        coeff[1][1][n] = lrint(0.95 * (1 << 14));
+        coeff[1][2][n] = lrint(0.15 * (1 << 14));
+        coeff[2][0][n] = lrint(-0.2 * (1 << 14));
+        coeff[2][1][n] = lrint(0.30 * (1 << 14));
+        coeff[2][2][n] = lrint(0.90 * (1 << 14));
+    }
+    if (check_func(dsp.multiply3x3, "ff_colorspacedsp_multiply3x3")) {
+        randomize_buffers();
+        memcpy(dst1_y, dst0_y, W * H * sizeof(*dst1_y));
+        memcpy(dst1_u, dst0_u, W * H * sizeof(*dst1_u));
+        memcpy(dst1_v, dst0_v, W * H * sizeof(*dst1_v));
+        call_ref(dst0, W, W, H, coeff);
+        call_new(dst1, W, W, H, coeff);
+        if (memcmp(dst0[0], dst1[0], H * W * sizeof(*dst0_y)) ||
+            memcmp(dst0[1], dst1[1], H * W * sizeof(*dst0_u)) ||
+            memcmp(dst0[2], dst1[2], H * W * sizeof(*dst0_v))) {
+            fail();
+        }
+    }
+
+    report("multiply3x3");
+}
+
+void checkasm_check_colorspace(void)
+{
+    check_yuv2yuv();
+    check_yuv2rgb();
+    check_rgb2yuv();
+    check_multiply3x3();
+}
--- a/externals/ffmpeg/tests/checkasm/vf_eq.c
+++ b/externals/ffmpeg/tests/checkasm/vf_eq.c
@@ -0,0 +1,79 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavfilter/avfilter.h"
+#include "libavfilter/vf_eq.h"
+#include "libavutil/intreadwrite.h"
+
+#define WIDTH 256
+#define HEIGHT 256
+#define SRC_STRIDE 256
+#define PIXELS (WIDTH * HEIGHT)
+#define RANDOM_RANGE 80000
+#define SCALE 10000
+
+#define randomize_buffers(buf, size)      \
+    do {                                  \
+        int j;                            \
+        uint8_t *tmp_buf = (uint8_t *)buf;\
+        for (j = 0; j< size; j++)         \
+            tmp_buf[j] = rnd() & 0xFF;    \
+    } while (0)
+
+static void check_eq(void)
+{
+    LOCAL_ALIGNED_32(uint8_t, src,     [PIXELS]);
+    LOCAL_ALIGNED_32(uint8_t, dst_ref, [PIXELS]);
+    LOCAL_ALIGNED_32(uint8_t, dst_new, [PIXELS]);
+    int w = WIDTH;
+    int h = HEIGHT;
+    int src_stride = SRC_STRIDE;
+    int dst_stride = SRC_STRIDE;
+    EQParameters pa;
+    EQContext eq;
+    declare_func(void, EQParameters *param, uint8_t *dst, int dst_stride,
+                 const uint8_t *src, int src_stride, int w, int h);
+
+    double rand_contrast = (int)(rnd() % (RANDOM_RANGE * 2) - RANDOM_RANGE) /
+                           (SCALE * 1.0);
+    double rand_brightness = (int)(rnd() % (SCALE * 2) - SCALE) /
+                             (SCALE * 1.0);
+    pa.contrast = rand_contrast;
+    pa.brightness = rand_brightness;
+
+    memset(dst_ref, 0, PIXELS);
+    memset(dst_new, 0, PIXELS);
+    randomize_buffers(src, PIXELS);
+    ff_eq_init(&eq);
+
+    if (check_func(eq.process, "process")) {
+        call_ref(&pa, dst_ref, dst_stride, src, src_stride, w, h);
+        call_new(&pa, dst_new, dst_stride, src, src_stride, w, h);
+        if (memcmp(dst_ref, dst_new, PIXELS))
+            fail();
+        bench_new(&pa, dst_new, dst_stride, src, src_stride, w, h);
+    }
+}
+
+void checkasm_check_vf_eq(void)
+{
+    check_eq();
+    report("eq");
+}
--- a/externals/ffmpeg/tests/checkasm/vf_gblur.c
+++ b/externals/ffmpeg/tests/checkasm/vf_gblur.c
@@ -0,0 +1,66 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavfilter/gblur.h"
+
+#define WIDTH 256
+#define HEIGHT 256
+#define PIXELS (WIDTH * HEIGHT)
+#define BUF_SIZE (PIXELS * 4)
+
+#define randomize_buffers(buf, size)             \
+    do {                                         \
+        int j;                                   \
+        float *tmp_buf = (float *)buf;           \
+        for (j = 0; j < size; j++)               \
+            tmp_buf[j] = (float)(rnd() & 0xFF); \
+    } while (0)
+
+void checkasm_check_vf_gblur(void)
+{
+    float *dst_ref = av_malloc(BUF_SIZE);
+    float *dst_new = av_malloc(BUF_SIZE);
+    int w = WIDTH;
+    int h = HEIGHT;
+    int steps = 2;
+    float nu = 0.101f;
+    float bscale = 1.112f;
+    GBlurContext s;
+
+    declare_func(void, float *dst, int w, int h, int steps, float nu, float bscale);
+
+    randomize_buffers(dst_ref, PIXELS);
+    memcpy(dst_new, dst_ref, BUF_SIZE);
+
+    ff_gblur_init(&s);
+
+    if (check_func(s.horiz_slice, "horiz_slice")) {
+        call_ref(dst_ref, w, h, steps, nu, bscale);
+        call_new(dst_new, w, h, steps, nu, bscale);
+
+        if (!float_near_abs_eps_array(dst_ref, dst_new, 0.01f, PIXELS)) {
+            fail();
+        }
+        bench_new(dst_new, w, h, 1, nu, bscale);
+    }
+    report("horiz_slice");
+    av_freep(&dst_ref);
+    av_freep(&dst_new);
+}
--- a/externals/ffmpeg/tests/checkasm/vf_hflip.c
+++ b/externals/ffmpeg/tests/checkasm/vf_hflip.c
@@ -0,0 +1,76 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavfilter/hflip.h"
+#include "libavutil/intreadwrite.h"
+
+#define WIDTH 256
+#define WIDTH_PADDED 256 + 32
+
+#define randomize_buffers(buf, size)      \
+    do {                                  \
+        int j;                            \
+        uint8_t *tmp_buf = (uint8_t *)buf;\
+        for (j = 0; j < size; j++)        \
+            tmp_buf[j] = rnd() & 0xFF;    \
+    } while (0)
+
+static void check_hflip(int step, const char * report_name){
+    LOCAL_ALIGNED_32(uint8_t, src,     [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, dst_ref, [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, dst_new, [WIDTH_PADDED]);
+    int w = WIDTH;
+    int i;
+    int step_array[4] = {1, 1, 1, 1};
+    FlipContext s;
+
+    declare_func(void, const uint8_t *src, uint8_t *dst, int w);
+
+    memset(src,     0, WIDTH_PADDED);
+    memset(dst_ref, 0, WIDTH_PADDED);
+    memset(dst_new, 0, WIDTH_PADDED);
+    randomize_buffers(src, WIDTH_PADDED);
+
+    if (step == 2) {
+        w /= 2;
+        for (i = 0; i < 4; i++)
+            step_array[i] = step;
+    }
+
+    ff_hflip_init(&s, step_array, 4);
+
+    if (check_func(s.flip_line[0], "hflip_%s", report_name)) {
+        for (i = 1; i < w; i++) {
+            call_ref(src + (w - 1) * step, dst_ref, i);
+            call_new(src + (w - 1) * step, dst_new, i);
+            if (memcmp(dst_ref, dst_new, i * step))
+                fail();
+        }
+        bench_new(src + (w - 1) * step, dst_new, w);
+    }
+}
+void checkasm_check_vf_hflip(void)
+{
+    check_hflip(1, "byte");
+    report("hflip_byte");
+
+    check_hflip(2, "short");
+    report("hflip_short");
+}
--- a/externals/ffmpeg/tests/checkasm/vf_nlmeans.c
+++ b/externals/ffmpeg/tests/checkasm/vf_nlmeans.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "checkasm.h"
+#include "libavfilter/vf_nlmeans.h"
+#include "libavutil/avassert.h"
+
+#define randomize_buffer(buf, size) do {    \
+    int i;                                  \
+    for (i = 0; i < size / 4; i++)          \
+        ((uint32_t *)buf)[i] = rnd();       \
+} while (0)
+
+void checkasm_check_nlmeans(void)
+{
+    NLMeansDSPContext dsp = {0};
+
+    const int w = 123;  // source width
+    const int h = 45;   // source height
+    const int p = 3;    // patch half size
+    const int r = 2;    // research window half size
+
+    ff_nlmeans_init(&dsp);
+
+    /* See the filter's code for the explanations on the variables */
+    if (check_func(dsp.compute_safe_ssd_integral_image, "ssd_integral_image")) {
+        int offx, offy;
+        const int e = p + r;
+        const int ii_w = w + e*2;
+        const int ii_h = h + e*2;
+        const int ii_lz_32 = FFALIGN(ii_w + 1, 4);
+        uint32_t *ii_orig_ref = av_mallocz_array(ii_h + 1, ii_lz_32 * sizeof(*ii_orig_ref));
+        uint32_t *ii_ref = ii_orig_ref + ii_lz_32 + 1;
+        uint32_t *ii_orig_new = av_mallocz_array(ii_h + 1, ii_lz_32 * sizeof(*ii_orig_new));
+        uint32_t *ii_new = ii_orig_new + ii_lz_32 + 1;
+        const int src_lz = FFALIGN(w, 16);
+        uint8_t *src = av_mallocz_array(h, src_lz);
+
+        declare_func(void, uint32_t *dst, ptrdiff_t dst_linesize_32,
+                     const uint8_t *s1, ptrdiff_t linesize1,
+                     const uint8_t *s2, ptrdiff_t linesize2,
+                     int w, int h);
+
+        randomize_buffer(src, h * src_lz);
+
+        for (offy = -r; offy <= r; offy++) {
+            for (offx = -r; offx <= r; offx++) {
+                if (offx || offy) {
+                    const int s1x = e;
+                    const int s1y = e;
+                    const int s2x = e + offx;
+                    const int s2y = e + offy;
+                    const int startx_safe = FFMAX(s1x, s2x);
+                    const int starty_safe = FFMAX(s1y, s2y);
+                    const int u_endx_safe = FFMIN(s1x + w, s2x + w);
+                    const int endy_safe   = FFMIN(s1y + h, s2y + h);
+                    const int safe_pw = (u_endx_safe - startx_safe) & ~0xf;
+                    const int safe_ph = endy_safe - starty_safe;
+
+                    av_assert0(safe_pw && safe_ph);
+                    av_assert0(startx_safe - s1x >= 0); av_assert0(startx_safe - s1x < w);
+                    av_assert0(starty_safe - s1y >= 0); av_assert0(starty_safe - s1y < h);
+                    av_assert0(startx_safe - s2x >= 0); av_assert0(startx_safe - s2x < w);
+                    av_assert0(starty_safe - s2y >= 0); av_assert0(starty_safe - s2y < h);
+
+                    memset(ii_ref, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_ref));
+                    memset(ii_new, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_new));
+
+                    call_ref(ii_ref + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
+                             src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
+                             src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
+                             safe_pw, safe_ph);
+                    call_new(ii_new + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
+                             src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
+                             src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
+                             safe_pw, safe_ph);
+
+                    if (memcmp(ii_ref, ii_new, (ii_lz_32 * ii_h - 1) * sizeof(*ii_ref)))
+                        fail();
+
+                    memset(ii_new, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_new));
+                    bench_new(ii_new + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
+                             src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
+                             src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
+                             safe_pw, safe_ph);
+                }
+            }
+        }
+
+        av_freep(&ii_orig_ref);
+        av_freep(&ii_orig_new);
+        av_freep(&src);
+    }
+
+    report("dsp");
+}
--- a/externals/ffmpeg/tests/checkasm/vf_threshold.c
+++ b/externals/ffmpeg/tests/checkasm/vf_threshold.c
@@ -0,0 +1,85 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavfilter/threshold.h"
+#include "libavutil/intreadwrite.h"
+
+#define WIDTH 256
+#define WIDTH_PADDED 256 + 32
+
+#define randomize_buffers(buf, size)     \
+    do {                                 \
+       int j;                            \
+       uint8_t *tmp_buf = (uint8_t *)buf;\
+       for (j = 0; j < size; j++)        \
+           tmp_buf[j] = rnd() & 0xFF;    \
+    } while (0)
+
+static void check_threshold(int depth){
+    LOCAL_ALIGNED_32(uint8_t, in       , [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, threshold, [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, min      , [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, max      , [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, out_ref  , [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, out_new  , [WIDTH_PADDED]);
+    ptrdiff_t line_size = WIDTH_PADDED;
+    int w = WIDTH;
+
+    declare_func(void, const uint8_t *in, const uint8_t *threshold,
+                 const uint8_t *min, const uint8_t *max, uint8_t *out,
+                 ptrdiff_t ilinesize, ptrdiff_t tlinesize,
+                 ptrdiff_t flinesize, ptrdiff_t slinesize,
+                 ptrdiff_t olinesize, int w, int h);
+
+    ThresholdContext s;
+    s.depth = depth;
+    ff_threshold_init(&s);
+
+    memset(in,     0, WIDTH_PADDED);
+    memset(threshold, 0, WIDTH_PADDED);
+    memset(min, 0, WIDTH_PADDED);
+    memset(max, 0, WIDTH_PADDED);
+    memset(out_ref, 0, WIDTH_PADDED);
+    memset(out_new, 0, WIDTH_PADDED);
+    randomize_buffers(in, WIDTH);
+    randomize_buffers(threshold, WIDTH);
+    randomize_buffers(min, WIDTH);
+    randomize_buffers(max, WIDTH);
+
+    if (depth == 16)
+        w /= 2;
+
+    if (check_func(s.threshold, "threshold%d", depth)) {
+        call_ref(in, threshold, min, max, out_ref, line_size, line_size, line_size, line_size, line_size, w, 1);
+        call_new(in, threshold, min, max, out_new, line_size, line_size, line_size, line_size, line_size, w, 1);
+        if (memcmp(out_ref, out_new, WIDTH))
+            fail();
+        bench_new(in, threshold, min, max, out_new, line_size, line_size, line_size, line_size, line_size, w, 1);
+    }
+}
+
+void checkasm_check_vf_threshold(void)
+{
+    check_threshold(8);
+    report("threshold8");
+
+    check_threshold(16);
+    report("threshold16");
+}
--- a/externals/ffmpeg/tests/checkasm/videodsp.c
+++ b/externals/ffmpeg/tests/checkasm/videodsp.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/videodsp.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define randomize_buffers(w, h)                         \
+    do {                                                \
+        int i;                                          \
+        for (i = 0; i < w * h * sizeof(*src0); i += 4)  \
+            AV_WN32A(((uint8_t *) src0) + i, rnd());    \
+    } while (0)
+
+#define iter_1d(type, fix, fix_val, var, var_start, var_end)        \
+    for (fix = fix_val, var = var_start; var <= var_end; var++) {   \
+        call_ref((type *) dst0, (const type *) (src0 + y * pw + x), \
+                 bw * sizeof(type), pw * sizeof(type),              \
+                 bw, bh, x, y, pw, ph);                             \
+        call_new((type *) dst1, (const type *) (src1 + y * pw + x), \
+                 bw * sizeof(type), pw * sizeof(type),              \
+                 bw, bh, x, y, pw, ph);                             \
+        if (memcmp(dst0, dst1, bw * bh * sizeof(type)))             \
+            fail();                                                 \
+        bench_new((type *) dst1, (const type *) (src1 + y * pw + x),\
+                  bw * sizeof(type), pw * sizeof(type),             \
+                  bw, bh, x, y, pw, ph);                            \
+    }
+
+#define check_emu_edge_size(type, src_w, src_h, dst_w, dst_h)   \
+    do {                                                        \
+        LOCAL_ALIGNED_16(type, src0, [src_w * src_h]);          \
+        LOCAL_ALIGNED_16(type, src1, [src_w * src_h]);          \
+        int bw = dst_w, bh = dst_h;                             \
+        int pw = src_w, ph = src_h;                             \
+        int y, x;                                               \
+        randomize_buffers(src_w, src_h);                        \
+        memcpy(src1, src0, pw * ph * sizeof(type));             \
+        iter_1d(type, y, 0 - src_h, x, 0 - src_w, src_w - 0);   \
+        iter_1d(type, x, src_w - 0, y, 0 - src_h, src_h - 0);   \
+        iter_1d(type, y, src_h - 0, x, 0 - src_w, src_w - 0);   \
+        iter_1d(type, x, 0 - src_w, y, 0 - src_h, src_h - 0);   \
+    } while (0)
+
+#define check_emu_edge(type)                                    \
+    do {                                                        \
+        LOCAL_ALIGNED_16(type, dst0, [64 * 64]);                \
+        LOCAL_ALIGNED_16(type, dst1, [64 * 64]);                \
+        declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, \
+                          void, type *dst, const type *src,     \
+                          ptrdiff_t dst_linesize,               \
+                          ptrdiff_t src_linesize,               \
+                          int block_w, int block_h,             \
+                          int src_x, int src_y,                 \
+                          int src_w, int src_h);                \
+        check_emu_edge_size(type, 16,  1, 64, 64);              \
+        check_emu_edge_size(type, 16, 16, 64, 64);              \
+        check_emu_edge_size(type, 64, 64, 64, 64);              \
+    } while (0)
+
+void checkasm_check_videodsp(void)
+{
+    VideoDSPContext vdsp;
+
+    ff_videodsp_init(&vdsp, 8);
+    if (check_func(vdsp.emulated_edge_mc, "emulated_edge_mc_8"))
+        check_emu_edge(uint8_t);
+
+    report("emulated_edge_mc");
+}
--- a/externals/ffmpeg/tests/checkasm/vp8dsp.c
+++ b/externals/ffmpeg/tests/checkasm/vp8dsp.c
@@ -0,0 +1,519 @@
+/*
+ * Copyright (c) 2016 Martin Storsjo
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/vp8dsp.h"
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+
+#include "checkasm.h"
+
+#define PIXEL_STRIDE 16
+
+#define randomize_buffers(src, dst, stride, coef)                            \
+    do {                                                                     \
+        int x, y;                                                            \
+        for (y = 0; y < 4; y++) {                                            \
+            AV_WN32A((src) + y * (stride), rnd());                           \
+            AV_WN32A((dst) + y * (stride), rnd());                           \
+            for (x = 0; x < 4; x++)                                          \
+                (coef)[y * 4 + x] = (src)[y * (stride) + x] -                \
+                                    (dst)[y * (stride) + x];                 \
+        }                                                                    \
+    } while (0)
+
+static void dct4x4(int16_t *coef)
+{
+    int i;
+    for (i = 0; i < 4; i++) {
+        const int a1 = (coef[i*4 + 0] + coef[i*4 + 3]) * 8;
+        const int b1 = (coef[i*4 + 1] + coef[i*4 + 2]) * 8;
+        const int c1 = (coef[i*4 + 1] - coef[i*4 + 2]) * 8;
+        const int d1 = (coef[i*4 + 0] - coef[i*4 + 3]) * 8;
+        coef[i*4 + 0] =  a1 + b1;
+        coef[i*4 + 1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
+        coef[i*4 + 2] =  a1 - b1;
+        coef[i*4 + 3] = (d1 * 2217 - c1 * 5352 +  7500) >> 12;
+    }
+    for (i = 0; i < 4; i++) {
+        const int a1 = coef[i + 0*4] + coef[i + 3*4];
+        const int b1 = coef[i + 1*4] + coef[i + 2*4];
+        const int c1 = coef[i + 1*4] - coef[i + 2*4];
+        const int d1 = coef[i + 0*4] - coef[i + 3*4];
+        coef[i + 0*4] =  (a1 + b1 + 7) >> 4;
+        coef[i + 1*4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + !!d1;
+        coef[i + 2*4] =  (a1 - b1 + 7) >> 4;
+        coef[i + 3*4] =  (d1 * 2217 - c1 * 5352 + 51000) >> 16;
+    }
+}
+
+static void wht4x4(int16_t *coef)
+{
+    int i;
+    for (i = 0; i < 4; i++) {
+        int a1 = coef[0 * 4 + i];
+        int b1 = coef[1 * 4 + i];
+        int c1 = coef[2 * 4 + i];
+        int d1 = coef[3 * 4 + i];
+        int e1;
+        a1 += b1;
+        d1 -= c1;
+        e1 = (a1 - d1) >> 1;
+        b1 = e1 - b1;
+        c1 = e1 - c1;
+        a1 -= c1;
+        d1 += b1;
+        coef[0 * 4 + i] = a1;
+        coef[1 * 4 + i] = c1;
+        coef[2 * 4 + i] = d1;
+        coef[3 * 4 + i] = b1;
+    }
+    for (i = 0; i < 4; i++) {
+        int a1 = coef[i * 4 + 0];
+        int b1 = coef[i * 4 + 1];
+        int c1 = coef[i * 4 + 2];
+        int d1 = coef[i * 4 + 3];
+        int e1;
+        a1 += b1;
+        d1 -= c1;
+        e1 = (a1 - d1) >> 1;
+        b1 = e1 - b1;
+        c1 = e1 - c1;
+        a1 -= c1;
+        d1 += b1;
+        coef[i * 4 + 0] = a1 * 2;
+        coef[i * 4 + 1] = c1 * 2;
+        coef[i * 4 + 2] = d1 * 2;
+        coef[i * 4 + 3] = b1 * 2;
+    }
+}
+
+static void check_idct(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, src,  [4 * 4]);
+    LOCAL_ALIGNED_16(uint8_t, dst,  [4 * 4]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4]);
+    LOCAL_ALIGNED_16(int16_t, coef, [4 * 4]);
+    LOCAL_ALIGNED_16(int16_t, subcoef0, [4 * 4]);
+    LOCAL_ALIGNED_16(int16_t, subcoef1, [4 * 4]);
+    VP8DSPContext d;
+    int dc;
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, ptrdiff_t stride);
+
+    ff_vp8dsp_init(&d);
+    randomize_buffers(src, dst, 4, coef);
+
+    dct4x4(coef);
+
+    for (dc = 0; dc <= 1; dc++) {
+        void (*idct)(uint8_t *, int16_t *, ptrdiff_t) = dc ? d.vp8_idct_dc_add : d.vp8_idct_add;
+
+        if (check_func(idct, "vp8_idct_%sadd", dc ? "dc_" : "")) {
+            if (dc) {
+                memset(subcoef0, 0, 4 * 4 * sizeof(int16_t));
+                subcoef0[0] = coef[0];
+            } else {
+                memcpy(subcoef0, coef, 4 * 4 * sizeof(int16_t));
+            }
+            memcpy(dst0, dst, 4 * 4);
+            memcpy(dst1, dst, 4 * 4);
+            memcpy(subcoef1, subcoef0, 4 * 4 * sizeof(int16_t));
+            // Note, this uses a pixel stride of 4, even though the real decoder uses a stride as a
+            // multiple of 16. If optimizations want to take advantage of that, this test needs to be
+            // updated to make it more like the h264dsp tests.
+            call_ref(dst0, subcoef0, 4);
+            call_new(dst1, subcoef1, 4);
+            if (memcmp(dst0, dst1, 4 * 4) ||
+                memcmp(subcoef0, subcoef1, 4 * 4 * sizeof(int16_t)))
+                fail();
+
+            bench_new(dst1, subcoef1, 4);
+        }
+    }
+}
+
+static void check_idct_dc4(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, src,  [4 * 4 * 4]);
+    LOCAL_ALIGNED_16(uint8_t, dst,  [4 * 4 * 4]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4 * 4]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4 * 4]);
+    LOCAL_ALIGNED_16(int16_t, coef, [4], [4 * 4]);
+    LOCAL_ALIGNED_16(int16_t, subcoef0, [4], [4 * 4]);
+    LOCAL_ALIGNED_16(int16_t, subcoef1, [4], [4 * 4]);
+    VP8DSPContext d;
+    int i, chroma;
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
+
+    ff_vp8dsp_init(&d);
+
+    for (chroma = 0; chroma <= 1; chroma++) {
+        void (*idct4dc)(uint8_t *, int16_t[4][16], ptrdiff_t) = chroma ? d.vp8_idct_dc_add4uv : d.vp8_idct_dc_add4y;
+        if (check_func(idct4dc, "vp8_idct_dc_add4%s", chroma ? "uv" : "y")) {
+            ptrdiff_t stride = chroma ? 8 : 16;
+            int w      = chroma ? 2 : 4;
+            for (i = 0; i < 4; i++) {
+                int blockx = 4 * (i % w);
+                int blocky = 4 * (i / w);
+                randomize_buffers(src + stride * blocky + blockx, dst + stride * blocky + blockx, stride, coef[i]);
+                dct4x4(coef[i]);
+                memset(&coef[i][1], 0, 15 * sizeof(int16_t));
+            }
+
+            memcpy(dst0, dst, 4 * 4 * 4);
+            memcpy(dst1, dst, 4 * 4 * 4);
+            memcpy(subcoef0, coef, 4 * 4 * 4 * sizeof(int16_t));
+            memcpy(subcoef1, coef, 4 * 4 * 4 * sizeof(int16_t));
+            call_ref(dst0, subcoef0, stride);
+            call_new(dst1, subcoef1, stride);
+            if (memcmp(dst0, dst1, 4 * 4 * 4) ||
+                memcmp(subcoef0, subcoef1, 4 * 4 * 4 * sizeof(int16_t)))
+                fail();
+            bench_new(dst1, subcoef1, stride);
+        }
+    }
+
+}
+
+static void check_luma_dc_wht(void)
+{
+    LOCAL_ALIGNED_16(int16_t, dc, [4 * 4]);
+    LOCAL_ALIGNED_16(int16_t, dc0, [4 * 4]);
+    LOCAL_ALIGNED_16(int16_t, dc1, [4 * 4]);
+    int16_t block[4][4][16];
+    LOCAL_ALIGNED_16(int16_t, block0, [4], [4][16]);
+    LOCAL_ALIGNED_16(int16_t, block1, [4], [4][16]);
+    VP8DSPContext d;
+    int dc_only;
+    int blockx, blocky;
+    declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t block[4][4][16], int16_t dc[16]);
+
+    ff_vp8dsp_init(&d);
+
+    for (blocky = 0; blocky < 4; blocky++) {
+        for (blockx = 0; blockx < 4; blockx++) {
+            uint8_t src[16], dst[16];
+            randomize_buffers(src, dst, 4, block[blocky][blockx]);
+
+            dct4x4(block[blocky][blockx]);
+            dc[blocky * 4 + blockx] = block[blocky][blockx][0];
+            block[blocky][blockx][0] = rnd();
+        }
+    }
+    wht4x4(dc);
+
+    for (dc_only = 0; dc_only <= 1; dc_only++) {
+        void (*idct)(int16_t [4][4][16], int16_t [16]) = dc_only ? d.vp8_luma_dc_wht_dc : d.vp8_luma_dc_wht;
+
+        if (check_func(idct, "vp8_luma_dc_wht%s", dc_only ? "_dc" : "")) {
+            if (dc_only) {
+                memset(dc0, 0, 16 * sizeof(int16_t));
+                dc0[0] = dc[0];
+            } else {
+                memcpy(dc0, dc, 16 * sizeof(int16_t));
+            }
+            memcpy(dc1, dc0, 16 * sizeof(int16_t));
+            memcpy(block0, block, 4 * 4 * 16 * sizeof(int16_t));
+            memcpy(block1, block, 4 * 4 * 16 * sizeof(int16_t));
+            call_ref(block0, dc0);
+            call_new(block1, dc1);
+            if (memcmp(block0, block1, 4 * 4 * 16 * sizeof(int16_t)) ||
+                memcmp(dc0, dc1, 16 * sizeof(int16_t)))
+                fail();
+            bench_new(block1, dc1);
+        }
+    }
+}
+
+#define SRC_BUF_STRIDE 32
+#define SRC_BUF_SIZE (((size << (size < 16)) + 5) * SRC_BUF_STRIDE)
+// The mc subpixel interpolation filter needs the 2 previous pixels in either
+// direction, the +1 is to make sure the actual load addresses always are
+// unaligned.
+#define src (buf + 2 * SRC_BUF_STRIDE + 2 + 1)
+
+#undef randomize_buffers
+#define randomize_buffers()                               \
+    do {                                                  \
+        int k;                                            \
+        for (k = 0; k < SRC_BUF_SIZE; k += 4) {           \
+            AV_WN32A(buf + k, rnd());                     \
+        }                                                 \
+    } while (0)
+
+static void check_mc(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, buf, [32 * 32]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16]);
+    VP8DSPContext d;
+    int type, k, dx, dy;
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, uint8_t *, ptrdiff_t, int, int, int);
+
+    ff_vp78dsp_init(&d);
+
+    for (type = 0; type < 2; type++) {
+        vp8_mc_func (*tab)[3][3] = type ? d.put_vp8_bilinear_pixels_tab : d.put_vp8_epel_pixels_tab;
+        for (k = 1; k < 8; k++) {
+            int hsize  = k / 3;
+            int size   = 16 >> hsize;
+            int height = (size << 1) >> (k % 3);
+            for (dy = 0; dy < 3; dy++) {
+                for (dx = 0; dx < 3; dx++) {
+                    char str[100];
+                    if (dx || dy) {
+                        if (type == 0) {
+                            static const char *dx_names[] = { "", "h4", "h6" };
+                            static const char *dy_names[] = { "", "v4", "v6" };
+                            snprintf(str, sizeof(str), "epel%d_%s%s", size, dx_names[dx], dy_names[dy]);
+                        } else {
+                            snprintf(str, sizeof(str), "bilin%d_%s%s", size, dx ? "h" : "", dy ? "v" : "");
+                        }
+                    } else {
+                        snprintf(str, sizeof(str), "pixels%d", size);
+                    }
+                    if (check_func(tab[hsize][dy][dx], "vp8_put_%s", str)) {
+                        int mx, my;
+                        int i;
+                        if (type == 0) {
+                            mx = dx == 2 ? 2 + 2 * (rnd() % 3) : dx == 1 ? 1 + 2 * (rnd() % 4) : 0;
+                            my = dy == 2 ? 2 + 2 * (rnd() % 3) : dy == 1 ? 1 + 2 * (rnd() % 4) : 0;
+                        } else {
+                            mx = dx ? 1 + (rnd() % 7) : 0;
+                            my = dy ? 1 + (rnd() % 7) : 0;
+                        }
+                        randomize_buffers();
+                        for (i = -2; i <= 3; i++) {
+                            int val = (i == -1 || i == 2) ? 0 : 0xff;
+                            // Set pixels in the first row and column to the maximum pattern,
+                            // to test for potential overflows in the filter.
+                            src[i                 ] = val;
+                            src[i * SRC_BUF_STRIDE] = val;
+                        }
+                        call_ref(dst0, size, src, SRC_BUF_STRIDE, height, mx, my);
+                        call_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
+                        if (memcmp(dst0, dst1, size * height))
+                            fail();
+                        bench_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
+                    }
+                }
+            }
+        }
+    }
+}
+
+#undef randomize_buffers
+
+#define setpx(a, b, c) buf[(a) + (b) * jstride] = av_clip_uint8(c)
+// Set the pixel to c +/- [0,d]
+#define setdx(a, b, c, d) setpx(a, b, c - (d) + (rnd() % ((d) * 2 + 1)))
+// Set the pixel to c +/- [d,d+e] (making sure it won't be clipped)
+#define setdx2(a, b, o, c, d, e) setpx(a, b, o = c + ((d) + (rnd() % (e))) * (c >= 128 ? -1 : 1))
+
+static void randomize_loopfilter_buffers(int lineoff, int str,
+                                         int dir, int flim_E, int flim_I,
+                                         int hev_thresh, uint8_t *buf,
+                                         int force_hev)
+{
+    uint32_t mask = 0xff;
+    int off = dir ? lineoff : lineoff * str;
+    int istride = dir ? 1 : str;
+    int jstride = dir ? str : 1;
+    int i;
+    for (i = 0; i < 8; i += 2) {
+        // Row 0 will trigger hev for q0/q1, row 2 will trigger hev for p0/p1,
+        // rows 4 and 6 will not trigger hev.
+        // force_hev 1 will make sure all rows trigger hev, while force_hev -1
+        // makes none of them trigger it.
+        int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
+        setpx(idx,  0, q0 = rnd() & mask);
+        if (i == 0 && force_hev >= 0 || force_hev > 0)
+            setdx2(idx, 1, q1, q0, hev_thresh + 1, flim_I - hev_thresh - 1);
+        else
+            setdx(idx,  1, q1 = q0, hev_thresh);
+        setdx(idx,  2, q2 = q1, flim_I);
+        setdx(idx,  3, q2,      flim_I);
+        setdx(idx, -1, p0 = q0, flim_E >> 2);
+        if (i == 2 && force_hev >= 0 || force_hev > 0)
+            setdx2(idx, -2, p1, p0, hev_thresh + 1, flim_I - hev_thresh - 1);
+        else
+            setdx(idx, -2, p1 = p0, hev_thresh);
+        setdx(idx, -3, p2 = p1, flim_I);
+        setdx(idx, -4, p2,      flim_I);
+    }
+}
+
+// Fill the buffer with random pixels
+static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
+{
+    int x, y;
+    for (y = 0; y < h; y++)
+        for (x = 0; x < w; x++)
+            buf[y * stride + x] = rnd() & 0xff;
+}
+
+#define randomize_buffers(buf, lineoff, str, force_hev) \
+    randomize_loopfilter_buffers(lineoff, str, dir, flim_E, flim_I, hev_thresh, buf, force_hev)
+
+static void check_loopfilter_16y(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
+    LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
+    VP8DSPContext d;
+    int dir, edge, force_hev;
+    int flim_E = 20, flim_I = 10, hev_thresh = 7;
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int, int, int);
+
+    ff_vp8dsp_init(&d);
+
+    for (dir = 0; dir < 2; dir++) {
+        int midoff = dir ? 4 * 16 : 4;
+        int midoff_aligned = dir ? 4 * 16 : 16;
+        uint8_t *buf0 = base0 + midoff_aligned;
+        uint8_t *buf1 = base1 + midoff_aligned;
+        for (edge = 0; edge < 2; edge++) {
+            void (*func)(uint8_t *, ptrdiff_t, int, int, int) = NULL;
+            switch (dir << 1 | edge) {
+            case (0 << 1) | 0: func = d.vp8_h_loop_filter16y; break;
+            case (1 << 1) | 0: func = d.vp8_v_loop_filter16y; break;
+            case (0 << 1) | 1: func = d.vp8_h_loop_filter16y_inner; break;
+            case (1 << 1) | 1: func = d.vp8_v_loop_filter16y_inner; break;
+            }
+            if (check_func(func, "vp8_loop_filter16y%s_%s", edge ? "_inner" : "", dir ? "v" : "h")) {
+                for (force_hev = -1; force_hev <= 1; force_hev++) {
+                    fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
+                    randomize_buffers(buf0, 0, 16, force_hev);
+                    randomize_buffers(buf0, 8, 16, force_hev);
+                    memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
+                    call_ref(buf0, 16, flim_E, flim_I, hev_thresh);
+                    call_new(buf1, 16, flim_E, flim_I, hev_thresh);
+                    if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
+                        fail();
+                }
+                fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
+                randomize_buffers(buf0, 0, 16, 0);
+                randomize_buffers(buf0, 8, 16, 0);
+                bench_new(buf0, 16, flim_E, flim_I, hev_thresh);
+            }
+        }
+    }
+}
+
+static void check_loopfilter_8uv(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, base0u, [32 + 16 * 16]);
+    LOCAL_ALIGNED_16(uint8_t, base0v, [32 + 16 * 16]);
+    LOCAL_ALIGNED_16(uint8_t, base1u, [32 + 16 * 16]);
+    LOCAL_ALIGNED_16(uint8_t, base1v, [32 + 16 * 16]);
+    VP8DSPContext d;
+    int dir, edge, force_hev;
+    int flim_E = 20, flim_I = 10, hev_thresh = 7;
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, uint8_t *, ptrdiff_t, int, int, int);
+
+    ff_vp8dsp_init(&d);
+
+    for (dir = 0; dir < 2; dir++) {
+        int midoff = dir ? 4 * 16 : 4;
+        int midoff_aligned = dir ? 4 * 16 : 16;
+        uint8_t *buf0u = base0u + midoff_aligned;
+        uint8_t *buf0v = base0v + midoff_aligned;
+        uint8_t *buf1u = base1u + midoff_aligned;
+        uint8_t *buf1v = base1v + midoff_aligned;
+        for (edge = 0; edge < 2; edge++) {
+            void (*func)(uint8_t *, uint8_t *, ptrdiff_t, int, int, int) = NULL;
+            switch (dir << 1 | edge) {
+            case (0 << 1) | 0: func = d.vp8_h_loop_filter8uv; break;
+            case (1 << 1) | 0: func = d.vp8_v_loop_filter8uv; break;
+            case (0 << 1) | 1: func = d.vp8_h_loop_filter8uv_inner; break;
+            case (1 << 1) | 1: func = d.vp8_v_loop_filter8uv_inner; break;
+            }
+            if (check_func(func, "vp8_loop_filter8uv%s_%s", edge ? "_inner" : "", dir ? "v" : "h")) {
+                for (force_hev = -1; force_hev <= 1; force_hev++) {
+                    fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
+                    fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
+                    randomize_buffers(buf0u, 0, 16, force_hev);
+                    randomize_buffers(buf0v, 0, 16, force_hev);
+                    memcpy(buf1u - midoff, buf0u - midoff, 16 * 16);
+                    memcpy(buf1v - midoff, buf0v - midoff, 16 * 16);
+
+                    call_ref(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
+                    call_new(buf1u, buf1v, 16, flim_E, flim_I, hev_thresh);
+                    if (memcmp(buf0u - midoff, buf1u - midoff, 16 * 16) ||
+                        memcmp(buf0v - midoff, buf1v - midoff, 16 * 16))
+                        fail();
+                }
+                fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
+                fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
+                randomize_buffers(buf0u, 0, 16, 0);
+                randomize_buffers(buf0v, 0, 16, 0);
+                bench_new(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
+            }
+        }
+    }
+}
+
+static void check_loopfilter_simple(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
+    LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
+    VP8DSPContext d;
+    int dir;
+    int flim_E = 20, flim_I = 30, hev_thresh = 0;
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int);
+
+    ff_vp8dsp_init(&d);
+
+    for (dir = 0; dir < 2; dir++) {
+        int midoff = dir ? 4 * 16 : 4;
+        int midoff_aligned = dir ? 4 * 16 : 16;
+        uint8_t *buf0 = base0 + midoff_aligned;
+        uint8_t *buf1 = base1 + midoff_aligned;
+        void (*func)(uint8_t *, ptrdiff_t, int) = dir ? d.vp8_v_loop_filter_simple : d.vp8_h_loop_filter_simple;
+        if (check_func(func, "vp8_loop_filter_simple_%s", dir ? "v" : "h")) {
+            fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
+            randomize_buffers(buf0, 0, 16, -1);
+            randomize_buffers(buf0, 8, 16, -1);
+            memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
+            call_ref(buf0, 16, flim_E);
+            call_new(buf1, 16, flim_E);
+            if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
+                fail();
+            bench_new(buf0, 16, flim_E);
+        }
+    }
+}
+
+void checkasm_check_vp8dsp(void)
+{
+    check_idct();
+    check_idct_dc4();
+    check_luma_dc_wht();
+    report("idct");
+    check_mc();
+    report("mc");
+    check_loopfilter_16y();
+    check_loopfilter_8uv();
+    check_loopfilter_simple();
+    report("loopfilter");
+}
--- a/externals/ffmpeg/tests/checkasm/vp9dsp.c
+++ b/externals/ffmpeg/tests/checkasm/vp9dsp.c
@@ -0,0 +1,631 @@
+/*
+ * Copyright (c) 2015 Ronald S. Bultje <rsbultje@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <math.h>
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/vp9data.h"
+#include "libavcodec/vp9.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
+
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
+
+#define randomize_buffers()                                        \
+    do {                                                           \
+        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1];          \
+        int k;                                                     \
+        for (k = -4;  k < SIZEOF_PIXEL * FFMAX(8, size); k += 4) { \
+            uint32_t r = rnd() & mask;                             \
+            AV_WN32A(a + k, r);                                    \
+        }                                                          \
+        for (k = 0; k < size * SIZEOF_PIXEL; k += 4) {             \
+            uint32_t r = rnd() & mask;                             \
+            AV_WN32A(l + k, r);                                    \
+        }                                                          \
+    } while (0)
+
+static void check_ipred(void)
+{
+    LOCAL_ALIGNED_32(uint8_t, a_buf, [64 * 2]);
+    uint8_t *a = &a_buf[32 * 2];
+    LOCAL_ALIGNED_32(uint8_t, l, [32 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
+    VP9DSPContext dsp;
+    int tx, mode, bit_depth;
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride,
+                      const uint8_t *left, const uint8_t *top);
+    static const char *const mode_names[N_INTRA_PRED_MODES] = {
+        [VERT_PRED] = "vert",
+        [HOR_PRED] = "hor",
+        [DC_PRED] = "dc",
+        [DIAG_DOWN_LEFT_PRED] = "diag_downleft",
+        [DIAG_DOWN_RIGHT_PRED] = "diag_downright",
+        [VERT_RIGHT_PRED] = "vert_right",
+        [HOR_DOWN_PRED] = "hor_down",
+        [VERT_LEFT_PRED] = "vert_left",
+        [HOR_UP_PRED] = "hor_up",
+        [TM_VP8_PRED] = "tm",
+        [LEFT_DC_PRED] = "dc_left",
+        [TOP_DC_PRED] = "dc_top",
+        [DC_128_PRED] = "dc_128",
+        [DC_127_PRED] = "dc_127",
+        [DC_129_PRED] = "dc_129",
+    };
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        ff_vp9dsp_init(&dsp, bit_depth, 0);
+        for (tx = 0; tx < 4; tx++) {
+            int size = 4 << tx;
+
+            for (mode = 0; mode < N_INTRA_PRED_MODES; mode++) {
+                if (check_func(dsp.intra_pred[tx][mode], "vp9_%s_%dx%d_%dbpp",
+                               mode_names[mode], size, size, bit_depth)) {
+                    randomize_buffers();
+                    call_ref(dst0, size * SIZEOF_PIXEL, l, a);
+                    call_new(dst1, size * SIZEOF_PIXEL, l, a);
+                    if (memcmp(dst0, dst1, size * size * SIZEOF_PIXEL))
+                        fail();
+                    bench_new(dst1, size * SIZEOF_PIXEL,l, a);
+                }
+            }
+        }
+    }
+    report("ipred");
+}
+
+#undef randomize_buffers
+
+#define randomize_buffers() \
+    do { \
+        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1];                  \
+        for (y = 0; y < sz; y++) {                                         \
+            for (x = 0; x < sz * SIZEOF_PIXEL; x += 4) {                   \
+                uint32_t r = rnd() & mask;                                 \
+                AV_WN32A(dst + y * sz * SIZEOF_PIXEL + x, r);              \
+                AV_WN32A(src + y * sz * SIZEOF_PIXEL + x, rnd() & mask);   \
+            }                                                              \
+            for (x = 0; x < sz; x++) {                                     \
+                if (bit_depth == 8) {                                      \
+                    coef[y * sz + x] = src[y * sz + x] - dst[y * sz + x];  \
+                } else {                                                   \
+                    ((int32_t *) coef)[y * sz + x] =                       \
+                        ((uint16_t *) src)[y * sz + x] -                   \
+                        ((uint16_t *) dst)[y * sz + x];                    \
+                }                                                          \
+            }                                                              \
+        }                                                                  \
+    } while(0)
+
+// wht function copied from libvpx
+static void fwht_1d(double *out, const double *in, int sz)
+{
+    double t0 = in[0] + in[1];
+    double t3 = in[3] - in[2];
+    double t4 = trunc((t0 - t3) * 0.5);
+    double t1 = t4 - in[1];
+    double t2 = t4 - in[2];
+
+    out[0] = t0 - t2;
+    out[1] = t2;
+    out[2] = t3 + t1;
+    out[3] = t1;
+}
+
+// standard DCT-II
+static void fdct_1d(double *out, const double *in, int sz)
+{
+    int k, n;
+
+    for (k = 0; k < sz; k++) {
+        out[k] = 0.0;
+        for (n = 0; n < sz; n++)
+            out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (sz * 2.0));
+    }
+    out[0] *= M_SQRT1_2;
+}
+
+// see "Towards jointly optimal spatial prediction and adaptive transform in
+// video/image coding", by J. Han, A. Saxena, and K. Rose
+// IEEE Proc. ICASSP, pp. 726-729, Mar. 2010.
+static void fadst4_1d(double *out, const double *in, int sz)
+{
+    int k, n;
+
+    for (k = 0; k < sz; k++) {
+        out[k] = 0.0;
+        for (n = 0; n < sz; n++)
+            out[k] += in[n] * sin(M_PI * (n + 1) * (2 * k + 1) / (sz * 2.0 + 1.0));
+    }
+}
+
+// see "A Butterfly Structured Design of The Hybrid Transform Coding Scheme",
+// by Jingning Han, Yaowu Xu, and Debargha Mukherjee
+// http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41418.pdf
+static void fadst_1d(double *out, const double *in, int sz)
+{
+    int k, n;
+
+    for (k = 0; k < sz; k++) {
+        out[k] = 0.0;
+        for (n = 0; n < sz; n++)
+            out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (sz * 4.0));
+    }
+}
+
+typedef void (*ftx1d_fn)(double *out, const double *in, int sz);
+static void ftx_2d(double *out, const double *in, enum TxfmMode tx,
+                   enum TxfmType txtp, int sz)
+{
+    static const double scaling_factors[5][4] = {
+        { 4.0, 16.0 * M_SQRT1_2 / 3.0, 16.0 * M_SQRT1_2 / 3.0, 32.0 / 9.0 },
+        { 2.0, 2.0, 2.0, 2.0 },
+        { 1.0, 1.0, 1.0, 1.0 },
+        { 0.25 },
+        { 4.0 }
+    };
+    static const ftx1d_fn ftx1d_tbl[5][4][2] = {
+        {
+            { fdct_1d, fdct_1d },
+            { fadst4_1d, fdct_1d },
+            { fdct_1d, fadst4_1d },
+            { fadst4_1d, fadst4_1d },
+        }, {
+            { fdct_1d, fdct_1d },
+            { fadst_1d, fdct_1d },
+            { fdct_1d, fadst_1d },
+            { fadst_1d, fadst_1d },
+        }, {
+            { fdct_1d, fdct_1d },
+            { fadst_1d, fdct_1d },
+            { fdct_1d, fadst_1d },
+            { fadst_1d, fadst_1d },
+        }, {
+            { fdct_1d, fdct_1d },
+        }, {
+            { fwht_1d, fwht_1d },
+        },
+    };
+    double temp[1024];
+    double scaling_factor = scaling_factors[tx][txtp];
+    int i, j;
+
+    // cols
+    for (i = 0; i < sz; ++i) {
+        double temp_out[32];
+
+        ftx1d_tbl[tx][txtp][0](temp_out, &in[i * sz], sz);
+        // scale and transpose
+        for (j = 0; j < sz; ++j)
+            temp[j * sz + i] = temp_out[j] * scaling_factor;
+    }
+
+    // rows
+    for (i = 0; i < sz; i++)
+        ftx1d_tbl[tx][txtp][1](&out[i * sz], &temp[i * sz], sz);
+}
+
+static void ftx(int16_t *buf, enum TxfmMode tx,
+                enum TxfmType txtp, int sz, int bit_depth)
+{
+    double ind[1024], outd[1024];
+    int n;
+
+    emms_c();
+    for (n = 0; n < sz * sz; n++) {
+        if (bit_depth == 8)
+            ind[n] = buf[n];
+        else
+            ind[n] = ((int32_t *) buf)[n];
+    }
+    ftx_2d(outd, ind, tx, txtp, sz);
+    for (n = 0; n < sz * sz; n++) {
+        if (bit_depth == 8)
+            buf[n] = lrint(outd[n]);
+        else
+            ((int32_t *) buf)[n] = lrint(outd[n]);
+    }
+}
+
+static int copy_subcoefs(int16_t *out, const int16_t *in, enum TxfmMode tx,
+                         enum TxfmType txtp, int sz, int sub, int bit_depth)
+{
+    // copy the topleft coefficients such that the return value (being the
+    // coefficient scantable index for the eob token) guarantees that only
+    // the topleft $sub out of $sz (where $sz >= $sub) coefficients in both
+    // dimensions are non-zero. This leads to braching to specific optimized
+    // simd versions (e.g. dc-only) so that we get full asm coverage in this
+    // test
+
+    int n;
+    const int16_t *scan = ff_vp9_scans[tx][txtp];
+    int eob;
+
+    for (n = 0; n < sz * sz; n++) {
+        int rc = scan[n], rcx = rc % sz, rcy = rc / sz;
+
+        // find eob for this sub-idct
+        if (rcx >= sub || rcy >= sub)
+            break;
+
+        // copy coef
+        if (bit_depth == 8) {
+            out[rc] = in[rc];
+        } else {
+            AV_COPY32(&out[rc * 2], &in[rc * 2]);
+        }
+    }
+
+    eob = n;
+
+    for (; n < sz * sz; n++) {
+        int rc = scan[n];
+
+        // zero
+        if (bit_depth == 8) {
+            out[rc] = 0;
+        } else {
+            AV_ZERO32(&out[rc * 2]);
+        }
+    }
+
+    return eob;
+}
+
+static int iszero(const int16_t *c, int sz)
+{
+    int n;
+
+    for (n = 0; n < sz / sizeof(int16_t); n += 2)
+        if (AV_RN32A(&c[n]))
+            return 0;
+
+    return 1;
+}
+
+#define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))
+
+static void check_itxfm(void)
+{
+    LOCAL_ALIGNED_32(uint8_t, src, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(int16_t, coef, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(int16_t, subcoef0, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(int16_t, subcoef1, [32 * 32 * 2]);
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+    VP9DSPContext dsp;
+    int y, x, tx, txtp, bit_depth, sub;
+    static const char *const txtp_types[N_TXFM_TYPES] = {
+        [DCT_DCT] = "dct_dct", [DCT_ADST] = "adst_dct",
+        [ADST_DCT] = "dct_adst", [ADST_ADST] = "adst_adst"
+    };
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        ff_vp9dsp_init(&dsp, bit_depth, 0);
+
+        for (tx = TX_4X4; tx <= N_TXFM_SIZES /* 4 = lossless */; tx++) {
+            int sz = 4 << (tx & 3);
+            int n_txtps = tx < TX_32X32 ? N_TXFM_TYPES : 1;
+
+            for (txtp = 0; txtp < n_txtps; txtp++) {
+                // skip testing sub-IDCTs for WHT or ADST since they don't
+                // implement it in any of the SIMD functions. If they do,
+                // consider changing this to ensure we have complete test
+                // coverage. Test sub=1 for dc-only, then 2, 4, 8, 12, etc,
+                // since the arm version can distinguish them at that level.
+                for (sub = (txtp == 0 && tx < 4) ? 1 : sz; sub <= sz;
+                     sub < 4 ? (sub <<= 1) : (sub += 4)) {
+                    if (check_func(dsp.itxfm_add[tx][txtp],
+                                   "vp9_inv_%s_%dx%d_sub%d_add_%d",
+                                   tx == 4 ? "wht_wht" : txtp_types[txtp],
+                                   sz, sz, sub, bit_depth)) {
+                        int eob;
+
+                        randomize_buffers();
+                        ftx(coef, tx, txtp, sz, bit_depth);
+
+                        if (sub < sz) {
+                            eob = copy_subcoefs(subcoef0, coef, tx, txtp,
+                                                sz, sub, bit_depth);
+                        } else {
+                            eob = sz * sz;
+                            memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
+                        }
+
+                        memcpy(dst0, dst, sz * sz * SIZEOF_PIXEL);
+                        memcpy(dst1, dst, sz * sz * SIZEOF_PIXEL);
+                        memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
+                        call_ref(dst0, sz * SIZEOF_PIXEL, subcoef0, eob);
+                        call_new(dst1, sz * SIZEOF_PIXEL, subcoef1, eob);
+                        if (memcmp(dst0, dst1, sz * sz * SIZEOF_PIXEL) ||
+                            !iszero(subcoef0, sz * sz * SIZEOF_COEF) ||
+                            !iszero(subcoef1, sz * sz * SIZEOF_COEF))
+                            fail();
+
+                        bench_new(dst, sz * SIZEOF_PIXEL, coef, eob);
+                    }
+                }
+            }
+        }
+    }
+    report("itxfm");
+}
+
+#undef randomize_buffers
+
+#define setpx(a,b,c) \
+    do { \
+        if (SIZEOF_PIXEL == 1) { \
+            buf0[(a) + (b) * jstride] = av_clip_uint8(c); \
+        } else { \
+            ((uint16_t *)buf0)[(a) + (b) * jstride] = av_clip_uintp2(c, bit_depth); \
+        } \
+    } while (0)
+
+// c can be an assignment and must not be put under ()
+#define setdx(a,b,c,d) setpx(a,b,c-(d)+(rnd()%((d)*2+1)))
+#define setsx(a,b,c,d) setdx(a,b,c,(d) << (bit_depth - 8))
+static void randomize_loopfilter_buffers(int bidx, int lineoff, int str,
+                                         int bit_depth, int dir, const int *E,
+                                         const int *F, const int *H, const int *I,
+                                         uint8_t *buf0, uint8_t *buf1)
+{
+    uint32_t mask = (1 << bit_depth) - 1;
+    int off = dir ? lineoff : lineoff * 16;
+    int istride = dir ? 1 : 16;
+    int jstride = dir ? str : 1;
+    int i, j;
+    for (i = 0; i < 2; i++) /* flat16 */ {
+        int idx = off + i * istride, p0, q0;
+        setpx(idx,  0, q0 = rnd() & mask);
+        setsx(idx, -1, p0 = q0, E[bidx] >> 2);
+        for (j = 1; j < 8; j++) {
+            setsx(idx, -1 - j, p0, F[bidx]);
+            setsx(idx, j, q0, F[bidx]);
+        }
+    }
+    for (i = 2; i < 4; i++) /* flat8 */ {
+        int idx = off + i * istride, p0, q0;
+        setpx(idx,  0, q0 = rnd() & mask);
+        setsx(idx, -1, p0 = q0, E[bidx] >> 2);
+        for (j = 1; j < 4; j++) {
+            setsx(idx, -1 - j, p0, F[bidx]);
+            setsx(idx, j, q0, F[bidx]);
+        }
+        for (j = 4; j < 8; j++) {
+            setpx(idx, -1 - j, rnd() & mask);
+            setpx(idx, j, rnd() & mask);
+        }
+    }
+    for (i = 4; i < 6; i++) /* regular */ {
+        int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
+        setpx(idx,  0, q0 = rnd() & mask);
+        setsx(idx,  1, q1 = q0, I[bidx]);
+        setsx(idx,  2, q2 = q1, I[bidx]);
+        setsx(idx,  3, q2,      I[bidx]);
+        setsx(idx, -1, p0 = q0, E[bidx] >> 2);
+        setsx(idx, -2, p1 = p0, I[bidx]);
+        setsx(idx, -3, p2 = p1, I[bidx]);
+        setsx(idx, -4, p2,      I[bidx]);
+        for (j = 4; j < 8; j++) {
+            setpx(idx, -1 - j, rnd() & mask);
+            setpx(idx, j, rnd() & mask);
+        }
+    }
+    for (i = 6; i < 8; i++) /* off */ {
+        int idx = off + i * istride;
+        for (j = 0; j < 8; j++) {
+            setpx(idx, -1 - j, rnd() & mask);
+            setpx(idx, j, rnd() & mask);
+        }
+    }
+}
+#define randomize_buffers(bidx, lineoff, str) \
+        randomize_loopfilter_buffers(bidx, lineoff, str, bit_depth, dir, \
+                                     E, F, H, I, buf0, buf1)
+
+static void check_loopfilter(void)
+{
+    LOCAL_ALIGNED_32(uint8_t, base0, [32 + 16 * 16 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, base1, [32 + 16 * 16 * 2]);
+    VP9DSPContext dsp;
+    int dir, wd, wd2, bit_depth;
+    static const char *const dir_name[2] = { "h", "v" };
+    static const int E[2] = { 20, 28 }, I[2] = { 10, 16 };
+    static const int H[2] = { 7, 11 }, F[2] = { 1, 1 };
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        ff_vp9dsp_init(&dsp, bit_depth, 0);
+
+        for (dir = 0; dir < 2; dir++) {
+            int midoff = (dir ? 8 * 8 : 8) * SIZEOF_PIXEL;
+            int midoff_aligned = (dir ? 8 * 8 : 16) * SIZEOF_PIXEL;
+            uint8_t *buf0 = base0 + midoff_aligned;
+            uint8_t *buf1 = base1 + midoff_aligned;
+
+            for (wd = 0; wd < 3; wd++) {
+                // 4/8/16wd_8px
+                if (check_func(dsp.loop_filter_8[wd][dir],
+                               "vp9_loop_filter_%s_%d_8_%dbpp",
+                               dir_name[dir], 4 << wd, bit_depth)) {
+                    randomize_buffers(0, 0, 8);
+                    memcpy(buf1 - midoff, buf0 - midoff,
+                           16 * 8 * SIZEOF_PIXEL);
+                    call_ref(buf0, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
+                    call_new(buf1, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
+                    if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 8 * SIZEOF_PIXEL))
+                        fail();
+                    bench_new(buf1, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
+                }
+            }
+
+            midoff = (dir ? 16 * 8 : 8) * SIZEOF_PIXEL;
+            midoff_aligned = (dir ? 16 * 8 : 16) * SIZEOF_PIXEL;
+
+            buf0 = base0 + midoff_aligned;
+            buf1 = base1 + midoff_aligned;
+
+            // 16wd_16px loopfilter
+            if (check_func(dsp.loop_filter_16[dir],
+                           "vp9_loop_filter_%s_16_16_%dbpp",
+                           dir_name[dir], bit_depth)) {
+                randomize_buffers(0, 0, 16);
+                randomize_buffers(0, 8, 16);
+                memcpy(buf1 - midoff, buf0 - midoff, 16 * 16 * SIZEOF_PIXEL);
+                call_ref(buf0, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
+                call_new(buf1, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
+                if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16 * SIZEOF_PIXEL))
+                    fail();
+                bench_new(buf1, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
+            }
+
+            for (wd = 0; wd < 2; wd++) {
+                for (wd2 = 0; wd2 < 2; wd2++) {
+                    // mix2 loopfilter
+                    if (check_func(dsp.loop_filter_mix2[wd][wd2][dir],
+                                   "vp9_loop_filter_mix2_%s_%d%d_16_%dbpp",
+                                   dir_name[dir], 4 << wd, 4 << wd2, bit_depth)) {
+                        randomize_buffers(0, 0, 16);
+                        randomize_buffers(1, 8, 16);
+                        memcpy(buf1 - midoff, buf0 - midoff, 16 * 16 * SIZEOF_PIXEL);
+#define M(a) (((a)[1] << 8) | (a)[0])
+                        call_ref(buf0, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
+                        call_new(buf1, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
+                        if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16 * SIZEOF_PIXEL))
+                            fail();
+                        bench_new(buf1, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
+#undef M
+                    }
+                }
+            }
+        }
+    }
+    report("loopfilter");
+}
+
+#undef setsx
+#undef setpx
+#undef setdx
+#undef randomize_buffers
+
+#define DST_BUF_SIZE (size * size * SIZEOF_PIXEL)
+#define SRC_BUF_STRIDE 72
+#define SRC_BUF_SIZE ((size + 7) * SRC_BUF_STRIDE * SIZEOF_PIXEL)
+#define src (buf + 3 * SIZEOF_PIXEL * (SRC_BUF_STRIDE + 1))
+
+#define randomize_buffers()                               \
+    do {                                                  \
+        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
+        int k;                                            \
+        for (k = 0; k < SRC_BUF_SIZE; k += 4) {           \
+            uint32_t r = rnd() & mask;                    \
+            AV_WN32A(buf + k, r);                         \
+        }                                                 \
+        if (op == 1) {                                    \
+            for (k = 0; k < DST_BUF_SIZE; k += 4) {       \
+                uint32_t r = rnd() & mask;                \
+                AV_WN32A(dst0 + k, r);                    \
+                AV_WN32A(dst1 + k, r);                    \
+            }                                             \
+        }                                                 \
+    } while (0)
+
+static void check_mc(void)
+{
+    LOCAL_ALIGNED_32(uint8_t, buf, [72 * 72 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0, [64 * 64 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [64 * 64 * 2]);
+    VP9DSPContext dsp;
+    int op, hsize, bit_depth, filter, dx, dy;
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dst_stride,
+                      const uint8_t *ref, ptrdiff_t ref_stride,
+                 int h, int mx, int my);
+    static const char *const filter_names[4] = {
+        "8tap_smooth", "8tap_regular", "8tap_sharp", "bilin"
+    };
+    static const char *const subpel_names[2][2] = { { "", "h" }, { "v", "hv" } };
+    static const char *const op_names[2] = { "put", "avg" };
+    char str[256];
+
+    for (op = 0; op < 2; op++) {
+        for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+            ff_vp9dsp_init(&dsp, bit_depth, 0);
+            for (hsize = 0; hsize < 5; hsize++) {
+                int size = 64 >> hsize;
+
+                for (filter = 0; filter < 4; filter++) {
+                    for (dx = 0; dx < 2; dx++) {
+                        for (dy = 0; dy < 2; dy++) {
+                            if (dx || dy) {
+                                snprintf(str, sizeof(str),
+                                         "%s_%s_%d%s", op_names[op],
+                                         filter_names[filter], size,
+                                         subpel_names[dy][dx]);
+                            } else {
+                                snprintf(str, sizeof(str),
+                                         "%s%d", op_names[op], size);
+                            }
+                            if (check_func(dsp.mc[hsize][filter][op][dx][dy],
+                                           "vp9_%s_%dbpp", str, bit_depth)) {
+                                int mx = dx ? 1 + (rnd() % 14) : 0;
+                                int my = dy ? 1 + (rnd() % 14) : 0;
+                                randomize_buffers();
+                                call_ref(dst0, size * SIZEOF_PIXEL,
+                                         src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
+                                         size, mx, my);
+                                call_new(dst1, size * SIZEOF_PIXEL,
+                                         src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
+                                         size, mx, my);
+                                if (memcmp(dst0, dst1, DST_BUF_SIZE))
+                                    fail();
+
+                                // simd implementations for each filter of subpel
+                                // functions are identical
+                                if (filter >= 1 && filter <= 2) continue;
+                                // 10/12 bpp for bilin are identical
+                                if (bit_depth == 12 && filter == 3) continue;
+
+                                bench_new(dst1, size * SIZEOF_PIXEL,
+                                          src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
+                                          size, mx, my);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    report("mc");
+}
+
+void checkasm_check_vp9dsp(void)
+{
+    check_ipred();
+    check_itxfm();
+    check_loopfilter();
+    check_mc();
+}
--- a/externals/ffmpeg/tests/checkasm/x86/checkasm.asm
+++ b/externals/ffmpeg/tests/checkasm/x86/checkasm.asm
@@ -0,0 +1,244 @@
+;*****************************************************************************
+;* Assembly testing and benchmarking tool
+;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2012 Henrik Gramner
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+;*****************************************************************************
+
+%define private_prefix checkasm
+%include "libavutil/x86/x86inc.asm"
+
+SECTION_RODATA
+
+error_message: db "failed to preserve register", 0
+error_message_emms: db "failed to issue emms", 0
+
+%if ARCH_X86_64
+; just random numbers to reduce the chance of incidental match
+ALIGN 16
+x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
+x7:  dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
+x8:  dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
+x9:  dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
+x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
+x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
+x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
+x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
+x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
+x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
+n7:  dq 0x21f86d66c8ca00ce
+n8:  dq 0x75b6ba21077c48ad
+n9:  dq 0xed56bb2dcb3c7736
+n10: dq 0x8bda43d3fd1a7e06
+n11: dq 0xb64a9c9e5d318408
+n12: dq 0xdf9a54b303f1d3a3
+n13: dq 0x4a75479abd64e097
+n14: dq 0x249214109d5d1c88
+%endif
+
+SECTION .text
+
+cextern fail_func
+
+; max number of args used by any asm function.
+; (max_args % 4) must equal 3 for stack alignment
+%define max_args 15
+
+%if ARCH_X86_64
+
+;-----------------------------------------------------------------------------
+; int checkasm_stack_clobber(uint64_t clobber, ...)
+;-----------------------------------------------------------------------------
+cglobal stack_clobber, 1,2
+    ; Clobber the stack with junk below the stack pointer
+    %define argsize (max_args+6)*8
+    SUB  rsp, argsize
+    mov   r1, argsize-8
+.loop:
+    mov [rsp+r1], r0
+    sub   r1, 8
+    jge .loop
+    ADD  rsp, argsize
+    RET
+
+%if WIN64
+    %assign free_regs 7
+    DECLARE_REG_TMP 4
+%else
+    %assign free_regs 9
+    DECLARE_REG_TMP 7
+%endif
+
+%macro report_fail 1
+    mov  r9, rax
+    mov r10, rdx
+    lea  r0, [%1]
+    xor eax, eax
+    call fail_func
+    mov rdx, r10
+    mov rax, r9
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void checkasm_checked_call(void *func, ...)
+;-----------------------------------------------------------------------------
+INIT_XMM
+%macro CHECKED_CALL 0-1
+cglobal checked_call%1, 2,15,16,max_args*8+8
+    mov  t0, r0
+
+    ; All arguments have been pushed on the stack instead of registers in order to
+    ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
+    mov  r0, r6mp
+    mov  r1, r7mp
+    mov  r2, r8mp
+    mov  r3, r9mp
+%if UNIX64
+    mov  r4, r10mp
+    mov  r5, r11mp
+    %assign i 6
+    %rep max_args-6
+        mov  r9, [rsp+stack_offset+(i+1)*8]
+        mov  [rsp+(i-6)*8], r9
+        %assign i i+1
+    %endrep
+%else ; WIN64
+    %assign i 4
+    %rep max_args-4
+        mov  r9, [rsp+stack_offset+(i+7)*8]
+        mov  [rsp+i*8], r9
+        %assign i i+1
+    %endrep
+
+    ; Move possible floating-point arguments to the correct registers
+    movq m0, r0
+    movq m1, r1
+    movq m2, r2
+    movq m3, r3
+
+    %assign i 6
+    %rep 16-6
+        mova m %+ i, [x %+ i]
+        %assign i i+1
+    %endrep
+%endif
+
+%assign i 14
+%rep 15-free_regs
+    mov r %+ i, [n %+ i]
+    %assign i i-1
+%endrep
+    call t0
+%assign i 14
+%rep 15-free_regs
+    xor r %+ i, [n %+ i]
+    or  r14, r %+ i
+    %assign i i-1
+%endrep
+
+%if WIN64
+    %assign i 6
+    %rep 16-6
+        pxor m %+ i, [x %+ i]
+        por  m6, m %+ i
+        %assign i i+1
+    %endrep
+    packsswb m6, m6
+    movq r5, m6
+    or  r14, r5
+%endif
+
+    ; Call fail_func() with a descriptive message to mark it as a failure
+    ; if the called function didn't preserve all callee-saved registers.
+    ; Save the return value located in rdx:rax first to prevent clobbering.
+    jz .clobber_ok
+    report_fail error_message
+.clobber_ok:
+%ifidn %1, _emms
+    emms
+%elifnidn %1, _float
+    fstenv [rsp]
+    cmp  word [rsp + 8], 0xffff
+    je   .emms_ok
+    report_fail error_message_emms
+    emms
+.emms_ok:
+%endif
+    RET
+%endmacro
+
+%else
+
+; just random numbers to reduce the chance of incidental match
+%define n3 dword 0x6549315c
+%define n4 dword 0xe02f3e23
+%define n5 dword 0xb78d0d1d
+%define n6 dword 0x33627ba7
+
+%macro report_fail 1
+    mov  r3, eax
+    mov  r4, edx
+    lea  r0, [%1]
+    mov [esp], r0
+    call fail_func
+    mov  edx, r4
+    mov  eax, r3
+%endmacro
+
+%macro CHECKED_CALL 0-1
+;-----------------------------------------------------------------------------
+; void checkasm_checked_call(void *func, ...)
+;-----------------------------------------------------------------------------
+cglobal checked_call%1, 1,7
+    mov  r3, n3
+    mov  r4, n4
+    mov  r5, n5
+    mov  r6, n6
+%rep max_args
+    PUSH dword [esp+20+max_args*4]
+%endrep
+    call r0
+    xor  r3, n3
+    xor  r4, n4
+    xor  r5, n5
+    xor  r6, n6
+    or   r3, r4
+    or   r5, r6
+    or   r3, r5
+    jz .clobber_ok
+    report_fail error_message
+.clobber_ok:
+%ifidn %1, _emms
+    emms
+%elifnidn %1, _float
+    fstenv [esp]
+    cmp  word [esp + 8], 0xffff
+    je   .emms_ok
+    report_fail error_message_emms
+    emms
+.emms_ok:
+%endif
+    add  esp, max_args*4
+    REP_RET
+%endmacro
+
+%endif ; ARCH_X86_64
+
+CHECKED_CALL
+CHECKED_CALL _emms
+CHECKED_CALL _float