early-access version 1432

This commit is contained in:
pineappleEA
2021-02-09 04:25:58 +01:00
parent de64eab4b4
commit 3d5a9d908a
7336 changed files with 1773492 additions and 111 deletions

1
externals/ffmpeg/tests/checkasm/.gitignore vendored Executable file
View File

@@ -0,0 +1 @@
/checkasm

85
externals/ffmpeg/tests/checkasm/Makefile vendored Executable file
View File

@@ -0,0 +1,85 @@
# libavcodec tests
# subsystems
AVCODECOBJS-$(CONFIG_AUDIODSP) += audiodsp.o
AVCODECOBJS-$(CONFIG_BLOCKDSP) += blockdsp.o
AVCODECOBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o
AVCODECOBJS-$(CONFIG_FLACDSP) += flacdsp.o
AVCODECOBJS-$(CONFIG_FMTCONVERT) += fmtconvert.o
AVCODECOBJS-$(CONFIG_G722DSP) += g722dsp.o
AVCODECOBJS-$(CONFIG_H264DSP) += h264dsp.o
AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o
AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o
AVCODECOBJS-$(CONFIG_LLVIDDSP) += llviddsp.o
AVCODECOBJS-$(CONFIG_LLVIDENCDSP) += llviddspenc.o
AVCODECOBJS-$(CONFIG_VP8DSP) += vp8dsp.o
AVCODECOBJS-$(CONFIG_VIDEODSP) += videodsp.o
# decoders/encoders
AVCODECOBJS-$(CONFIG_AAC_DECODER) += aacpsdsp.o \
sbrdsp.o
AVCODECOBJS-$(CONFIG_ALAC_DECODER) += alacdsp.o
AVCODECOBJS-$(CONFIG_DCA_DECODER) += synth_filter.o
AVCODECOBJS-$(CONFIG_EXR_DECODER) += exrdsp.o
AVCODECOBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuvdsp.o
AVCODECOBJS-$(CONFIG_JPEG2000_DECODER) += jpeg2000dsp.o
AVCODECOBJS-$(CONFIG_OPUS_DECODER) += opusdsp.o
AVCODECOBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o
AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_idct.o hevc_sao.o
AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodsp.o
AVCODECOBJS-$(CONFIG_V210_DECODER) += v210dec.o
AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o
AVCODECOBJS-$(CONFIG_VP9_DECODER) += vp9dsp.o
CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes)
# libavfilter tests
AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o
AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o
AVFILTEROBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o
AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o
AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER) += vf_nlmeans.o
CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
# swscale tests
SWSCALEOBJS += sw_rgb.o sw_scale.o
CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWSCALEOBJS)
# libavutil tests
AVUTILOBJS += fixed_dsp.o
AVUTILOBJS += float_dsp.o
CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS)
CHECKASMOBJS-$(ARCH_AARCH64) += aarch64/checkasm.o
CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o
CHECKASMOBJS-$(HAVE_X86ASM) += x86/checkasm.o
CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o
CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%))
-include $(CHECKASMOBJS:.o=.d)
CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS)))
$(CHECKASMOBJS): | $(CHECKASMDIRS)
OUTDIRS += $(CHECKASMDIRS)
tests/checkasm/checkasm.o: CFLAGS += -Umain
CHECKASM := tests/checkasm/checkasm$(EXESUF)
$(CHECKASM): $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS)
$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS) $(EXTRALIBS-avcodec) $(EXTRALIBS-avfilter) $(EXTRALIBS-avformat) $(EXTRALIBS-avutil) $(EXTRALIBS-swresample) $(EXTRALIBS)
checkasm: $(CHECKASM)
testclean:: checkasmclean
checkasmclean:
$(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%)
.PHONY: checkasm

261
externals/ffmpeg/tests/checkasm/aacpsdsp.c vendored Executable file
View File

@@ -0,0 +1,261 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "libavcodec/aacpsdsp.h"
#include "libavutil/intfloat.h"
#include "checkasm.h"
#define N 32
#define STRIDE 128
#define BUF_SIZE (N * STRIDE)
#define randomize(buf, len) do { \
int i; \
for (i = 0; i < len; i++) { \
const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
(buf)[i] = f; \
} \
} while (0)
#define EPS 0.005
static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
{
int i;
for (i = 0; i < len; i++) {
union av_intfloat32 u = { .f = buf[i] };
u.i &= (0xffffffff << bits);
buf[i] = u.f;
}
}
static void test_add_squares(void)
{
LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
declare_func(void, INTFLOAT *dst,
const INTFLOAT (*src)[2], int n);
randomize((INTFLOAT *)src, BUF_SIZE * 2);
randomize(dst0, BUF_SIZE);
memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
call_ref(dst0, src, BUF_SIZE);
call_new(dst1, src, BUF_SIZE);
if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
fail();
bench_new(dst1, src, BUF_SIZE);
}
static void test_mul_pair_single(void)
{
LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
declare_func(void, INTFLOAT (*dst)[2],
INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
randomize((INTFLOAT *)src0, BUF_SIZE * 2);
randomize(src1, BUF_SIZE);
call_ref(dst0, src0, src1, BUF_SIZE);
call_new(dst1, src0, src1, BUF_SIZE);
if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
fail();
bench_new(dst1, src0, src1, BUF_SIZE);
}
static void test_hybrid_analysis(void)
{
LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
const INTFLOAT (*filter)[8][2],
ptrdiff_t stride, int n);
randomize((INTFLOAT *)in, 13 * 2);
randomize((INTFLOAT *)filter, N * 8 * 2);
randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
call_ref(dst0, in, filter, STRIDE, N);
call_new(dst1, in, filter, STRIDE, N);
if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
fail();
bench_new(dst1, in, filter, STRIDE, N);
}
static void test_hybrid_analysis_ileave(void)
{
LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]);
LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
int i, int len);
randomize((INTFLOAT *)out0, 91 * 32 * 2);
randomize((INTFLOAT *)in, 2 * 38 * 64);
memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
/* len is hardcoded to 32 as that's the only value used in
libavcodec. asm functions are likely to be optimized
hardcoding this value in their loops and could fail with
anything else.
i is hardcoded to the two values currently used by the
aac decoder because the arm neon implementation is
micro-optimized for them and will fail for almost every
other value. */
call_ref(out0, in, 3, 32);
call_new(out1, in, 3, 32);
/* the function just moves data around, so memcmp is enough */
if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
fail();
call_ref(out0, in, 5, 32);
call_new(out1, in, 5, 32);
if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
fail();
bench_new(out1, in, 3, 32);
}
static void test_hybrid_synthesis_deint(void)
{
LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]);
declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
int i, int len);
randomize((INTFLOAT *)in, 91 * 32 * 2);
randomize((INTFLOAT *)out0, 2 * 38 * 64);
memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
/* len is hardcoded to 32 as that's the only value used in
libavcodec. asm functions are likely to be optimized
hardcoding this value in their loops and could fail with
anything else.
i is hardcoded to the two values currently used by the
aac decoder because the arm neon implementation is
micro-optimized for them and will fail for almost every
other value. */
call_ref(out0, in, 3, 32);
call_new(out1, in, 3, 32);
/* the function just moves data around, so memcmp is enough */
if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
fail();
call_ref(out0, in, 5, 32);
call_new(out1, in, 5, 32);
if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
fail();
bench_new(out1, in, 3, 32);
}
static void test_stereo_interpolate(PSDSPContext *psdsp)
{
int i;
LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]);
LOCAL_ALIGNED_16(INTFLOAT, r, [BUF_SIZE], [2]);
LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
randomize((INTFLOAT *)l, BUF_SIZE * 2);
randomize((INTFLOAT *)r, BUF_SIZE * 2);
for (i = 0; i < 2; i++) {
if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
randomize((INTFLOAT *)h, 2 * 4);
randomize((INTFLOAT *)h_step, 2 * 4);
// Clear the least significant 14 bits of h_step, to avoid
// divergence when accumulating h_step BUF_SIZE times into
// a float variable which may or may not have extra intermediate
// precision. Therefore clear roughly log2(BUF_SIZE) less
// significant bits, to get the same result regardless of any
// extra precision in the accumulator.
clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
call_ref(l0, r0, h, h_step, BUF_SIZE);
call_new(l1, r1, h, h_step, BUF_SIZE);
if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
!float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
fail();
memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
bench_new(l1, r1, h, h_step, BUF_SIZE);
}
}
}
void checkasm_check_aacpsdsp(void)
{
PSDSPContext psdsp;
ff_psdsp_init(&psdsp);
if (check_func(psdsp.add_squares, "ps_add_squares"))
test_add_squares();
report("add_squares");
if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
test_mul_pair_single();
report("mul_pair_single");
if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
test_hybrid_analysis();
report("hybrid_analysis");
if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
test_hybrid_analysis_ileave();
report("hybrid_analysis_ileave");
if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
test_hybrid_synthesis_deint();
report("hybrid_synthesis_deint");
test_stereo_interpolate(&psdsp);
report("stereo_interpolate");
}

View File

@@ -0,0 +1,198 @@
/****************************************************************************
* Assembly testing and benchmarking tool
* Copyright (c) 2015 Martin Storsjo
* Copyright (c) 2015 Janne Grunau
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*****************************************************************************/
#include "libavutil/aarch64/asm.S"
const register_init, align=4
.quad 0x21f86d66c8ca00ce
.quad 0x75b6ba21077c48ad
.quad 0xed56bb2dcb3c7736
.quad 0x8bda43d3fd1a7e06
.quad 0xb64a9c9e5d318408
.quad 0xdf9a54b303f1d3a3
.quad 0x4a75479abd64e097
.quad 0x249214109d5d1c88
.quad 0x1a1b2550a612b48c
.quad 0x79445c159ce79064
.quad 0x2eed899d5a28ddcd
.quad 0x86b2536fcd8cf636
.quad 0xb0856806085e7943
.quad 0x3f2bf84fc0fcca4e
.quad 0xacbd382dcf5b8de2
.quad 0xd229e1f5b281303f
.quad 0x71aeaff20b095fd9
.quad 0xab63e2e11fa38ed9
endconst
const error_message_register
.asciz "failed to preserve register"
error_message_stack:
.asciz "stack clobbered"
endconst
// max number of args used by any asm function.
#define MAX_ARGS 15
#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15)
function checkasm_stack_clobber, export=1
mov x3, sp
mov x2, #CLOBBER_STACK
1:
stp x0, x1, [sp, #-16]!
subs x2, x2, #16
b.gt 1b
mov sp, x3
ret
endfunc
// + 16 for stack canary reference
#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15 + 16)
function checkasm_checked_call, export=1
stp x29, x30, [sp, #-16]!
mov x29, sp
stp x19, x20, [sp, #-16]!
stp x21, x22, [sp, #-16]!
stp x23, x24, [sp, #-16]!
stp x25, x26, [sp, #-16]!
stp x27, x28, [sp, #-16]!
stp d8, d9, [sp, #-16]!
stp d10, d11, [sp, #-16]!
stp d12, d13, [sp, #-16]!
stp d14, d15, [sp, #-16]!
movrel x9, register_init
ldp d8, d9, [x9], #16
ldp d10, d11, [x9], #16
ldp d12, d13, [x9], #16
ldp d14, d15, [x9], #16
ldp x19, x20, [x9], #16
ldp x21, x22, [x9], #16
ldp x23, x24, [x9], #16
ldp x25, x26, [x9], #16
ldp x27, x28, [x9], #16
sub sp, sp, #ARG_STACK
.equ pos, 0
.rept MAX_ARGS-8
// Skip the first 8 args, that are loaded into registers
ldr x9, [x29, #16 + 8*8 + pos]
str x9, [sp, #pos]
.equ pos, pos + 8
.endr
// Fill x8-x17 with garbage. This doesn't have to be preserved,
// but avoids relying on them having any particular value.
movrel x9, register_init
ldp x10, x11, [x9], #32
ldp x12, x13, [x9], #32
ldp x14, x15, [x9], #32
ldp x16, x17, [x9], #32
ldp x8, x9, [x9]
// For stack overflows, the callee is free to overwrite the parameters
// that were passed on the stack (if any), so we can only check after
// that point. First figure out how many parameters the function
// really took on the stack:
ldr w2, [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
// Load the first non-parameter value from the stack, that should be
// left untouched by the function. Store a copy of it inverted, so that
// e.g. overwriting everything with zero would be noticed.
ldr x2, [sp, x2, lsl #3]
mvn x2, x2
str x2, [sp, #ARG_STACK-8]
// Load the in-register arguments
mov x12, x0
ldp x0, x1, [x29, #16]
ldp x2, x3, [x29, #32]
ldp x4, x5, [x29, #48]
ldp x6, x7, [x29, #64]
// Call the target function
blr x12
// Load the number of stack parameters, stack canary and its reference
ldr w2, [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
ldr x2, [sp, x2, lsl #3]
ldr x3, [sp, #ARG_STACK-8]
add sp, sp, #ARG_STACK
stp x0, x1, [sp, #-16]!
mvn x3, x3
cmp x2, x3
b.ne 2f
movrel x9, register_init
movi v3.8h, #0
.macro check_reg_neon reg1, reg2
ldr q1, [x9], #16
uzp1 v2.2d, v\reg1\().2d, v\reg2\().2d
eor v1.16b, v1.16b, v2.16b
orr v3.16b, v3.16b, v1.16b
.endm
check_reg_neon 8, 9
check_reg_neon 10, 11
check_reg_neon 12, 13
check_reg_neon 14, 15
uqxtn v3.8b, v3.8h
umov x3, v3.d[0]
.macro check_reg reg1, reg2
ldp x0, x1, [x9], #16
eor x0, x0, \reg1
eor x1, x1, \reg2
orr x3, x3, x0
orr x3, x3, x1
.endm
check_reg x19, x20
check_reg x21, x22
check_reg x23, x24
check_reg x25, x26
check_reg x27, x28
cbz x3, 0f
movrel x0, error_message_register
b 1f
2:
movrel x0, error_message_stack
1:
bl X(checkasm_fail_func)
0:
ldp x0, x1, [sp], #16
ldp d14, d15, [sp], #16
ldp d12, d13, [sp], #16
ldp d10, d11, [sp], #16
ldp d8, d9, [sp], #16
ldp x27, x28, [sp], #16
ldp x25, x26, [sp], #16
ldp x23, x24, [sp], #16
ldp x21, x22, [sp], #16
ldp x19, x20, [sp], #16
ldp x29, x30, [sp], #16
ret
endfunc

95
externals/ffmpeg/tests/checkasm/af_afir.c vendored Executable file
View File

@@ -0,0 +1,95 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "config.h"
#include <float.h>
#include <stdint.h>
#include "libavfilter/af_afir.h"
#include "libavutil/internal.h"
#include "checkasm.h"
#define LEN 256
#define randomize_buffer(buf) \
do { \
int i; \
double bmg[2], stddev = 10.0, mean = 0.0; \
\
for (i = 0; i < LEN*2+8; i += 2) { \
av_bmg_get(&checkasm_lfg, bmg); \
buf[i] = bmg[0] * stddev + mean; \
buf[i + 1] = bmg[1] * stddev + mean; \
} \
} while(0);
static void test_fcmul_add(const float *src0, const float *src1, const float *src2)
{
LOCAL_ALIGNED_32(float, cdst, [LEN*2+8]);
LOCAL_ALIGNED_32(float, odst, [LEN*2+8]);
int i;
declare_func(void, float *sum, const float *t, const float *c,
ptrdiff_t len);
memcpy(cdst, src0, (LEN*2+8) * sizeof(float));
memcpy(odst, src0, (LEN*2+8) * sizeof(float));
call_ref(cdst, src1, src2, LEN);
call_new(odst, src1, src2, LEN);
for (i = 0; i <= LEN*2; i++) {
int idx = i & ~1;
float cre = src2[idx];
float cim = src2[idx + 1];
float tre = src1[idx];
float tim = src1[idx + 1];
double t = fabs(src0[i]) +
fabs(tre) + fabs(tim) + fabs(cre) + fabs(cim) +
fabs(tre * cre) + fabs(tim * cim) +
fabs(tre * cim) + fabs(tim * cre) +
fabs(tre * cre - tim * cim) +
fabs(tre * cim + tim * cre) +
fabs(cdst[i]) + 1.0;
if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
i, cdst[i], odst[i], cdst[i] - odst[i]);
fail();
break;
}
}
memcpy(odst, src0, (LEN*2+8) * sizeof(float));
bench_new(odst, src1, src2, LEN);
}
void checkasm_check_afir(void)
{
LOCAL_ALIGNED_32(float, src0, [LEN*2+8]);
LOCAL_ALIGNED_32(float, src1, [LEN*2+8]);
LOCAL_ALIGNED_32(float, src2, [LEN*2+8]);
AudioFIRDSPContext fir = { 0 };
ff_afir_init(&fir);
randomize_buffer(src0);
randomize_buffer(src1);
randomize_buffer(src2);
if (check_func(fir.fcmul_add, "fcmul_add"))
test_fcmul_add(src0, src1, src2);
report("fcmul_add");
}

119
externals/ffmpeg/tests/checkasm/alacdsp.c vendored Executable file
View File

@@ -0,0 +1,119 @@
/*
* Copyright (c) 2015 James Almer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/alacdsp.h"
#include "libavcodec/mathops.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#define BUF_SIZE 256
#define MAX_CHANNELS 2
#define randomize_buffers() \
do { \
int i; \
for (i = 0; i < BUF_SIZE*MAX_CHANNELS; i++) { \
int32_t r = sign_extend(rnd(), 24); \
ref_buf[i] = r; \
new_buf[i] = r; \
} \
} while (0)
static void check_decorrelate_stereo(void)
{
LOCAL_ALIGNED_16(int32_t, ref_buf, [BUF_SIZE*MAX_CHANNELS]);
LOCAL_ALIGNED_16(int32_t, new_buf, [BUF_SIZE*MAX_CHANNELS]);
int32_t *ref[2] = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1] };
int32_t *new[2] = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1] };
ALACDSPContext c;
ff_alacdsp_init(&c);
if (check_func(c.decorrelate_stereo, "alac_decorrelate_stereo")) {
int len = (rnd() & 0xFF) + 1;
int shift = rnd() & 0x1F;
int weight = rnd() & 0xFF;
declare_func(void, int32_t *buf[2], int len, int shift, int weight);
randomize_buffers();
call_ref(ref, len, shift, weight);
call_new(new, len, shift, weight);
if (memcmp(ref[0], new[0], len * sizeof(int32_t)) ||
memcmp(ref[1], new[1], len * sizeof(int32_t)))
fail();
bench_new(new, BUF_SIZE, shift, weight);
}
report("decorrelate_stereo");
}
#undef randomize_buffers
#define randomize_buffers() \
do { \
int i, j; \
for (i = 0; i < BUF_SIZE; i++) { \
for (j = 0; j < ch; j++) { \
int32_t r = sign_extend(rnd(), 24); \
ref[j][i] = r; \
new[j][i] = r; \
r = rnd() & 0xFF; \
ref_ebb[j][i] = r; \
new_ebb[j][i] = r; \
} \
} \
} while (0)
static void check_append_extra_bits(void)
{
LOCAL_ALIGNED_16(int32_t, ref_buf, [BUF_SIZE*MAX_CHANNELS*2]);
LOCAL_ALIGNED_16(int32_t, new_buf, [BUF_SIZE*MAX_CHANNELS*2]);
int32_t *ref[2] = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1] };
int32_t *new[2] = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1] };
int32_t *ref_ebb[2] = { &ref_buf[BUF_SIZE*2], &ref_buf[BUF_SIZE*3] };
int32_t *new_ebb[2] = { &new_buf[BUF_SIZE*2], &new_buf[BUF_SIZE*3] };
ALACDSPContext c;
static const char * const channels[2] = { "mono", "stereo" };
int ch;
ff_alacdsp_init(&c);
for (ch = 1; ch <= 2; ch++) {
if (check_func(c.append_extra_bits[ch-1], "alac_append_extra_bits_%s", channels[ch-1])) {
int len = (rnd() & 0xFF) + 1;
declare_func(void, int32_t *buf[2], int32_t *ebb[2], int ebits, int ch, int len);
randomize_buffers();
call_ref(ref, ref_ebb, 8, ch, len);
call_new(new, new_ebb, 8, ch, len);
if ( memcmp(ref[0], new[0], len * sizeof(int32_t)) ||
(ch == 2 && memcmp(ref[1], new[1], len * sizeof(int32_t))))
fail();
bench_new(new, new_ebb, 8, ch, BUF_SIZE);
}
}
report("append_extra_bits");
}
void checkasm_check_alacdsp(void)
{
check_decorrelate_stereo();
check_append_extra_bits();
}

197
externals/ffmpeg/tests/checkasm/arm/checkasm.S vendored Executable file
View File

@@ -0,0 +1,197 @@
/****************************************************************************
* Assembly testing and benchmarking tool
* Copyright (c) 2015 Martin Storsjo
* Copyright (c) 2015 Janne Grunau
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*****************************************************************************/
#include "libavutil/arm/asm.S"
/* override fpu so that NEON instructions are rejected */
#if HAVE_VFP
FPU .fpu vfp
ELF .eabi_attribute 10, 0 @ suppress Tag_FP_arch
#endif
const register_init, align=3
.quad 0x21f86d66c8ca00ce
.quad 0x75b6ba21077c48ad
.quad 0xed56bb2dcb3c7736
.quad 0x8bda43d3fd1a7e06
.quad 0xb64a9c9e5d318408
.quad 0xdf9a54b303f1d3a3
.quad 0x4a75479abd64e097
.quad 0x249214109d5d1c88
endconst
const error_message_fpscr
.asciz "failed to preserve register FPSCR, changed bits: %x"
error_message_gpr:
.asciz "failed to preserve register r%d"
error_message_vfp:
.asciz "failed to preserve register d%d"
error_message_stack:
.asciz "failed to preserve stack"
endconst
@ max number of args used by any asm function.
#define MAX_ARGS 15
#define ARG_STACK 4*(MAX_ARGS - 4)
@ Align the used stack space to 8 to preserve the stack alignment.
@ +8 for stack canary reference.
#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed + 8)
.macro clobbercheck variant
.equ pushed, 4*9
function checkasm_checked_call_\variant, export=1
push {r4-r11, lr}
.ifc \variant, vfp
vpush {d8-d15}
fmrx r4, FPSCR
push {r4}
.equ pushed, pushed + 16*4 + 4
.endif
movrel r12, register_init
.ifc \variant, vfp
vldm r12, {d8-d15}
.endif
ldm r12, {r4-r11}
sub sp, sp, #ARG_STACK_A
.equ pos, 0
.rept MAX_ARGS-4
ldr r12, [sp, #ARG_STACK_A + pushed + 8 + pos]
str r12, [sp, #pos]
.equ pos, pos + 4
.endr
@ For stack overflows, the callee is free to overwrite the parameters
@ that were passed on the stack (if any), so we can only check after
@ that point. First figure out how many parameters the function
@ really took on the stack:
ldr r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
@ Load the first non-parameter value from the stack, that should be
@ left untouched by the function. Store a copy of it inverted, so that
@ e.g. overwriting everything with zero would be noticed.
ldr r12, [sp, r12, lsl #2]
mvn r12, r12
str r12, [sp, #ARG_STACK_A - 4]
mov r12, r0
mov r0, r2
mov r1, r3
ldrd r2, r3, [sp, #ARG_STACK_A + pushed]
@ Call the target function
blx r12
@ Load the number of stack parameters, stack canary and its reference
ldr r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
ldr r2, [sp, r12, lsl #2]
ldr r3, [sp, #ARG_STACK_A - 4]
add sp, sp, #ARG_STACK_A
push {r0, r1}
mvn r3, r3
cmp r2, r3
bne 5f
movrel r12, register_init
.ifc \variant, vfp
.macro check_reg_vfp, dreg, offset
ldrd r2, r3, [r12, #8 * (\offset)]
vmov r0, lr, \dreg
eor r2, r2, r0
eor r3, r3, lr
orrs r2, r2, r3
bne 4f
.endm
.irp n, 8, 9, 10, 11, 12, 13, 14, 15
@ keep track of the checked double/SIMD register
mov r1, #\n
check_reg_vfp d\n, \n-8
.endr
.purgem check_reg_vfp
fmrx r1, FPSCR
ldr r3, [sp, #8]
eor r1, r1, r3
@ Ignore changes in bits 0-4 and 7
bic r1, r1, #0x9f
@ Ignore changes in the topmost 5 bits
bics r1, r1, #0xf8000000
bne 3f
.endif
@ keep track of the checked GPR
mov r1, #4
.macro check_reg reg1, reg2=
ldrd r2, r3, [r12], #8
eors r2, r2, \reg1
bne 2f
add r1, r1, #1
.ifnb \reg2
eors r3, r3, \reg2
bne 2f
.endif
add r1, r1, #1
.endm
check_reg r4, r5
check_reg r6, r7
@ r9 is a volatile register in the ios ABI
#ifdef __APPLE__
check_reg r8
#else
check_reg r8, r9
#endif
check_reg r10, r11
.purgem check_reg
b 0f
5:
movrel r0, error_message_stack
b 1f
4:
movrel r0, error_message_vfp
b 1f
3:
movrel r0, error_message_fpscr
b 1f
2:
movrel r0, error_message_gpr
1:
bl X(checkasm_fail_func)
0:
pop {r0, r1}
.ifc \variant, vfp
pop {r2}
fmxr FPSCR, r2
vpop {d8-d15}
.endif
pop {r4-r11, pc}
endfunc
.endm
#if HAVE_VFP || HAVE_NEON
clobbercheck vfp
#endif
clobbercheck novfp

146
externals/ffmpeg/tests/checkasm/audiodsp.c vendored Executable file
View File

@@ -0,0 +1,146 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "libavcodec/audiodsp.h"
#include "libavutil/common.h"
#include "libavutil/intreadwrite.h"
#include "checkasm.h"
#define MAX_SIZE (32 * 128)
#define randomize_float(buf, len) \
do { \
int i; \
for (i = 0; i < len; i++) { \
float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f; \
buf[i] = f; \
} \
} while (0)
#define randomize_int(buf, len, size, bits) \
do { \
int i; \
for (i = 0; i < len; i++) { \
uint ## size ## _t r = rnd() & ((1LL << bits) - 1); \
AV_WN ## size ## A(buf + i, -(1LL << (bits - 1)) + r); \
} \
} while (0)
void checkasm_check_audiodsp(void)
{
AudioDSPContext adsp;
ff_audiodsp_init(&adsp);
if (check_func(adsp.scalarproduct_int16, "audiodsp.scalarproduct_int16")) {
LOCAL_ALIGNED(32, int16_t, v1, [MAX_SIZE]);
LOCAL_ALIGNED(32, int16_t, v2, [MAX_SIZE]);
unsigned int len_bits_minus4, v1_bits, v2_bits, len;
int32_t res0, res1;
declare_func_emms(AV_CPU_FLAG_MMX, int32_t, const int16_t *v1, const int16_t *v2, int len);
// generate random 5-12bit vector length
len_bits_minus4 = rnd() % 8;
len = rnd() & ((1 << len_bits_minus4) - 1);
len = 16 * FFMAX(len, 1);
// generate the bit counts for each of the vectors such that the result
// fits into int32
v1_bits = 1 + rnd() % 15;
v2_bits = FFMIN(32 - (len_bits_minus4 + 4) - v1_bits - 1, 15);
randomize_int(v1, MAX_SIZE, 16, v1_bits + 1);
randomize_int(v2, MAX_SIZE, 16, v2_bits + 1);
res0 = call_ref(v1, v2, len);
res1 = call_new(v1, v2, len);
if (res0 != res1)
fail();
bench_new(v1, v2, MAX_SIZE);
}
if (check_func(adsp.vector_clip_int32, "audiodsp.vector_clip_int32")) {
LOCAL_ALIGNED(32, int32_t, src, [MAX_SIZE]);
LOCAL_ALIGNED(32, int32_t, dst0, [MAX_SIZE]);
LOCAL_ALIGNED(32, int32_t, dst1, [MAX_SIZE]);
int32_t val1, val2, min, max;
int len;
declare_func_emms(AV_CPU_FLAG_MMX, void, int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
val1 = ((int32_t)rnd());
val1 = FFSIGN(val1) * (val1 & ((1 << 24) - 1));
val2 = ((int32_t)rnd());
val2 = FFSIGN(val2) * (val2 & ((1 << 24) - 1));
min = FFMIN(val1, val2);
max = FFMAX(val1, val2);
randomize_int(src, MAX_SIZE, 32, 32);
len = rnd() % 128;
len = 32 * FFMAX(len, 1);
call_ref(dst0, src, min, max, len);
call_new(dst1, src, min, max, len);
if (memcmp(dst0, dst1, len * sizeof(*dst0)))
fail();
bench_new(dst1, src, min, max, MAX_SIZE);
}
if (check_func(adsp.vector_clipf, "audiodsp.vector_clipf")) {
LOCAL_ALIGNED(32, float, src, [MAX_SIZE]);
LOCAL_ALIGNED(32, float, dst0, [MAX_SIZE]);
LOCAL_ALIGNED(32, float, dst1, [MAX_SIZE]);
float val1, val2, min, max;
int i, len;
declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src,
int len, float min, float max);
val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
min = FFMIN(val1, val2);
max = FFMAX(val1, val2);
randomize_float(src, MAX_SIZE);
len = rnd() % 128;
len = 16 * FFMAX(len, 1);
call_ref(dst0, src, len, min, max);
call_new(dst1, src, len, min, max);
for (i = 0; i < len; i++) {
if (!float_near_ulp_array(dst0, dst1, 3, len))
fail();
}
bench_new(dst1, src, MAX_SIZE, min, max);
}
report("audiodsp");
}

68
externals/ffmpeg/tests/checkasm/blockdsp.c vendored Executable file
View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2015 Henrik Gramner
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/blockdsp.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#define randomize_buffers(size) \
do { \
int i; \
for (i = 0; i < size; i++) { \
uint16_t r = rnd(); \
AV_WN16A(buf0 + i, r); \
AV_WN16A(buf1 + i, r); \
} \
} while (0)
#define check_clear(func, size) \
do { \
if (check_func(h.func, "blockdsp." #func)) { \
declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block); \
randomize_buffers(size); \
call_ref(buf0); \
call_new(buf1); \
if (memcmp(buf0, buf1, sizeof(*buf0) * size)) \
fail(); \
bench_new(buf0); \
} \
} while (0)
void checkasm_check_blockdsp(void)
{
LOCAL_ALIGNED_32(uint16_t, buf0, [6 * 8 * 8]);
LOCAL_ALIGNED_32(uint16_t, buf1, [6 * 8 * 8]);
AVCodecContext avctx = { 0 };
BlockDSPContext h;
ff_blockdsp_init(&h, &avctx);
check_clear(clear_block, 8 * 8);
check_clear(clear_blocks, 8 * 8 * 6);
report("blockdsp");
}

76
externals/ffmpeg/tests/checkasm/bswapdsp.c vendored Executable file
View File

@@ -0,0 +1,76 @@
/*
* Copyright (c) 2015 Henrik Gramner
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/bswapdsp.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#define BUF_SIZE 512
#define randomize_buffers() \
do { \
int i; \
for (i = 0; i < BUF_SIZE; i += 4) { \
uint32_t r = rnd(); \
AV_WN32A(src0 + i, r); \
AV_WN32A(src1 + i, r); \
r = rnd(); \
AV_WN32A(dst0 + i, r); \
AV_WN32A(dst1 + i, r); \
} \
} while (0)
#define check_bswap(type) \
do { \
int w; \
declare_func(void, type *dst, const type *src, int w); \
\
for (w = 0; w < BUF_SIZE / sizeof(type); w++) { \
int offset = (BUF_SIZE / sizeof(type) - w) & 15; /* Test various alignments */ \
randomize_buffers(); \
call_ref((type *)dst0 + offset, (type *)src0 + offset, w); \
call_new((type *)dst1 + offset, (type *)src1 + offset, w); \
if (memcmp(src0, src1, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE)) \
fail(); \
bench_new((type *)dst1 + offset, (type *)src1 + offset, w); \
} \
} while (0)
void checkasm_check_bswapdsp(void)
{
LOCAL_ALIGNED_16(uint8_t, src0, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, src1, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]);
BswapDSPContext h;
ff_bswapdsp_init(&h);
if (check_func(h.bswap_buf, "bswap_buf"))
check_bswap(uint32_t);
if (check_func(h.bswap16_buf, "bswap16_buf"))
check_bswap(uint16_t);
report("bswap");
}

881
externals/ffmpeg/tests/checkasm/checkasm.c vendored Executable file
View File

@@ -0,0 +1,881 @@
/*
* Assembly testing and benchmarking tool
* Copyright (c) 2015 Henrik Gramner
* Copyright (c) 2008 Loren Merritt
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "config.h"
#if CONFIG_LINUX_PERF
# ifndef _GNU_SOURCE
# define _GNU_SOURCE // for syscall (performance monitoring API)
# endif
#endif
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "checkasm.h"
#include "libavutil/common.h"
#include "libavutil/cpu.h"
#include "libavutil/intfloat.h"
#include "libavutil/random_seed.h"
#if HAVE_IO_H
#include <io.h>
#endif
#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
#include <windows.h>
#define COLOR_RED FOREGROUND_RED
#define COLOR_GREEN FOREGROUND_GREEN
#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
#else
#define COLOR_RED 1
#define COLOR_GREEN 2
#define COLOR_YELLOW 3
#endif
#if HAVE_UNISTD_H
#include <unistd.h>
#endif
#if !HAVE_ISATTY
#define isatty(fd) 1
#endif
#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
#include "libavutil/arm/cpu.h"
void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp;
#endif
/* List of tests to invoke */
static const struct {
const char *name;
void (*func)(void);
} tests[] = {
#if CONFIG_AVCODEC
#if CONFIG_AAC_DECODER
{ "aacpsdsp", checkasm_check_aacpsdsp },
{ "sbrdsp", checkasm_check_sbrdsp },
#endif
#if CONFIG_ALAC_DECODER
{ "alacdsp", checkasm_check_alacdsp },
#endif
#if CONFIG_AUDIODSP
{ "audiodsp", checkasm_check_audiodsp },
#endif
#if CONFIG_BLOCKDSP
{ "blockdsp", checkasm_check_blockdsp },
#endif
#if CONFIG_BSWAPDSP
{ "bswapdsp", checkasm_check_bswapdsp },
#endif
#if CONFIG_DCA_DECODER
{ "synth_filter", checkasm_check_synth_filter },
#endif
#if CONFIG_EXR_DECODER
{ "exrdsp", checkasm_check_exrdsp },
#endif
#if CONFIG_FLACDSP
{ "flacdsp", checkasm_check_flacdsp },
#endif
#if CONFIG_FMTCONVERT
{ "fmtconvert", checkasm_check_fmtconvert },
#endif
#if CONFIG_G722DSP
{ "g722dsp", checkasm_check_g722dsp },
#endif
#if CONFIG_H264DSP
{ "h264dsp", checkasm_check_h264dsp },
#endif
#if CONFIG_H264PRED
{ "h264pred", checkasm_check_h264pred },
#endif
#if CONFIG_H264QPEL
{ "h264qpel", checkasm_check_h264qpel },
#endif
#if CONFIG_HEVC_DECODER
{ "hevc_add_res", checkasm_check_hevc_add_res },
{ "hevc_idct", checkasm_check_hevc_idct },
{ "hevc_sao", checkasm_check_hevc_sao },
#endif
#if CONFIG_HUFFYUV_DECODER
{ "huffyuvdsp", checkasm_check_huffyuvdsp },
#endif
#if CONFIG_JPEG2000_DECODER
{ "jpeg2000dsp", checkasm_check_jpeg2000dsp },
#endif
#if CONFIG_HUFFYUVDSP
{ "llviddsp", checkasm_check_llviddsp },
#endif
#if CONFIG_LLVIDENCDSP
{ "llviddspenc", checkasm_check_llviddspenc },
#endif
#if CONFIG_OPUS_DECODER
{ "opusdsp", checkasm_check_opusdsp },
#endif
#if CONFIG_PIXBLOCKDSP
{ "pixblockdsp", checkasm_check_pixblockdsp },
#endif
#if CONFIG_UTVIDEO_DECODER
{ "utvideodsp", checkasm_check_utvideodsp },
#endif
#if CONFIG_V210_DECODER
{ "v210dec", checkasm_check_v210dec },
#endif
#if CONFIG_V210_ENCODER
{ "v210enc", checkasm_check_v210enc },
#endif
#if CONFIG_VP8DSP
{ "vp8dsp", checkasm_check_vp8dsp },
#endif
#if CONFIG_VP9_DECODER
{ "vp9dsp", checkasm_check_vp9dsp },
#endif
#if CONFIG_VIDEODSP
{ "videodsp", checkasm_check_videodsp },
#endif
#endif
#if CONFIG_AVFILTER
#if CONFIG_AFIR_FILTER
{ "af_afir", checkasm_check_afir },
#endif
#if CONFIG_BLEND_FILTER
{ "vf_blend", checkasm_check_blend },
#endif
#if CONFIG_COLORSPACE_FILTER
{ "vf_colorspace", checkasm_check_colorspace },
#endif
#if CONFIG_EQ_FILTER
{ "vf_eq", checkasm_check_vf_eq },
#endif
#if CONFIG_GBLUR_FILTER
{ "vf_gblur", checkasm_check_vf_gblur },
#endif
#if CONFIG_HFLIP_FILTER
{ "vf_hflip", checkasm_check_vf_hflip },
#endif
#if CONFIG_NLMEANS_FILTER
{ "vf_nlmeans", checkasm_check_nlmeans },
#endif
#if CONFIG_THRESHOLD_FILTER
{ "vf_threshold", checkasm_check_vf_threshold },
#endif
#endif
#if CONFIG_SWSCALE
{ "sw_rgb", checkasm_check_sw_rgb },
{ "sw_scale", checkasm_check_sw_scale },
#endif
#if CONFIG_AVUTIL
{ "fixed_dsp", checkasm_check_fixed_dsp },
{ "float_dsp", checkasm_check_float_dsp },
#endif
{ NULL }
};
/* List of cpu flags to check */
static const struct {
const char *name;
const char *suffix;
int flag;
} cpus[] = {
#if ARCH_AARCH64
{ "ARMV8", "armv8", AV_CPU_FLAG_ARMV8 },
{ "NEON", "neon", AV_CPU_FLAG_NEON },
#elif ARCH_ARM
{ "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE },
{ "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 },
{ "ARMV6T2", "armv6t2", AV_CPU_FLAG_ARMV6T2 },
{ "VFP", "vfp", AV_CPU_FLAG_VFP },
{ "VFP_VM", "vfp_vm", AV_CPU_FLAG_VFP_VM },
{ "VFPV3", "vfp3", AV_CPU_FLAG_VFPV3 },
{ "NEON", "neon", AV_CPU_FLAG_NEON },
#elif ARCH_PPC
{ "ALTIVEC", "altivec", AV_CPU_FLAG_ALTIVEC },
{ "VSX", "vsx", AV_CPU_FLAG_VSX },
{ "POWER8", "power8", AV_CPU_FLAG_POWER8 },
#elif ARCH_X86
{ "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
{ "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT },
{ "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW },
{ "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
{ "SSE", "sse", AV_CPU_FLAG_SSE },
{ "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
{ "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
{ "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
{ "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 },
{ "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 },
{ "AES-NI", "aesni", AV_CPU_FLAG_AESNI },
{ "AVX", "avx", AV_CPU_FLAG_AVX },
{ "XOP", "xop", AV_CPU_FLAG_XOP },
{ "FMA3", "fma3", AV_CPU_FLAG_FMA3 },
{ "FMA4", "fma4", AV_CPU_FLAG_FMA4 },
{ "AVX2", "avx2", AV_CPU_FLAG_AVX2 },
{ "AVX-512", "avx512", AV_CPU_FLAG_AVX512 },
#endif
{ NULL }
};
typedef struct CheckasmFuncVersion {
struct CheckasmFuncVersion *next;
void *func;
int ok;
int cpu;
CheckasmPerf perf;
} CheckasmFuncVersion;
/* Binary search tree node */
typedef struct CheckasmFunc {
struct CheckasmFunc *child[2];
CheckasmFuncVersion versions;
uint8_t color; /* 0 = red, 1 = black */
char name[1];
} CheckasmFunc;
/* Internal state */
static struct {
CheckasmFunc *funcs;
CheckasmFunc *current_func;
CheckasmFuncVersion *current_func_ver;
const char *current_test_name;
const char *bench_pattern;
int bench_pattern_len;
int num_checked;
int num_failed;
/* perf */
int nop_time;
int sysfd;
int cpu_flag;
const char *cpu_flag_name;
const char *test_name;
int verbose;
} state;
/* PRNG state */
AVLFG checkasm_lfg;
/* float compare support code */
static int is_negative(union av_intfloat32 u)
{
return u.i >> 31;
}
int float_near_ulp(float a, float b, unsigned max_ulp)
{
union av_intfloat32 x, y;
x.f = a;
y.f = b;
if (is_negative(x) != is_negative(y)) {
// handle -0.0 == +0.0
return a == b;
}
if (llabs((int64_t)x.i - y.i) <= max_ulp)
return 1;
return 0;
}
int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
unsigned len)
{
unsigned i;
for (i = 0; i < len; i++) {
if (!float_near_ulp(a[i], b[i], max_ulp))
return 0;
}
return 1;
}
int float_near_abs_eps(float a, float b, float eps)
{
float abs_diff = fabsf(a - b);
if (abs_diff < eps)
return 1;
fprintf(stderr, "test failed comparing %g with %g (abs diff=%g with EPS=%g)\n", a, b, abs_diff, eps);
return 0;
}
int float_near_abs_eps_array(const float *a, const float *b, float eps,
unsigned len)
{
unsigned i;
for (i = 0; i < len; i++) {
if (!float_near_abs_eps(a[i], b[i], eps))
return 0;
}
return 1;
}
int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp)
{
return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
}
int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
unsigned max_ulp, unsigned len)
{
unsigned i;
for (i = 0; i < len; i++) {
if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
return 0;
}
return 1;
}
int double_near_abs_eps(double a, double b, double eps)
{
double abs_diff = fabs(a - b);
return abs_diff < eps;
}
int double_near_abs_eps_array(const double *a, const double *b, double eps,
unsigned len)
{
unsigned i;
for (i = 0; i < len; i++) {
if (!double_near_abs_eps(a[i], b[i], eps))
return 0;
}
return 1;
}
/* Print colored text to stderr if the terminal supports it */
static void color_printf(int color, const char *fmt, ...)
{
static int use_color = -1;
va_list arg;
#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
static HANDLE con;
static WORD org_attributes;
if (use_color < 0) {
CONSOLE_SCREEN_BUFFER_INFO con_info;
con = GetStdHandle(STD_ERROR_HANDLE);
if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) {
org_attributes = con_info.wAttributes;
use_color = 1;
} else
use_color = 0;
}
if (use_color)
SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f));
#else
if (use_color < 0) {
const char *term = getenv("TERM");
use_color = term && strcmp(term, "dumb") && isatty(2);
}
if (use_color)
fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
#endif
va_start(arg, fmt);
vfprintf(stderr, fmt, arg);
va_end(arg);
if (use_color) {
#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
SetConsoleTextAttribute(con, org_attributes);
#else
fprintf(stderr, "\x1b[0m");
#endif
}
}
/* Deallocate a tree */
static void destroy_func_tree(CheckasmFunc *f)
{
if (f) {
CheckasmFuncVersion *v = f->versions.next;
while (v) {
CheckasmFuncVersion *next = v->next;
free(v);
v = next;
}
destroy_func_tree(f->child[0]);
destroy_func_tree(f->child[1]);
free(f);
}
}
/* Allocate a zero-initialized block, clean up and exit on failure */
static void *checkasm_malloc(size_t size)
{
void *ptr = calloc(1, size);
if (!ptr) {
fprintf(stderr, "checkasm: malloc failed\n");
destroy_func_tree(state.funcs);
exit(1);
}
return ptr;
}
/* Get the suffix of the specified cpu flag */
static const char *cpu_suffix(int cpu)
{
int i = FF_ARRAY_ELEMS(cpus);
while (--i >= 0)
if (cpu & cpus[i].flag)
return cpus[i].suffix;
return "c";
}
static int cmp_nop(const void *a, const void *b)
{
return *(const uint16_t*)a - *(const uint16_t*)b;
}
/* Measure the overhead of the timing code (in decicycles) */
static int measure_nop_time(void)
{
uint16_t nops[10000];
int i, nop_sum = 0;
av_unused const int sysfd = state.sysfd;
uint64_t t = 0;
for (i = 0; i < 10000; i++) {
PERF_START(t);
PERF_STOP(t);
nops[i] = t;
}
qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
for (i = 2500; i < 7500; i++)
nop_sum += nops[i];
return nop_sum / 500;
}
/* Print benchmark results */
static void print_benchs(CheckasmFunc *f)
{
if (f) {
print_benchs(f->child[0]);
/* Only print functions with at least one assembly version */
if (f->versions.cpu || f->versions.next) {
CheckasmFuncVersion *v = &f->versions;
do {
CheckasmPerf *p = &v->perf;
if (p->iterations) {
int decicycles = (10*p->cycles/p->iterations - state.nop_time) / 4;
printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
}
} while ((v = v->next));
}
print_benchs(f->child[1]);
}
}
/* ASCIIbetical sort except preserving natural order for numbers */
static int cmp_func_names(const char *a, const char *b)
{
const char *start = a;
int ascii_diff, digit_diff;
for (; !(ascii_diff = *(const unsigned char*)a - *(const unsigned char*)b) && *a; a++, b++);
for (; av_isdigit(*a) && av_isdigit(*b); a++, b++);
if (a > start && av_isdigit(a[-1]) && (digit_diff = av_isdigit(*a) - av_isdigit(*b)))
return digit_diff;
return ascii_diff;
}
/* Perform a tree rotation in the specified direction and return the new root */
static CheckasmFunc *rotate_tree(CheckasmFunc *f, int dir)
{
CheckasmFunc *r = f->child[dir^1];
f->child[dir^1] = r->child[dir];
r->child[dir] = f;
r->color = f->color;
f->color = 0;
return r;
}
#define is_red(f) ((f) && !(f)->color)
/* Balance a left-leaning red-black tree at the specified node */
static void balance_tree(CheckasmFunc **root)
{
CheckasmFunc *f = *root;
if (is_red(f->child[0]) && is_red(f->child[1])) {
f->color ^= 1;
f->child[0]->color = f->child[1]->color = 1;
}
if (!is_red(f->child[0]) && is_red(f->child[1]))
*root = rotate_tree(f, 0); /* Rotate left */
else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
*root = rotate_tree(f, 1); /* Rotate right */
}
/* Get a node with the specified name, creating it if it doesn't exist */
static CheckasmFunc *get_func(CheckasmFunc **root, const char *name)
{
CheckasmFunc *f = *root;
if (f) {
/* Search the tree for a matching node */
int cmp = cmp_func_names(name, f->name);
if (cmp) {
f = get_func(&f->child[cmp > 0], name);
/* Rebalance the tree on the way up if a new node was inserted */
if (!f->versions.func)
balance_tree(root);
}
} else {
/* Allocate and insert a new node into the tree */
int name_length = strlen(name);
f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length);
memcpy(f->name, name, name_length + 1);
}
return f;
}
/* Perform tests and benchmarks for the specified cpu flag if supported by the host */
static void check_cpu_flag(const char *name, int flag)
{
int old_cpu_flag = state.cpu_flag;
flag |= old_cpu_flag;
av_force_cpu_flags(-1);
state.cpu_flag = flag & av_get_cpu_flags();
av_force_cpu_flags(state.cpu_flag);
if (!flag || state.cpu_flag != old_cpu_flag) {
int i;
state.cpu_flag_name = name;
for (i = 0; tests[i].func; i++) {
if (state.test_name && strcmp(tests[i].name, state.test_name))
continue;
state.current_test_name = tests[i].name;
tests[i].func();
}
}
}
/* Print the name of the current CPU flag, but only do it once */
static void print_cpu_name(void)
{
if (state.cpu_flag_name) {
color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
state.cpu_flag_name = NULL;
}
}
#if CONFIG_LINUX_PERF
static int bench_init_linux(void)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.size = sizeof(struct perf_event_attr),
.config = PERF_COUNT_HW_CPU_CYCLES,
.disabled = 1, // start counting only on demand
.exclude_kernel = 1,
.exclude_hv = 1,
};
printf("benchmarking with Linux Perf Monitoring API\n");
state.sysfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
if (state.sysfd == -1) {
perror("syscall");
return -1;
}
return 0;
}
#endif
#if !CONFIG_LINUX_PERF
static int bench_init_ffmpeg(void)
{
#ifdef AV_READ_TIME
printf("benchmarking with native FFmpeg timers\n");
return 0;
#else
fprintf(stderr, "checkasm: --bench is not supported on your system\n");
return -1;
#endif
}
#endif
static int bench_init(void)
{
#if CONFIG_LINUX_PERF
int ret = bench_init_linux();
#else
int ret = bench_init_ffmpeg();
#endif
if (ret < 0)
return ret;
state.nop_time = measure_nop_time();
printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
return 0;
}
static void bench_uninit(void)
{
#if CONFIG_LINUX_PERF
if (state.sysfd > 0)
close(state.sysfd);
#endif
}
int main(int argc, char *argv[])
{
unsigned int seed = av_get_random_seed();
int i, ret = 0;
#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags()))
checkasm_checked_call = checkasm_checked_call_vfp;
#endif
if (!tests[0].func || !cpus[0].flag) {
fprintf(stderr, "checkasm: no tests to perform\n");
return 0;
}
while (argc > 1) {
if (!strncmp(argv[1], "--bench", 7)) {
if (bench_init() < 0)
return 1;
if (argv[1][7] == '=') {
state.bench_pattern = argv[1] + 8;
state.bench_pattern_len = strlen(state.bench_pattern);
} else
state.bench_pattern = "";
} else if (!strncmp(argv[1], "--test=", 7)) {
state.test_name = argv[1] + 7;
} else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) {
state.verbose = 1;
} else {
seed = strtoul(argv[1], NULL, 10);
}
argc--;
argv++;
}
fprintf(stderr, "checkasm: using random seed %u\n", seed);
av_lfg_init(&checkasm_lfg, seed);
check_cpu_flag(NULL, 0);
for (i = 0; cpus[i].flag; i++)
check_cpu_flag(cpus[i].name, cpus[i].flag);
if (state.num_failed) {
fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked);
ret = 1;
} else {
fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
if (state.bench_pattern) {
print_benchs(state.funcs);
}
}
destroy_func_tree(state.funcs);
bench_uninit();
return ret;
}
/* Decide whether or not the specified function needs to be tested and
* allocate/initialize data structures if needed. Returns a pointer to a
* reference function if the function should be tested, otherwise NULL */
void *checkasm_check_func(void *func, const char *name, ...)
{
char name_buf[256];
void *ref = func;
CheckasmFuncVersion *v;
int name_length;
va_list arg;
va_start(arg, name);
name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
va_end(arg);
if (!func || name_length <= 0 || name_length >= sizeof(name_buf))
return NULL;
state.current_func = get_func(&state.funcs, name_buf);
state.funcs->color = 1;
v = &state.current_func->versions;
if (v->func) {
CheckasmFuncVersion *prev;
do {
/* Only test functions that haven't already been tested */
if (v->func == func)
return NULL;
if (v->ok)
ref = v->func;
prev = v;
} while ((v = v->next));
v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
}
v->func = func;
v->ok = 1;
v->cpu = state.cpu_flag;
state.current_func_ver = v;
if (state.cpu_flag)
state.num_checked++;
return ref;
}
/* Decide whether or not the current function needs to be benchmarked */
int checkasm_bench_func(void)
{
return !state.num_failed && state.bench_pattern &&
!strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len);
}
/* Indicate that the current test has failed */
void checkasm_fail_func(const char *msg, ...)
{
if (state.current_func_ver->cpu && state.current_func_ver->ok) {
va_list arg;
print_cpu_name();
fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu));
va_start(arg, msg);
vfprintf(stderr, msg, arg);
va_end(arg);
fprintf(stderr, ")\n");
state.current_func_ver->ok = 0;
state.num_failed++;
}
}
/* Get the benchmark context of the current function */
CheckasmPerf *checkasm_get_perf_context(void)
{
CheckasmPerf *perf = &state.current_func_ver->perf;
memset(perf, 0, sizeof(*perf));
perf->sysfd = state.sysfd;
return perf;
}
/* Print the outcome of all tests performed since the last time this function was called */
void checkasm_report(const char *name, ...)
{
static int prev_checked, prev_failed, max_length;
if (state.num_checked > prev_checked) {
int pad_length = max_length + 4;
va_list arg;
print_cpu_name();
pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
va_start(arg, name);
pad_length -= vfprintf(stderr, name, arg);
va_end(arg);
fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '[');
if (state.num_failed == prev_failed)
color_printf(COLOR_GREEN, "OK");
else
color_printf(COLOR_RED, "FAILED");
fprintf(stderr, "]\n");
prev_checked = state.num_checked;
prev_failed = state.num_failed;
} else if (!state.cpu_flag) {
/* Calculate the amount of padding required to make the output vertically aligned */
int length = strlen(state.current_test_name);
va_list arg;
va_start(arg, name);
length += vsnprintf(NULL, 0, name, arg);
va_end(arg);
if (length > max_length)
max_length = length;
}
}
#define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
int checkasm_check_##type(const char *const file, const int line, \
const type *buf1, ptrdiff_t stride1, \
const type *buf2, ptrdiff_t stride2, \
const int w, int h, const char *const name) \
{ \
int y = 0; \
stride1 /= sizeof(*buf1); \
stride2 /= sizeof(*buf2); \
for (y = 0; y < h; y++) \
if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \
break; \
if (y == h) \
return 0; \
checkasm_fail_func("%s:%d", file, line); \
if (!state.verbose) \
return 1; \
fprintf(stderr, "%s:\n", name); \
while (h--) { \
for (int x = 0; x < w; x++) \
fprintf(stderr, " " fmt, buf1[x]); \
fprintf(stderr, " "); \
for (int x = 0; x < w; x++) \
fprintf(stderr, " " fmt, buf2[x]); \
fprintf(stderr, " "); \
for (int x = 0; x < w; x++) \
fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \
buf1 += stride1; \
buf2 += stride2; \
fprintf(stderr, "\n"); \
} \
return 1; \
}
DEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x")
DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x")
DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d")
DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d")

282
externals/ffmpeg/tests/checkasm/checkasm.h vendored Executable file
View File

@@ -0,0 +1,282 @@
/*
* Assembly testing and benchmarking tool
* Copyright (c) 2015 Henrik Gramner
* Copyright (c) 2008 Loren Merritt
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef TESTS_CHECKASM_CHECKASM_H
#define TESTS_CHECKASM_CHECKASM_H
#include <stdint.h>
#include "config.h"
#if CONFIG_LINUX_PERF
#include <unistd.h> // read(3)
#include <sys/ioctl.h>
#include <asm/unistd.h>
#include <linux/perf_event.h>
#endif
#include "libavutil/avstring.h"
#include "libavutil/cpu.h"
#include "libavutil/internal.h"
#include "libavutil/lfg.h"
#include "libavutil/timer.h"
void checkasm_check_aacpsdsp(void);
void checkasm_check_afir(void);
void checkasm_check_alacdsp(void);
void checkasm_check_audiodsp(void);
void checkasm_check_blend(void);
void checkasm_check_blockdsp(void);
void checkasm_check_bswapdsp(void);
void checkasm_check_colorspace(void);
void checkasm_check_exrdsp(void);
void checkasm_check_fixed_dsp(void);
void checkasm_check_flacdsp(void);
void checkasm_check_float_dsp(void);
void checkasm_check_fmtconvert(void);
void checkasm_check_g722dsp(void);
void checkasm_check_h264dsp(void);
void checkasm_check_h264pred(void);
void checkasm_check_h264qpel(void);
void checkasm_check_hevc_add_res(void);
void checkasm_check_hevc_idct(void);
void checkasm_check_hevc_sao(void);
void checkasm_check_huffyuvdsp(void);
void checkasm_check_jpeg2000dsp(void);
void checkasm_check_llviddsp(void);
void checkasm_check_llviddspenc(void);
void checkasm_check_nlmeans(void);
void checkasm_check_opusdsp(void);
void checkasm_check_pixblockdsp(void);
void checkasm_check_sbrdsp(void);
void checkasm_check_synth_filter(void);
void checkasm_check_sw_rgb(void);
void checkasm_check_sw_scale(void);
void checkasm_check_utvideodsp(void);
void checkasm_check_v210dec(void);
void checkasm_check_v210enc(void);
void checkasm_check_vf_eq(void);
void checkasm_check_vf_gblur(void);
void checkasm_check_vf_hflip(void);
void checkasm_check_vf_threshold(void);
void checkasm_check_vp8dsp(void);
void checkasm_check_vp9dsp(void);
void checkasm_check_videodsp(void);
struct CheckasmPerf;
void *checkasm_check_func(void *func, const char *name, ...) av_printf_format(2, 3);
int checkasm_bench_func(void);
void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2);
struct CheckasmPerf *checkasm_get_perf_context(void);
void checkasm_report(const char *name, ...) av_printf_format(1, 2);
/* float compare utilities */
int float_near_ulp(float a, float b, unsigned max_ulp);
int float_near_abs_eps(float a, float b, float eps);
int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp);
int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
unsigned len);
int float_near_abs_eps_array(const float *a, const float *b, float eps,
unsigned len);
int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
unsigned max_ulp, unsigned len);
int double_near_abs_eps(double a, double b, double eps);
int double_near_abs_eps_array(const double *a, const double *b, double eps,
unsigned len);
extern AVLFG checkasm_lfg;
#define rnd() av_lfg_get(&checkasm_lfg)
static av_unused void *func_ref, *func_new;
#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
/* Decide whether or not the specified function needs to be tested */
#define check_func(func, ...) (func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
/* Declare the function prototype. The first argument is the return value, the remaining
* arguments are the function parameters. Naming parameters is optional. */
#define declare_func(ret, ...) declare_new(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
#define declare_func_float(ret, ...) declare_new_float(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
#define declare_func_emms(cpu_flags, ret, ...) declare_new_emms(cpu_flags, ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
/* Indicate that the current test has failed */
#define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__)
/* Print the test outcome */
#define report checkasm_report
/* Call the reference function */
#define call_ref(...) ((func_type *)func_ref)(__VA_ARGS__)
#if ARCH_X86 && HAVE_X86ASM
/* Verifies that clobbered callee-saved registers are properly saved and restored
* and that either no MMX registers are touched or emms is issued */
void checkasm_checked_call(void *func, ...);
/* Verifies that clobbered callee-saved registers are properly saved and restored
* and issues emms for asm functions which are not required to do so */
void checkasm_checked_call_emms(void *func, ...);
/* Verifies that clobbered callee-saved registers are properly saved and restored
* but doesn't issue emms. Meant for dsp functions returning float or double */
void checkasm_checked_call_float(void *func, ...);
#if ARCH_X86_64
/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
* This is done by clobbering the stack with junk around the stack pointer and calling the
* assembly function through checked_call() with added dummy arguments which forces all
* real arguments to be passed on the stack and not in registers. For 32-bit arguments the
* upper half of the 64-bit register locations on the stack will now contain junk which will
* cause misbehaving functions to either produce incorrect output or segfault. Note that
* even though this works extremely well in practice, it's technically not guaranteed
* and false negatives is theoretically possible, but there can never be any false positives.
*/
void checkasm_stack_clobber(uint64_t clobber, ...);
#define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\
= (void *)checkasm_checked_call;
#define declare_new_float(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\
= (void *)checkasm_checked_call_float;
#define declare_new_emms(cpu_flags, ret, ...) \
ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) = \
((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \
(void *)checkasm_checked_call;
#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__))
#elif ARCH_X86_32
#define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call;
#define declare_new_float(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call_float;
#define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = \
((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \
(void *)checkasm_checked_call;
#define call_new(...) checked_call(func_new, __VA_ARGS__)
#endif
#elif ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
/* Use a dummy argument, to offset the real parameters by 2, not only 1.
* This makes sure that potential 8-byte-alignment of parameters is kept the same
* even when the extra parameters have been removed. */
void checkasm_checked_call_vfp(void *func, int dummy, ...);
void checkasm_checked_call_novfp(void *func, int dummy, ...);
extern void (*checkasm_checked_call)(void *func, int dummy, ...);
#define declare_new(ret, ...) ret (*checked_call)(void *, int dummy, __VA_ARGS__, \
int, int, int, int, int, int, int, int, \
int, int, int, int, int, int, int) = (void *)checkasm_checked_call;
#define call_new(...) checked_call(func_new, 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0)
#elif ARCH_AARCH64 && !defined(__APPLE__)
void checkasm_stack_clobber(uint64_t clobber, ...);
void checkasm_checked_call(void *func, ...);
#define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, int, int, __VA_ARGS__,\
int, int, int, int, int, int, int, int,\
int, int, int, int, int, int, int)\
= (void *)checkasm_checked_call;
#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0))
#else
#define declare_new(ret, ...)
#define declare_new_float(ret, ...)
#define declare_new_emms(cpu_flags, ret, ...)
/* Call the function */
#define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
#endif
#ifndef declare_new_emms
#define declare_new_emms(cpu_flags, ret, ...) declare_new(ret, __VA_ARGS__)
#endif
#ifndef declare_new_float
#define declare_new_float(ret, ...) declare_new(ret, __VA_ARGS__)
#endif
typedef struct CheckasmPerf {
int sysfd;
uint64_t cycles;
int iterations;
} CheckasmPerf;
#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF
#if CONFIG_LINUX_PERF
#define PERF_START(t) do { \
ioctl(sysfd, PERF_EVENT_IOC_RESET, 0); \
ioctl(sysfd, PERF_EVENT_IOC_ENABLE, 0); \
} while (0)
#define PERF_STOP(t) do { \
ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0); \
read(sysfd, &t, sizeof(t)); \
} while (0)
#else
#define PERF_START(t) t = AV_READ_TIME()
#define PERF_STOP(t) t = AV_READ_TIME() - t
#endif
/* Benchmark the function */
#define bench_new(...)\
do {\
if (checkasm_bench_func()) {\
struct CheckasmPerf *perf = checkasm_get_perf_context();\
av_unused const int sysfd = perf->sysfd;\
func_type *tfunc = func_new;\
uint64_t tsum = 0;\
int ti, tcount = 0;\
uint64_t t = 0; \
for (ti = 0; ti < BENCH_RUNS; ti++) {\
PERF_START(t);\
tfunc(__VA_ARGS__);\
tfunc(__VA_ARGS__);\
tfunc(__VA_ARGS__);\
tfunc(__VA_ARGS__);\
PERF_STOP(t);\
if (t*tcount <= tsum*4 && ti > 0) {\
tsum += t;\
tcount++;\
}\
}\
emms_c();\
perf->cycles += t;\
perf->iterations++;\
}\
} while (0)
#else
#define bench_new(...) while(0)
#define PERF_START(t) while(0)
#define PERF_STOP(t) while(0)
#endif
#define DECL_CHECKASM_CHECK_FUNC(type) \
int checkasm_check_##type(const char *const file, const int line, \
const type *const buf1, const ptrdiff_t stride1, \
const type *const buf2, const ptrdiff_t stride2, \
const int w, const int h, const char *const name)
DECL_CHECKASM_CHECK_FUNC(uint8_t);
DECL_CHECKASM_CHECK_FUNC(uint16_t);
DECL_CHECKASM_CHECK_FUNC(int16_t);
DECL_CHECKASM_CHECK_FUNC(int32_t);
#define PASTE(a,b) a ## b
#define CONCAT(a,b) PASTE(a,b)
#define checkasm_check(prefix, ...) CONCAT(checkasm_check_, prefix)(__FILE__, __LINE__, __VA_ARGS__)
#endif /* TESTS_CHECKASM_CHECKASM_H */

91
externals/ffmpeg/tests/checkasm/exrdsp.c vendored Executable file
View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2017 James Almer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/exrdsp.h"
#include "libavutil/intreadwrite.h"
#define BUF_SIZE 5120
#define PADDED_BUF_SIZE BUF_SIZE+AV_INPUT_BUFFER_PADDING_SIZE*2
#define randomize_buffers() \
do { \
int i; \
for (i = 0; i < BUF_SIZE; i += 4) { \
uint32_t r = rnd(); \
AV_WN32A(src + i, r); \
} \
} while (0)
static void check_reorder_pixels(void) {
LOCAL_ALIGNED_32(uint8_t, src, [PADDED_BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]);
declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t size);
memset(src, 0, PADDED_BUF_SIZE);
memset(dst_ref, 0, PADDED_BUF_SIZE);
memset(dst_new, 0, PADDED_BUF_SIZE);
randomize_buffers();
call_ref(dst_ref, src, BUF_SIZE);
call_new(dst_new, src, BUF_SIZE);
if (memcmp(dst_ref, dst_new, BUF_SIZE))
fail();
bench_new(dst_new, src, BUF_SIZE);
}
static void check_predictor(void) {
LOCAL_ALIGNED_32(uint8_t, src, [PADDED_BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]);
declare_func(void, uint8_t *src, ptrdiff_t size);
memset(src, 0, PADDED_BUF_SIZE);
randomize_buffers();
memcpy(dst_ref, src, PADDED_BUF_SIZE);
memcpy(dst_new, src, PADDED_BUF_SIZE);
call_ref(dst_ref, BUF_SIZE);
call_new(dst_new, BUF_SIZE);
if (memcmp(dst_ref, dst_new, BUF_SIZE))
fail();
bench_new(dst_new, BUF_SIZE);
}
void checkasm_check_exrdsp(void)
{
ExrDSPContext h;
ff_exrdsp_init(&h);
if (check_func(h.reorder_pixels, "reorder_pixels"))
check_reorder_pixels();
report("reorder_pixels");
if (check_func(h.predictor, "predictor"))
check_predictor();
report("predictor");
}

159
externals/ffmpeg/tests/checkasm/fixed_dsp.c vendored Executable file
View File

@@ -0,0 +1,159 @@
/*
* Copyright (c) 2015 James Almer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "checkasm.h"
#include "libavutil/common.h"
#include "libavutil/fixed_dsp.h"
#include "libavutil/internal.h"
#include "libavutil/mem.h"
#define BUF_SIZE 256
#define randomize_buffers() \
do { \
int i; \
for (i = 0; i < BUF_SIZE; i++) { \
src0[i] = sign_extend(rnd(), 24); \
src1[i] = sign_extend(rnd(), 24); \
src2[i] = sign_extend(rnd(), 24); \
} \
} while (0)
static void check_vector_fmul(const int *src0, const int *src1)
{
LOCAL_ALIGNED_32(int, ref, [BUF_SIZE]);
LOCAL_ALIGNED_32(int, new, [BUF_SIZE]);
declare_func(void, int *dst, const int *src0, const int *src1, int len);
call_ref(ref, src0, src1, BUF_SIZE);
call_new(new, src0, src1, BUF_SIZE);
if (memcmp(ref, new, BUF_SIZE * sizeof(int)))
fail();
bench_new(new, src0, src1, BUF_SIZE);
}
static void check_vector_fmul_add(const int *src0, const int *src1, const int *src2)
{
LOCAL_ALIGNED_32(int, ref, [BUF_SIZE]);
LOCAL_ALIGNED_32(int, new, [BUF_SIZE]);
declare_func(void, int *dst, const int *src0, const int *src1, const int *src2, int len);
call_ref(ref, src0, src1, src2, BUF_SIZE);
call_new(new, src0, src1, src2, BUF_SIZE);
if (memcmp(ref, new, BUF_SIZE * sizeof(int)))
fail();
bench_new(new, src0, src1, src2, BUF_SIZE);
}
static void check_vector_fmul_window(const int32_t *src0, const int32_t *src1, const int32_t *win)
{
LOCAL_ALIGNED_32(int32_t, ref, [BUF_SIZE]);
LOCAL_ALIGNED_32(int32_t, new, [BUF_SIZE]);
declare_func(void, int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len);
call_ref(ref, src0, src1, win, BUF_SIZE / 2);
call_new(new, src0, src1, win, BUF_SIZE / 2);
if (memcmp(ref, new, BUF_SIZE * sizeof(int32_t)))
fail();
bench_new(new, src0, src1, win, BUF_SIZE / 2);
}
static void check_vector_fmul_window_scaled(const int32_t *src0, const int32_t *src1, const int32_t *win)
{
LOCAL_ALIGNED_16(int16_t, ref, [BUF_SIZE]);
LOCAL_ALIGNED_16(int16_t, new, [BUF_SIZE]);
declare_func(void, int16_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len, uint8_t bits);
call_ref(ref, src0, src1, win, BUF_SIZE / 2, 2);
call_new(new, src0, src1, win, BUF_SIZE / 2, 2);
if (memcmp(ref, new, BUF_SIZE * sizeof(int16_t)))
fail();
bench_new(new, src0, src1, win, BUF_SIZE / 2, 2);
}
static void check_butterflies(const int *src0, const int *src1)
{
LOCAL_ALIGNED_16(int, ref0, [BUF_SIZE]);
LOCAL_ALIGNED_16(int, ref1, [BUF_SIZE]);
LOCAL_ALIGNED_16(int, new0, [BUF_SIZE]);
LOCAL_ALIGNED_16(int, new1, [BUF_SIZE]);
declare_func(void, int *av_restrict src0, int *av_restrict src1, int len);
memcpy(ref0, src0, BUF_SIZE * sizeof(*src0));
memcpy(ref1, src1, BUF_SIZE * sizeof(*src1));
memcpy(new0, src0, BUF_SIZE * sizeof(*src0));
memcpy(new1, src1, BUF_SIZE * sizeof(*src1));
call_ref(ref0, ref1, BUF_SIZE);
call_new(new0, new1, BUF_SIZE);
if (memcmp(ref0, new0, BUF_SIZE * sizeof(*ref0)) ||
memcmp(ref1, new1, BUF_SIZE * sizeof(*ref1)))
fail();
memcpy(new0, src0, BUF_SIZE * sizeof(*src0));
memcpy(new1, src1, BUF_SIZE * sizeof(*src1));
bench_new(new0, new1, BUF_SIZE);
}
static void check_scalarproduct_fixed(const int *src0, const int *src1)
{
int ref, new;
declare_func(int, const int *src0, const int *src1, int len);
ref = call_ref(src0, src1, BUF_SIZE);
new = call_new(src0, src1, BUF_SIZE);
if (ref != new)
fail();
bench_new(src0, src1, BUF_SIZE);
}
void checkasm_check_fixed_dsp(void)
{
LOCAL_ALIGNED_32(int32_t, src0, [BUF_SIZE]);
LOCAL_ALIGNED_32(int32_t, src1, [BUF_SIZE]);
LOCAL_ALIGNED_32(int32_t, src2, [BUF_SIZE]);
AVFixedDSPContext *fdsp = avpriv_alloc_fixed_dsp(1);
randomize_buffers();
if (check_func(fdsp->vector_fmul, "vector_fmul_fixed"))
check_vector_fmul(src0, src1);
if (check_func(fdsp->vector_fmul_add, "vector_fmul_add_fixed"))
check_vector_fmul_add(src0, src1, src2);
if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse_fixed"))
check_vector_fmul(src0, src1);
if (check_func(fdsp->vector_fmul_window, "vector_fmul_window_fixed"))
check_vector_fmul_window(src0, src1, src2);
if (check_func(fdsp->vector_fmul_window_scaled, "vector_fmul_window_scaled_fixed"))
check_vector_fmul_window_scaled(src0, src1, src2);
report("vector_fmul");
if (check_func(fdsp->butterflies_fixed, "butterflies_fixed"))
check_butterflies(src0, src1);
report("butterflies_fixed");
if (check_func(fdsp->scalarproduct_fixed, "scalarproduct_fixed"))
check_scalarproduct_fixed(src0, src1);
report("scalarproduct_fixed");
av_freep(&fdsp);
}

90
externals/ffmpeg/tests/checkasm/flacdsp.c vendored Executable file
View File

@@ -0,0 +1,90 @@
/*
* Copyright (c) 2015 James Almer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/flacdsp.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#define BUF_SIZE 256
#define MAX_CHANNELS 8
#define randomize_buffers() \
do { \
int i, j; \
for (i = 0; i < BUF_SIZE; i += 4) { \
for (j = 0; j < channels; j++) { \
uint32_t r = rnd() & (1 << (bits - 2)) - 1; \
AV_WN32A(ref_src[j] + i, r); \
AV_WN32A(new_src[j] + i, r); \
} \
} \
} while (0)
static void check_decorrelate(uint8_t **ref_dst, uint8_t **ref_src, uint8_t **new_dst, uint8_t **new_src,
int channels, int bits) {
declare_func(void, uint8_t **out, int32_t **in, int channels, int len, int shift);
randomize_buffers();
call_ref(ref_dst, (int32_t **)ref_src, channels, BUF_SIZE / sizeof(int32_t), 8);
call_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8);
if (memcmp(*ref_dst, *new_dst, bits == 16 ? BUF_SIZE * (channels/2) : BUF_SIZE * channels) ||
memcmp(*ref_src, *new_src, BUF_SIZE * channels))
fail();
bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8);
}
void checkasm_check_flacdsp(void)
{
LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]);
LOCAL_ALIGNED_16(uint8_t, ref_buf, [BUF_SIZE*MAX_CHANNELS]);
LOCAL_ALIGNED_16(uint8_t, new_dst, [BUF_SIZE*MAX_CHANNELS]);
LOCAL_ALIGNED_16(uint8_t, new_buf, [BUF_SIZE*MAX_CHANNELS]);
uint8_t *ref_src[] = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1], &ref_buf[BUF_SIZE*2], &ref_buf[BUF_SIZE*3],
&ref_buf[BUF_SIZE*4], &ref_buf[BUF_SIZE*5], &ref_buf[BUF_SIZE*6], &ref_buf[BUF_SIZE*7] };
uint8_t *new_src[] = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1], &new_buf[BUF_SIZE*2], &new_buf[BUF_SIZE*3],
&new_buf[BUF_SIZE*4], &new_buf[BUF_SIZE*5], &new_buf[BUF_SIZE*6], &new_buf[BUF_SIZE*7] };
static const char * const names[3] = { "ls", "rs", "ms" };
static const struct {
enum AVSampleFormat fmt;
int bits;
} fmts[] = {
{ AV_SAMPLE_FMT_S16, 16 },
{ AV_SAMPLE_FMT_S32, 32 },
};
FLACDSPContext h;
int i, j;
for (i = 0; i < 2; i++) {
ff_flacdsp_init(&h, fmts[i].fmt, 2, 0);
for (j = 0; j < 3; j++)
if (check_func(h.decorrelate[j], "flac_decorrelate_%s_%d", names[j], fmts[i].bits))
check_decorrelate(&ref_dst, ref_src, &new_dst, new_src, 2, fmts[i].bits);
for (j = 2; j <= MAX_CHANNELS; j += 2) {
ff_flacdsp_init(&h, fmts[i].fmt, j, 0);
if (check_func(h.decorrelate[0], "flac_decorrelate_indep%d_%d", j, fmts[i].bits))
check_decorrelate(&ref_dst, ref_src, &new_dst, new_src, j, fmts[i].bits);
}
}
report("decorrelate");
}

338
externals/ffmpeg/tests/checkasm/float_dsp.c vendored Executable file
View File

@@ -0,0 +1,338 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "config.h"
#include <float.h>
#include <stdint.h>
#include "libavutil/float_dsp.h"
#include "libavutil/internal.h"
#include "checkasm.h"
#define LEN 256
#define randomize_buffer(buf) \
do { \
int i; \
double bmg[2], stddev = 10.0, mean = 0.0; \
\
for (i = 0; i < LEN; i += 2) { \
av_bmg_get(&checkasm_lfg, bmg); \
buf[i] = bmg[0] * stddev + mean; \
buf[i + 1] = bmg[1] * stddev + mean; \
} \
} while(0);
static void test_vector_fmul(const float *src0, const float *src1)
{
LOCAL_ALIGNED_32(float, cdst, [LEN]);
LOCAL_ALIGNED_32(float, odst, [LEN]);
int i;
declare_func(void, float *dst, const float *src0, const float *src1,
int len);
call_ref(cdst, src0, src1, LEN);
call_new(odst, src0, src1, LEN);
for (i = 0; i < LEN; i++) {
double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
i, cdst[i], odst[i], cdst[i] - odst[i]);
fail();
break;
}
}
bench_new(odst, src0, src1, LEN);
}
static void test_vector_dmul(const double *src0, const double *src1)
{
LOCAL_ALIGNED_32(double, cdst, [LEN]);
LOCAL_ALIGNED_32(double, odst, [LEN]);
int i;
declare_func(void, double *dst, const double *src0, const double *src1,
int len);
call_ref(cdst, src0, src1, LEN);
call_new(odst, src0, src1, LEN);
for (i = 0; i < LEN; i++) {
double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
i, cdst[i], odst[i], cdst[i] - odst[i]);
fail();
break;
}
}
bench_new(odst, src0, src1, LEN);
}
#define ARBITRARY_FMUL_ADD_CONST 0.005
static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
{
LOCAL_ALIGNED_32(float, cdst, [LEN]);
LOCAL_ALIGNED_32(float, odst, [LEN]);
int i;
declare_func(void, float *dst, const float *src0, const float *src1,
const float *src2, int len);
call_ref(cdst, src0, src1, src2, LEN);
call_new(odst, src0, src1, src2, LEN);
for (i = 0; i < LEN; i++) {
if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
i, cdst[i], odst[i], cdst[i] - odst[i]);
fail();
break;
}
}
bench_new(odst, src0, src1, src2, LEN);
}
static void test_vector_fmul_scalar(const float *src0, const float *src1)
{
LOCAL_ALIGNED_16(float, cdst, [LEN]);
LOCAL_ALIGNED_16(float, odst, [LEN]);
int i;
declare_func(void, float *dst, const float *src, float mul, int len);
call_ref(cdst, src0, src1[0], LEN);
call_new(odst, src0, src1[0], LEN);
for (i = 0; i < LEN; i++) {
double t = fabs(src0[i]) + fabs(src1[0]) + fabs(src0[i] * src1[0]) + 1.0;
if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
i, cdst[i], odst[i], cdst[i] - odst[i]);
fail();
break;
}
}
bench_new(odst, src0, src1[0], LEN);
}
#define ARBITRARY_FMUL_WINDOW_CONST 0.008
static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
{
LOCAL_ALIGNED_16(float, cdst, [LEN]);
LOCAL_ALIGNED_16(float, odst, [LEN]);
int i;
declare_func(void, float *dst, const float *src0, const float *src1,
const float *win, int len);
call_ref(cdst, src0, src1, win, LEN / 2);
call_new(odst, src0, src1, win, LEN / 2);
for (i = 0; i < LEN; i++) {
if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
i, cdst[i], odst[i], cdst[i] - odst[i]);
fail();
break;
}
}
bench_new(odst, src0, src1, win, LEN / 2);
}
#define ARBITRARY_FMAC_SCALAR_CONST 0.005
static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
{
LOCAL_ALIGNED_32(float, cdst, [LEN]);
LOCAL_ALIGNED_32(float, odst, [LEN]);
int i;
declare_func(void, float *dst, const float *src, float mul, int len);
memcpy(cdst, src2, LEN * sizeof(*src2));
memcpy(odst, src2, LEN * sizeof(*src2));
call_ref(cdst, src0, src1[0], LEN);
call_new(odst, src0, src1[0], LEN);
for (i = 0; i < LEN; i++) {
if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
i, cdst[i], odst[i], cdst[i] - odst[i]);
fail();
break;
}
}
memcpy(odst, src2, LEN * sizeof(*src2));
bench_new(odst, src0, src1[0], LEN);
}
static void test_vector_dmul_scalar(const double *src0, const double *src1)
{
LOCAL_ALIGNED_32(double, cdst, [LEN]);
LOCAL_ALIGNED_32(double, odst, [LEN]);
int i;
declare_func(void, double *dst, const double *src, double mul, int len);
call_ref(cdst, src0, src1[0], LEN);
call_new(odst, src0, src1[0], LEN);
for (i = 0; i < LEN; i++) {
double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
cdst[i], odst[i], cdst[i] - odst[i]);
fail();
break;
}
}
bench_new(odst, src0, src1[0], LEN);
}
#define ARBITRARY_DMAC_SCALAR_CONST 0.005
static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
{
LOCAL_ALIGNED_32(double, cdst, [LEN]);
LOCAL_ALIGNED_32(double, odst, [LEN]);
int i;
declare_func(void, double *dst, const double *src, double mul, int len);
memcpy(cdst, src2, LEN * sizeof(*src2));
memcpy(odst, src2, LEN * sizeof(*src2));
call_ref(cdst, src0, src1[0], LEN);
call_new(odst, src0, src1[0], LEN);
for (i = 0; i < LEN; i++) {
if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
i, cdst[i], odst[i], cdst[i] - odst[i]);
fail();
break;
}
}
memcpy(odst, src2, LEN * sizeof(*src2));
bench_new(odst, src0, src1[0], LEN);
}
static void test_butterflies_float(const float *src0, const float *src1)
{
LOCAL_ALIGNED_16(float, cdst, [LEN]);
LOCAL_ALIGNED_16(float, odst, [LEN]);
LOCAL_ALIGNED_16(float, cdst1, [LEN]);
LOCAL_ALIGNED_16(float, odst1, [LEN]);
int i;
declare_func(void, float *av_restrict src0, float *av_restrict src1,
int len);
memcpy(cdst, src0, LEN * sizeof(*src0));
memcpy(cdst1, src1, LEN * sizeof(*src1));
memcpy(odst, src0, LEN * sizeof(*src0));
memcpy(odst1, src1, LEN * sizeof(*src1));
call_ref(cdst, cdst1, LEN);
call_new(odst, odst1, LEN);
for (i = 0; i < LEN; i++) {
if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON) ||
!float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
i, cdst[i], odst[i], cdst[i] - odst[i]);
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
fail();
break;
}
}
memcpy(odst, src0, LEN * sizeof(*src0));
memcpy(odst1, src1, LEN * sizeof(*src1));
bench_new(odst, odst1, LEN);
}
#define ARBITRARY_SCALARPRODUCT_CONST 0.2
static void test_scalarproduct_float(const float *src0, const float *src1)
{
float cprod, oprod;
declare_func_float(float, const float *src0, const float *src1, int len);
cprod = call_ref(src0, src1, LEN);
oprod = call_new(src0, src1, LEN);
if (!float_near_abs_eps(cprod, oprod, ARBITRARY_SCALARPRODUCT_CONST)) {
fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
cprod, oprod, cprod - oprod);
fail();
}
bench_new(src0, src1, LEN);
}
void checkasm_check_float_dsp(void)
{
LOCAL_ALIGNED_32(float, src0, [LEN]);
LOCAL_ALIGNED_32(float, src1, [LEN]);
LOCAL_ALIGNED_32(float, src2, [LEN]);
LOCAL_ALIGNED_16(float, src3, [LEN]);
LOCAL_ALIGNED_16(float, src4, [LEN]);
LOCAL_ALIGNED_16(float, src5, [LEN]);
LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
AVFloatDSPContext *fdsp = avpriv_float_dsp_alloc(1);
if (!fdsp) {
fprintf(stderr, "floatdsp: Out of memory error\n");
return;
}
randomize_buffer(src0);
randomize_buffer(src1);
randomize_buffer(src2);
randomize_buffer(src3);
randomize_buffer(src4);
randomize_buffer(src5);
randomize_buffer(dbl_src0);
randomize_buffer(dbl_src1);
randomize_buffer(dbl_src2);
if (check_func(fdsp->vector_fmul, "vector_fmul"))
test_vector_fmul(src0, src1);
if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
test_vector_fmul_add(src0, src1, src2);
if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
test_vector_fmul_scalar(src3, src4);
if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
test_vector_fmul(src0, src1);
if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
test_vector_fmul_window(src3, src4, src5);
report("vector_fmul");
if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
test_vector_fmac_scalar(src0, src1, src2);
report("vector_fmac");
if (check_func(fdsp->vector_dmul, "vector_dmul"))
test_vector_dmul(dbl_src0, dbl_src1);
if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
test_vector_dmul_scalar(dbl_src0, dbl_src1);
report("vector_dmul");
if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
report("vector_dmac");
if (check_func(fdsp->butterflies_float, "butterflies_float"))
test_butterflies_float(src3, src4);
report("butterflies_float");
if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
test_scalarproduct_float(src3, src4);
report("scalarproduct_float");
av_freep(&fdsp);
}

105
externals/ffmpeg/tests/checkasm/fmtconvert.c vendored Executable file
View File

@@ -0,0 +1,105 @@
/*
* Copyright (c) 2015 Janne Grunau
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <math.h>
#include <stdint.h>
#include <string.h>
#include "libavutil/internal.h"
#include "libavutil/common.h"
#include "libavcodec/fmtconvert.h"
#include "checkasm.h"
#define BUF_SIZE 1024
#define randomize_input(len) \
do { \
int k; \
for (k = 0; k < len; k++) { \
in[k] = rnd() - INT32_MAX; \
} \
for ( ; k < BUF_SIZE; k++) { \
in[k] = INT32_MAX; \
} \
} while (0)
void checkasm_check_fmtconvert(void)
{
FmtConvertContext c;
LOCAL_ALIGNED(32, float, dst0, [BUF_SIZE]);
LOCAL_ALIGNED(32, float, dst1, [BUF_SIZE]);
LOCAL_ALIGNED(32, int32_t, in, [BUF_SIZE]);
float scale_arr[128];
int length[] = {8, 16, 24, 56, 72, 128, 512, 520, 656, 768, 992};
int i, j;
for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++)
scale_arr[i] = (FF_ARRAY_ELEMS(scale_arr) - FF_ARRAY_ELEMS(scale_arr) / 2) / 13;
ff_fmt_convert_init(&c, NULL);
memset(dst0, 0, sizeof(*dst0) * BUF_SIZE);
memset(dst1, 0, sizeof(*dst1) * BUF_SIZE);
if (check_func(c.int32_to_float_fmul_scalar, "int32_to_float_fmul_scalar")) {
declare_func(void, float *, const int32_t *, float, int);
for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++) {
for (j = 0; j < FF_ARRAY_ELEMS(length); j++) {
randomize_input(length[j]);
call_ref(dst0, in, scale_arr[i], length[j]);
call_new(dst1, in, scale_arr[i], length[j]);
if (!float_near_ulp_array(dst0, dst1, 3, length[j])) {
fail();
break;
}
bench_new(dst1, in, scale_arr[i], length[j]);
}
}
}
if (check_func(c.int32_to_float_fmul_array8, "int32_to_float_fmul_array8")) {
declare_func(void, FmtConvertContext *, float *, const int32_t *,
const float *, int);
for (i = 0; i < 4; i++) {
for (j = 0; j < FF_ARRAY_ELEMS(length); j++) {
randomize_input(length[j]);
call_ref(&c, dst0, in, scale_arr, length[j]);
call_new(&c, dst1, in, scale_arr, length[j]);
if (!float_near_ulp_array(dst0, dst1, 3, length[j])) {
fail();
fprintf(stderr, "int32_to_float_fmul_array8: len: %d\n", length[j]);
break;
}
bench_new(&c, dst1, in, scale_arr, length[j]);
}
}
}
report("fmtconvert");
}

63
externals/ffmpeg/tests/checkasm/g722dsp.c vendored Executable file
View File

@@ -0,0 +1,63 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/g722.h"
#include "libavcodec/g722dsp.h"
#include "libavcodec/mathops.h"
#define randomize_buffers() \
do { \
int i; \
for (i = 0; i < PREV_SAMPLES_BUF_SIZE; i++) { \
src0[i] = src1[i] = sign_extend(rnd(), 16); \
} \
} while (0)
static void check_qmf(void) {
int16_t src0[PREV_SAMPLES_BUF_SIZE];
int16_t src1[PREV_SAMPLES_BUF_SIZE];
const int16_t *tmp0 = src0;
const int16_t *tmp1 = src1;
int dst0[2], dst1[2];
int i;
declare_func(void, const int16_t *prev_samples, int xout[2]);
randomize_buffers();
for (i = 0; i < PREV_SAMPLES_BUF_SIZE - 24; i++) {
call_ref(tmp0++, dst0);
call_new(tmp1++, dst1);
if (memcmp(dst0, dst1, sizeof(dst0)))
fail();
}
bench_new(src1, dst1);
}
void checkasm_check_g722dsp(void)
{
G722DSPContext h;
ff_g722dsp_init(&h);
if (check_func(h.apply_qmf, "g722_apply_qmf"))
check_qmf();
report("apply_qmf");
}

452
externals/ffmpeg/tests/checkasm/h264dsp.c vendored Executable file
View File

@@ -0,0 +1,452 @@
/*
* Copyright (c) 2016 Martin Storsjo
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/h264dsp.h"
#include "libavcodec/h264data.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f };
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
#define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))
#define PIXEL_STRIDE 16
#define randomize_buffers() \
do { \
int x, y; \
uint32_t mask = pixel_mask[bit_depth - 8]; \
for (y = 0; y < sz; y++) { \
for (x = 0; x < PIXEL_STRIDE; x += 4) { \
AV_WN32A(src + y * PIXEL_STRIDE + x, rnd() & mask); \
AV_WN32A(dst + y * PIXEL_STRIDE + x, rnd() & mask); \
} \
for (x = 0; x < sz; x++) { \
if (bit_depth == 8) { \
coef[y * sz + x] = src[y * PIXEL_STRIDE + x] - \
dst[y * PIXEL_STRIDE + x]; \
} else { \
((int32_t *)coef)[y * sz + x] = \
((uint16_t *)src)[y * (PIXEL_STRIDE/2) + x] - \
((uint16_t *)dst)[y * (PIXEL_STRIDE/2) + x]; \
} \
} \
} \
} while (0)
#define dct4x4_impl(size, dctcoef) \
static void dct4x4_##size(dctcoef *coef) \
{ \
int i, y, x; \
dctcoef tmp[16]; \
for (i = 0; i < 4; i++) { \
const int z0 = coef[i*4 + 0] + coef[i*4 + 3]; \
const int z1 = coef[i*4 + 1] + coef[i*4 + 2]; \
const int z2 = coef[i*4 + 0] - coef[i*4 + 3]; \
const int z3 = coef[i*4 + 1] - coef[i*4 + 2]; \
tmp[i + 4*0] = z0 + z1; \
tmp[i + 4*1] = 2*z2 + z3; \
tmp[i + 4*2] = z0 - z1; \
tmp[i + 4*3] = z2 - 2*z3; \
} \
for (i = 0; i < 4; i++) { \
const int z0 = tmp[i*4 + 0] + tmp[i*4 + 3]; \
const int z1 = tmp[i*4 + 1] + tmp[i*4 + 2]; \
const int z2 = tmp[i*4 + 0] - tmp[i*4 + 3]; \
const int z3 = tmp[i*4 + 1] - tmp[i*4 + 2]; \
coef[i*4 + 0] = z0 + z1; \
coef[i*4 + 1] = 2*z2 + z3; \
coef[i*4 + 2] = z0 - z1; \
coef[i*4 + 3] = z2 - 2*z3; \
} \
for (y = 0; y < 4; y++) { \
for (x = 0; x < 4; x++) { \
static const int scale[] = { 13107 * 10, 8066 * 13, 5243 * 16 }; \
const int idx = (y & 1) + (x & 1); \
coef[y*4 + x] = (coef[y*4 + x] * scale[idx] + (1 << 14)) >> 15; \
} \
} \
}
#define DCT8_1D(src, srcstride, dst, dststride) do { \
const int a0 = (src)[srcstride * 0] + (src)[srcstride * 7]; \
const int a1 = (src)[srcstride * 0] - (src)[srcstride * 7]; \
const int a2 = (src)[srcstride * 1] + (src)[srcstride * 6]; \
const int a3 = (src)[srcstride * 1] - (src)[srcstride * 6]; \
const int a4 = (src)[srcstride * 2] + (src)[srcstride * 5]; \
const int a5 = (src)[srcstride * 2] - (src)[srcstride * 5]; \
const int a6 = (src)[srcstride * 3] + (src)[srcstride * 4]; \
const int a7 = (src)[srcstride * 3] - (src)[srcstride * 4]; \
const int b0 = a0 + a6; \
const int b1 = a2 + a4; \
const int b2 = a0 - a6; \
const int b3 = a2 - a4; \
const int b4 = a3 + a5 + (a1 + (a1 >> 1)); \
const int b5 = a1 - a7 - (a5 + (a5 >> 1)); \
const int b6 = a1 + a7 - (a3 + (a3 >> 1)); \
const int b7 = a3 - a5 + (a7 + (a7 >> 1)); \
(dst)[dststride * 0] = b0 + b1; \
(dst)[dststride * 1] = b4 + (b7 >> 2); \
(dst)[dststride * 2] = b2 + (b3 >> 1); \
(dst)[dststride * 3] = b5 + (b6 >> 2); \
(dst)[dststride * 4] = b0 - b1; \
(dst)[dststride * 5] = b6 - (b5 >> 2); \
(dst)[dststride * 6] = (b2 >> 1) - b3; \
(dst)[dststride * 7] = (b4 >> 2) - b7; \
} while (0)
#define dct8x8_impl(size, dctcoef) \
static void dct8x8_##size(dctcoef *coef) \
{ \
int i, x, y; \
dctcoef tmp[64]; \
for (i = 0; i < 8; i++) \
DCT8_1D(coef + i, 8, tmp + i, 8); \
\
for (i = 0; i < 8; i++) \
DCT8_1D(tmp + 8*i, 1, coef + i, 8); \
\
for (y = 0; y < 8; y++) { \
for (x = 0; x < 8; x++) { \
static const int scale[] = { \
13107 * 20, 11428 * 18, 20972 * 32, \
12222 * 19, 16777 * 25, 15481 * 24, \
}; \
static const int idxmap[] = { \
0, 3, 4, 3, \
3, 1, 5, 1, \
4, 5, 2, 5, \
3, 1, 5, 1, \
}; \
const int idx = idxmap[(y & 3) * 4 + (x & 3)]; \
coef[y*8 + x] = ((int64_t)coef[y*8 + x] * \
scale[idx] + (1 << 17)) >> 18; \
} \
} \
}
dct4x4_impl(16, int16_t)
dct4x4_impl(32, int32_t)
dct8x8_impl(16, int16_t)
dct8x8_impl(32, int32_t)
static void dct4x4(int16_t *coef, int bit_depth)
{
if (bit_depth == 8)
dct4x4_16(coef);
else
dct4x4_32((int32_t *) coef);
}
static void dct8x8(int16_t *coef, int bit_depth)
{
if (bit_depth == 8) {
dct8x8_16(coef);
} else {
dct8x8_32((int32_t *) coef);
}
}
static void check_idct(void)
{
LOCAL_ALIGNED_16(uint8_t, src, [8 * 8 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst, [8 * 8 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst0, [8 * 8 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst1_base, [8 * 8 * 2 + 32]);
LOCAL_ALIGNED_16(int16_t, coef, [8 * 8 * 2]);
LOCAL_ALIGNED_16(int16_t, subcoef0, [8 * 8 * 2]);
LOCAL_ALIGNED_16(int16_t, subcoef1, [8 * 8 * 2]);
H264DSPContext h;
int bit_depth, sz, align, dc;
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, int stride);
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
ff_h264dsp_init(&h, bit_depth, 1);
for (sz = 4; sz <= 8; sz += 4) {
randomize_buffers();
if (sz == 4)
dct4x4(coef, bit_depth);
else
dct8x8(coef, bit_depth);
for (dc = 0; dc <= 1; dc++) {
void (*idct)(uint8_t *, int16_t *, int) = NULL;
switch ((sz << 1) | dc) {
case (4 << 1) | 0: idct = h.h264_idct_add; break;
case (4 << 1) | 1: idct = h.h264_idct_dc_add; break;
case (8 << 1) | 0: idct = h.h264_idct8_add; break;
case (8 << 1) | 1: idct = h.h264_idct8_dc_add; break;
}
if (check_func(idct, "h264_idct%d_add%s_%dbpp", sz, dc ? "_dc" : "", bit_depth)) {
for (align = 0; align < 16; align += sz * SIZEOF_PIXEL) {
uint8_t *dst1 = dst1_base + align;
if (dc) {
memset(subcoef0, 0, sz * sz * SIZEOF_COEF);
memcpy(subcoef0, coef, SIZEOF_COEF);
} else {
memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
}
memcpy(dst0, dst, sz * PIXEL_STRIDE);
memcpy(dst1, dst, sz * PIXEL_STRIDE);
memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
call_ref(dst0, subcoef0, PIXEL_STRIDE);
call_new(dst1, subcoef1, PIXEL_STRIDE);
if (memcmp(dst0, dst1, sz * PIXEL_STRIDE) ||
memcmp(subcoef0, subcoef1, sz * sz * SIZEOF_COEF))
fail();
bench_new(dst1, subcoef1, sz * SIZEOF_PIXEL);
}
}
}
}
}
}
static void check_idct_multiple(void)
{
LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]);
LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]);
LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]);
LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]);
LOCAL_ALIGNED_16(uint8_t, nnzc, [15 * 8]);
H264DSPContext h;
int bit_depth, i, y, func;
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]);
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
ff_h264dsp_init(&h, bit_depth, 1);
for (func = 0; func < 3; func++) {
void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL;
const char *name;
int sz = 4, intra = 0;
int block_offset[16] = { 0 };
switch (func) {
case 0:
idct = h.h264_idct_add16;
name = "h264_idct_add16";
break;
case 1:
idct = h.h264_idct_add16intra;
name = "h264_idct_add16intra";
intra = 1;
break;
case 2:
idct = h.h264_idct8_add4;
name = "h264_idct8_add4";
sz = 8;
break;
}
memset(nnzc, 0, 15 * 8);
memset(coef_full, 0, 16 * 16 * SIZEOF_COEF);
for (i = 0; i < 16 * 16; i += sz * sz) {
uint8_t src[8 * 8 * 2];
uint8_t dst[8 * 8 * 2];
int16_t coef[8 * 8 * 2];
int index = i / sz;
int block_y = (index / 16) * sz;
int block_x = index % 16;
int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL;
int nnz = rnd() % 3;
randomize_buffers();
if (sz == 4)
dct4x4(coef, bit_depth);
else
dct8x8(coef, bit_depth);
for (y = 0; y < sz; y++)
memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL],
&dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL);
if (nnz > 1)
nnz = sz * sz;
memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])],
coef, nnz * SIZEOF_COEF);
if (intra && nnz == 1)
nnz = 0;
nnzc[scan8[i / 16]] = nnz;
block_offset[i / 16] = offset;
}
if (check_func(idct, "%s_%dbpp", name, bit_depth)) {
memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF);
memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF);
memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL);
memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL);
call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc);
call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) ||
memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF))
fail();
bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
}
}
}
}
static void check_loop_filter(void)
{
LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
H264DSPContext h;
int bit_depth;
int alphas[36], betas[36];
int8_t tc0[36][4];
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
int alpha, int beta, int8_t *tc0);
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
int i, j, a, c;
uint32_t mask = pixel_mask_lf[bit_depth - 8];
ff_h264dsp_init(&h, bit_depth, 1);
for (i = 35, a = 255, c = 250; i >= 0; i--) {
alphas[i] = a << (bit_depth - 8);
betas[i] = (i + 1) / 2 << (bit_depth - 8);
tc0[i][0] = tc0[i][3] = (c + 6) / 10;
tc0[i][1] = (c + 7) / 15;
tc0[i][2] = (c + 9) / 20;
a = a*9/10;
c = c*9/10;
}
#define CHECK_LOOP_FILTER(name, align, idc) \
do { \
if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
for (j = 0; j < 36; j++) { \
intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
for (i = 0; i < 1024; i+=4) { \
AV_WN32A(dst + i, rnd() & mask); \
} \
memcpy(dst0, dst, 32 * 16 * 2); \
memcpy(dst1, dst, 32 * 16 * 2); \
\
call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \
call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \
if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d " \
"tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], \
tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
fail(); \
} \
bench_new(dst1, 32, alphas[j], betas[j], tc0[j]); \
} \
} \
} while (0)
CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1,);
CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0,);
CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0,);
CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1,);
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0,);
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0,);
ff_h264dsp_init(&h, bit_depth, 2);
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0, 422);
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0, 422);
#undef CHECK_LOOP_FILTER
}
}
static void check_loop_filter_intra(void)
{
LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
H264DSPContext h;
int bit_depth;
int alphas[36], betas[36];
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
int alpha, int beta);
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
int i, j, a;
uint32_t mask = pixel_mask_lf[bit_depth - 8];
ff_h264dsp_init(&h, bit_depth, 1);
for (i = 35, a = 255; i >= 0; i--) {
alphas[i] = a << (bit_depth - 8);
betas[i] = (i + 1) / 2 << (bit_depth - 8);
a = a*9/10;
}
#define CHECK_LOOP_FILTER(name, align, idc) \
do { \
if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
for (j = 0; j < 36; j++) { \
intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
for (i = 0; i < 1024; i+=4) { \
AV_WN32A(dst + i, rnd() & mask); \
} \
memcpy(dst0, dst, 32 * 16 * 2); \
memcpy(dst1, dst, 32 * 16 * 2); \
\
call_ref(dst0 + off, 32, alphas[j], betas[j]); \
call_new(dst1 + off, 32, alphas[j], betas[j]); \
if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d\n", \
j, alphas[j], betas[j]); \
fail(); \
} \
bench_new(dst1, 32, alphas[j], betas[j]); \
} \
} \
} while (0)
CHECK_LOOP_FILTER(h264_v_loop_filter_luma_intra, 1,);
CHECK_LOOP_FILTER(h264_h_loop_filter_luma_intra, 0,);
CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff_intra, 0,);
CHECK_LOOP_FILTER(h264_v_loop_filter_chroma_intra, 1,);
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0,);
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0,);
ff_h264dsp_init(&h, bit_depth, 2);
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0, 422);
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0, 422);
#undef CHECK_LOOP_FILTER
}
}
void checkasm_check_h264dsp(void)
{
check_idct();
check_idct_multiple();
report("idct");
check_loop_filter();
report("loop_filter");
check_loop_filter_intra();
report("loop_filter_intra");
}

260
externals/ffmpeg/tests/checkasm/h264pred.c vendored Executable file
View File

@@ -0,0 +1,260 @@
/*
* Copyright (c) 2015 Henrik Gramner
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/h264pred.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
static const int codec_ids[4] = { AV_CODEC_ID_H264, AV_CODEC_ID_VP8, AV_CODEC_ID_RV40, AV_CODEC_ID_SVQ3 };
static const char * const pred4x4_modes[4][15] = {
{ /* H.264 */
[VERT_PRED ] = "vertical",
[HOR_PRED ] = "horizontal",
[DC_PRED ] = "dc",
[DIAG_DOWN_LEFT_PRED ] = "down_left",
[DIAG_DOWN_RIGHT_PRED] = "down_right",
[VERT_RIGHT_PRED ] = "vertical_right",
[HOR_DOWN_PRED ] = "horizontal_right",
[VERT_LEFT_PRED ] = "vertical_left",
[HOR_UP_PRED ] = "horizontal_up",
[LEFT_DC_PRED ] = "left_dc",
[TOP_DC_PRED ] = "top_dc",
[DC_128_PRED ] = "dc_128",
},
{ /* VP8 */
[VERT_PRED ] = "vertical_vp8",
[HOR_PRED ] = "horizontal_vp8",
[VERT_LEFT_PRED] = "vertical_left_vp8",
[TM_VP8_PRED ] = "tm_vp8",
[DC_127_PRED ] = "dc_127_vp8",
[DC_129_PRED ] = "dc_129_vp8",
},
{ /* RV40 */
[DIAG_DOWN_LEFT_PRED ] = "down_left_rv40",
[VERT_LEFT_PRED ] = "vertical_left_rv40",
[HOR_UP_PRED ] = "horizontal_up_rv40",
[DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = "down_left_nodown_rv40",
[HOR_UP_PRED_RV40_NODOWN ] = "horizontal_up_nodown_rv40",
[VERT_LEFT_PRED_RV40_NODOWN ] = "vertical_left_nodown_rv40",
},
{ /* SVQ3 */
[DIAG_DOWN_LEFT_PRED] = "down_left_svq3",
},
};
static const char * const pred8x8_modes[4][11] = {
{ /* H.264 */
[DC_PRED8x8 ] = "dc",
[HOR_PRED8x8 ] = "horizontal",
[VERT_PRED8x8 ] = "vertical",
[PLANE_PRED8x8 ] = "plane",
[LEFT_DC_PRED8x8 ] = "left_dc",
[TOP_DC_PRED8x8 ] = "top_dc",
[DC_128_PRED8x8 ] = "dc_128",
[ALZHEIMER_DC_L0T_PRED8x8] = "mad_cow_dc_l0t",
[ALZHEIMER_DC_0LT_PRED8x8] = "mad_cow_dc_0lt",
[ALZHEIMER_DC_L00_PRED8x8] = "mad_cow_dc_l00",
[ALZHEIMER_DC_0L0_PRED8x8] = "mad_cow_dc_0l0",
},
{ /* VP8 */
[PLANE_PRED8x8 ] = "tm_vp8",
[DC_127_PRED8x8] = "dc_127_vp8",
[DC_129_PRED8x8] = "dc_129_vp8",
},
{ /* RV40 */
[DC_PRED8x8 ] = "dc_rv40",
[LEFT_DC_PRED8x8] = "left_dc_rv40",
[TOP_DC_PRED8x8 ] = "top_dc_rv40",
},
/* nothing for SVQ3 */
};
static const char * const pred16x16_modes[4][9] = {
{ /* H.264 */
[DC_PRED8x8 ] = "dc",
[HOR_PRED8x8 ] = "horizontal",
[VERT_PRED8x8 ] = "vertical",
[PLANE_PRED8x8 ] = "plane",
[LEFT_DC_PRED8x8] = "left_dc",
[TOP_DC_PRED8x8 ] = "top_dc",
[DC_128_PRED8x8 ] = "dc_128",
},
{ /* VP8 */
[PLANE_PRED8x8 ] = "tm_vp8",
[DC_127_PRED8x8] = "dc_127_vp8",
[DC_129_PRED8x8] = "dc_129_vp8",
},
{ /* RV40 */
[PLANE_PRED8x8] = "plane_rv40",
},
{ /* SVQ3 */
[PLANE_PRED8x8] = "plane_svq3",
},
};
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
#define BUF_SIZE (3 * 16 * 17)
#define check_pred_func(func, name, mode_name) \
(mode_name && ((codec_ids[codec] == AV_CODEC_ID_H264) ? \
check_func(func, "pred%s_%s_%d", name, mode_name, bit_depth) : \
check_func(func, "pred%s_%s", name, mode_name)))
#define randomize_buffers() \
do { \
uint32_t mask = pixel_mask[bit_depth - 8]; \
int i; \
for (i = 0; i < BUF_SIZE; i += 4) { \
uint32_t r = rnd() & mask; \
AV_WN32A(buf0 + i, r); \
AV_WN32A(buf1 + i, r); \
} \
} while (0)
#define src0 (buf0 + 4 * 16) /* Offset to allow room for top and left */
#define src1 (buf1 + 4 * 16)
static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
int codec, int chroma_format, int bit_depth)
{
if (chroma_format == 1) {
uint8_t *topright = buf0 + 2*16;
int pred_mode;
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride);
for (pred_mode = 0; pred_mode < 15; pred_mode++) {
if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) {
randomize_buffers();
call_ref(src0, topright, 12*SIZEOF_PIXEL);
call_new(src1, topright, 12*SIZEOF_PIXEL);
if (memcmp(buf0, buf1, BUF_SIZE))
fail();
bench_new(src1, topright, 12*SIZEOF_PIXEL);
}
}
}
}
static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
int codec, int chroma_format, int bit_depth)
{
int pred_mode;
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride);
for (pred_mode = 0; pred_mode < 11; pred_mode++) {
if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8",
pred8x8_modes[codec][pred_mode])) {
randomize_buffers();
call_ref(src0, 24*SIZEOF_PIXEL);
call_new(src1, 24*SIZEOF_PIXEL);
if (memcmp(buf0, buf1, BUF_SIZE))
fail();
bench_new(src1, 24*SIZEOF_PIXEL);
}
}
}
static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
int codec, int chroma_format, int bit_depth)
{
if (chroma_format == 1) {
int pred_mode;
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride);
for (pred_mode = 0; pred_mode < 9; pred_mode++) {
if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) {
randomize_buffers();
call_ref(src0, 48);
call_new(src1, 48);
if (memcmp(buf0, buf1, BUF_SIZE))
fail();
bench_new(src1, 48);
}
}
}
}
static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
int codec, int chroma_format, int bit_depth)
{
if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) {
int pred_mode;
declare_func_emms(AV_CPU_FLAG_MMXEXT, void, uint8_t *src, int topleft, int topright, ptrdiff_t stride);
for (pred_mode = 0; pred_mode < 12; pred_mode++) {
if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) {
int neighbors;
for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) {
int has_topleft = neighbors & 0x8000;
int has_topright = neighbors & 0x4000;
if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft)
continue; /* Those aren't allowed according to the spec */
randomize_buffers();
call_ref(src0, has_topleft, has_topright, 24*SIZEOF_PIXEL);
call_new(src1, has_topleft, has_topright, 24*SIZEOF_PIXEL);
if (memcmp(buf0, buf1, BUF_SIZE))
fail();
bench_new(src1, has_topleft, has_topright, 24*SIZEOF_PIXEL);
}
}
}
}
}
/* TODO: Add tests for H.264 lossless H/V prediction */
void checkasm_check_h264pred(void)
{
static const struct {
void (*func)(H264PredContext*, uint8_t*, uint8_t*, int, int, int);
const char *name;
} tests[] = {
{ check_pred4x4, "pred4x4" },
{ check_pred8x8, "pred8x8" },
{ check_pred16x16, "pred16x16" },
{ check_pred8x8l, "pred8x8l" },
};
LOCAL_ALIGNED_16(uint8_t, buf0, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, buf1, [BUF_SIZE]);
H264PredContext h;
int test, codec, chroma_format, bit_depth;
for (test = 0; test < FF_ARRAY_ELEMS(tests); test++) {
for (codec = 0; codec < 4; codec++) {
int codec_id = codec_ids[codec];
for (bit_depth = 8; bit_depth <= (codec_id == AV_CODEC_ID_H264 ? 10 : 8); bit_depth++)
for (chroma_format = 1; chroma_format <= (codec_id == AV_CODEC_ID_H264 ? 2 : 1); chroma_format++) {
ff_h264_pred_init(&h, codec_id, bit_depth, chroma_format);
tests[test].func(&h, buf0, buf1, codec, chroma_format, bit_depth);
}
}
report("%s", tests[test].name);
}
}

81
externals/ffmpeg/tests/checkasm/h264qpel.c vendored Executable file
View File

@@ -0,0 +1,81 @@
/*
* Copyright (c) 2015 Henrik Gramner
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/h264qpel.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
#define BUF_SIZE (2 * 16 * (16 + 3 + 4))
#define randomize_buffers() \
do { \
uint32_t mask = pixel_mask[bit_depth - 8]; \
int k; \
for (k = 0; k < BUF_SIZE; k += 4) { \
uint32_t r = rnd() & mask; \
AV_WN32A(buf0 + k, r); \
AV_WN32A(buf1 + k, r); \
r = rnd(); \
AV_WN32A(dst0 + k, r); \
AV_WN32A(dst1 + k, r); \
} \
} while (0)
#define src0 (buf0 + 3 * 2 * 16) /* h264qpel functions read data from negative src pointer offsets */
#define src1 (buf1 + 3 * 2 * 16)
void checkasm_check_h264qpel(void)
{
LOCAL_ALIGNED_16(uint8_t, buf0, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, buf1, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]);
H264QpelContext h;
int op, bit_depth, i, j;
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
for (op = 0; op < 2; op++) {
qpel_mc_func (*tab)[16] = op ? h.avg_h264_qpel_pixels_tab : h.put_h264_qpel_pixels_tab;
const char *op_name = op ? "avg" : "put";
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
ff_h264qpel_init(&h, bit_depth);
for (i = 0; i < (op ? 3 : 4); i++) {
int size = 16 >> i;
for (j = 0; j < 16; j++)
if (check_func(tab[i][j], "%s_h264_qpel_%d_mc%d%d_%d", op_name, size, j & 3, j >> 2, bit_depth)) {
randomize_buffers();
call_ref(dst0, src0, size * SIZEOF_PIXEL);
call_new(dst1, src1, size * SIZEOF_PIXEL);
if (memcmp(buf0, buf1, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE))
fail();
bench_new(dst1, src1, size * SIZEOF_PIXEL);
}
}
}
report("%s", op_name);
}
}

View File

@@ -0,0 +1,96 @@
/*
* Copyright (c) 2016 Alexandra Hájková
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "libavutil/intreadwrite.h"
#include "libavcodec/hevcdsp.h"
#include "checkasm.h"
#define randomize_buffers(buf, size) \
do { \
int j; \
for (j = 0; j < size; j++) { \
int16_t r = rnd(); \
AV_WN16A(buf + j, r >> 3); \
} \
} while (0)
#define randomize_buffers2(buf, size) \
do { \
int j; \
for (j = 0; j < size; j++) \
AV_WN16A(buf + j * 2, rnd() & 0x3FF); \
} while (0)
static void compare_add_res(int size, ptrdiff_t stride, int overflow_test)
{
LOCAL_ALIGNED_32(int16_t, res0, [32 * 32]);
LOCAL_ALIGNED_32(int16_t, res1, [32 * 32]);
LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *res, ptrdiff_t stride);
randomize_buffers(res0, size);
randomize_buffers2(dst0, size);
if (overflow_test)
res0[0] = 0x8000;
memcpy(res1, res0, sizeof(*res0) * size);
memcpy(dst1, dst0, sizeof(int16_t) * size);
call_ref(dst0, res0, stride);
call_new(dst1, res1, stride);
if (memcmp(dst0, dst1, size))
fail();
bench_new(dst1, res1, stride);
}
static void check_add_res(HEVCDSPContext h, int bit_depth)
{
int i;
for (i = 2; i <= 5; i++) {
int block_size = 1 << i;
int size = block_size * block_size;
ptrdiff_t stride = block_size << (bit_depth > 8);
if (check_func(h.add_residual[i - 2], "hevc_add_res_%dx%d_%d", block_size, block_size, bit_depth)) {
compare_add_res(size, stride, 0);
// overflow test for res = -32768
compare_add_res(size, stride, 1);
}
}
}
void checkasm_check_hevc_add_res(void)
{
int bit_depth;
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
HEVCDSPContext h;
ff_hevc_dsp_init(&h, bit_depth);
check_add_res(h, bit_depth);
}
report("add_residual");
}

105
externals/ffmpeg/tests/checkasm/hevc_idct.c vendored Executable file
View File

@@ -0,0 +1,105 @@
/*
* Copyright (c) 2016 Alexandra Hájková
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "libavutil/intreadwrite.h"
#include "libavcodec/hevcdsp.h"
#include "checkasm.h"
#define randomize_buffers(buf, size) \
do { \
int j; \
for (j = 0; j < size; j++) { \
int16_t r = rnd(); \
AV_WN16A(buf + j, r); \
} \
} while (0)
static void check_idct(HEVCDSPContext h, int bit_depth)
{
int i;
LOCAL_ALIGNED(32, int16_t, coeffs0, [32 * 32]);
LOCAL_ALIGNED(32, int16_t, coeffs1, [32 * 32]);
for (i = 2; i <= 5; i++) {
int block_size = 1 << i;
int size = block_size * block_size;
int col_limit = block_size;
declare_func(void, int16_t *coeffs, int col_limit);
randomize_buffers(coeffs0, size);
memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
if (check_func(h.idct[i - 2], "hevc_idct_%dx%d_%d", block_size, block_size, bit_depth)) {
call_ref(coeffs0, col_limit);
call_new(coeffs1, col_limit);
if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size))
fail();
bench_new(coeffs1, col_limit);
}
}
}
static void check_idct_dc(HEVCDSPContext h, int bit_depth)
{
int i;
LOCAL_ALIGNED(32, int16_t, coeffs0, [32 * 32]);
LOCAL_ALIGNED(32, int16_t, coeffs1, [32 * 32]);
for (i = 2; i <= 5; i++) {
int block_size = 1 << i;
int size = block_size * block_size;
declare_func_emms(AV_CPU_FLAG_MMXEXT, void, int16_t *coeffs);
randomize_buffers(coeffs0, size);
memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
if (check_func(h.idct_dc[i - 2], "hevc_idct_%dx%d_dc_%d", block_size, block_size, bit_depth)) {
call_ref(coeffs0);
call_new(coeffs1);
if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size))
fail();
bench_new(coeffs1);
}
}
}
void checkasm_check_hevc_idct(void)
{
int bit_depth;
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
HEVCDSPContext h;
ff_hevc_dsp_init(&h, bit_depth);
check_idct_dc(h, bit_depth);
}
report("idct_dc");
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
HEVCDSPContext h;
ff_hevc_dsp_init(&h, bit_depth);
check_idct(h, bit_depth);
}
report("idct");
}

150
externals/ffmpeg/tests/checkasm/hevc_sao.c vendored Executable file
View File

@@ -0,0 +1,150 @@
/*
* Copyright (c) 2018 Yingming Fan <yingmingfan@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "libavutil/intreadwrite.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/hevcdsp.h"
#include "checkasm.h"
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
static const uint32_t sao_size[5] = {8, 16, 32, 48, 64};
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
#define PIXEL_STRIDE (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) //same with sao_edge src_stride
#define BUF_SIZE (PIXEL_STRIDE * (64+2) * 2) //+2 for top and bottom row, *2 for high bit depth
#define OFFSET_THRESH (1 << (bit_depth - 5))
#define OFFSET_LENGTH 5
#define randomize_buffers(buf0, buf1, size) \
do { \
uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
int k; \
for (k = 0; k < size; k += 4) { \
uint32_t r = rnd() & mask; \
AV_WN32A(buf0 + k, r); \
AV_WN32A(buf1 + k, r); \
} \
} while (0)
#define randomize_buffers2(buf, size) \
do { \
uint32_t max_offset = OFFSET_THRESH; \
int k; \
if (bit_depth == 8) { \
for (k = 0; k < size; k++) { \
uint8_t r = rnd() % max_offset; \
buf[k] = r; \
} \
} else { \
for (k = 0; k < size; k++) { \
uint16_t r = rnd() % max_offset; \
buf[k] = r; \
} \
} \
} while (0)
static void check_sao_band(HEVCDSPContext h, int bit_depth)
{
int i;
LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]);
int16_t offset_val[OFFSET_LENGTH];
int left_class = rnd()%32;
for (i = 0; i <= 4; i++) {
int block_size = sao_size[i];
ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride,
int16_t *sao_offset_val, int sao_left_class, int width, int height);
randomize_buffers(src0, src1, BUF_SIZE);
randomize_buffers2(offset_val, OFFSET_LENGTH);
memset(dst0, 0, BUF_SIZE);
memset(dst1, 0, BUF_SIZE);
if (check_func(h.sao_band_filter[i], "hevc_sao_band_%dx%d_%d", block_size, block_size, bit_depth)) {
call_ref(dst0, src0, stride, stride, offset_val, left_class, block_size, block_size);
call_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size);
if (memcmp(dst0, dst1, BUF_SIZE))
fail();
bench_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size);
}
}
}
static void check_sao_edge(HEVCDSPContext h, int bit_depth)
{
int i;
LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]);
int16_t offset_val[OFFSET_LENGTH];
int eo = rnd()%4;
for (i = 0; i <= 4; i++) {
int block_size = sao_size[i];
ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
int offset = (AV_INPUT_BUFFER_PADDING_SIZE + PIXEL_STRIDE)*SIZEOF_PIXEL;
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
int16_t *sao_offset_val, int eo, int width, int height);
randomize_buffers(src0, src1, BUF_SIZE);
randomize_buffers2(offset_val, OFFSET_LENGTH);
memset(dst0, 0, BUF_SIZE);
memset(dst1, 0, BUF_SIZE);
if (check_func(h.sao_edge_filter[i], "hevc_sao_edge_%dx%d_%d", block_size, block_size, bit_depth)) {
call_ref(dst0, src0 + offset, stride, offset_val, eo, block_size, block_size);
call_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size);
if (memcmp(dst0, dst1, BUF_SIZE))
fail();
bench_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size);
}
}
}
void checkasm_check_hevc_sao(void)
{
int bit_depth;
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
HEVCDSPContext h;
ff_hevc_dsp_init(&h, bit_depth);
check_sao_band(h, bit_depth);
}
report("sao_band");
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
HEVCDSPContext h;
ff_hevc_dsp_init(&h, bit_depth);
check_sao_edge(h, bit_depth);
}
report("sao_edge");
}

81
externals/ffmpeg/tests/checkasm/huffyuvdsp.c vendored Executable file
View File

@@ -0,0 +1,81 @@
/*
* Copyright (c) 2016 Alexandra Hájková
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "libavutil/common.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mem.h"
#include "libavcodec/huffyuvdsp.h"
#include "checkasm.h"
#define randomize_buffers(buf, size) \
do { \
int j; \
for (j = 0; j < size; j++) \
buf[j] = rnd() & 0xFFFF; \
} while (0)
static void check_add_int16(HuffYUVDSPContext c, unsigned mask, int width, const char * name)
{
uint16_t *src0 = av_mallocz(width * sizeof(uint16_t));
uint16_t *src1 = av_mallocz(width * sizeof(uint16_t));
uint16_t *dst0 = av_mallocz(width * sizeof(uint16_t));
uint16_t *dst1 = av_mallocz(width * sizeof(uint16_t));
declare_func_emms(AV_CPU_FLAG_MMX, void, uint16_t *dst, uint16_t *src, unsigned mask, int w);
if (!src0 || !src1 || !dst0 || !dst1)
fail();
randomize_buffers(src0, width);
memcpy(src1, src0, width * sizeof(uint16_t));
if (check_func(c.add_int16, "%s", name)) {
call_ref(dst0, src0, mask, width);
call_new(dst1, src1, mask, width);
if (memcmp(dst0, dst1, width * sizeof(uint16_t)))
fail();
bench_new(dst1, src1, mask, width);
}
av_free(src0);
av_free(src1);
av_free(dst0);
av_free(dst1);
}
void checkasm_check_huffyuvdsp(void)
{
HuffYUVDSPContext c;
int width = 16 * av_clip(rnd(), 16, 128);
ff_huffyuvdsp_init(&c, AV_PIX_FMT_YUV422P);
/*! test width not multiple of mmsize */
check_add_int16(c, 65535, width, "add_int16_rnd_width");
report("add_int16_rnd_width");
/*! test always with the same size (for perf test) */
check_add_int16(c, 65535, 16*128, "add_int16_128");
report("add_int16_128");
}

103
externals/ffmpeg/tests/checkasm/jpeg2000dsp.c vendored Executable file
View File

@@ -0,0 +1,103 @@
/*
* Copyright (c) 2015 James Almer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "checkasm.h"
#include "libavcodec/jpeg2000dsp.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#define BUF_SIZE 512
#define randomize_buffers() \
do { \
int i; \
for (i = 0; i < BUF_SIZE*3; i++) \
src[i] = rnd(); \
} while (0)
#define randomize_buffers_float() \
do { \
int i; \
for (i = 0; i < BUF_SIZE*3; i++) \
src[i] = (float)rnd() / (UINT_MAX >> 5); \
} while (0)
static void check_rct_int(void)
{
LOCAL_ALIGNED_32(int32_t, src, [BUF_SIZE*3]);
LOCAL_ALIGNED_32(int32_t, ref, [BUF_SIZE*3]);
LOCAL_ALIGNED_32(int32_t, new, [BUF_SIZE*3]);
int32_t *ref0 = &ref[BUF_SIZE*0], *new0 = &new[BUF_SIZE*0];
int32_t *ref1 = &ref[BUF_SIZE*1], *new1 = &new[BUF_SIZE*1];
int32_t *ref2 = &ref[BUF_SIZE*2], *new2 = &new[BUF_SIZE*2];
declare_func(void, void *src0, void *src1, void *src2, int csize);
randomize_buffers();
memcpy(ref, src, BUF_SIZE * 3 * sizeof(*src));
memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
call_ref(ref0, ref1, ref2, BUF_SIZE);
call_new(new0, new1, new2, BUF_SIZE);
if (memcmp(ref0, new0, BUF_SIZE * sizeof(*src)) ||
memcmp(ref1, new1, BUF_SIZE * sizeof(*src)) ||
memcmp(ref2, new2, BUF_SIZE * sizeof(*src)))
fail();
memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
bench_new(new0, new1, new2, BUF_SIZE);
}
static void check_ict_float(void)
{
LOCAL_ALIGNED_32(float, src, [BUF_SIZE*3]);
LOCAL_ALIGNED_32(float, ref, [BUF_SIZE*3]);
LOCAL_ALIGNED_32(float, new, [BUF_SIZE*3]);
float *ref0 = &ref[BUF_SIZE*0], *new0 = &new[BUF_SIZE*0];
float *ref1 = &ref[BUF_SIZE*1], *new1 = &new[BUF_SIZE*1];
float *ref2 = &ref[BUF_SIZE*2], *new2 = &new[BUF_SIZE*2];
declare_func(void, void *src0, void *src1, void *src2, int csize);
randomize_buffers_float();
memcpy(ref, src, BUF_SIZE * 3 * sizeof(*src));
memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
call_ref(ref0, ref1, ref2, BUF_SIZE);
call_new(new0, new1, new2, BUF_SIZE);
if (!float_near_abs_eps_array(ref0, new0, 1.0e-5, BUF_SIZE) ||
!float_near_abs_eps_array(ref1, new1, 1.0e-5, BUF_SIZE) ||
!float_near_abs_eps_array(ref2, new2, 1.0e-5, BUF_SIZE))
fail();
memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
bench_new(new0, new1, new2, BUF_SIZE);
}
void checkasm_check_jpeg2000dsp(void)
{
Jpeg2000DSPContext h;
ff_jpeg2000dsp_init(&h);
if (check_func(h.mct_decode[FF_DWT53], "jpeg2000_rct_int"))
check_rct_int();
if (check_func(h.mct_decode[FF_DWT97], "jpeg2000_ict_float"))
check_ict_float();
report("mct_decode");
}

220
externals/ffmpeg/tests/checkasm/llviddsp.c vendored Executable file
View File

@@ -0,0 +1,220 @@
/*
* Copyright (c) 2016 Alexandra Hájková
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "libavutil/common.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mem.h"
#include "libavcodec/lossless_videodsp.h"
#include "checkasm.h"
#define randomize_buffers(buf, size) \
do { \
int j; \
uint8_t *tmp_buf = (uint8_t *)buf;\
for (j = 0; j < size; j++) \
tmp_buf[j] = rnd() & 0xFF; \
} while (0)
#define init_buffer(a0, a1, type, width)\
if (!a0 || !a1)\
fail();\
randomize_buffers(a0, width * sizeof(type));\
memcpy(a1, a0, width*sizeof(type));\
static void check_add_bytes(LLVidDSPContext c, int width)
{
uint8_t *dst0 = av_mallocz(width);
uint8_t *dst1 = av_mallocz(width);
uint8_t *src0 = av_mallocz_array(width, sizeof(uint8_t));
uint8_t *src1 = av_mallocz_array(width, sizeof(uint8_t));
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t w);
init_buffer(src0, src1, uint8_t, width);
if (!dst0 || !dst1)
fail();
if (check_func(c.add_bytes, "add_bytes")) {
call_ref(dst0, src0, width);
call_new(dst1, src1, width);
if (memcmp(dst0, dst1, width))
fail();
bench_new(dst1, src1, width);
}
av_free(src0);
av_free(src1);
av_free(dst0);
av_free(dst1);
}
static void check_add_median_pred(LLVidDSPContext c, int width) {
int A0, A1, B0, B1;
uint8_t *dst0 = av_mallocz(width);
uint8_t *dst1 = av_mallocz(width);
uint8_t *src0 = av_mallocz_array(width, sizeof(uint8_t));
uint8_t *src1 = av_mallocz_array(width, sizeof(uint8_t));
uint8_t *diff0 = av_mallocz_array(width, sizeof(uint8_t));
uint8_t *diff1 = av_mallocz_array(width, sizeof(uint8_t));
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src1,
const uint8_t *diff, ptrdiff_t w,
int *left, int *left_top);
init_buffer(src0, src1, uint8_t, width);
init_buffer(diff0, diff1, uint8_t, width);
A0 = rnd() & 0xFF;
B0 = rnd() & 0xFF;
A1 = A0;
B1 = B0;
if (check_func(c.add_median_pred, "add_median_pred")) {
call_ref(dst0, src0, diff0, width, &A0, &B0);
call_new(dst1, src1, diff1, width, &A1, &B1);
if (memcmp(dst0, dst1, width) || (A0 != A1) || (B0 != B1))
fail();
bench_new(dst1, src1, diff1, width, &A1, &B1);
}
av_free(src0);
av_free(src1);
av_free(diff0);
av_free(diff1);
av_free(dst0);
av_free(dst1);
}
static void check_add_left_pred(LLVidDSPContext c, int width, int acc, const char * report)
{
int res0, res1;
uint8_t *dst0 = av_mallocz(width);
uint8_t *dst1 = av_mallocz(width);
uint8_t *src0 = av_mallocz_array(width, sizeof(uint8_t));
uint8_t *src1 = av_mallocz_array(width, sizeof(uint8_t));
declare_func_emms(AV_CPU_FLAG_MMX, int, uint8_t *dst, uint8_t *src, ptrdiff_t w, int acc);
init_buffer(src0, src1, uint8_t, width);
if (!dst0 || !dst1)
fail();
if (check_func(c.add_left_pred, "%s", report)) {
res0 = call_ref(dst0, src0, width, acc);
res1 = call_new(dst1, src1, width, acc);
if ((res0 & 0xFF) != (res1 & 0xFF)||\
memcmp(dst0, dst1, width))
fail();
bench_new(dst1, src1, width, acc);
}
av_free(src0);
av_free(src1);
av_free(dst0);
av_free(dst1);
}
static void check_add_left_pred_16(LLVidDSPContext c, unsigned mask, int width, unsigned acc, const char * report)
{
int res0, res1;
uint16_t *dst0 = av_mallocz_array(width, sizeof(uint16_t));
uint16_t *dst1 = av_mallocz_array(width, sizeof(uint16_t));
uint16_t *src0 = av_mallocz_array(width, sizeof(uint16_t));
uint16_t *src1 = av_mallocz_array(width, sizeof(uint16_t));
declare_func_emms(AV_CPU_FLAG_MMX, int, uint16_t *dst, uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
init_buffer(src0, src1, uint16_t, width);
if (!dst0 || !dst1)
fail();
if (check_func(c.add_left_pred_int16, "%s", report)) {
res0 = call_ref(dst0, src0, mask, width, acc);
res1 = call_new(dst1, src1, mask, width, acc);
if ((res0 &0xFFFF) != (res1 &0xFFFF)||\
memcmp(dst0, dst1, width))
fail();
bench_new(dst1, src1, mask, width, acc);
}
av_free(src0);
av_free(src1);
av_free(dst0);
av_free(dst1);
}
static void check_add_gradient_pred(LLVidDSPContext c, int w) {
int src_size, stride;
uint8_t *src0, *src1;
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *src, const ptrdiff_t stride,
const ptrdiff_t width);
stride = w + 32;
src_size = (stride + 32) * 2; /* dsp need previous line, and ignore the start of the line */
src0 = av_mallocz(src_size);
src1 = av_mallocz(src_size);
init_buffer(src0, src1, uint8_t, src_size);
if (check_func(c.add_gradient_pred, "add_gradient_pred")) {
call_ref(src0 + stride + 32, stride, w);
call_new(src1 + stride + 32, stride, w);
if (memcmp(src0, src1, stride)||/* previous line doesn't change */
memcmp(src0+stride, src1 + stride, w + 32)) {
fail();
}
bench_new(src1 + stride + 32, stride, w);
}
av_free(src0);
av_free(src1);
}
void checkasm_check_llviddsp(void)
{
LLVidDSPContext c;
int width = 16 * av_clip(rnd(), 16, 128);
int accRnd = rnd() & 0xFF;
ff_llviddsp_init(&c);
check_add_bytes(c, width);
report("add_bytes");
check_add_median_pred(c, width);
report("add_median_pred");
check_add_left_pred(c, width, 0, "add_left_pred_zero");
report("add_left_pred_zero");
check_add_left_pred(c, width, accRnd, "add_left_pred_rnd_acc");
report("add_left_pred_rnd_acc");
check_add_left_pred_16(c, 255, width, accRnd, "add_left_pred_int16");
report("add_left_pred_int16");
check_add_gradient_pred(c, width);
report("add_gradient_pred");
}

114
externals/ffmpeg/tests/checkasm/llviddspenc.c vendored Executable file
View File

@@ -0,0 +1,114 @@
/*
* Copyright (c) 2016 Alexandra Hájková
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "libavutil/common.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mem.h"
#include "libavcodec/lossless_videoencdsp.h"
#include "checkasm.h"
#define randomize_buffers(buf, size) \
do { \
int j; \
for (j = 0; j < size; j+=4) \
AV_WN32(buf + j, rnd()); \
} while (0)
static const struct {uint8_t w, h, s;} planes[] = {
{16,16,16}, {21,23,25}, {32,17,48}, {15,128,16}, {128,127,128}
};
#define MAX_STRIDE 128
#define MAX_HEIGHT 127
static void check_diff_bytes(LLVidEncDSPContext *c)
{
int i;
LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE]);
LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE]);
LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE]);
LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE]);
LOCAL_ALIGNED_32(uint8_t, src2, [MAX_STRIDE]);
LOCAL_ALIGNED_32(uint8_t, src3, [MAX_STRIDE]);
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src1,
const uint8_t *src2, intptr_t w);
memset(dst0, 0, MAX_STRIDE);
memset(dst1, 0, MAX_STRIDE);
randomize_buffers(src0, MAX_STRIDE);
memcpy(src1, src0, MAX_STRIDE);
randomize_buffers(src2, MAX_STRIDE);
memcpy(src3, src2, MAX_STRIDE);
if (check_func(c->diff_bytes, "diff_bytes")) {
for (i = 0; i < 5; i ++) {
call_ref(dst0, src0, src2, planes[i].w);
call_new(dst1, src1, src3, planes[i].w);
if (memcmp(dst0, dst1, planes[i].w))
fail();
}
bench_new(dst1, src0, src2, planes[4].w);
}
}
static void check_sub_left_pred(LLVidEncDSPContext *c)
{
int i;
LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE * MAX_HEIGHT]);
LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE * MAX_HEIGHT]);
LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT]);
LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT]);
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, ptrdiff_t width, int height);
memset(dst0, 0, MAX_STRIDE * MAX_HEIGHT);
memset(dst1, 0, MAX_STRIDE * MAX_HEIGHT);
randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT);
memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT);
if (check_func(c->sub_left_predict, "sub_left_predict")) {
for (i = 0; i < 5; i ++) {
call_ref(dst0, src0, planes[i].s, planes[i].w, planes[i].h);
call_new(dst1, src1, planes[i].s, planes[i].w, planes[i].h);
if (memcmp(dst0, dst1, planes[i].w * planes[i].h))
fail();
break;
}
bench_new(dst1, src0, planes[4].s, planes[4].w, planes[4].h);
}
}
void checkasm_check_llviddspenc(void)
{
LLVidEncDSPContext c;
ff_llvidencdsp_init(&c);
check_diff_bytes(&c);
report("diff_bytes");
check_sub_left_pred(&c);
report("sub_left_predict");
}

103
externals/ffmpeg/tests/checkasm/opusdsp.c vendored Executable file
View File

@@ -0,0 +1,103 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "libavcodec/opusdsp.h"
#include "checkasm.h"
#define randomize_float(buf, len) \
do { \
for (int i = 0; i < len; i++) { \
float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f; \
buf[i] = f; \
} \
} while (0)
#define EPS 0.005
#define MAX_SIZE (960)
/* period is between 15 and 1022, inclusive */
static void test_postfilter(int period)
{
LOCAL_ALIGNED(16, float, data0, [MAX_SIZE + 1024]);
LOCAL_ALIGNED(16, float, data1, [MAX_SIZE + 1024]);
/* This filter can explode very easily, so use a tapset from the codec.
* In the codec these are usually multiplied by at least 0.09375f,
* so its outside the largest filter value, but the filter is still stable
* so use it. */
float gains[3] = { 0.3066406250f, 0.2170410156f, 0.1296386719f };
/* The codec will always call with an offset which is aligned once
* (period + 2) is subtracted, but here we have to align it outselves. */
int offset = FFALIGN(period + 2, 4);
declare_func(void, float *data, int period, float *gains, int len);
randomize_float(data0, MAX_SIZE + 1024);
memcpy(data1, data0, (MAX_SIZE + 1024)*sizeof(float));
call_ref(data0 + offset, period, gains, MAX_SIZE);
call_new(data1 + offset, period, gains, MAX_SIZE);
if (!float_near_abs_eps_array(data0 + offset, data1 + offset, EPS, MAX_SIZE))
fail();
bench_new(data1 + offset, period, gains, MAX_SIZE);
}
static void test_deemphasis(void)
{
LOCAL_ALIGNED(16, float, src, [FFALIGN(MAX_SIZE, 4)]);
LOCAL_ALIGNED(16, float, dst0, [FFALIGN(MAX_SIZE, 4)]);
LOCAL_ALIGNED(16, float, dst1, [FFALIGN(MAX_SIZE, 4)]);
float coeff0 = (float)rnd() / (UINT_MAX >> 5) - 16.0f, coeff1 = coeff0;
declare_func_float(float, float *out, float *in, float coeff, int len);
randomize_float(src, MAX_SIZE);
coeff0 = call_ref(dst0, src, coeff0, MAX_SIZE);
coeff1 = call_new(dst1, src, coeff1, MAX_SIZE);
if (!float_near_abs_eps(coeff0, coeff1, EPS) ||
!float_near_abs_eps_array(dst0, dst1, EPS, MAX_SIZE))
fail();
bench_new(dst1, src, coeff1, MAX_SIZE);
}
void checkasm_check_opusdsp(void)
{
OpusDSP ctx;
ff_opus_dsp_init(&ctx);
if (check_func(ctx.postfilter, "postfilter_15"))
test_postfilter(15);
report("postfilter_15");
if (check_func(ctx.postfilter, "postfilter_512"))
test_postfilter(512);
report("postfilter_512");
if (check_func(ctx.postfilter, "postfilter_1022"))
test_postfilter(1022);
report("postfilter_1022");
if (check_func(ctx.deemphasis, "deemphasis"))
test_deemphasis();
report("deemphasis");
}

111
externals/ffmpeg/tests/checkasm/pixblockdsp.c vendored Executable file
View File

@@ -0,0 +1,111 @@
/*
* Copyright (c) 2015 Tiancheng "Timothy" Gu
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/pixblockdsp.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#define BUF_UNITS 8
#define BUF_SIZE (BUF_UNITS * 128 + 8 * BUF_UNITS)
#define randomize_buffers() \
do { \
int i; \
for (i = 0; i < BUF_SIZE; i += 4) { \
uint32_t r = rnd(); \
AV_WN32A(src10 + i, r); \
AV_WN32A(src11 + i, r); \
r = rnd(); \
AV_WN32A(src20 + i, r); \
AV_WN32A(src21 + i, r); \
r = rnd(); \
AV_WN32A(dst0_ + i, r); \
AV_WN32A(dst1_ + i, r); \
} \
} while (0)
#define check_get_pixels(type, aligned) \
do { \
int i; \
declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); \
\
for (i = 0; i < BUF_UNITS; i++) { \
int src_offset = i * 64 * sizeof(type) + (aligned ? 8 : 1) * i; \
int dst_offset = i * 64; /* dst must be aligned */ \
randomize_buffers(); \
call_ref(dst0 + dst_offset, src10 + src_offset, 8); \
call_new(dst1 + dst_offset, src11 + src_offset, 8); \
if (memcmp(src10, src11, BUF_SIZE)|| memcmp(dst0, dst1, BUF_SIZE)) \
fail(); \
bench_new(dst1 + dst_offset, src11 + src_offset, 8); \
} \
} while (0)
#define check_diff_pixels(type, aligned) \
do { \
int i; \
declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); \
\
for (i = 0; i < BUF_UNITS; i++) { \
int src_offset = i * 64 * sizeof(type) + (aligned ? 8 : 1) * i; \
int dst_offset = i * 64; /* dst must be aligned */ \
randomize_buffers(); \
call_ref(dst0 + dst_offset, src10 + src_offset, src20 + src_offset, 8); \
call_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8); \
if (memcmp(src10, src11, BUF_SIZE) || memcmp(src20, src21, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE)) \
fail(); \
bench_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8); \
} \
} while (0)
void checkasm_check_pixblockdsp(void)
{
LOCAL_ALIGNED_16(uint8_t, src10, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, src11, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, src20, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, src21, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, dst0_, [BUF_SIZE]);
LOCAL_ALIGNED_16(uint8_t, dst1_, [BUF_SIZE]);
uint16_t *dst0 = (uint16_t *)dst0_;
uint16_t *dst1 = (uint16_t *)dst1_;
PixblockDSPContext h;
AVCodecContext avctx = {
.bits_per_raw_sample = 8,
};
ff_pixblockdsp_init(&h, &avctx);
if (check_func(h.get_pixels, "get_pixels"))
check_get_pixels(uint8_t, 1);
if (check_func(h.get_pixels_unaligned, "get_pixels_unaligned"))
check_get_pixels(uint8_t, 0);
report("get_pixels");
if (check_func(h.diff_pixels, "diff_pixels"))
check_diff_pixels(uint8_t, 1);
if (check_func(h.diff_pixels_unaligned, "diff_pixels_unaligned"))
check_diff_pixels(uint8_t, 0);
report("diff_pixels");
}

300
externals/ffmpeg/tests/checkasm/sbrdsp.c vendored Executable file
View File

@@ -0,0 +1,300 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "libavcodec/sbrdsp.h"
#include <float.h>
#include "checkasm.h"
#define randomize(buf, len) do { \
int i; \
for (i = 0; i < len; i++) { \
const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
(buf)[i] = f; \
} \
} while (0)
#define EPS 0.0001
static void test_sum64x5(void)
{
LOCAL_ALIGNED_16(INTFLOAT, dst0, [64 + 256]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [64 + 256]);
declare_func(void, INTFLOAT *z);
randomize((INTFLOAT *)dst0, 64 + 256);
memcpy(dst1, dst0, (64 + 256) * sizeof(INTFLOAT));
call_ref(dst0);
call_new(dst1);
if (!float_near_abs_eps_array(dst0, dst1, EPS, 64 + 256))
fail();
bench_new(dst1);
}
static void test_sum_square(void)
{
INTFLOAT res0;
INTFLOAT res1;
LOCAL_ALIGNED_16(INTFLOAT, src, [256], [2]);
double t = 4 * 256;
declare_func_float(INTFLOAT, INTFLOAT (*x)[2], int n);
randomize((INTFLOAT *)src, 256 * 2);
res0 = call_ref(src, 256);
res1 = call_new(src, 256);
if (!float_near_abs_eps(res0, res1, t * 2 * FLT_EPSILON))
fail();
bench_new(src, 256);
}
static void test_neg_odd_64(void)
{
LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
declare_func(void, INTFLOAT *x);
randomize((INTFLOAT *)dst0, 64);
memcpy(dst1, dst0, (64) * sizeof(INTFLOAT));
call_ref(dst0);
call_new(dst1);
if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
fail();
bench_new(dst1);
}
static void test_qmf_pre_shuffle(void)
{
LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
declare_func(void, INTFLOAT *z);
randomize((INTFLOAT *)dst0, 128);
memcpy(dst1, dst0, (128) * sizeof(INTFLOAT));
call_ref(dst0);
call_new(dst1);
if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
fail();
bench_new(dst1);
}
static void test_qmf_post_shuffle(void)
{
LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
LOCAL_ALIGNED_16(INTFLOAT, dst0, [32], [2]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [32], [2]);
declare_func(void, INTFLOAT W[32][2], const INTFLOAT *z);
randomize((INTFLOAT *)src, 64);
call_ref(dst0, src);
call_new(dst1, src);
if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 64))
fail();
bench_new(dst1, src);
}
static void test_qmf_deint_neg(void)
{
LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
declare_func(void, INTFLOAT *v, const INTFLOAT *src);
randomize((INTFLOAT *)src, 64);
call_ref(dst0, src);
call_new(dst1, src);
if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
fail();
bench_new(dst1, src);
}
static void test_qmf_deint_bfly(void)
{
LOCAL_ALIGNED_16(INTFLOAT, src0, [64]);
LOCAL_ALIGNED_16(INTFLOAT, src1, [64]);
LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
declare_func(void, INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT *src1);
memset(dst0, 0, 128 * sizeof(INTFLOAT));
memset(dst1, 0, 128 * sizeof(INTFLOAT));
randomize((INTFLOAT *)src0, 64);
randomize((INTFLOAT *)src1, 64);
call_ref(dst0, src0, src1);
call_new(dst1, src0, src1);
if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
fail();
bench_new(dst1, src0, src1);
}
static void test_autocorrelate(void)
{
LOCAL_ALIGNED_16(INTFLOAT, src, [40], [2]);
LOCAL_ALIGNED_16(INTFLOAT, dst0, [3], [2][2]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [3], [2][2]);
declare_func(void, const INTFLOAT x[40][2], INTFLOAT phi[3][2][2]);
memset(dst0, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
memset(dst1, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
randomize((INTFLOAT *)src, 80);
call_ref(src, dst0);
call_new(src, dst1);
if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 3 * 2 * 2))
fail();
bench_new(src, dst1);
}
static void test_hf_gen(void)
{
LOCAL_ALIGNED_16(INTFLOAT, low, [128], [2]);
LOCAL_ALIGNED_16(INTFLOAT, alpha0, [2]);
LOCAL_ALIGNED_16(INTFLOAT, alpha1, [2]);
LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
INTFLOAT bw = (INTFLOAT)rnd() / UINT_MAX;
int i;
declare_func(void, INTFLOAT (*X_high)[2], const INTFLOAT (*X_low)[2],
const INTFLOAT alpha0[2], const INTFLOAT alpha1[2],
INTFLOAT bw, int start, int end);
randomize((INTFLOAT *)low, 128 * 2);
randomize((INTFLOAT *)alpha0, 2);
randomize((INTFLOAT *)alpha1, 2);
for (i = 2; i < 64; i += 2) {
memset(dst0, 0, 128 * 2 * sizeof(INTFLOAT));
memset(dst1, 0, 128 * 2 * sizeof(INTFLOAT));
call_ref(dst0, low, alpha0, alpha1, 0.0, i, 128);
call_new(dst1, low, alpha0, alpha1, 0.0, i, 128);
if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
fail();
bench_new(dst1, low, alpha0, alpha1, bw, i, 128);
}
}
static void test_hf_g_filt(void)
{
LOCAL_ALIGNED_16(INTFLOAT, high, [128], [40][2]);
LOCAL_ALIGNED_16(INTFLOAT, g_filt, [128]);
LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
declare_func(void, INTFLOAT (*Y)[2], const INTFLOAT (*X_high)[40][2],
const INTFLOAT *g_filt, int m_max, intptr_t ixh);
randomize((INTFLOAT *)high, 128 * 40 * 2);
randomize((INTFLOAT *)g_filt, 128);
call_ref(dst0, high, g_filt, 128, 20);
call_new(dst1, high, g_filt, 128, 20);
if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
fail();
bench_new(dst1, high, g_filt, 128, 20);
}
static void test_hf_apply_noise(const SBRDSPContext *sbrdsp)
{
LOCAL_ALIGNED_16(AAC_FLOAT, s_m, [128]);
LOCAL_ALIGNED_16(AAC_FLOAT, q_filt, [128]);
LOCAL_ALIGNED_16(INTFLOAT, ref, [128], [2]);
LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
int noise = 0x2a;
int i, j;
declare_func(void, INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
const AAC_FLOAT *q_filt, int noise,
int kx, int m_max);
randomize((INTFLOAT *)ref, 128 * 2);
randomize((INTFLOAT *)s_m, 128);
randomize((INTFLOAT *)q_filt, 128);
for (i = 0; i < 4; i++) {
if (check_func(sbrdsp->hf_apply_noise[i], "hf_apply_noise_%d", i)) {
for (j = 0; j < 2; j++) {
memcpy(dst0, ref, 128 * 2 * sizeof(INTFLOAT));
memcpy(dst1, ref, 128 * 2 * sizeof(INTFLOAT));
call_ref(dst0, s_m, q_filt, noise, j, 128);
call_new(dst1, s_m, q_filt, noise, j, 128);
if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
fail();
bench_new(dst1, s_m, q_filt, noise, j, 128);
}
}
}
}
void checkasm_check_sbrdsp(void)
{
SBRDSPContext sbrdsp;
ff_sbrdsp_init(&sbrdsp);
if (check_func(sbrdsp.sum64x5, "sum64x5"))
test_sum64x5();
report("sum64x5");
if (check_func(sbrdsp.sum_square, "sum_square"))
test_sum_square();
report("sum_square");
if (check_func(sbrdsp.neg_odd_64, "neg_odd_64"))
test_neg_odd_64();
report("neg_odd_64");
if (check_func(sbrdsp.qmf_pre_shuffle, "qmf_pre_shuffle"))
test_qmf_pre_shuffle();
report("qmf_pre_shuffle");
if (check_func(sbrdsp.qmf_post_shuffle, "qmf_post_shuffle"))
test_qmf_post_shuffle();
report("qmf_post_shuffle");
if (check_func(sbrdsp.qmf_deint_neg, "qmf_deint_neg"))
test_qmf_deint_neg();
report("qmf_deint_neg");
if (check_func(sbrdsp.qmf_deint_bfly, "qmf_deint_bfly"))
test_qmf_deint_bfly();
report("qmf_deint_bfly");
if (check_func(sbrdsp.autocorrelate, "autocorrelate"))
test_autocorrelate();
report("autocorrelate");
if (check_func(sbrdsp.hf_gen, "hf_gen"))
test_hf_gen();
report("hf_gen");
if (check_func(sbrdsp.hf_g_filt, "hf_g_filt"))
test_hf_g_filt();
report("hf_g_filt");
test_hf_apply_noise(&sbrdsp);
report("hf_apply_noise");
}

206
externals/ffmpeg/tests/checkasm/sw_rgb.c vendored Executable file
View File

@@ -0,0 +1,206 @@
/*
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "libavutil/common.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mem.h"
#include "libswscale/rgb2rgb.h"
#include "checkasm.h"
#define randomize_buffers(buf, size) \
do { \
int j; \
for (j = 0; j < size; j+=4) \
AV_WN32(buf + j, rnd()); \
} while (0)
static const uint8_t width[] = {12, 16, 20, 32, 36, 128};
static const struct {uint8_t w, h, s;} planes[] = {
{12,16,12}, {16,16,16}, {20,23,25}, {32,18,48}, {8,128,16}, {128,128,128}
};
#define MAX_STRIDE 128
#define MAX_HEIGHT 128
static void check_shuffle_bytes(void * func, const char * report)
{
int i;
LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE]);
LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE]);
LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE]);
LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE]);
declare_func_emms(AV_CPU_FLAG_MMX, void, const uint8_t *src, uint8_t *dst, int src_size);
memset(dst0, 0, MAX_STRIDE);
memset(dst1, 0, MAX_STRIDE);
randomize_buffers(src0, MAX_STRIDE);
memcpy(src1, src0, MAX_STRIDE);
if (check_func(func, "%s", report)) {
for (i = 0; i < 6; i ++) {
call_ref(src0, dst0, width[i]);
call_new(src1, dst1, width[i]);
if (memcmp(dst0, dst1, MAX_STRIDE))
fail();
}
bench_new(src0, dst0, width[5]);
}
}
static void check_uyvy_to_422p(void)
{
int i;
LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT * 2]);
LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT * 2]);
LOCAL_ALIGNED_32(uint8_t, dst_y_0, [MAX_STRIDE * MAX_HEIGHT]);
LOCAL_ALIGNED_32(uint8_t, dst_y_1, [MAX_STRIDE * MAX_HEIGHT]);
LOCAL_ALIGNED_32(uint8_t, dst_u_0, [(MAX_STRIDE/2) * MAX_HEIGHT]);
LOCAL_ALIGNED_32(uint8_t, dst_u_1, [(MAX_STRIDE/2) * MAX_HEIGHT]);
LOCAL_ALIGNED_32(uint8_t, dst_v_0, [(MAX_STRIDE/2) * MAX_HEIGHT]);
LOCAL_ALIGNED_32(uint8_t, dst_v_1, [(MAX_STRIDE/2) * MAX_HEIGHT]);
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
const uint8_t *src, int width, int height,
int lumStride, int chromStride, int srcStride);
randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT * 2);
memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT * 2);
if (check_func(uyvytoyuv422, "uyvytoyuv422")) {
for (i = 0; i < 6; i ++) {
memset(dst_y_0, 0, MAX_STRIDE * MAX_HEIGHT);
memset(dst_y_1, 0, MAX_STRIDE * MAX_HEIGHT);
memset(dst_u_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
memset(dst_u_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
memset(dst_v_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
memset(dst_v_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
call_ref(dst_y_0, dst_u_0, dst_v_0, src0, planes[i].w, planes[i].h,
MAX_STRIDE, MAX_STRIDE / 2, planes[i].s);
call_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[i].w, planes[i].h,
MAX_STRIDE, MAX_STRIDE / 2, planes[i].s);
if (memcmp(dst_y_0, dst_y_1, MAX_STRIDE * MAX_HEIGHT) ||
memcmp(dst_u_0, dst_u_1, (MAX_STRIDE/2) * MAX_HEIGHT) ||
memcmp(dst_v_0, dst_v_1, (MAX_STRIDE/2) * MAX_HEIGHT))
fail();
}
bench_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[5].w, planes[5].h,
MAX_STRIDE, MAX_STRIDE / 2, planes[5].s);
}
}
static void check_interleave_bytes(void)
{
LOCAL_ALIGNED_16(uint8_t, src0_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
LOCAL_ALIGNED_16(uint8_t, src1_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
LOCAL_ALIGNED_16(uint8_t, dst0_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]);
LOCAL_ALIGNED_16(uint8_t, dst1_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]);
// Intentionally using unaligned buffers, as this function doesn't have
// any alignment requirements.
uint8_t *src0 = src0_buf + 1;
uint8_t *src1 = src1_buf + 1;
uint8_t *dst0 = dst0_buf + 2;
uint8_t *dst1 = dst1_buf + 2;
declare_func_emms(AV_CPU_FLAG_MMX, void, const uint8_t *, const uint8_t *,
uint8_t *, int, int, int, int, int);
randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT);
randomize_buffers(src1, MAX_STRIDE * MAX_HEIGHT);
if (check_func(interleaveBytes, "interleave_bytes")) {
for (int i = 0; i <= 16; i++) {
// Try all widths [1,16], and try one random width.
int w = i > 0 ? i : (1 + (rnd() % (MAX_STRIDE-2)));
int h = 1 + (rnd() % (MAX_HEIGHT-2));
int src0_offset = 0, src0_stride = MAX_STRIDE;
int src1_offset = 0, src1_stride = MAX_STRIDE;
int dst_offset = 0, dst_stride = 2 * MAX_STRIDE;
memset(dst0, 0, 2 * MAX_STRIDE * MAX_HEIGHT);
memset(dst1, 0, 2 * MAX_STRIDE * MAX_HEIGHT);
// Try different combinations of negative strides
if (i & 1) {
src0_offset = (h-1)*src0_stride;
src0_stride = -src0_stride;
}
if (i & 2) {
src1_offset = (h-1)*src1_stride;
src1_stride = -src1_stride;
}
if (i & 4) {
dst_offset = (h-1)*dst_stride;
dst_stride = -dst_stride;
}
call_ref(src0 + src0_offset, src1 + src1_offset, dst0 + dst_offset,
w, h, src0_stride, src1_stride, dst_stride);
call_new(src0 + src0_offset, src1 + src1_offset, dst1 + dst_offset,
w, h, src0_stride, src1_stride, dst_stride);
// Check a one pixel-pair edge around the destination area,
// to catch overwrites past the end.
checkasm_check(uint8_t, dst0, 2*MAX_STRIDE, dst1, 2*MAX_STRIDE,
2 * w + 2, h + 1, "dst");
}
bench_new(src0, src1, dst1, 127, MAX_HEIGHT,
MAX_STRIDE, MAX_STRIDE, 2*MAX_STRIDE);
}
if (check_func(interleaveBytes, "interleave_bytes_aligned")) {
// Bench the function in a more typical case, with aligned
// buffers and widths.
bench_new(src0_buf, src1_buf, dst1_buf, 128, MAX_HEIGHT,
MAX_STRIDE, MAX_STRIDE, 2*MAX_STRIDE);
}
}
void checkasm_check_sw_rgb(void)
{
ff_sws_rgb2rgb_init();
check_shuffle_bytes(shuffle_bytes_2103, "shuffle_bytes_2103");
report("shuffle_bytes_2103");
check_shuffle_bytes(shuffle_bytes_0321, "shuffle_bytes_0321");
report("shuffle_bytes_0321");
check_shuffle_bytes(shuffle_bytes_1230, "shuffle_bytes_1230");
report("shuffle_bytes_1230");
check_shuffle_bytes(shuffle_bytes_3012, "shuffle_bytes_3012");
report("shuffle_bytes_3012");
check_shuffle_bytes(shuffle_bytes_3210, "shuffle_bytes_3210");
report("shuffle_bytes_3210");
check_uyvy_to_422p();
report("uyvytoyuv422");
check_interleave_bytes();
report("interleave_bytes");
}

134
externals/ffmpeg/tests/checkasm/sw_scale.c vendored Executable file
View File

@@ -0,0 +1,134 @@
/*
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "libavutil/common.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mem.h"
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "checkasm.h"
#define randomize_buffers(buf, size) \
do { \
int j; \
for (j = 0; j < size; j+=4) \
AV_WN32(buf + j, rnd()); \
} while (0)
#define SRC_PIXELS 128
static void check_hscale(void)
{
#define MAX_FILTER_WIDTH 40
#define FILTER_SIZES 5
static const int filter_sizes[FILTER_SIZES] = { 4, 8, 16, 32, 40 };
#define HSCALE_PAIRS 2
static const int hscale_pairs[HSCALE_PAIRS][2] = {
{ 8, 14 },
{ 8, 18 },
};
int i, j, fsi, hpi, width;
struct SwsContext *ctx;
// padded
LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);
LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
// padded
LOCAL_ALIGNED_32(int16_t, filter, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
// The dst parameter here is either int16_t or int32_t but we use void* to
// just cover both cases.
declare_func_emms(AV_CPU_FLAG_MMX, void, void *c, void *dst, int dstW,
const uint8_t *src, const int16_t *filter,
const int32_t *filterPos, int filterSize);
ctx = sws_alloc_context();
if (sws_init_context(ctx, NULL, NULL) < 0)
fail();
randomize_buffers(src, SRC_PIXELS + MAX_FILTER_WIDTH - 1);
for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {
for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
width = filter_sizes[fsi];
ctx->srcBpc = hscale_pairs[hpi][0];
ctx->dstBpc = hscale_pairs[hpi][1];
ctx->hLumFilterSize = ctx->hChrFilterSize = width;
for (i = 0; i < SRC_PIXELS; i++) {
filterPos[i] = i;
// These filter cofficients are chosen to try break two corner
// cases, namely:
//
// - Negative filter coefficients. The filters output signed
// values, and it should be possible to end up with negative
// output values.
//
// - Positive clipping. The hscale filter function has clipping
// at (1<<15) - 1
//
// The coefficients sum to the 1.0 point for the hscale
// functions (1 << 14).
for (j = 0; j < width; j++) {
filter[i * width + j] = -((1 << 14) / (width - 1));
}
filter[i * width + (rnd() % width)] = ((1 << 15) - 1);
}
for (i = 0; i < MAX_FILTER_WIDTH; i++) {
// These values should be unused in SIMD implementations but
// may still be read, random coefficients here should help show
// issues where they are used in error.
filter[SRC_PIXELS * width + i] = rnd();
}
ff_getSwsFunc(ctx);
if (check_func(ctx->hcScale, "hscale_%d_to_%d_width%d", ctx->srcBpc, ctx->dstBpc + 1, width)) {
memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
call_ref(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
call_new(NULL, dst1, SRC_PIXELS, src, filter, filterPos, width);
if (memcmp(dst0, dst1, SRC_PIXELS * sizeof(dst0[0])))
fail();
bench_new(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
}
}
}
sws_freeContext(ctx);
}
void checkasm_check_sw_scale(void)
{
check_hscale();
report("hscale");
}

121
externals/ffmpeg/tests/checkasm/synth_filter.c vendored Executable file
View File

@@ -0,0 +1,121 @@
/*
* Copyright (c) 2015 Janne Grunau
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "libavutil/internal.h"
#include "libavutil/intfloat.h"
#include "libavcodec/dcadata.h"
#include "libavcodec/synth_filter.h"
#include "checkasm.h"
#define BUF_SIZE 32
#define randomize_input() \
do { \
int i; \
for (i = 0; i < BUF_SIZE; i++) { \
float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f; \
in[i] = f; \
} \
} while (0)
void checkasm_check_synth_filter(void)
{
FFTContext imdct;
SynthFilterContext synth;
ff_mdct_init(&imdct, 6, 1, 1.0);
ff_synth_filter_init(&synth);
if (check_func(synth.synth_filter_float, "synth_filter_float")) {
LOCAL_ALIGNED(32, float, out0, [BUF_SIZE]);
LOCAL_ALIGNED(32, float, out1, [BUF_SIZE]);
LOCAL_ALIGNED(32, float, out_b, [BUF_SIZE]);
LOCAL_ALIGNED(32, float, in, [BUF_SIZE]);
LOCAL_ALIGNED(32, float, buf2_0, [BUF_SIZE]);
LOCAL_ALIGNED(32, float, buf2_1, [BUF_SIZE]);
LOCAL_ALIGNED(32, float, buf2_b, [BUF_SIZE]);
LOCAL_ALIGNED(32, float, buf0, [512]);
LOCAL_ALIGNED(32, float, buf1, [512]);
LOCAL_ALIGNED(32, float, buf_b, [512]);
float scale = 1.0f;
int i, offset0 = 0, offset1 = 0, offset_b = 0;
declare_func(void, FFTContext *, float *, int *, float[32], const float[512],
float[32], float[32], float);
memset(buf2_0, 0, sizeof(*buf2_0) * BUF_SIZE);
memset(buf2_1, 0, sizeof(*buf2_1) * BUF_SIZE);
memset(buf2_b, 0, sizeof(*buf2_b) * BUF_SIZE);
memset(buf0, 0, sizeof(*buf2_0) * 512);
memset(buf1, 0, sizeof(*buf2_1) * 512);
memset(buf_b, 0, sizeof(*buf2_b) * 512);
/* more than 1 synth_buf_offset wrap-around */
for (i = 0; i < 20; i++) {
int j;
const float * window = (i & 1) ? ff_dca_fir_32bands_perfect : ff_dca_fir_32bands_nonperfect;
memset(out0, 0, sizeof(*out0) * BUF_SIZE);
memset(out1, 0, sizeof(*out1) * BUF_SIZE);
memset(out_b, 0, sizeof(*out_b) * BUF_SIZE);
randomize_input();
call_ref(&imdct, buf0, &offset0, buf2_0, window,
out0, in, scale);
call_new(&imdct, buf1, &offset1, buf2_1, window,
out1, in, scale);
if (offset0 != offset1) {
fail();
fprintf(stderr, "offsets do not match: %d, %d", offset0, offset1);
break;
}
for (j = 0; j < BUF_SIZE; j++) {
if (!float_near_abs_eps_ulp(out0[j], out1[j], 7.0e-7, 16) ||
!float_near_abs_eps_ulp(buf2_0[j], buf2_1[j], 7.0e-7, 16)) {
union av_intfloat32 o0, o1, b0, b1;
fail();
o0.f = out0[j]; o1.f = out1[j];
b0.f = buf2_0[j], b1.f = buf2_1[j];
fprintf(stderr, "out: %11g (0x%08x); %11g (0x%08x); abs diff %11g\n",
o0.f, o0.i, o1.f, o1.i, fabsf(o0.f - o1.f));
fprintf(stderr, "buf2: %11g (0x%08x); %11g (0x%08x); abs diff %11g\n",
b0.f, b0.i, b1.f, b1.i, fabsf(b0.f - b1.f));
break;
}
}
bench_new(&imdct, buf_b, &offset_b, buf2_b, window,
out_b, in, scale);
}
}
ff_mdct_end(&imdct);
report("synth_filter");
}

105
externals/ffmpeg/tests/checkasm/utvideodsp.c vendored Executable file
View File

@@ -0,0 +1,105 @@
/*
* Copyright (c) 2017 Jokyo Images
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/utvideodsp.h"
#include "libavutil/intreadwrite.h"
#define WIDTH 240
#define HEIGHT 120
#define WIDTH_PADDED (WIDTH + 16) /* padded to 32 */
#define BUFFER_SIZE (WIDTH_PADDED * HEIGHT)
#define randomize_plane(buf, type) \
do { \
int w, h; \
type * tmp = buf; \
for (h = 0; h < HEIGHT; h++) { \
for (w = 0; w < WIDTH; w++) \
tmp[w] = rnd() & 0xFF; \
tmp += WIDTH_PADDED; \
} \
} while (0)
#define cmp_plane(buf0, buf1, s) \
do { \
int h; \
for (h = 0; h < HEIGHT; h++) { \
if (memcmp(buf0 + h*WIDTH_PADDED, \
buf1 + h*WIDTH_PADDED, WIDTH *s)) \
fail();\
} \
} while (0)
#define CHECK_RESTORE(type)\
LOCAL_ALIGNED_32(type, src_r0, [BUFFER_SIZE]); \
LOCAL_ALIGNED_32(type, src_g0, [BUFFER_SIZE]); \
LOCAL_ALIGNED_32(type, src_b0, [BUFFER_SIZE]); \
LOCAL_ALIGNED_32(type, src_r1, [BUFFER_SIZE]); \
LOCAL_ALIGNED_32(type, src_g1, [BUFFER_SIZE]); \
LOCAL_ALIGNED_32(type, src_b1, [BUFFER_SIZE]); \
declare_func(void, type *src_r, type *src_g, type *src_b, \
ptrdiff_t linesize_r, ptrdiff_t linesize_g, \
ptrdiff_t linesize_b, int width, int height); \
memset(src_r0, 0, BUFFER_SIZE * sizeof(type)); \
memset(src_g0, 0, BUFFER_SIZE * sizeof(type)); \
memset(src_b0, 0, BUFFER_SIZE * sizeof(type)); \
randomize_plane(src_r0, type); \
randomize_plane(src_g0, type); \
randomize_plane(src_b0, type); \
memcpy(src_r1, src_r0, BUFFER_SIZE * sizeof(type)); \
memcpy(src_g1, src_g0, BUFFER_SIZE * sizeof(type)); \
memcpy(src_b1, src_b0, BUFFER_SIZE * sizeof(type)); \
call_ref(src_r0, src_g0, src_b0, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT);\
call_new(src_r1, src_g1, src_b1, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT);\
cmp_plane(src_r0, src_r1, sizeof(type)); \
cmp_plane(src_g0, src_g1, sizeof(type)); \
cmp_plane(src_b0, src_b1, sizeof(type)); \
bench_new(src_r1, src_g1, src_b1, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT)
static void check_restore_rgb_planes(void) {
CHECK_RESTORE(uint8_t);
}
static void check_restore_rgb_planes10(void) {
CHECK_RESTORE(uint16_t);
}
void checkasm_check_utvideodsp(void)
{
UTVideoDSPContext h;
ff_utvideodsp_init(&h);
if (check_func(h.restore_rgb_planes, "restore_rgb_planes"))
check_restore_rgb_planes();
report("restore_rgb_planes");
if (check_func(h.restore_rgb_planes10, "restore_rgb_planes10"))
check_restore_rgb_planes10();
report("restore_rgb_planes10");
}

77
externals/ffmpeg/tests/checkasm/v210dec.c vendored Executable file
View File

@@ -0,0 +1,77 @@
/*
* Copyright (c) 2019 James Darnley
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/v210dec.h"
static uint32_t get_v210(void)
{
uint32_t t0 = rnd() & 0x3ff,
t1 = rnd() & 0x3ff,
t2 = rnd() & 0x3ff;
uint32_t value = t0
| (t1 << 10)
| (t2 << 20);
return value;
}
#define NUM_SAMPLES 2048
static void randomize_buffers(uint32_t *src0, uint32_t *src1, int len)
{
for (int i = 0; i < len; i++) {
uint32_t value = get_v210();
src0[i] = value;
src1[i] = value;
}
}
void checkasm_check_v210dec(void)
{
V210DecContext h;
h.aligned_input = 0;
ff_v210dec_init(&h);
if (check_func(h.unpack_frame, "v210_unpack")) {
uint32_t src0[NUM_SAMPLES/3];
uint32_t src1[NUM_SAMPLES/3];
uint16_t y0[NUM_SAMPLES/2];
uint16_t y1[NUM_SAMPLES/2];
uint16_t u0[NUM_SAMPLES/4];
uint16_t u1[NUM_SAMPLES/4];
uint16_t v0[NUM_SAMPLES/4];
uint16_t v1[NUM_SAMPLES/4];
declare_func(void, const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
const int pixels = NUM_SAMPLES / 2 / 6 * 6;
randomize_buffers(src0, src1, NUM_SAMPLES/3);
call_ref(src0, y0, u0, v0, pixels);
call_new(src1, y1, u1, v1, pixels);
if (memcmp(src0, src1, NUM_SAMPLES/3 * sizeof src0[0])
|| memcmp(y0, y1, pixels * sizeof y0[0])
|| memcmp(u0, u1, pixels/2 * sizeof u0[0])
|| memcmp(v0, v1, pixels/2 * sizeof v0[0]))
fail();
bench_new(src1, y1, u1, v1, pixels);
}
report("v210_unpack");
}

94
externals/ffmpeg/tests/checkasm/v210enc.c vendored Executable file
View File

@@ -0,0 +1,94 @@
/*
* Copyright (c) 2015 Henrik Gramner
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/v210enc.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#define BUF_SIZE 512
#define randomize_buffers(mask) \
do { \
int i, size = sizeof(*y0); \
for (i = 0; i < BUF_SIZE; i += 4 / size) { \
uint32_t r = rnd() & mask; \
AV_WN32A(y0 + i, r); \
AV_WN32A(y1 + i, r); \
} \
for (i = 0; i < BUF_SIZE / 2; i += 4 / size) { \
uint32_t r = rnd() & mask; \
AV_WN32A(u0 + i, r); \
AV_WN32A(u1 + i, r); \
r = rnd() & mask; \
AV_WN32A(v0 + i, r); \
AV_WN32A(v1 + i, r); \
} \
for (i = 0; i < width * 8 / 3; i += 4) { \
uint32_t r = rnd(); \
AV_WN32A(dst0 + i, r); \
AV_WN32A(dst1 + i, r); \
} \
} while (0)
#define check_pack_line(type, mask) \
do { \
LOCAL_ALIGNED_16(type, y0, [BUF_SIZE]); \
LOCAL_ALIGNED_16(type, y1, [BUF_SIZE]); \
LOCAL_ALIGNED_16(type, u0, [BUF_SIZE / 2]); \
LOCAL_ALIGNED_16(type, u1, [BUF_SIZE / 2]); \
LOCAL_ALIGNED_16(type, v0, [BUF_SIZE / 2]); \
LOCAL_ALIGNED_16(type, v1, [BUF_SIZE / 2]); \
LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE * 8 / 3]); \
LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE * 8 / 3]); \
\
declare_func(void, const type * y, const type * u, const type * v, \
uint8_t * dst, ptrdiff_t width); \
ptrdiff_t width, step = 12 / sizeof(type); \
\
for (width = step; width < BUF_SIZE - 15; width += step) { \
int y_offset = rnd() & 15; \
int uv_offset = y_offset / 2; \
randomize_buffers(mask); \
call_ref(y0 + y_offset, u0 + uv_offset, v0 + uv_offset, dst0, width); \
call_new(y1 + y_offset, u1 + uv_offset, v1 + uv_offset, dst1, width); \
if (memcmp(y0, y1, BUF_SIZE) || memcmp(u0, u1, BUF_SIZE / 2) || \
memcmp(v0, v1, BUF_SIZE / 2) || memcmp(dst0, dst1, width * 8 / 3)) \
fail(); \
bench_new(y1 + y_offset, u1 + uv_offset, v1 + uv_offset, dst1, width); \
} \
} while (0)
void checkasm_check_v210enc(void)
{
V210EncContext h;
ff_v210enc_init(&h);
if (check_func(h.pack_line_8, "v210_planar_pack_8"))
check_pack_line(uint8_t, 0xffffffff);
if (check_func(h.pack_line_10, "v210_planar_pack_10"))
check_pack_line(uint16_t, 0x03ff03ff);
report("planar_pack");
}

149
externals/ffmpeg/tests/checkasm/vf_blend.c vendored Executable file
View File

@@ -0,0 +1,149 @@
/*
* Copyright (c) 2016 Tiancheng "Timothy" Gu
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavfilter/blend.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#define WIDTH 256
#define HEIGHT 256
#define BUF_UNITS 3
#define SIZE_PER_UNIT (WIDTH * HEIGHT)
#define BUF_SIZE (BUF_UNITS * SIZE_PER_UNIT)
#define randomize_buffers() \
do { \
int i, j; \
for (i = 0; i < HEIGHT; i++) { \
for (j = 0; j < WIDTH; j++) { \
top1[i * WIDTH + j] = \
top2[i * WIDTH + j] = i; \
bot1[i * WIDTH + j] = \
bot2[i * WIDTH + j] = j; \
} \
} \
for (i = 0; i < SIZE_PER_UNIT; i += 4) { \
uint32_t r = rnd(); \
AV_WN32A(dst1 + i, r); \
AV_WN32A(dst2 + i, r); \
} \
for (; i < BUF_SIZE; i += 4) { \
uint32_t r = rnd(); \
AV_WN32A(top1 + i, r); \
AV_WN32A(top2 + i, r); \
r = rnd(); \
AV_WN32A(bot1 + i, r); \
AV_WN32A(bot2 + i, r); \
r = rnd(); \
AV_WN32A(dst1 + i, r); \
AV_WN32A(dst2 + i, r); \
} \
} while (0)
#define check_blend_func(depth) \
do { \
int i, w; \
declare_func(void, const uint8_t *top, ptrdiff_t top_linesize, \
const uint8_t *bottom, ptrdiff_t bottom_linesize, \
uint8_t *dst, ptrdiff_t dst_linesize, \
ptrdiff_t width, ptrdiff_t height, \
struct FilterParams *param, double *values); \
w = WIDTH / depth; \
\
for (i = 0; i < BUF_UNITS - 1; i++) { \
int src_offset = i * SIZE_PER_UNIT + (BUF_UNITS - 1 - i) * depth; /* Test various alignments */ \
int dst_offset = i * SIZE_PER_UNIT; /* dst must be aligned */ \
randomize_buffers(); \
call_ref(top1 + src_offset, w, bot1 + src_offset, w, \
dst1 + dst_offset, w, w, HEIGHT, &param, NULL); \
call_new(top2 + src_offset, w, bot2 + src_offset, w, \
dst2 + dst_offset, w, w, HEIGHT, &param, NULL); \
if (memcmp(top1, top2, BUF_SIZE) || memcmp(bot1, bot2, BUF_SIZE) || memcmp(dst1, dst2, BUF_SIZE)) \
fail(); \
} \
bench_new(top2, w / 4, bot2, w / 4, dst2, w / 4, \
w / 4, HEIGHT / 4, &param, NULL); \
} while (0)
void checkasm_check_blend(void)
{
uint8_t *top1 = av_malloc(BUF_SIZE);
uint8_t *top2 = av_malloc(BUF_SIZE);
uint8_t *bot1 = av_malloc(BUF_SIZE);
uint8_t *bot2 = av_malloc(BUF_SIZE);
uint8_t *dst1 = av_malloc(BUF_SIZE);
uint8_t *dst2 = av_malloc(BUF_SIZE);
FilterParams param = {
.opacity = 1.0,
};
#define check_and_report(name, val, depth) \
param.mode = val; \
ff_blend_init(&param, depth - 1); \
if (check_func(param.blend, #name)) \
check_blend_func(depth);
check_and_report(addition, BLEND_ADDITION, 1)
check_and_report(grainmerge, BLEND_GRAINMERGE, 1)
check_and_report(and, BLEND_AND, 1)
check_and_report(average, BLEND_AVERAGE, 1)
check_and_report(darken, BLEND_DARKEN, 1)
check_and_report(grainextract, BLEND_GRAINEXTRACT, 1)
check_and_report(hardmix, BLEND_HARDMIX, 1)
check_and_report(lighten, BLEND_LIGHTEN, 1)
check_and_report(multiply, BLEND_MULTIPLY, 1)
check_and_report(or, BLEND_OR, 1)
check_and_report(phoenix, BLEND_PHOENIX, 1)
check_and_report(screen, BLEND_SCREEN, 1)
check_and_report(subtract, BLEND_SUBTRACT, 1)
check_and_report(xor, BLEND_XOR, 1)
check_and_report(difference, BLEND_DIFFERENCE, 1)
check_and_report(extremity, BLEND_EXTREMITY, 1)
check_and_report(negation, BLEND_NEGATION, 1)
report("8bit");
check_and_report(addition_16, BLEND_ADDITION, 2)
check_and_report(grainmerge_16, BLEND_GRAINMERGE, 2)
check_and_report(and_16, BLEND_AND, 2)
check_and_report(average_16, BLEND_AVERAGE, 2)
check_and_report(darken_16, BLEND_DARKEN, 2)
check_and_report(grainextract_16, BLEND_GRAINEXTRACT, 2)
check_and_report(difference_16, BLEND_DIFFERENCE, 2)
check_and_report(extremity_16, BLEND_EXTREMITY, 2)
check_and_report(negation_16, BLEND_NEGATION, 2)
check_and_report(lighten_16, BLEND_LIGHTEN, 2)
check_and_report(or_16, BLEND_OR, 2)
check_and_report(phoenix_16, BLEND_PHOENIX, 2)
check_and_report(subtract_16, BLEND_SUBTRACT, 2)
check_and_report(xor_16, BLEND_SUBTRACT, 2)
report("16bit");
av_freep(&top1);
av_freep(&top2);
av_freep(&bot1);
av_freep(&bot2);
av_freep(&dst1);
av_freep(&dst2);
}

View File

@@ -0,0 +1,314 @@
/*
* Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <string.h>
#include "checkasm.h"
#include "libavfilter/colorspacedsp.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#define W 64
#define H 64
#define randomize_buffers() \
do { \
unsigned mask = bpp_mask[idepth]; \
int n, m; \
int bpp = 1 + (!!idepth); \
int buf_size = W * H * bpp; \
for (m = 0; m < 3; m++) { \
int ss = m ? ss_w + ss_h : 0; \
int plane_sz = buf_size >> ss; \
for (n = 0; n < plane_sz; n += 4) { \
unsigned r = rnd() & mask; \
AV_WN32A(&src[m][n], r); \
} \
} \
} while (0)
static const char *format_string[] = {
"444", "422", "420"
};
static const unsigned bpp_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
static void check_yuv2yuv(void)
{
declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
uint8_t *src[3], ptrdiff_t src_stride[3],
int w, int h, const int16_t coeff[3][3][8],
const int16_t off[2][8]);
ColorSpaceDSPContext dsp;
int idepth, odepth, fmt, n;
LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
uint8_t *src[3] = { src_y, src_u, src_v };
LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
LOCAL_ALIGNED_32(int16_t, offset_buf, [16]);
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
int16_t (*offset)[8] = (int16_t(*)[8]) offset_buf;
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
ff_colorspacedsp_init(&dsp);
for (n = 0; n < 8; n++) {
offset[0][n] = offset[1][n] = 16;
coeff[0][0][n] = (1 << 14) + (1 << 7) + 1;
coeff[0][1][n] = (1 << 7) - 1;
coeff[0][2][n] = -(1 << 8);
coeff[1][0][n] = coeff[2][0][n] = 0;
coeff[1][1][n] = (1 << 14) + (1 << 7);
coeff[1][2][n] = -(1 << 7);
coeff[2][2][n] = (1 << 14) - (1 << 6);
coeff[2][1][n] = 1 << 6;
}
for (idepth = 0; idepth < 3; idepth++) {
for (odepth = 0; odepth < 3; odepth++) {
for (fmt = 0; fmt < 3; fmt++) {
if (check_func(dsp.yuv2yuv[idepth][odepth][fmt],
"ff_colorspacedsp_yuv2yuv_%sp%dto%d",
format_string[fmt],
idepth * 2 + 8, odepth * 2 + 8)) {
int ss_w = !!fmt, ss_h = fmt == 2;
int y_src_stride = W << !!idepth, y_dst_stride = W << !!odepth;
int uv_src_stride = y_src_stride >> ss_w, uv_dst_stride = y_dst_stride >> ss_w;
randomize_buffers();
call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
W, H, coeff, offset);
call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
W, H, coeff, offset);
if (memcmp(dst0[0], dst1[0], y_dst_stride * H) ||
memcmp(dst0[1], dst1[1], uv_dst_stride * H >> ss_h) ||
memcmp(dst0[2], dst1[2], uv_dst_stride * H >> ss_h)) {
fail();
}
}
}
}
}
report("yuv2yuv");
}
static void check_yuv2rgb(void)
{
declare_func(void, int16_t *dst[3], ptrdiff_t dst_stride,
uint8_t *src[3], ptrdiff_t src_stride[3],
int w, int h, const int16_t coeff[3][3][8],
const int16_t off[8]);
ColorSpaceDSPContext dsp;
int idepth, fmt, n;
LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
uint8_t *src[3] = { src_y, src_u, src_v };
LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
LOCAL_ALIGNED_32(int16_t, offset, [8]);
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
ff_colorspacedsp_init(&dsp);
for (n = 0; n < 8; n++) {
offset[n] = 16;
coeff[0][0][n] = coeff[1][0][n] = coeff[2][0][n] = (1 << 14) | 1;
coeff[0][1][n] = coeff[2][2][n] = 0;
coeff[0][2][n] = 1 << 13;
coeff[1][1][n] = -(1 << 12);
coeff[1][2][n] = 1 << 12;
coeff[2][1][n] = 1 << 11;
}
for (idepth = 0; idepth < 3; idepth++) {
for (fmt = 0; fmt < 3; fmt++) {
if (check_func(dsp.yuv2rgb[idepth][fmt],
"ff_colorspacedsp_yuv2rgb_%sp%d",
format_string[fmt], idepth * 2 + 8)) {
int ss_w = !!fmt, ss_h = fmt == 2;
int y_src_stride = W << !!idepth;
int uv_src_stride = y_src_stride >> ss_w;
randomize_buffers();
call_ref(dst0, W, src,
(ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
W, H, coeff, offset);
call_new(dst1, W, src,
(ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
W, H, coeff, offset);
if (memcmp(dst0[0], dst1[0], W * H * sizeof(int16_t)) ||
memcmp(dst0[1], dst1[1], W * H * sizeof(int16_t)) ||
memcmp(dst0[2], dst1[2], W * H * sizeof(int16_t))) {
fail();
}
}
}
}
report("yuv2rgb");
}
#undef randomize_buffers
#define randomize_buffers() \
do { \
int y, x, p; \
for (p = 0; p < 3; p++) { \
for (y = 0; y < H; y++) { \
for (x = 0; x < W; x++) { \
int r = rnd() & 0x7fff; \
r -= (32768 - 28672) >> 1; \
src[p][y * W + x] = r; \
} \
} \
} \
} while (0)
static void check_rgb2yuv(void)
{
declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
int16_t *src[3], ptrdiff_t src_stride,
int w, int h, const int16_t coeff[3][3][8],
const int16_t off[8]);
ColorSpaceDSPContext dsp;
int odepth, fmt, n;
LOCAL_ALIGNED_32(int16_t, src_y, [W * H * 2]);
LOCAL_ALIGNED_32(int16_t, src_u, [W * H * 2]);
LOCAL_ALIGNED_32(int16_t, src_v, [W * H * 2]);
int16_t *src[3] = { src_y, src_u, src_v };
LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
LOCAL_ALIGNED_32(int16_t, offset, [8]);
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
ff_colorspacedsp_init(&dsp);
for (n = 0; n < 8; n++) {
offset[n] = 16;
// these somewhat resemble bt601/smpte170m coefficients
coeff[0][0][n] = lrint(0.3 * (1 << 14));
coeff[0][1][n] = lrint(0.6 * (1 << 14));
coeff[0][2][n] = lrint(0.1 * (1 << 14));
coeff[1][0][n] = lrint(-0.15 * (1 << 14));
coeff[1][1][n] = lrint(-0.35 * (1 << 14));
coeff[1][2][n] = lrint(0.5 * (1 << 14));
coeff[2][0][n] = lrint(0.5 * (1 << 14));
coeff[2][1][n] = lrint(-0.42 * (1 << 14));
coeff[2][2][n] = lrint(-0.08 * (1 << 14));
}
for (odepth = 0; odepth < 3; odepth++) {
for (fmt = 0; fmt < 3; fmt++) {
if (check_func(dsp.rgb2yuv[odepth][fmt],
"ff_colorspacedsp_rgb2yuv_%sp%d",
format_string[fmt], odepth * 2 + 8)) {
int ss_w = !!fmt, ss_h = fmt == 2;
int y_dst_stride = W << !!odepth;
int uv_dst_stride = y_dst_stride >> ss_w;
randomize_buffers();
call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
src, W, W, H, coeff, offset);
call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
src, W, W, H, coeff, offset);
if (memcmp(dst0[0], dst1[0], H * y_dst_stride) ||
memcmp(dst0[1], dst1[1], H * uv_dst_stride >> ss_h) ||
memcmp(dst0[2], dst1[2], H * uv_dst_stride >> ss_h)) {
fail();
}
}
}
}
report("rgb2yuv");
}
static void check_multiply3x3(void)
{
declare_func(void, int16_t *data[3], ptrdiff_t stride,
int w, int h, const int16_t coeff[3][3][8]);
ColorSpaceDSPContext dsp;
LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
int16_t **src = dst0;
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
int n;
ff_colorspacedsp_init(&dsp);
for (n = 0; n < 8; n++) {
coeff[0][0][n] = lrint(0.85 * (1 << 14));
coeff[0][1][n] = lrint(0.10 * (1 << 14));
coeff[0][2][n] = lrint(0.05 * (1 << 14));
coeff[1][0][n] = lrint(-0.1 * (1 << 14));
coeff[1][1][n] = lrint(0.95 * (1 << 14));
coeff[1][2][n] = lrint(0.15 * (1 << 14));
coeff[2][0][n] = lrint(-0.2 * (1 << 14));
coeff[2][1][n] = lrint(0.30 * (1 << 14));
coeff[2][2][n] = lrint(0.90 * (1 << 14));
}
if (check_func(dsp.multiply3x3, "ff_colorspacedsp_multiply3x3")) {
randomize_buffers();
memcpy(dst1_y, dst0_y, W * H * sizeof(*dst1_y));
memcpy(dst1_u, dst0_u, W * H * sizeof(*dst1_u));
memcpy(dst1_v, dst0_v, W * H * sizeof(*dst1_v));
call_ref(dst0, W, W, H, coeff);
call_new(dst1, W, W, H, coeff);
if (memcmp(dst0[0], dst1[0], H * W * sizeof(*dst0_y)) ||
memcmp(dst0[1], dst1[1], H * W * sizeof(*dst0_u)) ||
memcmp(dst0[2], dst1[2], H * W * sizeof(*dst0_v))) {
fail();
}
}
report("multiply3x3");
}
void checkasm_check_colorspace(void)
{
check_yuv2yuv();
check_yuv2rgb();
check_rgb2yuv();
check_multiply3x3();
}

79
externals/ffmpeg/tests/checkasm/vf_eq.c vendored Executable file
View File

@@ -0,0 +1,79 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavfilter/avfilter.h"
#include "libavfilter/vf_eq.h"
#include "libavutil/intreadwrite.h"
#define WIDTH 256
#define HEIGHT 256
#define SRC_STRIDE 256
#define PIXELS (WIDTH * HEIGHT)
#define RANDOM_RANGE 80000
#define SCALE 10000
#define randomize_buffers(buf, size) \
do { \
int j; \
uint8_t *tmp_buf = (uint8_t *)buf;\
for (j = 0; j< size; j++) \
tmp_buf[j] = rnd() & 0xFF; \
} while (0)
static void check_eq(void)
{
LOCAL_ALIGNED_32(uint8_t, src, [PIXELS]);
LOCAL_ALIGNED_32(uint8_t, dst_ref, [PIXELS]);
LOCAL_ALIGNED_32(uint8_t, dst_new, [PIXELS]);
int w = WIDTH;
int h = HEIGHT;
int src_stride = SRC_STRIDE;
int dst_stride = SRC_STRIDE;
EQParameters pa;
EQContext eq;
declare_func(void, EQParameters *param, uint8_t *dst, int dst_stride,
const uint8_t *src, int src_stride, int w, int h);
double rand_contrast = (int)(rnd() % (RANDOM_RANGE * 2) - RANDOM_RANGE) /
(SCALE * 1.0);
double rand_brightness = (int)(rnd() % (SCALE * 2) - SCALE) /
(SCALE * 1.0);
pa.contrast = rand_contrast;
pa.brightness = rand_brightness;
memset(dst_ref, 0, PIXELS);
memset(dst_new, 0, PIXELS);
randomize_buffers(src, PIXELS);
ff_eq_init(&eq);
if (check_func(eq.process, "process")) {
call_ref(&pa, dst_ref, dst_stride, src, src_stride, w, h);
call_new(&pa, dst_new, dst_stride, src, src_stride, w, h);
if (memcmp(dst_ref, dst_new, PIXELS))
fail();
bench_new(&pa, dst_new, dst_stride, src, src_stride, w, h);
}
}
void checkasm_check_vf_eq(void)
{
check_eq();
report("eq");
}

66
externals/ffmpeg/tests/checkasm/vf_gblur.c vendored Executable file
View File

@@ -0,0 +1,66 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavfilter/gblur.h"
#define WIDTH 256
#define HEIGHT 256
#define PIXELS (WIDTH * HEIGHT)
#define BUF_SIZE (PIXELS * 4)
#define randomize_buffers(buf, size) \
do { \
int j; \
float *tmp_buf = (float *)buf; \
for (j = 0; j < size; j++) \
tmp_buf[j] = (float)(rnd() & 0xFF); \
} while (0)
void checkasm_check_vf_gblur(void)
{
float *dst_ref = av_malloc(BUF_SIZE);
float *dst_new = av_malloc(BUF_SIZE);
int w = WIDTH;
int h = HEIGHT;
int steps = 2;
float nu = 0.101f;
float bscale = 1.112f;
GBlurContext s;
declare_func(void, float *dst, int w, int h, int steps, float nu, float bscale);
randomize_buffers(dst_ref, PIXELS);
memcpy(dst_new, dst_ref, BUF_SIZE);
ff_gblur_init(&s);
if (check_func(s.horiz_slice, "horiz_slice")) {
call_ref(dst_ref, w, h, steps, nu, bscale);
call_new(dst_new, w, h, steps, nu, bscale);
if (!float_near_abs_eps_array(dst_ref, dst_new, 0.01f, PIXELS)) {
fail();
}
bench_new(dst_new, w, h, 1, nu, bscale);
}
report("horiz_slice");
av_freep(&dst_ref);
av_freep(&dst_new);
}

76
externals/ffmpeg/tests/checkasm/vf_hflip.c vendored Executable file
View File

@@ -0,0 +1,76 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavfilter/hflip.h"
#include "libavutil/intreadwrite.h"
#define WIDTH 256
#define WIDTH_PADDED 256 + 32
#define randomize_buffers(buf, size) \
do { \
int j; \
uint8_t *tmp_buf = (uint8_t *)buf;\
for (j = 0; j < size; j++) \
tmp_buf[j] = rnd() & 0xFF; \
} while (0)
static void check_hflip(int step, const char * report_name){
LOCAL_ALIGNED_32(uint8_t, src, [WIDTH_PADDED]);
LOCAL_ALIGNED_32(uint8_t, dst_ref, [WIDTH_PADDED]);
LOCAL_ALIGNED_32(uint8_t, dst_new, [WIDTH_PADDED]);
int w = WIDTH;
int i;
int step_array[4] = {1, 1, 1, 1};
FlipContext s;
declare_func(void, const uint8_t *src, uint8_t *dst, int w);
memset(src, 0, WIDTH_PADDED);
memset(dst_ref, 0, WIDTH_PADDED);
memset(dst_new, 0, WIDTH_PADDED);
randomize_buffers(src, WIDTH_PADDED);
if (step == 2) {
w /= 2;
for (i = 0; i < 4; i++)
step_array[i] = step;
}
ff_hflip_init(&s, step_array, 4);
if (check_func(s.flip_line[0], "hflip_%s", report_name)) {
for (i = 1; i < w; i++) {
call_ref(src + (w - 1) * step, dst_ref, i);
call_new(src + (w - 1) * step, dst_new, i);
if (memcmp(dst_ref, dst_new, i * step))
fail();
}
bench_new(src + (w - 1) * step, dst_new, w);
}
}
void checkasm_check_vf_hflip(void)
{
check_hflip(1, "byte");
report("hflip_byte");
check_hflip(2, "short");
report("hflip_short");
}

113
externals/ffmpeg/tests/checkasm/vf_nlmeans.c vendored Executable file
View File

@@ -0,0 +1,113 @@
/*
* Copyright (c) 2018 Clément Bœsch <u pkh me>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "checkasm.h"
#include "libavfilter/vf_nlmeans.h"
#include "libavutil/avassert.h"
#define randomize_buffer(buf, size) do { \
int i; \
for (i = 0; i < size / 4; i++) \
((uint32_t *)buf)[i] = rnd(); \
} while (0)
void checkasm_check_nlmeans(void)
{
NLMeansDSPContext dsp = {0};
const int w = 123; // source width
const int h = 45; // source height
const int p = 3; // patch half size
const int r = 2; // research window half size
ff_nlmeans_init(&dsp);
/* See the filter's code for the explanations on the variables */
if (check_func(dsp.compute_safe_ssd_integral_image, "ssd_integral_image")) {
int offx, offy;
const int e = p + r;
const int ii_w = w + e*2;
const int ii_h = h + e*2;
const int ii_lz_32 = FFALIGN(ii_w + 1, 4);
uint32_t *ii_orig_ref = av_mallocz_array(ii_h + 1, ii_lz_32 * sizeof(*ii_orig_ref));
uint32_t *ii_ref = ii_orig_ref + ii_lz_32 + 1;
uint32_t *ii_orig_new = av_mallocz_array(ii_h + 1, ii_lz_32 * sizeof(*ii_orig_new));
uint32_t *ii_new = ii_orig_new + ii_lz_32 + 1;
const int src_lz = FFALIGN(w, 16);
uint8_t *src = av_mallocz_array(h, src_lz);
declare_func(void, uint32_t *dst, ptrdiff_t dst_linesize_32,
const uint8_t *s1, ptrdiff_t linesize1,
const uint8_t *s2, ptrdiff_t linesize2,
int w, int h);
randomize_buffer(src, h * src_lz);
for (offy = -r; offy <= r; offy++) {
for (offx = -r; offx <= r; offx++) {
if (offx || offy) {
const int s1x = e;
const int s1y = e;
const int s2x = e + offx;
const int s2y = e + offy;
const int startx_safe = FFMAX(s1x, s2x);
const int starty_safe = FFMAX(s1y, s2y);
const int u_endx_safe = FFMIN(s1x + w, s2x + w);
const int endy_safe = FFMIN(s1y + h, s2y + h);
const int safe_pw = (u_endx_safe - startx_safe) & ~0xf;
const int safe_ph = endy_safe - starty_safe;
av_assert0(safe_pw && safe_ph);
av_assert0(startx_safe - s1x >= 0); av_assert0(startx_safe - s1x < w);
av_assert0(starty_safe - s1y >= 0); av_assert0(starty_safe - s1y < h);
av_assert0(startx_safe - s2x >= 0); av_assert0(startx_safe - s2x < w);
av_assert0(starty_safe - s2y >= 0); av_assert0(starty_safe - s2y < h);
memset(ii_ref, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_ref));
memset(ii_new, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_new));
call_ref(ii_ref + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
safe_pw, safe_ph);
call_new(ii_new + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
safe_pw, safe_ph);
if (memcmp(ii_ref, ii_new, (ii_lz_32 * ii_h - 1) * sizeof(*ii_ref)))
fail();
memset(ii_new, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_new));
bench_new(ii_new + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
safe_pw, safe_ph);
}
}
}
av_freep(&ii_orig_ref);
av_freep(&ii_orig_new);
av_freep(&src);
}
report("dsp");
}

View File

@@ -0,0 +1,85 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavfilter/threshold.h"
#include "libavutil/intreadwrite.h"
#define WIDTH 256
#define WIDTH_PADDED 256 + 32
#define randomize_buffers(buf, size) \
do { \
int j; \
uint8_t *tmp_buf = (uint8_t *)buf;\
for (j = 0; j < size; j++) \
tmp_buf[j] = rnd() & 0xFF; \
} while (0)
static void check_threshold(int depth){
LOCAL_ALIGNED_32(uint8_t, in , [WIDTH_PADDED]);
LOCAL_ALIGNED_32(uint8_t, threshold, [WIDTH_PADDED]);
LOCAL_ALIGNED_32(uint8_t, min , [WIDTH_PADDED]);
LOCAL_ALIGNED_32(uint8_t, max , [WIDTH_PADDED]);
LOCAL_ALIGNED_32(uint8_t, out_ref , [WIDTH_PADDED]);
LOCAL_ALIGNED_32(uint8_t, out_new , [WIDTH_PADDED]);
ptrdiff_t line_size = WIDTH_PADDED;
int w = WIDTH;
declare_func(void, const uint8_t *in, const uint8_t *threshold,
const uint8_t *min, const uint8_t *max, uint8_t *out,
ptrdiff_t ilinesize, ptrdiff_t tlinesize,
ptrdiff_t flinesize, ptrdiff_t slinesize,
ptrdiff_t olinesize, int w, int h);
ThresholdContext s;
s.depth = depth;
ff_threshold_init(&s);
memset(in, 0, WIDTH_PADDED);
memset(threshold, 0, WIDTH_PADDED);
memset(min, 0, WIDTH_PADDED);
memset(max, 0, WIDTH_PADDED);
memset(out_ref, 0, WIDTH_PADDED);
memset(out_new, 0, WIDTH_PADDED);
randomize_buffers(in, WIDTH);
randomize_buffers(threshold, WIDTH);
randomize_buffers(min, WIDTH);
randomize_buffers(max, WIDTH);
if (depth == 16)
w /= 2;
if (check_func(s.threshold, "threshold%d", depth)) {
call_ref(in, threshold, min, max, out_ref, line_size, line_size, line_size, line_size, line_size, w, 1);
call_new(in, threshold, min, max, out_new, line_size, line_size, line_size, line_size, line_size, w, 1);
if (memcmp(out_ref, out_new, WIDTH))
fail();
bench_new(in, threshold, min, max, out_new, line_size, line_size, line_size, line_size, line_size, w, 1);
}
}
void checkasm_check_vf_threshold(void)
{
check_threshold(8);
report("threshold8");
check_threshold(16);
report("threshold16");
}

89
externals/ffmpeg/tests/checkasm/videodsp.c vendored Executable file
View File

@@ -0,0 +1,89 @@
/*
* Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "checkasm.h"
#include "libavcodec/videodsp.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#define randomize_buffers(w, h) \
do { \
int i; \
for (i = 0; i < w * h * sizeof(*src0); i += 4) \
AV_WN32A(((uint8_t *) src0) + i, rnd()); \
} while (0)
#define iter_1d(type, fix, fix_val, var, var_start, var_end) \
for (fix = fix_val, var = var_start; var <= var_end; var++) { \
call_ref((type *) dst0, (const type *) (src0 + y * pw + x), \
bw * sizeof(type), pw * sizeof(type), \
bw, bh, x, y, pw, ph); \
call_new((type *) dst1, (const type *) (src1 + y * pw + x), \
bw * sizeof(type), pw * sizeof(type), \
bw, bh, x, y, pw, ph); \
if (memcmp(dst0, dst1, bw * bh * sizeof(type))) \
fail(); \
bench_new((type *) dst1, (const type *) (src1 + y * pw + x),\
bw * sizeof(type), pw * sizeof(type), \
bw, bh, x, y, pw, ph); \
}
#define check_emu_edge_size(type, src_w, src_h, dst_w, dst_h) \
do { \
LOCAL_ALIGNED_16(type, src0, [src_w * src_h]); \
LOCAL_ALIGNED_16(type, src1, [src_w * src_h]); \
int bw = dst_w, bh = dst_h; \
int pw = src_w, ph = src_h; \
int y, x; \
randomize_buffers(src_w, src_h); \
memcpy(src1, src0, pw * ph * sizeof(type)); \
iter_1d(type, y, 0 - src_h, x, 0 - src_w, src_w - 0); \
iter_1d(type, x, src_w - 0, y, 0 - src_h, src_h - 0); \
iter_1d(type, y, src_h - 0, x, 0 - src_w, src_w - 0); \
iter_1d(type, x, 0 - src_w, y, 0 - src_h, src_h - 0); \
} while (0)
#define check_emu_edge(type) \
do { \
LOCAL_ALIGNED_16(type, dst0, [64 * 64]); \
LOCAL_ALIGNED_16(type, dst1, [64 * 64]); \
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, \
void, type *dst, const type *src, \
ptrdiff_t dst_linesize, \
ptrdiff_t src_linesize, \
int block_w, int block_h, \
int src_x, int src_y, \
int src_w, int src_h); \
check_emu_edge_size(type, 16, 1, 64, 64); \
check_emu_edge_size(type, 16, 16, 64, 64); \
check_emu_edge_size(type, 64, 64, 64, 64); \
} while (0)
void checkasm_check_videodsp(void)
{
VideoDSPContext vdsp;
ff_videodsp_init(&vdsp, 8);
if (check_func(vdsp.emulated_edge_mc, "emulated_edge_mc_8"))
check_emu_edge(uint8_t);
report("emulated_edge_mc");
}

519
externals/ffmpeg/tests/checkasm/vp8dsp.c vendored Executable file
View File

@@ -0,0 +1,519 @@
/*
* Copyright (c) 2016 Martin Storsjo
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "libavcodec/avcodec.h"
#include "libavcodec/vp8dsp.h"
#include "libavutil/common.h"
#include "libavutil/intreadwrite.h"
#include "checkasm.h"
#define PIXEL_STRIDE 16
#define randomize_buffers(src, dst, stride, coef) \
do { \
int x, y; \
for (y = 0; y < 4; y++) { \
AV_WN32A((src) + y * (stride), rnd()); \
AV_WN32A((dst) + y * (stride), rnd()); \
for (x = 0; x < 4; x++) \
(coef)[y * 4 + x] = (src)[y * (stride) + x] - \
(dst)[y * (stride) + x]; \
} \
} while (0)
static void dct4x4(int16_t *coef)
{
int i;
for (i = 0; i < 4; i++) {
const int a1 = (coef[i*4 + 0] + coef[i*4 + 3]) * 8;
const int b1 = (coef[i*4 + 1] + coef[i*4 + 2]) * 8;
const int c1 = (coef[i*4 + 1] - coef[i*4 + 2]) * 8;
const int d1 = (coef[i*4 + 0] - coef[i*4 + 3]) * 8;
coef[i*4 + 0] = a1 + b1;
coef[i*4 + 1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
coef[i*4 + 2] = a1 - b1;
coef[i*4 + 3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;
}
for (i = 0; i < 4; i++) {
const int a1 = coef[i + 0*4] + coef[i + 3*4];
const int b1 = coef[i + 1*4] + coef[i + 2*4];
const int c1 = coef[i + 1*4] - coef[i + 2*4];
const int d1 = coef[i + 0*4] - coef[i + 3*4];
coef[i + 0*4] = (a1 + b1 + 7) >> 4;
coef[i + 1*4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + !!d1;
coef[i + 2*4] = (a1 - b1 + 7) >> 4;
coef[i + 3*4] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;
}
}
static void wht4x4(int16_t *coef)
{
int i;
for (i = 0; i < 4; i++) {
int a1 = coef[0 * 4 + i];
int b1 = coef[1 * 4 + i];
int c1 = coef[2 * 4 + i];
int d1 = coef[3 * 4 + i];
int e1;
a1 += b1;
d1 -= c1;
e1 = (a1 - d1) >> 1;
b1 = e1 - b1;
c1 = e1 - c1;
a1 -= c1;
d1 += b1;
coef[0 * 4 + i] = a1;
coef[1 * 4 + i] = c1;
coef[2 * 4 + i] = d1;
coef[3 * 4 + i] = b1;
}
for (i = 0; i < 4; i++) {
int a1 = coef[i * 4 + 0];
int b1 = coef[i * 4 + 1];
int c1 = coef[i * 4 + 2];
int d1 = coef[i * 4 + 3];
int e1;
a1 += b1;
d1 -= c1;
e1 = (a1 - d1) >> 1;
b1 = e1 - b1;
c1 = e1 - c1;
a1 -= c1;
d1 += b1;
coef[i * 4 + 0] = a1 * 2;
coef[i * 4 + 1] = c1 * 2;
coef[i * 4 + 2] = d1 * 2;
coef[i * 4 + 3] = b1 * 2;
}
}
static void check_idct(void)
{
LOCAL_ALIGNED_16(uint8_t, src, [4 * 4]);
LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4]);
LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4]);
LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4]);
LOCAL_ALIGNED_16(int16_t, coef, [4 * 4]);
LOCAL_ALIGNED_16(int16_t, subcoef0, [4 * 4]);
LOCAL_ALIGNED_16(int16_t, subcoef1, [4 * 4]);
VP8DSPContext d;
int dc;
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, ptrdiff_t stride);
ff_vp8dsp_init(&d);
randomize_buffers(src, dst, 4, coef);
dct4x4(coef);
for (dc = 0; dc <= 1; dc++) {
void (*idct)(uint8_t *, int16_t *, ptrdiff_t) = dc ? d.vp8_idct_dc_add : d.vp8_idct_add;
if (check_func(idct, "vp8_idct_%sadd", dc ? "dc_" : "")) {
if (dc) {
memset(subcoef0, 0, 4 * 4 * sizeof(int16_t));
subcoef0[0] = coef[0];
} else {
memcpy(subcoef0, coef, 4 * 4 * sizeof(int16_t));
}
memcpy(dst0, dst, 4 * 4);
memcpy(dst1, dst, 4 * 4);
memcpy(subcoef1, subcoef0, 4 * 4 * sizeof(int16_t));
// Note, this uses a pixel stride of 4, even though the real decoder uses a stride as a
// multiple of 16. If optimizations want to take advantage of that, this test needs to be
// updated to make it more like the h264dsp tests.
call_ref(dst0, subcoef0, 4);
call_new(dst1, subcoef1, 4);
if (memcmp(dst0, dst1, 4 * 4) ||
memcmp(subcoef0, subcoef1, 4 * 4 * sizeof(int16_t)))
fail();
bench_new(dst1, subcoef1, 4);
}
}
}
static void check_idct_dc4(void)
{
LOCAL_ALIGNED_16(uint8_t, src, [4 * 4 * 4]);
LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4 * 4]);
LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4 * 4]);
LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4 * 4]);
LOCAL_ALIGNED_16(int16_t, coef, [4], [4 * 4]);
LOCAL_ALIGNED_16(int16_t, subcoef0, [4], [4 * 4]);
LOCAL_ALIGNED_16(int16_t, subcoef1, [4], [4 * 4]);
VP8DSPContext d;
int i, chroma;
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
ff_vp8dsp_init(&d);
for (chroma = 0; chroma <= 1; chroma++) {
void (*idct4dc)(uint8_t *, int16_t[4][16], ptrdiff_t) = chroma ? d.vp8_idct_dc_add4uv : d.vp8_idct_dc_add4y;
if (check_func(idct4dc, "vp8_idct_dc_add4%s", chroma ? "uv" : "y")) {
ptrdiff_t stride = chroma ? 8 : 16;
int w = chroma ? 2 : 4;
for (i = 0; i < 4; i++) {
int blockx = 4 * (i % w);
int blocky = 4 * (i / w);
randomize_buffers(src + stride * blocky + blockx, dst + stride * blocky + blockx, stride, coef[i]);
dct4x4(coef[i]);
memset(&coef[i][1], 0, 15 * sizeof(int16_t));
}
memcpy(dst0, dst, 4 * 4 * 4);
memcpy(dst1, dst, 4 * 4 * 4);
memcpy(subcoef0, coef, 4 * 4 * 4 * sizeof(int16_t));
memcpy(subcoef1, coef, 4 * 4 * 4 * sizeof(int16_t));
call_ref(dst0, subcoef0, stride);
call_new(dst1, subcoef1, stride);
if (memcmp(dst0, dst1, 4 * 4 * 4) ||
memcmp(subcoef0, subcoef1, 4 * 4 * 4 * sizeof(int16_t)))
fail();
bench_new(dst1, subcoef1, stride);
}
}
}
static void check_luma_dc_wht(void)
{
LOCAL_ALIGNED_16(int16_t, dc, [4 * 4]);
LOCAL_ALIGNED_16(int16_t, dc0, [4 * 4]);
LOCAL_ALIGNED_16(int16_t, dc1, [4 * 4]);
int16_t block[4][4][16];
LOCAL_ALIGNED_16(int16_t, block0, [4], [4][16]);
LOCAL_ALIGNED_16(int16_t, block1, [4], [4][16]);
VP8DSPContext d;
int dc_only;
int blockx, blocky;
declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t block[4][4][16], int16_t dc[16]);
ff_vp8dsp_init(&d);
for (blocky = 0; blocky < 4; blocky++) {
for (blockx = 0; blockx < 4; blockx++) {
uint8_t src[16], dst[16];
randomize_buffers(src, dst, 4, block[blocky][blockx]);
dct4x4(block[blocky][blockx]);
dc[blocky * 4 + blockx] = block[blocky][blockx][0];
block[blocky][blockx][0] = rnd();
}
}
wht4x4(dc);
for (dc_only = 0; dc_only <= 1; dc_only++) {
void (*idct)(int16_t [4][4][16], int16_t [16]) = dc_only ? d.vp8_luma_dc_wht_dc : d.vp8_luma_dc_wht;
if (check_func(idct, "vp8_luma_dc_wht%s", dc_only ? "_dc" : "")) {
if (dc_only) {
memset(dc0, 0, 16 * sizeof(int16_t));
dc0[0] = dc[0];
} else {
memcpy(dc0, dc, 16 * sizeof(int16_t));
}
memcpy(dc1, dc0, 16 * sizeof(int16_t));
memcpy(block0, block, 4 * 4 * 16 * sizeof(int16_t));
memcpy(block1, block, 4 * 4 * 16 * sizeof(int16_t));
call_ref(block0, dc0);
call_new(block1, dc1);
if (memcmp(block0, block1, 4 * 4 * 16 * sizeof(int16_t)) ||
memcmp(dc0, dc1, 16 * sizeof(int16_t)))
fail();
bench_new(block1, dc1);
}
}
}
#define SRC_BUF_STRIDE 32
#define SRC_BUF_SIZE (((size << (size < 16)) + 5) * SRC_BUF_STRIDE)
// The mc subpixel interpolation filter needs the 2 previous pixels in either
// direction, the +1 is to make sure the actual load addresses always are
// unaligned.
#define src (buf + 2 * SRC_BUF_STRIDE + 2 + 1)
#undef randomize_buffers
#define randomize_buffers() \
do { \
int k; \
for (k = 0; k < SRC_BUF_SIZE; k += 4) { \
AV_WN32A(buf + k, rnd()); \
} \
} while (0)
static void check_mc(void)
{
LOCAL_ALIGNED_16(uint8_t, buf, [32 * 32]);
LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16]);
LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16]);
VP8DSPContext d;
int type, k, dx, dy;
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, uint8_t *, ptrdiff_t, int, int, int);
ff_vp78dsp_init(&d);
for (type = 0; type < 2; type++) {
vp8_mc_func (*tab)[3][3] = type ? d.put_vp8_bilinear_pixels_tab : d.put_vp8_epel_pixels_tab;
for (k = 1; k < 8; k++) {
int hsize = k / 3;
int size = 16 >> hsize;
int height = (size << 1) >> (k % 3);
for (dy = 0; dy < 3; dy++) {
for (dx = 0; dx < 3; dx++) {
char str[100];
if (dx || dy) {
if (type == 0) {
static const char *dx_names[] = { "", "h4", "h6" };
static const char *dy_names[] = { "", "v4", "v6" };
snprintf(str, sizeof(str), "epel%d_%s%s", size, dx_names[dx], dy_names[dy]);
} else {
snprintf(str, sizeof(str), "bilin%d_%s%s", size, dx ? "h" : "", dy ? "v" : "");
}
} else {
snprintf(str, sizeof(str), "pixels%d", size);
}
if (check_func(tab[hsize][dy][dx], "vp8_put_%s", str)) {
int mx, my;
int i;
if (type == 0) {
mx = dx == 2 ? 2 + 2 * (rnd() % 3) : dx == 1 ? 1 + 2 * (rnd() % 4) : 0;
my = dy == 2 ? 2 + 2 * (rnd() % 3) : dy == 1 ? 1 + 2 * (rnd() % 4) : 0;
} else {
mx = dx ? 1 + (rnd() % 7) : 0;
my = dy ? 1 + (rnd() % 7) : 0;
}
randomize_buffers();
for (i = -2; i <= 3; i++) {
int val = (i == -1 || i == 2) ? 0 : 0xff;
// Set pixels in the first row and column to the maximum pattern,
// to test for potential overflows in the filter.
src[i ] = val;
src[i * SRC_BUF_STRIDE] = val;
}
call_ref(dst0, size, src, SRC_BUF_STRIDE, height, mx, my);
call_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
if (memcmp(dst0, dst1, size * height))
fail();
bench_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
}
}
}
}
}
}
#undef randomize_buffers
#define setpx(a, b, c) buf[(a) + (b) * jstride] = av_clip_uint8(c)
// Set the pixel to c +/- [0,d]
#define setdx(a, b, c, d) setpx(a, b, c - (d) + (rnd() % ((d) * 2 + 1)))
// Set the pixel to c +/- [d,d+e] (making sure it won't be clipped)
#define setdx2(a, b, o, c, d, e) setpx(a, b, o = c + ((d) + (rnd() % (e))) * (c >= 128 ? -1 : 1))
static void randomize_loopfilter_buffers(int lineoff, int str,
int dir, int flim_E, int flim_I,
int hev_thresh, uint8_t *buf,
int force_hev)
{
uint32_t mask = 0xff;
int off = dir ? lineoff : lineoff * str;
int istride = dir ? 1 : str;
int jstride = dir ? str : 1;
int i;
for (i = 0; i < 8; i += 2) {
// Row 0 will trigger hev for q0/q1, row 2 will trigger hev for p0/p1,
// rows 4 and 6 will not trigger hev.
// force_hev 1 will make sure all rows trigger hev, while force_hev -1
// makes none of them trigger it.
int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
setpx(idx, 0, q0 = rnd() & mask);
if (i == 0 && force_hev >= 0 || force_hev > 0)
setdx2(idx, 1, q1, q0, hev_thresh + 1, flim_I - hev_thresh - 1);
else
setdx(idx, 1, q1 = q0, hev_thresh);
setdx(idx, 2, q2 = q1, flim_I);
setdx(idx, 3, q2, flim_I);
setdx(idx, -1, p0 = q0, flim_E >> 2);
if (i == 2 && force_hev >= 0 || force_hev > 0)
setdx2(idx, -2, p1, p0, hev_thresh + 1, flim_I - hev_thresh - 1);
else
setdx(idx, -2, p1 = p0, hev_thresh);
setdx(idx, -3, p2 = p1, flim_I);
setdx(idx, -4, p2, flim_I);
}
}
// Fill the buffer with random pixels
static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
{
int x, y;
for (y = 0; y < h; y++)
for (x = 0; x < w; x++)
buf[y * stride + x] = rnd() & 0xff;
}
#define randomize_buffers(buf, lineoff, str, force_hev) \
randomize_loopfilter_buffers(lineoff, str, dir, flim_E, flim_I, hev_thresh, buf, force_hev)
static void check_loopfilter_16y(void)
{
LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
VP8DSPContext d;
int dir, edge, force_hev;
int flim_E = 20, flim_I = 10, hev_thresh = 7;
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int, int, int);
ff_vp8dsp_init(&d);
for (dir = 0; dir < 2; dir++) {
int midoff = dir ? 4 * 16 : 4;
int midoff_aligned = dir ? 4 * 16 : 16;
uint8_t *buf0 = base0 + midoff_aligned;
uint8_t *buf1 = base1 + midoff_aligned;
for (edge = 0; edge < 2; edge++) {
void (*func)(uint8_t *, ptrdiff_t, int, int, int) = NULL;
switch (dir << 1 | edge) {
case (0 << 1) | 0: func = d.vp8_h_loop_filter16y; break;
case (1 << 1) | 0: func = d.vp8_v_loop_filter16y; break;
case (0 << 1) | 1: func = d.vp8_h_loop_filter16y_inner; break;
case (1 << 1) | 1: func = d.vp8_v_loop_filter16y_inner; break;
}
if (check_func(func, "vp8_loop_filter16y%s_%s", edge ? "_inner" : "", dir ? "v" : "h")) {
for (force_hev = -1; force_hev <= 1; force_hev++) {
fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
randomize_buffers(buf0, 0, 16, force_hev);
randomize_buffers(buf0, 8, 16, force_hev);
memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
call_ref(buf0, 16, flim_E, flim_I, hev_thresh);
call_new(buf1, 16, flim_E, flim_I, hev_thresh);
if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
fail();
}
fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
randomize_buffers(buf0, 0, 16, 0);
randomize_buffers(buf0, 8, 16, 0);
bench_new(buf0, 16, flim_E, flim_I, hev_thresh);
}
}
}
}
static void check_loopfilter_8uv(void)
{
LOCAL_ALIGNED_16(uint8_t, base0u, [32 + 16 * 16]);
LOCAL_ALIGNED_16(uint8_t, base0v, [32 + 16 * 16]);
LOCAL_ALIGNED_16(uint8_t, base1u, [32 + 16 * 16]);
LOCAL_ALIGNED_16(uint8_t, base1v, [32 + 16 * 16]);
VP8DSPContext d;
int dir, edge, force_hev;
int flim_E = 20, flim_I = 10, hev_thresh = 7;
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, uint8_t *, ptrdiff_t, int, int, int);
ff_vp8dsp_init(&d);
for (dir = 0; dir < 2; dir++) {
int midoff = dir ? 4 * 16 : 4;
int midoff_aligned = dir ? 4 * 16 : 16;
uint8_t *buf0u = base0u + midoff_aligned;
uint8_t *buf0v = base0v + midoff_aligned;
uint8_t *buf1u = base1u + midoff_aligned;
uint8_t *buf1v = base1v + midoff_aligned;
for (edge = 0; edge < 2; edge++) {
void (*func)(uint8_t *, uint8_t *, ptrdiff_t, int, int, int) = NULL;
switch (dir << 1 | edge) {
case (0 << 1) | 0: func = d.vp8_h_loop_filter8uv; break;
case (1 << 1) | 0: func = d.vp8_v_loop_filter8uv; break;
case (0 << 1) | 1: func = d.vp8_h_loop_filter8uv_inner; break;
case (1 << 1) | 1: func = d.vp8_v_loop_filter8uv_inner; break;
}
if (check_func(func, "vp8_loop_filter8uv%s_%s", edge ? "_inner" : "", dir ? "v" : "h")) {
for (force_hev = -1; force_hev <= 1; force_hev++) {
fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
randomize_buffers(buf0u, 0, 16, force_hev);
randomize_buffers(buf0v, 0, 16, force_hev);
memcpy(buf1u - midoff, buf0u - midoff, 16 * 16);
memcpy(buf1v - midoff, buf0v - midoff, 16 * 16);
call_ref(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
call_new(buf1u, buf1v, 16, flim_E, flim_I, hev_thresh);
if (memcmp(buf0u - midoff, buf1u - midoff, 16 * 16) ||
memcmp(buf0v - midoff, buf1v - midoff, 16 * 16))
fail();
}
fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
randomize_buffers(buf0u, 0, 16, 0);
randomize_buffers(buf0v, 0, 16, 0);
bench_new(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
}
}
}
}
static void check_loopfilter_simple(void)
{
LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
VP8DSPContext d;
int dir;
int flim_E = 20, flim_I = 30, hev_thresh = 0;
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int);
ff_vp8dsp_init(&d);
for (dir = 0; dir < 2; dir++) {
int midoff = dir ? 4 * 16 : 4;
int midoff_aligned = dir ? 4 * 16 : 16;
uint8_t *buf0 = base0 + midoff_aligned;
uint8_t *buf1 = base1 + midoff_aligned;
void (*func)(uint8_t *, ptrdiff_t, int) = dir ? d.vp8_v_loop_filter_simple : d.vp8_h_loop_filter_simple;
if (check_func(func, "vp8_loop_filter_simple_%s", dir ? "v" : "h")) {
fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
randomize_buffers(buf0, 0, 16, -1);
randomize_buffers(buf0, 8, 16, -1);
memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
call_ref(buf0, 16, flim_E);
call_new(buf1, 16, flim_E);
if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
fail();
bench_new(buf0, 16, flim_E);
}
}
}
void checkasm_check_vp8dsp(void)
{
check_idct();
check_idct_dc4();
check_luma_dc_wht();
report("idct");
check_mc();
report("mc");
check_loopfilter_16y();
check_loopfilter_8uv();
check_loopfilter_simple();
report("loopfilter");
}

631
externals/ffmpeg/tests/checkasm/vp9dsp.c vendored Executable file
View File

@@ -0,0 +1,631 @@
/*
* Copyright (c) 2015 Ronald S. Bultje <rsbultje@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <math.h>
#include <string.h>
#include "checkasm.h"
#include "libavcodec/vp9data.h"
#include "libavcodec/vp9.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
#define randomize_buffers() \
do { \
uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
int k; \
for (k = -4; k < SIZEOF_PIXEL * FFMAX(8, size); k += 4) { \
uint32_t r = rnd() & mask; \
AV_WN32A(a + k, r); \
} \
for (k = 0; k < size * SIZEOF_PIXEL; k += 4) { \
uint32_t r = rnd() & mask; \
AV_WN32A(l + k, r); \
} \
} while (0)
static void check_ipred(void)
{
LOCAL_ALIGNED_32(uint8_t, a_buf, [64 * 2]);
uint8_t *a = &a_buf[32 * 2];
LOCAL_ALIGNED_32(uint8_t, l, [32 * 2]);
LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
VP9DSPContext dsp;
int tx, mode, bit_depth;
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top);
static const char *const mode_names[N_INTRA_PRED_MODES] = {
[VERT_PRED] = "vert",
[HOR_PRED] = "hor",
[DC_PRED] = "dc",
[DIAG_DOWN_LEFT_PRED] = "diag_downleft",
[DIAG_DOWN_RIGHT_PRED] = "diag_downright",
[VERT_RIGHT_PRED] = "vert_right",
[HOR_DOWN_PRED] = "hor_down",
[VERT_LEFT_PRED] = "vert_left",
[HOR_UP_PRED] = "hor_up",
[TM_VP8_PRED] = "tm",
[LEFT_DC_PRED] = "dc_left",
[TOP_DC_PRED] = "dc_top",
[DC_128_PRED] = "dc_128",
[DC_127_PRED] = "dc_127",
[DC_129_PRED] = "dc_129",
};
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
ff_vp9dsp_init(&dsp, bit_depth, 0);
for (tx = 0; tx < 4; tx++) {
int size = 4 << tx;
for (mode = 0; mode < N_INTRA_PRED_MODES; mode++) {
if (check_func(dsp.intra_pred[tx][mode], "vp9_%s_%dx%d_%dbpp",
mode_names[mode], size, size, bit_depth)) {
randomize_buffers();
call_ref(dst0, size * SIZEOF_PIXEL, l, a);
call_new(dst1, size * SIZEOF_PIXEL, l, a);
if (memcmp(dst0, dst1, size * size * SIZEOF_PIXEL))
fail();
bench_new(dst1, size * SIZEOF_PIXEL,l, a);
}
}
}
}
report("ipred");
}
#undef randomize_buffers
#define randomize_buffers() \
do { \
uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
for (y = 0; y < sz; y++) { \
for (x = 0; x < sz * SIZEOF_PIXEL; x += 4) { \
uint32_t r = rnd() & mask; \
AV_WN32A(dst + y * sz * SIZEOF_PIXEL + x, r); \
AV_WN32A(src + y * sz * SIZEOF_PIXEL + x, rnd() & mask); \
} \
for (x = 0; x < sz; x++) { \
if (bit_depth == 8) { \
coef[y * sz + x] = src[y * sz + x] - dst[y * sz + x]; \
} else { \
((int32_t *) coef)[y * sz + x] = \
((uint16_t *) src)[y * sz + x] - \
((uint16_t *) dst)[y * sz + x]; \
} \
} \
} \
} while(0)
// wht function copied from libvpx
static void fwht_1d(double *out, const double *in, int sz)
{
double t0 = in[0] + in[1];
double t3 = in[3] - in[2];
double t4 = trunc((t0 - t3) * 0.5);
double t1 = t4 - in[1];
double t2 = t4 - in[2];
out[0] = t0 - t2;
out[1] = t2;
out[2] = t3 + t1;
out[3] = t1;
}
// standard DCT-II
static void fdct_1d(double *out, const double *in, int sz)
{
int k, n;
for (k = 0; k < sz; k++) {
out[k] = 0.0;
for (n = 0; n < sz; n++)
out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (sz * 2.0));
}
out[0] *= M_SQRT1_2;
}
// see "Towards jointly optimal spatial prediction and adaptive transform in
// video/image coding", by J. Han, A. Saxena, and K. Rose
// IEEE Proc. ICASSP, pp. 726-729, Mar. 2010.
static void fadst4_1d(double *out, const double *in, int sz)
{
int k, n;
for (k = 0; k < sz; k++) {
out[k] = 0.0;
for (n = 0; n < sz; n++)
out[k] += in[n] * sin(M_PI * (n + 1) * (2 * k + 1) / (sz * 2.0 + 1.0));
}
}
// see "A Butterfly Structured Design of The Hybrid Transform Coding Scheme",
// by Jingning Han, Yaowu Xu, and Debargha Mukherjee
// http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41418.pdf
static void fadst_1d(double *out, const double *in, int sz)
{
int k, n;
for (k = 0; k < sz; k++) {
out[k] = 0.0;
for (n = 0; n < sz; n++)
out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (sz * 4.0));
}
}
typedef void (*ftx1d_fn)(double *out, const double *in, int sz);
static void ftx_2d(double *out, const double *in, enum TxfmMode tx,
enum TxfmType txtp, int sz)
{
static const double scaling_factors[5][4] = {
{ 4.0, 16.0 * M_SQRT1_2 / 3.0, 16.0 * M_SQRT1_2 / 3.0, 32.0 / 9.0 },
{ 2.0, 2.0, 2.0, 2.0 },
{ 1.0, 1.0, 1.0, 1.0 },
{ 0.25 },
{ 4.0 }
};
static const ftx1d_fn ftx1d_tbl[5][4][2] = {
{
{ fdct_1d, fdct_1d },
{ fadst4_1d, fdct_1d },
{ fdct_1d, fadst4_1d },
{ fadst4_1d, fadst4_1d },
}, {
{ fdct_1d, fdct_1d },
{ fadst_1d, fdct_1d },
{ fdct_1d, fadst_1d },
{ fadst_1d, fadst_1d },
}, {
{ fdct_1d, fdct_1d },
{ fadst_1d, fdct_1d },
{ fdct_1d, fadst_1d },
{ fadst_1d, fadst_1d },
}, {
{ fdct_1d, fdct_1d },
}, {
{ fwht_1d, fwht_1d },
},
};
double temp[1024];
double scaling_factor = scaling_factors[tx][txtp];
int i, j;
// cols
for (i = 0; i < sz; ++i) {
double temp_out[32];
ftx1d_tbl[tx][txtp][0](temp_out, &in[i * sz], sz);
// scale and transpose
for (j = 0; j < sz; ++j)
temp[j * sz + i] = temp_out[j] * scaling_factor;
}
// rows
for (i = 0; i < sz; i++)
ftx1d_tbl[tx][txtp][1](&out[i * sz], &temp[i * sz], sz);
}
static void ftx(int16_t *buf, enum TxfmMode tx,
enum TxfmType txtp, int sz, int bit_depth)
{
double ind[1024], outd[1024];
int n;
emms_c();
for (n = 0; n < sz * sz; n++) {
if (bit_depth == 8)
ind[n] = buf[n];
else
ind[n] = ((int32_t *) buf)[n];
}
ftx_2d(outd, ind, tx, txtp, sz);
for (n = 0; n < sz * sz; n++) {
if (bit_depth == 8)
buf[n] = lrint(outd[n]);
else
((int32_t *) buf)[n] = lrint(outd[n]);
}
}
static int copy_subcoefs(int16_t *out, const int16_t *in, enum TxfmMode tx,
enum TxfmType txtp, int sz, int sub, int bit_depth)
{
// copy the topleft coefficients such that the return value (being the
// coefficient scantable index for the eob token) guarantees that only
// the topleft $sub out of $sz (where $sz >= $sub) coefficients in both
// dimensions are non-zero. This leads to braching to specific optimized
// simd versions (e.g. dc-only) so that we get full asm coverage in this
// test
int n;
const int16_t *scan = ff_vp9_scans[tx][txtp];
int eob;
for (n = 0; n < sz * sz; n++) {
int rc = scan[n], rcx = rc % sz, rcy = rc / sz;
// find eob for this sub-idct
if (rcx >= sub || rcy >= sub)
break;
// copy coef
if (bit_depth == 8) {
out[rc] = in[rc];
} else {
AV_COPY32(&out[rc * 2], &in[rc * 2]);
}
}
eob = n;
for (; n < sz * sz; n++) {
int rc = scan[n];
// zero
if (bit_depth == 8) {
out[rc] = 0;
} else {
AV_ZERO32(&out[rc * 2]);
}
}
return eob;
}
static int iszero(const int16_t *c, int sz)
{
int n;
for (n = 0; n < sz / sizeof(int16_t); n += 2)
if (AV_RN32A(&c[n]))
return 0;
return 1;
}
#define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))
static void check_itxfm(void)
{
LOCAL_ALIGNED_32(uint8_t, src, [32 * 32 * 2]);
LOCAL_ALIGNED_32(uint8_t, dst, [32 * 32 * 2]);
LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
LOCAL_ALIGNED_32(int16_t, coef, [32 * 32 * 2]);
LOCAL_ALIGNED_32(int16_t, subcoef0, [32 * 32 * 2]);
LOCAL_ALIGNED_32(int16_t, subcoef1, [32 * 32 * 2]);
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
VP9DSPContext dsp;
int y, x, tx, txtp, bit_depth, sub;
static const char *const txtp_types[N_TXFM_TYPES] = {
[DCT_DCT] = "dct_dct", [DCT_ADST] = "adst_dct",
[ADST_DCT] = "dct_adst", [ADST_ADST] = "adst_adst"
};
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
ff_vp9dsp_init(&dsp, bit_depth, 0);
for (tx = TX_4X4; tx <= N_TXFM_SIZES /* 4 = lossless */; tx++) {
int sz = 4 << (tx & 3);
int n_txtps = tx < TX_32X32 ? N_TXFM_TYPES : 1;
for (txtp = 0; txtp < n_txtps; txtp++) {
// skip testing sub-IDCTs for WHT or ADST since they don't
// implement it in any of the SIMD functions. If they do,
// consider changing this to ensure we have complete test
// coverage. Test sub=1 for dc-only, then 2, 4, 8, 12, etc,
// since the arm version can distinguish them at that level.
for (sub = (txtp == 0 && tx < 4) ? 1 : sz; sub <= sz;
sub < 4 ? (sub <<= 1) : (sub += 4)) {
if (check_func(dsp.itxfm_add[tx][txtp],
"vp9_inv_%s_%dx%d_sub%d_add_%d",
tx == 4 ? "wht_wht" : txtp_types[txtp],
sz, sz, sub, bit_depth)) {
int eob;
randomize_buffers();
ftx(coef, tx, txtp, sz, bit_depth);
if (sub < sz) {
eob = copy_subcoefs(subcoef0, coef, tx, txtp,
sz, sub, bit_depth);
} else {
eob = sz * sz;
memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
}
memcpy(dst0, dst, sz * sz * SIZEOF_PIXEL);
memcpy(dst1, dst, sz * sz * SIZEOF_PIXEL);
memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
call_ref(dst0, sz * SIZEOF_PIXEL, subcoef0, eob);
call_new(dst1, sz * SIZEOF_PIXEL, subcoef1, eob);
if (memcmp(dst0, dst1, sz * sz * SIZEOF_PIXEL) ||
!iszero(subcoef0, sz * sz * SIZEOF_COEF) ||
!iszero(subcoef1, sz * sz * SIZEOF_COEF))
fail();
bench_new(dst, sz * SIZEOF_PIXEL, coef, eob);
}
}
}
}
}
report("itxfm");
}
#undef randomize_buffers
#define setpx(a,b,c) \
do { \
if (SIZEOF_PIXEL == 1) { \
buf0[(a) + (b) * jstride] = av_clip_uint8(c); \
} else { \
((uint16_t *)buf0)[(a) + (b) * jstride] = av_clip_uintp2(c, bit_depth); \
} \
} while (0)
// c can be an assignment and must not be put under ()
#define setdx(a,b,c,d) setpx(a,b,c-(d)+(rnd()%((d)*2+1)))
#define setsx(a,b,c,d) setdx(a,b,c,(d) << (bit_depth - 8))
static void randomize_loopfilter_buffers(int bidx, int lineoff, int str,
int bit_depth, int dir, const int *E,
const int *F, const int *H, const int *I,
uint8_t *buf0, uint8_t *buf1)
{
uint32_t mask = (1 << bit_depth) - 1;
int off = dir ? lineoff : lineoff * 16;
int istride = dir ? 1 : 16;
int jstride = dir ? str : 1;
int i, j;
for (i = 0; i < 2; i++) /* flat16 */ {
int idx = off + i * istride, p0, q0;
setpx(idx, 0, q0 = rnd() & mask);
setsx(idx, -1, p0 = q0, E[bidx] >> 2);
for (j = 1; j < 8; j++) {
setsx(idx, -1 - j, p0, F[bidx]);
setsx(idx, j, q0, F[bidx]);
}
}
for (i = 2; i < 4; i++) /* flat8 */ {
int idx = off + i * istride, p0, q0;
setpx(idx, 0, q0 = rnd() & mask);
setsx(idx, -1, p0 = q0, E[bidx] >> 2);
for (j = 1; j < 4; j++) {
setsx(idx, -1 - j, p0, F[bidx]);
setsx(idx, j, q0, F[bidx]);
}
for (j = 4; j < 8; j++) {
setpx(idx, -1 - j, rnd() & mask);
setpx(idx, j, rnd() & mask);
}
}
for (i = 4; i < 6; i++) /* regular */ {
int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
setpx(idx, 0, q0 = rnd() & mask);
setsx(idx, 1, q1 = q0, I[bidx]);
setsx(idx, 2, q2 = q1, I[bidx]);
setsx(idx, 3, q2, I[bidx]);
setsx(idx, -1, p0 = q0, E[bidx] >> 2);
setsx(idx, -2, p1 = p0, I[bidx]);
setsx(idx, -3, p2 = p1, I[bidx]);
setsx(idx, -4, p2, I[bidx]);
for (j = 4; j < 8; j++) {
setpx(idx, -1 - j, rnd() & mask);
setpx(idx, j, rnd() & mask);
}
}
for (i = 6; i < 8; i++) /* off */ {
int idx = off + i * istride;
for (j = 0; j < 8; j++) {
setpx(idx, -1 - j, rnd() & mask);
setpx(idx, j, rnd() & mask);
}
}
}
#define randomize_buffers(bidx, lineoff, str) \
randomize_loopfilter_buffers(bidx, lineoff, str, bit_depth, dir, \
E, F, H, I, buf0, buf1)
static void check_loopfilter(void)
{
LOCAL_ALIGNED_32(uint8_t, base0, [32 + 16 * 16 * 2]);
LOCAL_ALIGNED_32(uint8_t, base1, [32 + 16 * 16 * 2]);
VP9DSPContext dsp;
int dir, wd, wd2, bit_depth;
static const char *const dir_name[2] = { "h", "v" };
static const int E[2] = { 20, 28 }, I[2] = { 10, 16 };
static const int H[2] = { 7, 11 }, F[2] = { 1, 1 };
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
ff_vp9dsp_init(&dsp, bit_depth, 0);
for (dir = 0; dir < 2; dir++) {
int midoff = (dir ? 8 * 8 : 8) * SIZEOF_PIXEL;
int midoff_aligned = (dir ? 8 * 8 : 16) * SIZEOF_PIXEL;
uint8_t *buf0 = base0 + midoff_aligned;
uint8_t *buf1 = base1 + midoff_aligned;
for (wd = 0; wd < 3; wd++) {
// 4/8/16wd_8px
if (check_func(dsp.loop_filter_8[wd][dir],
"vp9_loop_filter_%s_%d_8_%dbpp",
dir_name[dir], 4 << wd, bit_depth)) {
randomize_buffers(0, 0, 8);
memcpy(buf1 - midoff, buf0 - midoff,
16 * 8 * SIZEOF_PIXEL);
call_ref(buf0, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
call_new(buf1, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 8 * SIZEOF_PIXEL))
fail();
bench_new(buf1, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
}
}
midoff = (dir ? 16 * 8 : 8) * SIZEOF_PIXEL;
midoff_aligned = (dir ? 16 * 8 : 16) * SIZEOF_PIXEL;
buf0 = base0 + midoff_aligned;
buf1 = base1 + midoff_aligned;
// 16wd_16px loopfilter
if (check_func(dsp.loop_filter_16[dir],
"vp9_loop_filter_%s_16_16_%dbpp",
dir_name[dir], bit_depth)) {
randomize_buffers(0, 0, 16);
randomize_buffers(0, 8, 16);
memcpy(buf1 - midoff, buf0 - midoff, 16 * 16 * SIZEOF_PIXEL);
call_ref(buf0, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
call_new(buf1, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16 * SIZEOF_PIXEL))
fail();
bench_new(buf1, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
}
for (wd = 0; wd < 2; wd++) {
for (wd2 = 0; wd2 < 2; wd2++) {
// mix2 loopfilter
if (check_func(dsp.loop_filter_mix2[wd][wd2][dir],
"vp9_loop_filter_mix2_%s_%d%d_16_%dbpp",
dir_name[dir], 4 << wd, 4 << wd2, bit_depth)) {
randomize_buffers(0, 0, 16);
randomize_buffers(1, 8, 16);
memcpy(buf1 - midoff, buf0 - midoff, 16 * 16 * SIZEOF_PIXEL);
#define M(a) (((a)[1] << 8) | (a)[0])
call_ref(buf0, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
call_new(buf1, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16 * SIZEOF_PIXEL))
fail();
bench_new(buf1, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
#undef M
}
}
}
}
}
report("loopfilter");
}
#undef setsx
#undef setpx
#undef setdx
#undef randomize_buffers
#define DST_BUF_SIZE (size * size * SIZEOF_PIXEL)
#define SRC_BUF_STRIDE 72
#define SRC_BUF_SIZE ((size + 7) * SRC_BUF_STRIDE * SIZEOF_PIXEL)
#define src (buf + 3 * SIZEOF_PIXEL * (SRC_BUF_STRIDE + 1))
#define randomize_buffers() \
do { \
uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
int k; \
for (k = 0; k < SRC_BUF_SIZE; k += 4) { \
uint32_t r = rnd() & mask; \
AV_WN32A(buf + k, r); \
} \
if (op == 1) { \
for (k = 0; k < DST_BUF_SIZE; k += 4) { \
uint32_t r = rnd() & mask; \
AV_WN32A(dst0 + k, r); \
AV_WN32A(dst1 + k, r); \
} \
} \
} while (0)
static void check_mc(void)
{
LOCAL_ALIGNED_32(uint8_t, buf, [72 * 72 * 2]);
LOCAL_ALIGNED_32(uint8_t, dst0, [64 * 64 * 2]);
LOCAL_ALIGNED_32(uint8_t, dst1, [64 * 64 * 2]);
VP9DSPContext dsp;
int op, hsize, bit_depth, filter, dx, dy;
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
int h, int mx, int my);
static const char *const filter_names[4] = {
"8tap_smooth", "8tap_regular", "8tap_sharp", "bilin"
};
static const char *const subpel_names[2][2] = { { "", "h" }, { "v", "hv" } };
static const char *const op_names[2] = { "put", "avg" };
char str[256];
for (op = 0; op < 2; op++) {
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
ff_vp9dsp_init(&dsp, bit_depth, 0);
for (hsize = 0; hsize < 5; hsize++) {
int size = 64 >> hsize;
for (filter = 0; filter < 4; filter++) {
for (dx = 0; dx < 2; dx++) {
for (dy = 0; dy < 2; dy++) {
if (dx || dy) {
snprintf(str, sizeof(str),
"%s_%s_%d%s", op_names[op],
filter_names[filter], size,
subpel_names[dy][dx]);
} else {
snprintf(str, sizeof(str),
"%s%d", op_names[op], size);
}
if (check_func(dsp.mc[hsize][filter][op][dx][dy],
"vp9_%s_%dbpp", str, bit_depth)) {
int mx = dx ? 1 + (rnd() % 14) : 0;
int my = dy ? 1 + (rnd() % 14) : 0;
randomize_buffers();
call_ref(dst0, size * SIZEOF_PIXEL,
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
call_new(dst1, size * SIZEOF_PIXEL,
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
if (memcmp(dst0, dst1, DST_BUF_SIZE))
fail();
// simd implementations for each filter of subpel
// functions are identical
if (filter >= 1 && filter <= 2) continue;
// 10/12 bpp for bilin are identical
if (bit_depth == 12 && filter == 3) continue;
bench_new(dst1, size * SIZEOF_PIXEL,
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
}
}
}
}
}
}
}
report("mc");
}
void checkasm_check_vp9dsp(void)
{
check_ipred();
check_itxfm();
check_loopfilter();
check_mc();
}

View File

@@ -0,0 +1,244 @@
;*****************************************************************************
;* Assembly testing and benchmarking tool
;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2012 Henrik Gramner
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
;* the Free Software Foundation; either version 2 of the License, or
;* (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;* GNU General Public License for more details.
;*
;* You should have received a copy of the GNU General Public License
;* along with this program; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
;*****************************************************************************
%define private_prefix checkasm
%include "libavutil/x86/x86inc.asm"
SECTION_RODATA
error_message: db "failed to preserve register", 0
error_message_emms: db "failed to issue emms", 0
%if ARCH_X86_64
; just random numbers to reduce the chance of incidental match
ALIGN 16
x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064
x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
n7: dq 0x21f86d66c8ca00ce
n8: dq 0x75b6ba21077c48ad
n9: dq 0xed56bb2dcb3c7736
n10: dq 0x8bda43d3fd1a7e06
n11: dq 0xb64a9c9e5d318408
n12: dq 0xdf9a54b303f1d3a3
n13: dq 0x4a75479abd64e097
n14: dq 0x249214109d5d1c88
%endif
SECTION .text
cextern fail_func
; max number of args used by any asm function.
; (max_args % 4) must equal 3 for stack alignment
%define max_args 15
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; int checkasm_stack_clobber(uint64_t clobber, ...)
;-----------------------------------------------------------------------------
cglobal stack_clobber, 1,2
; Clobber the stack with junk below the stack pointer
%define argsize (max_args+6)*8
SUB rsp, argsize
mov r1, argsize-8
.loop:
mov [rsp+r1], r0
sub r1, 8
jge .loop
ADD rsp, argsize
RET
%if WIN64
%assign free_regs 7
DECLARE_REG_TMP 4
%else
%assign free_regs 9
DECLARE_REG_TMP 7
%endif
%macro report_fail 1
mov r9, rax
mov r10, rdx
lea r0, [%1]
xor eax, eax
call fail_func
mov rdx, r10
mov rax, r9
%endmacro
;-----------------------------------------------------------------------------
; void checkasm_checked_call(void *func, ...)
;-----------------------------------------------------------------------------
INIT_XMM
%macro CHECKED_CALL 0-1
cglobal checked_call%1, 2,15,16,max_args*8+8
mov t0, r0
; All arguments have been pushed on the stack instead of registers in order to
; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
mov r0, r6mp
mov r1, r7mp
mov r2, r8mp
mov r3, r9mp
%if UNIX64
mov r4, r10mp
mov r5, r11mp
%assign i 6
%rep max_args-6
mov r9, [rsp+stack_offset+(i+1)*8]
mov [rsp+(i-6)*8], r9
%assign i i+1
%endrep
%else ; WIN64
%assign i 4
%rep max_args-4
mov r9, [rsp+stack_offset+(i+7)*8]
mov [rsp+i*8], r9
%assign i i+1
%endrep
; Move possible floating-point arguments to the correct registers
movq m0, r0
movq m1, r1
movq m2, r2
movq m3, r3
%assign i 6
%rep 16-6
mova m %+ i, [x %+ i]
%assign i i+1
%endrep
%endif
%assign i 14
%rep 15-free_regs
mov r %+ i, [n %+ i]
%assign i i-1
%endrep
call t0
%assign i 14
%rep 15-free_regs
xor r %+ i, [n %+ i]
or r14, r %+ i
%assign i i-1
%endrep
%if WIN64
%assign i 6
%rep 16-6
pxor m %+ i, [x %+ i]
por m6, m %+ i
%assign i i+1
%endrep
packsswb m6, m6
movq r5, m6
or r14, r5
%endif
; Call fail_func() with a descriptive message to mark it as a failure
; if the called function didn't preserve all callee-saved registers.
; Save the return value located in rdx:rax first to prevent clobbering.
jz .clobber_ok
report_fail error_message
.clobber_ok:
%ifidn %1, _emms
emms
%elifnidn %1, _float
fstenv [rsp]
cmp word [rsp + 8], 0xffff
je .emms_ok
report_fail error_message_emms
emms
.emms_ok:
%endif
RET
%endmacro
%else
; just random numbers to reduce the chance of incidental match
%define n3 dword 0x6549315c
%define n4 dword 0xe02f3e23
%define n5 dword 0xb78d0d1d
%define n6 dword 0x33627ba7
%macro report_fail 1
mov r3, eax
mov r4, edx
lea r0, [%1]
mov [esp], r0
call fail_func
mov edx, r4
mov eax, r3
%endmacro
%macro CHECKED_CALL 0-1
;-----------------------------------------------------------------------------
; void checkasm_checked_call(void *func, ...)
;-----------------------------------------------------------------------------
cglobal checked_call%1, 1,7
mov r3, n3
mov r4, n4
mov r5, n5
mov r6, n6
%rep max_args
PUSH dword [esp+20+max_args*4]
%endrep
call r0
xor r3, n3
xor r4, n4
xor r5, n5
xor r6, n6
or r3, r4
or r5, r6
or r3, r5
jz .clobber_ok
report_fail error_message
.clobber_ok:
%ifidn %1, _emms
emms
%elifnidn %1, _float
fstenv [esp]
cmp word [esp + 8], 0xffff
je .emms_ok
report_fail error_message_emms
emms
.emms_ok:
%endif
add esp, max_args*4
REP_RET
%endmacro
%endif ; ARCH_X86_64
CHECKED_CALL
CHECKED_CALL _emms
CHECKED_CALL _float