early-access version 1432
This commit is contained in:
1
externals/ffmpeg/tests/checkasm/.gitignore
vendored
Executable file
1
externals/ffmpeg/tests/checkasm/.gitignore
vendored
Executable file
@@ -0,0 +1 @@
|
||||
/checkasm
|
85
externals/ffmpeg/tests/checkasm/Makefile
vendored
Executable file
85
externals/ffmpeg/tests/checkasm/Makefile
vendored
Executable file
@@ -0,0 +1,85 @@
|
||||
# libavcodec tests
|
||||
# subsystems
|
||||
AVCODECOBJS-$(CONFIG_AUDIODSP) += audiodsp.o
|
||||
AVCODECOBJS-$(CONFIG_BLOCKDSP) += blockdsp.o
|
||||
AVCODECOBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o
|
||||
AVCODECOBJS-$(CONFIG_FLACDSP) += flacdsp.o
|
||||
AVCODECOBJS-$(CONFIG_FMTCONVERT) += fmtconvert.o
|
||||
AVCODECOBJS-$(CONFIG_G722DSP) += g722dsp.o
|
||||
AVCODECOBJS-$(CONFIG_H264DSP) += h264dsp.o
|
||||
AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o
|
||||
AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o
|
||||
AVCODECOBJS-$(CONFIG_LLVIDDSP) += llviddsp.o
|
||||
AVCODECOBJS-$(CONFIG_LLVIDENCDSP) += llviddspenc.o
|
||||
AVCODECOBJS-$(CONFIG_VP8DSP) += vp8dsp.o
|
||||
AVCODECOBJS-$(CONFIG_VIDEODSP) += videodsp.o
|
||||
|
||||
# decoders/encoders
|
||||
AVCODECOBJS-$(CONFIG_AAC_DECODER) += aacpsdsp.o \
|
||||
sbrdsp.o
|
||||
AVCODECOBJS-$(CONFIG_ALAC_DECODER) += alacdsp.o
|
||||
AVCODECOBJS-$(CONFIG_DCA_DECODER) += synth_filter.o
|
||||
AVCODECOBJS-$(CONFIG_EXR_DECODER) += exrdsp.o
|
||||
AVCODECOBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuvdsp.o
|
||||
AVCODECOBJS-$(CONFIG_JPEG2000_DECODER) += jpeg2000dsp.o
|
||||
AVCODECOBJS-$(CONFIG_OPUS_DECODER) += opusdsp.o
|
||||
AVCODECOBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o
|
||||
AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_idct.o hevc_sao.o
|
||||
AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodsp.o
|
||||
AVCODECOBJS-$(CONFIG_V210_DECODER) += v210dec.o
|
||||
AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o
|
||||
AVCODECOBJS-$(CONFIG_VP9_DECODER) += vp9dsp.o
|
||||
|
||||
CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes)
|
||||
|
||||
# libavfilter tests
|
||||
AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
|
||||
AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
|
||||
AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
|
||||
AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o
|
||||
AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o
|
||||
AVFILTEROBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o
|
||||
AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o
|
||||
AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER) += vf_nlmeans.o
|
||||
|
||||
CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
|
||||
|
||||
# swscale tests
|
||||
SWSCALEOBJS += sw_rgb.o sw_scale.o
|
||||
|
||||
CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWSCALEOBJS)
|
||||
|
||||
# libavutil tests
|
||||
AVUTILOBJS += fixed_dsp.o
|
||||
AVUTILOBJS += float_dsp.o
|
||||
|
||||
CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS)
|
||||
|
||||
CHECKASMOBJS-$(ARCH_AARCH64) += aarch64/checkasm.o
|
||||
CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o
|
||||
CHECKASMOBJS-$(HAVE_X86ASM) += x86/checkasm.o
|
||||
|
||||
CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o
|
||||
CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%))
|
||||
|
||||
-include $(CHECKASMOBJS:.o=.d)
|
||||
|
||||
CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS)))
|
||||
$(CHECKASMOBJS): | $(CHECKASMDIRS)
|
||||
OUTDIRS += $(CHECKASMDIRS)
|
||||
|
||||
tests/checkasm/checkasm.o: CFLAGS += -Umain
|
||||
|
||||
CHECKASM := tests/checkasm/checkasm$(EXESUF)
|
||||
|
||||
$(CHECKASM): $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS)
|
||||
$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS) $(EXTRALIBS-avcodec) $(EXTRALIBS-avfilter) $(EXTRALIBS-avformat) $(EXTRALIBS-avutil) $(EXTRALIBS-swresample) $(EXTRALIBS)
|
||||
|
||||
checkasm: $(CHECKASM)
|
||||
|
||||
testclean:: checkasmclean
|
||||
|
||||
checkasmclean:
|
||||
$(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%)
|
||||
|
||||
.PHONY: checkasm
|
261
externals/ffmpeg/tests/checkasm/aacpsdsp.c
vendored
Executable file
261
externals/ffmpeg/tests/checkasm/aacpsdsp.c
vendored
Executable file
@@ -0,0 +1,261 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "libavcodec/aacpsdsp.h"
|
||||
#include "libavutil/intfloat.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define N 32
|
||||
#define STRIDE 128
|
||||
#define BUF_SIZE (N * STRIDE)
|
||||
|
||||
#define randomize(buf, len) do { \
|
||||
int i; \
|
||||
for (i = 0; i < len; i++) { \
|
||||
const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
|
||||
(buf)[i] = f; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define EPS 0.005
|
||||
|
||||
static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
union av_intfloat32 u = { .f = buf[i] };
|
||||
u.i &= (0xffffffff << bits);
|
||||
buf[i] = u.f;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_add_squares(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
|
||||
|
||||
declare_func(void, INTFLOAT *dst,
|
||||
const INTFLOAT (*src)[2], int n);
|
||||
|
||||
randomize((INTFLOAT *)src, BUF_SIZE * 2);
|
||||
randomize(dst0, BUF_SIZE);
|
||||
memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
|
||||
call_ref(dst0, src, BUF_SIZE);
|
||||
call_new(dst1, src, BUF_SIZE);
|
||||
if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(dst1, src, BUF_SIZE);
|
||||
}
|
||||
|
||||
static void test_mul_pair_single(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
|
||||
|
||||
declare_func(void, INTFLOAT (*dst)[2],
|
||||
INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
|
||||
|
||||
randomize((INTFLOAT *)src0, BUF_SIZE * 2);
|
||||
randomize(src1, BUF_SIZE);
|
||||
call_ref(dst0, src0, src1, BUF_SIZE);
|
||||
call_new(dst1, src0, src1, BUF_SIZE);
|
||||
if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
|
||||
fail();
|
||||
bench_new(dst1, src0, src1, BUF_SIZE);
|
||||
}
|
||||
|
||||
static void test_hybrid_analysis(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
|
||||
|
||||
declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
|
||||
const INTFLOAT (*filter)[8][2],
|
||||
ptrdiff_t stride, int n);
|
||||
|
||||
randomize((INTFLOAT *)in, 13 * 2);
|
||||
randomize((INTFLOAT *)filter, N * 8 * 2);
|
||||
|
||||
randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
|
||||
memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
|
||||
|
||||
call_ref(dst0, in, filter, STRIDE, N);
|
||||
call_new(dst1, in, filter, STRIDE, N);
|
||||
|
||||
if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
|
||||
fail();
|
||||
bench_new(dst1, in, filter, STRIDE, N);
|
||||
}
|
||||
|
||||
static void test_hybrid_analysis_ileave(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
|
||||
|
||||
declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
|
||||
int i, int len);
|
||||
|
||||
randomize((INTFLOAT *)out0, 91 * 32 * 2);
|
||||
randomize((INTFLOAT *)in, 2 * 38 * 64);
|
||||
memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
|
||||
|
||||
/* len is hardcoded to 32 as that's the only value used in
|
||||
libavcodec. asm functions are likely to be optimized
|
||||
hardcoding this value in their loops and could fail with
|
||||
anything else.
|
||||
i is hardcoded to the two values currently used by the
|
||||
aac decoder because the arm neon implementation is
|
||||
micro-optimized for them and will fail for almost every
|
||||
other value. */
|
||||
call_ref(out0, in, 3, 32);
|
||||
call_new(out1, in, 3, 32);
|
||||
|
||||
/* the function just moves data around, so memcmp is enough */
|
||||
if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
|
||||
fail();
|
||||
|
||||
call_ref(out0, in, 5, 32);
|
||||
call_new(out1, in, 5, 32);
|
||||
|
||||
if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
|
||||
fail();
|
||||
|
||||
bench_new(out1, in, 3, 32);
|
||||
}
|
||||
|
||||
static void test_hybrid_synthesis_deint(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]);
|
||||
|
||||
declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
|
||||
int i, int len);
|
||||
|
||||
randomize((INTFLOAT *)in, 91 * 32 * 2);
|
||||
randomize((INTFLOAT *)out0, 2 * 38 * 64);
|
||||
memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
|
||||
|
||||
/* len is hardcoded to 32 as that's the only value used in
|
||||
libavcodec. asm functions are likely to be optimized
|
||||
hardcoding this value in their loops and could fail with
|
||||
anything else.
|
||||
i is hardcoded to the two values currently used by the
|
||||
aac decoder because the arm neon implementation is
|
||||
micro-optimized for them and will fail for almost every
|
||||
other value. */
|
||||
call_ref(out0, in, 3, 32);
|
||||
call_new(out1, in, 3, 32);
|
||||
|
||||
/* the function just moves data around, so memcmp is enough */
|
||||
if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
|
||||
fail();
|
||||
|
||||
call_ref(out0, in, 5, 32);
|
||||
call_new(out1, in, 5, 32);
|
||||
|
||||
if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
|
||||
fail();
|
||||
|
||||
bench_new(out1, in, 3, 32);
|
||||
}
|
||||
|
||||
static void test_stereo_interpolate(PSDSPContext *psdsp)
|
||||
{
|
||||
int i;
|
||||
LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, r, [BUF_SIZE], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
|
||||
|
||||
declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
|
||||
INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
|
||||
|
||||
randomize((INTFLOAT *)l, BUF_SIZE * 2);
|
||||
randomize((INTFLOAT *)r, BUF_SIZE * 2);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
|
||||
memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
|
||||
memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
|
||||
memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
|
||||
memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
|
||||
|
||||
randomize((INTFLOAT *)h, 2 * 4);
|
||||
randomize((INTFLOAT *)h_step, 2 * 4);
|
||||
// Clear the least significant 14 bits of h_step, to avoid
|
||||
// divergence when accumulating h_step BUF_SIZE times into
|
||||
// a float variable which may or may not have extra intermediate
|
||||
// precision. Therefore clear roughly log2(BUF_SIZE) less
|
||||
// significant bits, to get the same result regardless of any
|
||||
// extra precision in the accumulator.
|
||||
clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
|
||||
|
||||
call_ref(l0, r0, h, h_step, BUF_SIZE);
|
||||
call_new(l1, r1, h, h_step, BUF_SIZE);
|
||||
if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
|
||||
!float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
|
||||
fail();
|
||||
|
||||
memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
|
||||
memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
|
||||
bench_new(l1, r1, h, h_step, BUF_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_aacpsdsp(void)
|
||||
{
|
||||
PSDSPContext psdsp;
|
||||
|
||||
ff_psdsp_init(&psdsp);
|
||||
|
||||
if (check_func(psdsp.add_squares, "ps_add_squares"))
|
||||
test_add_squares();
|
||||
report("add_squares");
|
||||
|
||||
if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
|
||||
test_mul_pair_single();
|
||||
report("mul_pair_single");
|
||||
|
||||
if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
|
||||
test_hybrid_analysis();
|
||||
report("hybrid_analysis");
|
||||
|
||||
if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
|
||||
test_hybrid_analysis_ileave();
|
||||
report("hybrid_analysis_ileave");
|
||||
|
||||
if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
|
||||
test_hybrid_synthesis_deint();
|
||||
report("hybrid_synthesis_deint");
|
||||
|
||||
test_stereo_interpolate(&psdsp);
|
||||
report("stereo_interpolate");
|
||||
}
|
198
externals/ffmpeg/tests/checkasm/aarch64/checkasm.S
vendored
Executable file
198
externals/ffmpeg/tests/checkasm/aarch64/checkasm.S
vendored
Executable file
@@ -0,0 +1,198 @@
|
||||
/****************************************************************************
|
||||
* Assembly testing and benchmarking tool
|
||||
* Copyright (c) 2015 Martin Storsjo
|
||||
* Copyright (c) 2015 Janne Grunau
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "libavutil/aarch64/asm.S"
|
||||
|
||||
const register_init, align=4
|
||||
.quad 0x21f86d66c8ca00ce
|
||||
.quad 0x75b6ba21077c48ad
|
||||
.quad 0xed56bb2dcb3c7736
|
||||
.quad 0x8bda43d3fd1a7e06
|
||||
.quad 0xb64a9c9e5d318408
|
||||
.quad 0xdf9a54b303f1d3a3
|
||||
.quad 0x4a75479abd64e097
|
||||
.quad 0x249214109d5d1c88
|
||||
.quad 0x1a1b2550a612b48c
|
||||
.quad 0x79445c159ce79064
|
||||
.quad 0x2eed899d5a28ddcd
|
||||
.quad 0x86b2536fcd8cf636
|
||||
.quad 0xb0856806085e7943
|
||||
.quad 0x3f2bf84fc0fcca4e
|
||||
.quad 0xacbd382dcf5b8de2
|
||||
.quad 0xd229e1f5b281303f
|
||||
.quad 0x71aeaff20b095fd9
|
||||
.quad 0xab63e2e11fa38ed9
|
||||
endconst
|
||||
|
||||
|
||||
const error_message_register
|
||||
.asciz "failed to preserve register"
|
||||
error_message_stack:
|
||||
.asciz "stack clobbered"
|
||||
endconst
|
||||
|
||||
|
||||
// max number of args used by any asm function.
|
||||
#define MAX_ARGS 15
|
||||
|
||||
#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15)
|
||||
|
||||
function checkasm_stack_clobber, export=1
|
||||
mov x3, sp
|
||||
mov x2, #CLOBBER_STACK
|
||||
1:
|
||||
stp x0, x1, [sp, #-16]!
|
||||
subs x2, x2, #16
|
||||
b.gt 1b
|
||||
mov sp, x3
|
||||
ret
|
||||
endfunc
|
||||
|
||||
// + 16 for stack canary reference
|
||||
#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15 + 16)
|
||||
|
||||
function checkasm_checked_call, export=1
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
stp x19, x20, [sp, #-16]!
|
||||
stp x21, x22, [sp, #-16]!
|
||||
stp x23, x24, [sp, #-16]!
|
||||
stp x25, x26, [sp, #-16]!
|
||||
stp x27, x28, [sp, #-16]!
|
||||
stp d8, d9, [sp, #-16]!
|
||||
stp d10, d11, [sp, #-16]!
|
||||
stp d12, d13, [sp, #-16]!
|
||||
stp d14, d15, [sp, #-16]!
|
||||
|
||||
movrel x9, register_init
|
||||
ldp d8, d9, [x9], #16
|
||||
ldp d10, d11, [x9], #16
|
||||
ldp d12, d13, [x9], #16
|
||||
ldp d14, d15, [x9], #16
|
||||
ldp x19, x20, [x9], #16
|
||||
ldp x21, x22, [x9], #16
|
||||
ldp x23, x24, [x9], #16
|
||||
ldp x25, x26, [x9], #16
|
||||
ldp x27, x28, [x9], #16
|
||||
|
||||
sub sp, sp, #ARG_STACK
|
||||
.equ pos, 0
|
||||
.rept MAX_ARGS-8
|
||||
// Skip the first 8 args, that are loaded into registers
|
||||
ldr x9, [x29, #16 + 8*8 + pos]
|
||||
str x9, [sp, #pos]
|
||||
.equ pos, pos + 8
|
||||
.endr
|
||||
|
||||
// Fill x8-x17 with garbage. This doesn't have to be preserved,
|
||||
// but avoids relying on them having any particular value.
|
||||
movrel x9, register_init
|
||||
ldp x10, x11, [x9], #32
|
||||
ldp x12, x13, [x9], #32
|
||||
ldp x14, x15, [x9], #32
|
||||
ldp x16, x17, [x9], #32
|
||||
ldp x8, x9, [x9]
|
||||
|
||||
// For stack overflows, the callee is free to overwrite the parameters
|
||||
// that were passed on the stack (if any), so we can only check after
|
||||
// that point. First figure out how many parameters the function
|
||||
// really took on the stack:
|
||||
ldr w2, [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
|
||||
// Load the first non-parameter value from the stack, that should be
|
||||
// left untouched by the function. Store a copy of it inverted, so that
|
||||
// e.g. overwriting everything with zero would be noticed.
|
||||
ldr x2, [sp, x2, lsl #3]
|
||||
mvn x2, x2
|
||||
str x2, [sp, #ARG_STACK-8]
|
||||
|
||||
// Load the in-register arguments
|
||||
mov x12, x0
|
||||
ldp x0, x1, [x29, #16]
|
||||
ldp x2, x3, [x29, #32]
|
||||
ldp x4, x5, [x29, #48]
|
||||
ldp x6, x7, [x29, #64]
|
||||
// Call the target function
|
||||
blr x12
|
||||
|
||||
// Load the number of stack parameters, stack canary and its reference
|
||||
ldr w2, [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
|
||||
ldr x2, [sp, x2, lsl #3]
|
||||
ldr x3, [sp, #ARG_STACK-8]
|
||||
|
||||
add sp, sp, #ARG_STACK
|
||||
stp x0, x1, [sp, #-16]!
|
||||
|
||||
mvn x3, x3
|
||||
cmp x2, x3
|
||||
b.ne 2f
|
||||
|
||||
movrel x9, register_init
|
||||
movi v3.8h, #0
|
||||
|
||||
.macro check_reg_neon reg1, reg2
|
||||
ldr q1, [x9], #16
|
||||
uzp1 v2.2d, v\reg1\().2d, v\reg2\().2d
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
orr v3.16b, v3.16b, v1.16b
|
||||
.endm
|
||||
check_reg_neon 8, 9
|
||||
check_reg_neon 10, 11
|
||||
check_reg_neon 12, 13
|
||||
check_reg_neon 14, 15
|
||||
uqxtn v3.8b, v3.8h
|
||||
umov x3, v3.d[0]
|
||||
|
||||
.macro check_reg reg1, reg2
|
||||
ldp x0, x1, [x9], #16
|
||||
eor x0, x0, \reg1
|
||||
eor x1, x1, \reg2
|
||||
orr x3, x3, x0
|
||||
orr x3, x3, x1
|
||||
.endm
|
||||
check_reg x19, x20
|
||||
check_reg x21, x22
|
||||
check_reg x23, x24
|
||||
check_reg x25, x26
|
||||
check_reg x27, x28
|
||||
|
||||
cbz x3, 0f
|
||||
|
||||
movrel x0, error_message_register
|
||||
b 1f
|
||||
2:
|
||||
movrel x0, error_message_stack
|
||||
1:
|
||||
bl X(checkasm_fail_func)
|
||||
0:
|
||||
ldp x0, x1, [sp], #16
|
||||
ldp d14, d15, [sp], #16
|
||||
ldp d12, d13, [sp], #16
|
||||
ldp d10, d11, [sp], #16
|
||||
ldp d8, d9, [sp], #16
|
||||
ldp x27, x28, [sp], #16
|
||||
ldp x25, x26, [sp], #16
|
||||
ldp x23, x24, [sp], #16
|
||||
ldp x21, x22, [sp], #16
|
||||
ldp x19, x20, [sp], #16
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
endfunc
|
95
externals/ffmpeg/tests/checkasm/af_afir.c
vendored
Executable file
95
externals/ffmpeg/tests/checkasm/af_afir.c
vendored
Executable file
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <float.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavfilter/af_afir.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "checkasm.h"
|
||||
|
||||
#define LEN 256
|
||||
|
||||
#define randomize_buffer(buf) \
|
||||
do { \
|
||||
int i; \
|
||||
double bmg[2], stddev = 10.0, mean = 0.0; \
|
||||
\
|
||||
for (i = 0; i < LEN*2+8; i += 2) { \
|
||||
av_bmg_get(&checkasm_lfg, bmg); \
|
||||
buf[i] = bmg[0] * stddev + mean; \
|
||||
buf[i + 1] = bmg[1] * stddev + mean; \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
static void test_fcmul_add(const float *src0, const float *src1, const float *src2)
|
||||
{
|
||||
LOCAL_ALIGNED_32(float, cdst, [LEN*2+8]);
|
||||
LOCAL_ALIGNED_32(float, odst, [LEN*2+8]);
|
||||
int i;
|
||||
|
||||
declare_func(void, float *sum, const float *t, const float *c,
|
||||
ptrdiff_t len);
|
||||
|
||||
memcpy(cdst, src0, (LEN*2+8) * sizeof(float));
|
||||
memcpy(odst, src0, (LEN*2+8) * sizeof(float));
|
||||
call_ref(cdst, src1, src2, LEN);
|
||||
call_new(odst, src1, src2, LEN);
|
||||
for (i = 0; i <= LEN*2; i++) {
|
||||
int idx = i & ~1;
|
||||
float cre = src2[idx];
|
||||
float cim = src2[idx + 1];
|
||||
float tre = src1[idx];
|
||||
float tim = src1[idx + 1];
|
||||
double t = fabs(src0[i]) +
|
||||
fabs(tre) + fabs(tim) + fabs(cre) + fabs(cim) +
|
||||
fabs(tre * cre) + fabs(tim * cim) +
|
||||
fabs(tre * cim) + fabs(tim * cre) +
|
||||
fabs(tre * cre - tim * cim) +
|
||||
fabs(tre * cim + tim * cre) +
|
||||
fabs(cdst[i]) + 1.0;
|
||||
if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
|
||||
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
|
||||
i, cdst[i], odst[i], cdst[i] - odst[i]);
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
memcpy(odst, src0, (LEN*2+8) * sizeof(float));
|
||||
bench_new(odst, src1, src2, LEN);
|
||||
}
|
||||
|
||||
void checkasm_check_afir(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(float, src0, [LEN*2+8]);
|
||||
LOCAL_ALIGNED_32(float, src1, [LEN*2+8]);
|
||||
LOCAL_ALIGNED_32(float, src2, [LEN*2+8]);
|
||||
AudioFIRDSPContext fir = { 0 };
|
||||
|
||||
ff_afir_init(&fir);
|
||||
|
||||
randomize_buffer(src0);
|
||||
randomize_buffer(src1);
|
||||
randomize_buffer(src2);
|
||||
|
||||
if (check_func(fir.fcmul_add, "fcmul_add"))
|
||||
test_fcmul_add(src0, src1, src2);
|
||||
report("fcmul_add");
|
||||
}
|
119
externals/ffmpeg/tests/checkasm/alacdsp.c
vendored
Executable file
119
externals/ffmpeg/tests/checkasm/alacdsp.c
vendored
Executable file
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2015 James Almer
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/alacdsp.h"
|
||||
#include "libavcodec/mathops.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
|
||||
#define BUF_SIZE 256
|
||||
#define MAX_CHANNELS 2
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < BUF_SIZE*MAX_CHANNELS; i++) { \
|
||||
int32_t r = sign_extend(rnd(), 24); \
|
||||
ref_buf[i] = r; \
|
||||
new_buf[i] = r; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_decorrelate_stereo(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(int32_t, ref_buf, [BUF_SIZE*MAX_CHANNELS]);
|
||||
LOCAL_ALIGNED_16(int32_t, new_buf, [BUF_SIZE*MAX_CHANNELS]);
|
||||
int32_t *ref[2] = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1] };
|
||||
int32_t *new[2] = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1] };
|
||||
ALACDSPContext c;
|
||||
|
||||
ff_alacdsp_init(&c);
|
||||
if (check_func(c.decorrelate_stereo, "alac_decorrelate_stereo")) {
|
||||
int len = (rnd() & 0xFF) + 1;
|
||||
int shift = rnd() & 0x1F;
|
||||
int weight = rnd() & 0xFF;
|
||||
declare_func(void, int32_t *buf[2], int len, int shift, int weight);
|
||||
|
||||
randomize_buffers();
|
||||
call_ref(ref, len, shift, weight);
|
||||
call_new(new, len, shift, weight);
|
||||
if (memcmp(ref[0], new[0], len * sizeof(int32_t)) ||
|
||||
memcmp(ref[1], new[1], len * sizeof(int32_t)))
|
||||
fail();
|
||||
bench_new(new, BUF_SIZE, shift, weight);
|
||||
}
|
||||
|
||||
report("decorrelate_stereo");
|
||||
}
|
||||
|
||||
#undef randomize_buffers
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int i, j; \
|
||||
for (i = 0; i < BUF_SIZE; i++) { \
|
||||
for (j = 0; j < ch; j++) { \
|
||||
int32_t r = sign_extend(rnd(), 24); \
|
||||
ref[j][i] = r; \
|
||||
new[j][i] = r; \
|
||||
r = rnd() & 0xFF; \
|
||||
ref_ebb[j][i] = r; \
|
||||
new_ebb[j][i] = r; \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_append_extra_bits(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(int32_t, ref_buf, [BUF_SIZE*MAX_CHANNELS*2]);
|
||||
LOCAL_ALIGNED_16(int32_t, new_buf, [BUF_SIZE*MAX_CHANNELS*2]);
|
||||
int32_t *ref[2] = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1] };
|
||||
int32_t *new[2] = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1] };
|
||||
int32_t *ref_ebb[2] = { &ref_buf[BUF_SIZE*2], &ref_buf[BUF_SIZE*3] };
|
||||
int32_t *new_ebb[2] = { &new_buf[BUF_SIZE*2], &new_buf[BUF_SIZE*3] };
|
||||
ALACDSPContext c;
|
||||
static const char * const channels[2] = { "mono", "stereo" };
|
||||
int ch;
|
||||
|
||||
ff_alacdsp_init(&c);
|
||||
for (ch = 1; ch <= 2; ch++) {
|
||||
if (check_func(c.append_extra_bits[ch-1], "alac_append_extra_bits_%s", channels[ch-1])) {
|
||||
int len = (rnd() & 0xFF) + 1;
|
||||
declare_func(void, int32_t *buf[2], int32_t *ebb[2], int ebits, int ch, int len);
|
||||
|
||||
randomize_buffers();
|
||||
call_ref(ref, ref_ebb, 8, ch, len);
|
||||
call_new(new, new_ebb, 8, ch, len);
|
||||
if ( memcmp(ref[0], new[0], len * sizeof(int32_t)) ||
|
||||
(ch == 2 && memcmp(ref[1], new[1], len * sizeof(int32_t))))
|
||||
fail();
|
||||
bench_new(new, new_ebb, 8, ch, BUF_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
report("append_extra_bits");
|
||||
}
|
||||
|
||||
void checkasm_check_alacdsp(void)
|
||||
{
|
||||
check_decorrelate_stereo();
|
||||
check_append_extra_bits();
|
||||
}
|
197
externals/ffmpeg/tests/checkasm/arm/checkasm.S
vendored
Executable file
197
externals/ffmpeg/tests/checkasm/arm/checkasm.S
vendored
Executable file
@@ -0,0 +1,197 @@
|
||||
/****************************************************************************
|
||||
* Assembly testing and benchmarking tool
|
||||
* Copyright (c) 2015 Martin Storsjo
|
||||
* Copyright (c) 2015 Janne Grunau
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "libavutil/arm/asm.S"
|
||||
|
||||
/* override fpu so that NEON instructions are rejected */
|
||||
#if HAVE_VFP
|
||||
FPU .fpu vfp
|
||||
ELF .eabi_attribute 10, 0 @ suppress Tag_FP_arch
|
||||
#endif
|
||||
|
||||
const register_init, align=3
|
||||
.quad 0x21f86d66c8ca00ce
|
||||
.quad 0x75b6ba21077c48ad
|
||||
.quad 0xed56bb2dcb3c7736
|
||||
.quad 0x8bda43d3fd1a7e06
|
||||
.quad 0xb64a9c9e5d318408
|
||||
.quad 0xdf9a54b303f1d3a3
|
||||
.quad 0x4a75479abd64e097
|
||||
.quad 0x249214109d5d1c88
|
||||
endconst
|
||||
|
||||
const error_message_fpscr
|
||||
.asciz "failed to preserve register FPSCR, changed bits: %x"
|
||||
error_message_gpr:
|
||||
.asciz "failed to preserve register r%d"
|
||||
error_message_vfp:
|
||||
.asciz "failed to preserve register d%d"
|
||||
error_message_stack:
|
||||
.asciz "failed to preserve stack"
|
||||
endconst
|
||||
|
||||
@ max number of args used by any asm function.
|
||||
#define MAX_ARGS 15
|
||||
|
||||
#define ARG_STACK 4*(MAX_ARGS - 4)
|
||||
|
||||
@ Align the used stack space to 8 to preserve the stack alignment.
|
||||
@ +8 for stack canary reference.
|
||||
#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed + 8)
|
||||
|
||||
.macro clobbercheck variant
|
||||
.equ pushed, 4*9
|
||||
function checkasm_checked_call_\variant, export=1
|
||||
push {r4-r11, lr}
|
||||
.ifc \variant, vfp
|
||||
vpush {d8-d15}
|
||||
fmrx r4, FPSCR
|
||||
push {r4}
|
||||
.equ pushed, pushed + 16*4 + 4
|
||||
.endif
|
||||
|
||||
movrel r12, register_init
|
||||
.ifc \variant, vfp
|
||||
vldm r12, {d8-d15}
|
||||
.endif
|
||||
ldm r12, {r4-r11}
|
||||
|
||||
sub sp, sp, #ARG_STACK_A
|
||||
.equ pos, 0
|
||||
.rept MAX_ARGS-4
|
||||
ldr r12, [sp, #ARG_STACK_A + pushed + 8 + pos]
|
||||
str r12, [sp, #pos]
|
||||
.equ pos, pos + 4
|
||||
.endr
|
||||
|
||||
@ For stack overflows, the callee is free to overwrite the parameters
|
||||
@ that were passed on the stack (if any), so we can only check after
|
||||
@ that point. First figure out how many parameters the function
|
||||
@ really took on the stack:
|
||||
ldr r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
|
||||
@ Load the first non-parameter value from the stack, that should be
|
||||
@ left untouched by the function. Store a copy of it inverted, so that
|
||||
@ e.g. overwriting everything with zero would be noticed.
|
||||
ldr r12, [sp, r12, lsl #2]
|
||||
mvn r12, r12
|
||||
str r12, [sp, #ARG_STACK_A - 4]
|
||||
|
||||
mov r12, r0
|
||||
mov r0, r2
|
||||
mov r1, r3
|
||||
ldrd r2, r3, [sp, #ARG_STACK_A + pushed]
|
||||
@ Call the target function
|
||||
blx r12
|
||||
|
||||
@ Load the number of stack parameters, stack canary and its reference
|
||||
ldr r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
|
||||
ldr r2, [sp, r12, lsl #2]
|
||||
ldr r3, [sp, #ARG_STACK_A - 4]
|
||||
|
||||
add sp, sp, #ARG_STACK_A
|
||||
push {r0, r1}
|
||||
|
||||
mvn r3, r3
|
||||
cmp r2, r3
|
||||
bne 5f
|
||||
|
||||
movrel r12, register_init
|
||||
.ifc \variant, vfp
|
||||
.macro check_reg_vfp, dreg, offset
|
||||
ldrd r2, r3, [r12, #8 * (\offset)]
|
||||
vmov r0, lr, \dreg
|
||||
eor r2, r2, r0
|
||||
eor r3, r3, lr
|
||||
orrs r2, r2, r3
|
||||
bne 4f
|
||||
.endm
|
||||
|
||||
.irp n, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
@ keep track of the checked double/SIMD register
|
||||
mov r1, #\n
|
||||
check_reg_vfp d\n, \n-8
|
||||
.endr
|
||||
.purgem check_reg_vfp
|
||||
|
||||
fmrx r1, FPSCR
|
||||
ldr r3, [sp, #8]
|
||||
eor r1, r1, r3
|
||||
@ Ignore changes in bits 0-4 and 7
|
||||
bic r1, r1, #0x9f
|
||||
@ Ignore changes in the topmost 5 bits
|
||||
bics r1, r1, #0xf8000000
|
||||
bne 3f
|
||||
.endif
|
||||
|
||||
@ keep track of the checked GPR
|
||||
mov r1, #4
|
||||
.macro check_reg reg1, reg2=
|
||||
ldrd r2, r3, [r12], #8
|
||||
eors r2, r2, \reg1
|
||||
bne 2f
|
||||
add r1, r1, #1
|
||||
.ifnb \reg2
|
||||
eors r3, r3, \reg2
|
||||
bne 2f
|
||||
.endif
|
||||
add r1, r1, #1
|
||||
.endm
|
||||
check_reg r4, r5
|
||||
check_reg r6, r7
|
||||
@ r9 is a volatile register in the ios ABI
|
||||
#ifdef __APPLE__
|
||||
check_reg r8
|
||||
#else
|
||||
check_reg r8, r9
|
||||
#endif
|
||||
check_reg r10, r11
|
||||
.purgem check_reg
|
||||
|
||||
b 0f
|
||||
5:
|
||||
movrel r0, error_message_stack
|
||||
b 1f
|
||||
4:
|
||||
movrel r0, error_message_vfp
|
||||
b 1f
|
||||
3:
|
||||
movrel r0, error_message_fpscr
|
||||
b 1f
|
||||
2:
|
||||
movrel r0, error_message_gpr
|
||||
1:
|
||||
bl X(checkasm_fail_func)
|
||||
0:
|
||||
pop {r0, r1}
|
||||
.ifc \variant, vfp
|
||||
pop {r2}
|
||||
fmxr FPSCR, r2
|
||||
vpop {d8-d15}
|
||||
.endif
|
||||
pop {r4-r11, pc}
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
#if HAVE_VFP || HAVE_NEON
|
||||
clobbercheck vfp
|
||||
#endif
|
||||
clobbercheck novfp
|
146
externals/ffmpeg/tests/checkasm/audiodsp.c
vendored
Executable file
146
externals/ffmpeg/tests/checkasm/audiodsp.c
vendored
Executable file
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libavcodec/audiodsp.h"
|
||||
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define MAX_SIZE (32 * 128)
|
||||
|
||||
#define randomize_float(buf, len) \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < len; i++) { \
|
||||
float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f; \
|
||||
buf[i] = f; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define randomize_int(buf, len, size, bits) \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < len; i++) { \
|
||||
uint ## size ## _t r = rnd() & ((1LL << bits) - 1); \
|
||||
AV_WN ## size ## A(buf + i, -(1LL << (bits - 1)) + r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_audiodsp(void)
|
||||
{
|
||||
AudioDSPContext adsp;
|
||||
|
||||
ff_audiodsp_init(&adsp);
|
||||
|
||||
if (check_func(adsp.scalarproduct_int16, "audiodsp.scalarproduct_int16")) {
|
||||
LOCAL_ALIGNED(32, int16_t, v1, [MAX_SIZE]);
|
||||
LOCAL_ALIGNED(32, int16_t, v2, [MAX_SIZE]);
|
||||
unsigned int len_bits_minus4, v1_bits, v2_bits, len;
|
||||
int32_t res0, res1;
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, int32_t, const int16_t *v1, const int16_t *v2, int len);
|
||||
|
||||
// generate random 5-12bit vector length
|
||||
len_bits_minus4 = rnd() % 8;
|
||||
len = rnd() & ((1 << len_bits_minus4) - 1);
|
||||
len = 16 * FFMAX(len, 1);
|
||||
|
||||
// generate the bit counts for each of the vectors such that the result
|
||||
// fits into int32
|
||||
v1_bits = 1 + rnd() % 15;
|
||||
v2_bits = FFMIN(32 - (len_bits_minus4 + 4) - v1_bits - 1, 15);
|
||||
|
||||
randomize_int(v1, MAX_SIZE, 16, v1_bits + 1);
|
||||
randomize_int(v2, MAX_SIZE, 16, v2_bits + 1);
|
||||
|
||||
res0 = call_ref(v1, v2, len);
|
||||
res1 = call_new(v1, v2, len);
|
||||
if (res0 != res1)
|
||||
fail();
|
||||
bench_new(v1, v2, MAX_SIZE);
|
||||
}
|
||||
|
||||
if (check_func(adsp.vector_clip_int32, "audiodsp.vector_clip_int32")) {
|
||||
LOCAL_ALIGNED(32, int32_t, src, [MAX_SIZE]);
|
||||
LOCAL_ALIGNED(32, int32_t, dst0, [MAX_SIZE]);
|
||||
LOCAL_ALIGNED(32, int32_t, dst1, [MAX_SIZE]);
|
||||
int32_t val1, val2, min, max;
|
||||
int len;
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, int32_t *dst, const int32_t *src,
|
||||
int32_t min, int32_t max, unsigned int len);
|
||||
|
||||
val1 = ((int32_t)rnd());
|
||||
val1 = FFSIGN(val1) * (val1 & ((1 << 24) - 1));
|
||||
val2 = ((int32_t)rnd());
|
||||
val2 = FFSIGN(val2) * (val2 & ((1 << 24) - 1));
|
||||
|
||||
min = FFMIN(val1, val2);
|
||||
max = FFMAX(val1, val2);
|
||||
|
||||
randomize_int(src, MAX_SIZE, 32, 32);
|
||||
|
||||
len = rnd() % 128;
|
||||
len = 32 * FFMAX(len, 1);
|
||||
|
||||
call_ref(dst0, src, min, max, len);
|
||||
call_new(dst1, src, min, max, len);
|
||||
if (memcmp(dst0, dst1, len * sizeof(*dst0)))
|
||||
fail();
|
||||
bench_new(dst1, src, min, max, MAX_SIZE);
|
||||
}
|
||||
|
||||
if (check_func(adsp.vector_clipf, "audiodsp.vector_clipf")) {
|
||||
LOCAL_ALIGNED(32, float, src, [MAX_SIZE]);
|
||||
LOCAL_ALIGNED(32, float, dst0, [MAX_SIZE]);
|
||||
LOCAL_ALIGNED(32, float, dst1, [MAX_SIZE]);
|
||||
float val1, val2, min, max;
|
||||
int i, len;
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src,
|
||||
int len, float min, float max);
|
||||
|
||||
val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
|
||||
val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
|
||||
|
||||
min = FFMIN(val1, val2);
|
||||
max = FFMAX(val1, val2);
|
||||
|
||||
randomize_float(src, MAX_SIZE);
|
||||
|
||||
len = rnd() % 128;
|
||||
len = 16 * FFMAX(len, 1);
|
||||
|
||||
call_ref(dst0, src, len, min, max);
|
||||
call_new(dst1, src, len, min, max);
|
||||
for (i = 0; i < len; i++) {
|
||||
if (!float_near_ulp_array(dst0, dst1, 3, len))
|
||||
fail();
|
||||
}
|
||||
bench_new(dst1, src, MAX_SIZE, min, max);
|
||||
}
|
||||
|
||||
report("audiodsp");
|
||||
}
|
68
externals/ffmpeg/tests/checkasm/blockdsp.c
vendored
Executable file
68
externals/ffmpeg/tests/checkasm/blockdsp.c
vendored
Executable file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Henrik Gramner
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#include "libavcodec/blockdsp.h"
|
||||
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define randomize_buffers(size) \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < size; i++) { \
|
||||
uint16_t r = rnd(); \
|
||||
AV_WN16A(buf0 + i, r); \
|
||||
AV_WN16A(buf1 + i, r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define check_clear(func, size) \
|
||||
do { \
|
||||
if (check_func(h.func, "blockdsp." #func)) { \
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block); \
|
||||
randomize_buffers(size); \
|
||||
call_ref(buf0); \
|
||||
call_new(buf1); \
|
||||
if (memcmp(buf0, buf1, sizeof(*buf0) * size)) \
|
||||
fail(); \
|
||||
bench_new(buf0); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_blockdsp(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(uint16_t, buf0, [6 * 8 * 8]);
|
||||
LOCAL_ALIGNED_32(uint16_t, buf1, [6 * 8 * 8]);
|
||||
|
||||
AVCodecContext avctx = { 0 };
|
||||
BlockDSPContext h;
|
||||
|
||||
ff_blockdsp_init(&h, &avctx);
|
||||
|
||||
check_clear(clear_block, 8 * 8);
|
||||
check_clear(clear_blocks, 8 * 8 * 6);
|
||||
|
||||
report("blockdsp");
|
||||
}
|
76
externals/ffmpeg/tests/checkasm/bswapdsp.c
vendored
Executable file
76
externals/ffmpeg/tests/checkasm/bswapdsp.c
vendored
Executable file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Henrik Gramner
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/bswapdsp.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define BUF_SIZE 512
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < BUF_SIZE; i += 4) { \
|
||||
uint32_t r = rnd(); \
|
||||
AV_WN32A(src0 + i, r); \
|
||||
AV_WN32A(src1 + i, r); \
|
||||
r = rnd(); \
|
||||
AV_WN32A(dst0 + i, r); \
|
||||
AV_WN32A(dst1 + i, r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define check_bswap(type) \
|
||||
do { \
|
||||
int w; \
|
||||
declare_func(void, type *dst, const type *src, int w); \
|
||||
\
|
||||
for (w = 0; w < BUF_SIZE / sizeof(type); w++) { \
|
||||
int offset = (BUF_SIZE / sizeof(type) - w) & 15; /* Test various alignments */ \
|
||||
randomize_buffers(); \
|
||||
call_ref((type *)dst0 + offset, (type *)src0 + offset, w); \
|
||||
call_new((type *)dst1 + offset, (type *)src1 + offset, w); \
|
||||
if (memcmp(src0, src1, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE)) \
|
||||
fail(); \
|
||||
bench_new((type *)dst1 + offset, (type *)src1 + offset, w); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_bswapdsp(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, src0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, src1, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]);
|
||||
BswapDSPContext h;
|
||||
|
||||
ff_bswapdsp_init(&h);
|
||||
|
||||
if (check_func(h.bswap_buf, "bswap_buf"))
|
||||
check_bswap(uint32_t);
|
||||
|
||||
if (check_func(h.bswap16_buf, "bswap16_buf"))
|
||||
check_bswap(uint16_t);
|
||||
|
||||
report("bswap");
|
||||
}
|
881
externals/ffmpeg/tests/checkasm/checkasm.c
vendored
Executable file
881
externals/ffmpeg/tests/checkasm/checkasm.c
vendored
Executable file
@@ -0,0 +1,881 @@
|
||||
/*
|
||||
* Assembly testing and benchmarking tool
|
||||
* Copyright (c) 2015 Henrik Gramner
|
||||
* Copyright (c) 2008 Loren Merritt
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if CONFIG_LINUX_PERF
|
||||
# ifndef _GNU_SOURCE
|
||||
# define _GNU_SOURCE // for syscall (performance monitoring API)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/intfloat.h"
|
||||
#include "libavutil/random_seed.h"
|
||||
|
||||
#if HAVE_IO_H
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
|
||||
#include <windows.h>
|
||||
#define COLOR_RED FOREGROUND_RED
|
||||
#define COLOR_GREEN FOREGROUND_GREEN
|
||||
#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
|
||||
#else
|
||||
#define COLOR_RED 1
|
||||
#define COLOR_GREEN 2
|
||||
#define COLOR_YELLOW 3
|
||||
#endif
|
||||
|
||||
#if HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#if !HAVE_ISATTY
|
||||
#define isatty(fd) 1
|
||||
#endif
|
||||
|
||||
#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
|
||||
#include "libavutil/arm/cpu.h"
|
||||
|
||||
void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp;
|
||||
#endif
|
||||
|
||||
/* List of tests to invoke */
|
||||
static const struct {
|
||||
const char *name;
|
||||
void (*func)(void);
|
||||
} tests[] = {
|
||||
#if CONFIG_AVCODEC
|
||||
#if CONFIG_AAC_DECODER
|
||||
{ "aacpsdsp", checkasm_check_aacpsdsp },
|
||||
{ "sbrdsp", checkasm_check_sbrdsp },
|
||||
#endif
|
||||
#if CONFIG_ALAC_DECODER
|
||||
{ "alacdsp", checkasm_check_alacdsp },
|
||||
#endif
|
||||
#if CONFIG_AUDIODSP
|
||||
{ "audiodsp", checkasm_check_audiodsp },
|
||||
#endif
|
||||
#if CONFIG_BLOCKDSP
|
||||
{ "blockdsp", checkasm_check_blockdsp },
|
||||
#endif
|
||||
#if CONFIG_BSWAPDSP
|
||||
{ "bswapdsp", checkasm_check_bswapdsp },
|
||||
#endif
|
||||
#if CONFIG_DCA_DECODER
|
||||
{ "synth_filter", checkasm_check_synth_filter },
|
||||
#endif
|
||||
#if CONFIG_EXR_DECODER
|
||||
{ "exrdsp", checkasm_check_exrdsp },
|
||||
#endif
|
||||
#if CONFIG_FLACDSP
|
||||
{ "flacdsp", checkasm_check_flacdsp },
|
||||
#endif
|
||||
#if CONFIG_FMTCONVERT
|
||||
{ "fmtconvert", checkasm_check_fmtconvert },
|
||||
#endif
|
||||
#if CONFIG_G722DSP
|
||||
{ "g722dsp", checkasm_check_g722dsp },
|
||||
#endif
|
||||
#if CONFIG_H264DSP
|
||||
{ "h264dsp", checkasm_check_h264dsp },
|
||||
#endif
|
||||
#if CONFIG_H264PRED
|
||||
{ "h264pred", checkasm_check_h264pred },
|
||||
#endif
|
||||
#if CONFIG_H264QPEL
|
||||
{ "h264qpel", checkasm_check_h264qpel },
|
||||
#endif
|
||||
#if CONFIG_HEVC_DECODER
|
||||
{ "hevc_add_res", checkasm_check_hevc_add_res },
|
||||
{ "hevc_idct", checkasm_check_hevc_idct },
|
||||
{ "hevc_sao", checkasm_check_hevc_sao },
|
||||
#endif
|
||||
#if CONFIG_HUFFYUV_DECODER
|
||||
{ "huffyuvdsp", checkasm_check_huffyuvdsp },
|
||||
#endif
|
||||
#if CONFIG_JPEG2000_DECODER
|
||||
{ "jpeg2000dsp", checkasm_check_jpeg2000dsp },
|
||||
#endif
|
||||
#if CONFIG_HUFFYUVDSP
|
||||
{ "llviddsp", checkasm_check_llviddsp },
|
||||
#endif
|
||||
#if CONFIG_LLVIDENCDSP
|
||||
{ "llviddspenc", checkasm_check_llviddspenc },
|
||||
#endif
|
||||
#if CONFIG_OPUS_DECODER
|
||||
{ "opusdsp", checkasm_check_opusdsp },
|
||||
#endif
|
||||
#if CONFIG_PIXBLOCKDSP
|
||||
{ "pixblockdsp", checkasm_check_pixblockdsp },
|
||||
#endif
|
||||
#if CONFIG_UTVIDEO_DECODER
|
||||
{ "utvideodsp", checkasm_check_utvideodsp },
|
||||
#endif
|
||||
#if CONFIG_V210_DECODER
|
||||
{ "v210dec", checkasm_check_v210dec },
|
||||
#endif
|
||||
#if CONFIG_V210_ENCODER
|
||||
{ "v210enc", checkasm_check_v210enc },
|
||||
#endif
|
||||
#if CONFIG_VP8DSP
|
||||
{ "vp8dsp", checkasm_check_vp8dsp },
|
||||
#endif
|
||||
#if CONFIG_VP9_DECODER
|
||||
{ "vp9dsp", checkasm_check_vp9dsp },
|
||||
#endif
|
||||
#if CONFIG_VIDEODSP
|
||||
{ "videodsp", checkasm_check_videodsp },
|
||||
#endif
|
||||
#endif
|
||||
#if CONFIG_AVFILTER
|
||||
#if CONFIG_AFIR_FILTER
|
||||
{ "af_afir", checkasm_check_afir },
|
||||
#endif
|
||||
#if CONFIG_BLEND_FILTER
|
||||
{ "vf_blend", checkasm_check_blend },
|
||||
#endif
|
||||
#if CONFIG_COLORSPACE_FILTER
|
||||
{ "vf_colorspace", checkasm_check_colorspace },
|
||||
#endif
|
||||
#if CONFIG_EQ_FILTER
|
||||
{ "vf_eq", checkasm_check_vf_eq },
|
||||
#endif
|
||||
#if CONFIG_GBLUR_FILTER
|
||||
{ "vf_gblur", checkasm_check_vf_gblur },
|
||||
#endif
|
||||
#if CONFIG_HFLIP_FILTER
|
||||
{ "vf_hflip", checkasm_check_vf_hflip },
|
||||
#endif
|
||||
#if CONFIG_NLMEANS_FILTER
|
||||
{ "vf_nlmeans", checkasm_check_nlmeans },
|
||||
#endif
|
||||
#if CONFIG_THRESHOLD_FILTER
|
||||
{ "vf_threshold", checkasm_check_vf_threshold },
|
||||
#endif
|
||||
#endif
|
||||
#if CONFIG_SWSCALE
|
||||
{ "sw_rgb", checkasm_check_sw_rgb },
|
||||
{ "sw_scale", checkasm_check_sw_scale },
|
||||
#endif
|
||||
#if CONFIG_AVUTIL
|
||||
{ "fixed_dsp", checkasm_check_fixed_dsp },
|
||||
{ "float_dsp", checkasm_check_float_dsp },
|
||||
#endif
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
/* List of cpu flags to check */
|
||||
static const struct {
|
||||
const char *name;
|
||||
const char *suffix;
|
||||
int flag;
|
||||
} cpus[] = {
|
||||
#if ARCH_AARCH64
|
||||
{ "ARMV8", "armv8", AV_CPU_FLAG_ARMV8 },
|
||||
{ "NEON", "neon", AV_CPU_FLAG_NEON },
|
||||
#elif ARCH_ARM
|
||||
{ "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE },
|
||||
{ "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 },
|
||||
{ "ARMV6T2", "armv6t2", AV_CPU_FLAG_ARMV6T2 },
|
||||
{ "VFP", "vfp", AV_CPU_FLAG_VFP },
|
||||
{ "VFP_VM", "vfp_vm", AV_CPU_FLAG_VFP_VM },
|
||||
{ "VFPV3", "vfp3", AV_CPU_FLAG_VFPV3 },
|
||||
{ "NEON", "neon", AV_CPU_FLAG_NEON },
|
||||
#elif ARCH_PPC
|
||||
{ "ALTIVEC", "altivec", AV_CPU_FLAG_ALTIVEC },
|
||||
{ "VSX", "vsx", AV_CPU_FLAG_VSX },
|
||||
{ "POWER8", "power8", AV_CPU_FLAG_POWER8 },
|
||||
#elif ARCH_X86
|
||||
{ "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
|
||||
{ "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT },
|
||||
{ "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW },
|
||||
{ "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
|
||||
{ "SSE", "sse", AV_CPU_FLAG_SSE },
|
||||
{ "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
|
||||
{ "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
|
||||
{ "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
|
||||
{ "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 },
|
||||
{ "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 },
|
||||
{ "AES-NI", "aesni", AV_CPU_FLAG_AESNI },
|
||||
{ "AVX", "avx", AV_CPU_FLAG_AVX },
|
||||
{ "XOP", "xop", AV_CPU_FLAG_XOP },
|
||||
{ "FMA3", "fma3", AV_CPU_FLAG_FMA3 },
|
||||
{ "FMA4", "fma4", AV_CPU_FLAG_FMA4 },
|
||||
{ "AVX2", "avx2", AV_CPU_FLAG_AVX2 },
|
||||
{ "AVX-512", "avx512", AV_CPU_FLAG_AVX512 },
|
||||
#endif
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
typedef struct CheckasmFuncVersion {
|
||||
struct CheckasmFuncVersion *next;
|
||||
void *func;
|
||||
int ok;
|
||||
int cpu;
|
||||
CheckasmPerf perf;
|
||||
} CheckasmFuncVersion;
|
||||
|
||||
/* Binary search tree node */
|
||||
typedef struct CheckasmFunc {
|
||||
struct CheckasmFunc *child[2];
|
||||
CheckasmFuncVersion versions;
|
||||
uint8_t color; /* 0 = red, 1 = black */
|
||||
char name[1];
|
||||
} CheckasmFunc;
|
||||
|
||||
/* Internal state */
|
||||
static struct {
|
||||
CheckasmFunc *funcs;
|
||||
CheckasmFunc *current_func;
|
||||
CheckasmFuncVersion *current_func_ver;
|
||||
const char *current_test_name;
|
||||
const char *bench_pattern;
|
||||
int bench_pattern_len;
|
||||
int num_checked;
|
||||
int num_failed;
|
||||
|
||||
/* perf */
|
||||
int nop_time;
|
||||
int sysfd;
|
||||
|
||||
int cpu_flag;
|
||||
const char *cpu_flag_name;
|
||||
const char *test_name;
|
||||
int verbose;
|
||||
} state;
|
||||
|
||||
/* PRNG state */
|
||||
AVLFG checkasm_lfg;
|
||||
|
||||
/* float compare support code */
|
||||
static int is_negative(union av_intfloat32 u)
|
||||
{
|
||||
return u.i >> 31;
|
||||
}
|
||||
|
||||
int float_near_ulp(float a, float b, unsigned max_ulp)
|
||||
{
|
||||
union av_intfloat32 x, y;
|
||||
|
||||
x.f = a;
|
||||
y.f = b;
|
||||
|
||||
if (is_negative(x) != is_negative(y)) {
|
||||
// handle -0.0 == +0.0
|
||||
return a == b;
|
||||
}
|
||||
|
||||
if (llabs((int64_t)x.i - y.i) <= max_ulp)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
|
||||
unsigned len)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (!float_near_ulp(a[i], b[i], max_ulp))
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int float_near_abs_eps(float a, float b, float eps)
|
||||
{
|
||||
float abs_diff = fabsf(a - b);
|
||||
if (abs_diff < eps)
|
||||
return 1;
|
||||
|
||||
fprintf(stderr, "test failed comparing %g with %g (abs diff=%g with EPS=%g)\n", a, b, abs_diff, eps);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int float_near_abs_eps_array(const float *a, const float *b, float eps,
|
||||
unsigned len)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (!float_near_abs_eps(a[i], b[i], eps))
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp)
|
||||
{
|
||||
return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
|
||||
}
|
||||
|
||||
int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
|
||||
unsigned max_ulp, unsigned len)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int double_near_abs_eps(double a, double b, double eps)
|
||||
{
|
||||
double abs_diff = fabs(a - b);
|
||||
|
||||
return abs_diff < eps;
|
||||
}
|
||||
|
||||
int double_near_abs_eps_array(const double *a, const double *b, double eps,
|
||||
unsigned len)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (!double_near_abs_eps(a[i], b[i], eps))
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Print colored text to stderr if the terminal supports it */
|
||||
static void color_printf(int color, const char *fmt, ...)
|
||||
{
|
||||
static int use_color = -1;
|
||||
va_list arg;
|
||||
|
||||
#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
|
||||
static HANDLE con;
|
||||
static WORD org_attributes;
|
||||
|
||||
if (use_color < 0) {
|
||||
CONSOLE_SCREEN_BUFFER_INFO con_info;
|
||||
con = GetStdHandle(STD_ERROR_HANDLE);
|
||||
if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) {
|
||||
org_attributes = con_info.wAttributes;
|
||||
use_color = 1;
|
||||
} else
|
||||
use_color = 0;
|
||||
}
|
||||
if (use_color)
|
||||
SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f));
|
||||
#else
|
||||
if (use_color < 0) {
|
||||
const char *term = getenv("TERM");
|
||||
use_color = term && strcmp(term, "dumb") && isatty(2);
|
||||
}
|
||||
if (use_color)
|
||||
fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
|
||||
#endif
|
||||
|
||||
va_start(arg, fmt);
|
||||
vfprintf(stderr, fmt, arg);
|
||||
va_end(arg);
|
||||
|
||||
if (use_color) {
|
||||
#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
|
||||
SetConsoleTextAttribute(con, org_attributes);
|
||||
#else
|
||||
fprintf(stderr, "\x1b[0m");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* Deallocate a tree */
|
||||
static void destroy_func_tree(CheckasmFunc *f)
|
||||
{
|
||||
if (f) {
|
||||
CheckasmFuncVersion *v = f->versions.next;
|
||||
while (v) {
|
||||
CheckasmFuncVersion *next = v->next;
|
||||
free(v);
|
||||
v = next;
|
||||
}
|
||||
|
||||
destroy_func_tree(f->child[0]);
|
||||
destroy_func_tree(f->child[1]);
|
||||
free(f);
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate a zero-initialized block, clean up and exit on failure */
|
||||
static void *checkasm_malloc(size_t size)
|
||||
{
|
||||
void *ptr = calloc(1, size);
|
||||
if (!ptr) {
|
||||
fprintf(stderr, "checkasm: malloc failed\n");
|
||||
destroy_func_tree(state.funcs);
|
||||
exit(1);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* Get the suffix of the specified cpu flag */
|
||||
static const char *cpu_suffix(int cpu)
|
||||
{
|
||||
int i = FF_ARRAY_ELEMS(cpus);
|
||||
|
||||
while (--i >= 0)
|
||||
if (cpu & cpus[i].flag)
|
||||
return cpus[i].suffix;
|
||||
|
||||
return "c";
|
||||
}
|
||||
|
||||
static int cmp_nop(const void *a, const void *b)
|
||||
{
|
||||
return *(const uint16_t*)a - *(const uint16_t*)b;
|
||||
}
|
||||
|
||||
/* Measure the overhead of the timing code (in decicycles) */
|
||||
static int measure_nop_time(void)
|
||||
{
|
||||
uint16_t nops[10000];
|
||||
int i, nop_sum = 0;
|
||||
av_unused const int sysfd = state.sysfd;
|
||||
|
||||
uint64_t t = 0;
|
||||
for (i = 0; i < 10000; i++) {
|
||||
PERF_START(t);
|
||||
PERF_STOP(t);
|
||||
nops[i] = t;
|
||||
}
|
||||
|
||||
qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
|
||||
for (i = 2500; i < 7500; i++)
|
||||
nop_sum += nops[i];
|
||||
|
||||
return nop_sum / 500;
|
||||
}
|
||||
|
||||
/* Print benchmark results */
|
||||
static void print_benchs(CheckasmFunc *f)
|
||||
{
|
||||
if (f) {
|
||||
print_benchs(f->child[0]);
|
||||
|
||||
/* Only print functions with at least one assembly version */
|
||||
if (f->versions.cpu || f->versions.next) {
|
||||
CheckasmFuncVersion *v = &f->versions;
|
||||
do {
|
||||
CheckasmPerf *p = &v->perf;
|
||||
if (p->iterations) {
|
||||
int decicycles = (10*p->cycles/p->iterations - state.nop_time) / 4;
|
||||
printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
|
||||
}
|
||||
} while ((v = v->next));
|
||||
}
|
||||
|
||||
print_benchs(f->child[1]);
|
||||
}
|
||||
}
|
||||
|
||||
/* ASCIIbetical sort except preserving natural order for numbers */
|
||||
static int cmp_func_names(const char *a, const char *b)
|
||||
{
|
||||
const char *start = a;
|
||||
int ascii_diff, digit_diff;
|
||||
|
||||
for (; !(ascii_diff = *(const unsigned char*)a - *(const unsigned char*)b) && *a; a++, b++);
|
||||
for (; av_isdigit(*a) && av_isdigit(*b); a++, b++);
|
||||
|
||||
if (a > start && av_isdigit(a[-1]) && (digit_diff = av_isdigit(*a) - av_isdigit(*b)))
|
||||
return digit_diff;
|
||||
|
||||
return ascii_diff;
|
||||
}
|
||||
|
||||
/* Perform a tree rotation in the specified direction and return the new root */
|
||||
static CheckasmFunc *rotate_tree(CheckasmFunc *f, int dir)
|
||||
{
|
||||
CheckasmFunc *r = f->child[dir^1];
|
||||
f->child[dir^1] = r->child[dir];
|
||||
r->child[dir] = f;
|
||||
r->color = f->color;
|
||||
f->color = 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
#define is_red(f) ((f) && !(f)->color)
|
||||
|
||||
/* Balance a left-leaning red-black tree at the specified node */
|
||||
static void balance_tree(CheckasmFunc **root)
|
||||
{
|
||||
CheckasmFunc *f = *root;
|
||||
|
||||
if (is_red(f->child[0]) && is_red(f->child[1])) {
|
||||
f->color ^= 1;
|
||||
f->child[0]->color = f->child[1]->color = 1;
|
||||
}
|
||||
|
||||
if (!is_red(f->child[0]) && is_red(f->child[1]))
|
||||
*root = rotate_tree(f, 0); /* Rotate left */
|
||||
else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
|
||||
*root = rotate_tree(f, 1); /* Rotate right */
|
||||
}
|
||||
|
||||
/* Get a node with the specified name, creating it if it doesn't exist */
|
||||
static CheckasmFunc *get_func(CheckasmFunc **root, const char *name)
|
||||
{
|
||||
CheckasmFunc *f = *root;
|
||||
|
||||
if (f) {
|
||||
/* Search the tree for a matching node */
|
||||
int cmp = cmp_func_names(name, f->name);
|
||||
if (cmp) {
|
||||
f = get_func(&f->child[cmp > 0], name);
|
||||
|
||||
/* Rebalance the tree on the way up if a new node was inserted */
|
||||
if (!f->versions.func)
|
||||
balance_tree(root);
|
||||
}
|
||||
} else {
|
||||
/* Allocate and insert a new node into the tree */
|
||||
int name_length = strlen(name);
|
||||
f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length);
|
||||
memcpy(f->name, name, name_length + 1);
|
||||
}
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
/* Perform tests and benchmarks for the specified cpu flag if supported by the host */
|
||||
static void check_cpu_flag(const char *name, int flag)
|
||||
{
|
||||
int old_cpu_flag = state.cpu_flag;
|
||||
|
||||
flag |= old_cpu_flag;
|
||||
av_force_cpu_flags(-1);
|
||||
state.cpu_flag = flag & av_get_cpu_flags();
|
||||
av_force_cpu_flags(state.cpu_flag);
|
||||
|
||||
if (!flag || state.cpu_flag != old_cpu_flag) {
|
||||
int i;
|
||||
|
||||
state.cpu_flag_name = name;
|
||||
for (i = 0; tests[i].func; i++) {
|
||||
if (state.test_name && strcmp(tests[i].name, state.test_name))
|
||||
continue;
|
||||
state.current_test_name = tests[i].name;
|
||||
tests[i].func();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Print the name of the current CPU flag, but only do it once */
|
||||
static void print_cpu_name(void)
|
||||
{
|
||||
if (state.cpu_flag_name) {
|
||||
color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
|
||||
state.cpu_flag_name = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_LINUX_PERF
|
||||
static int bench_init_linux(void)
|
||||
{
|
||||
struct perf_event_attr attr = {
|
||||
.type = PERF_TYPE_HARDWARE,
|
||||
.size = sizeof(struct perf_event_attr),
|
||||
.config = PERF_COUNT_HW_CPU_CYCLES,
|
||||
.disabled = 1, // start counting only on demand
|
||||
.exclude_kernel = 1,
|
||||
.exclude_hv = 1,
|
||||
};
|
||||
|
||||
printf("benchmarking with Linux Perf Monitoring API\n");
|
||||
|
||||
state.sysfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
|
||||
if (state.sysfd == -1) {
|
||||
perror("syscall");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !CONFIG_LINUX_PERF
|
||||
static int bench_init_ffmpeg(void)
|
||||
{
|
||||
#ifdef AV_READ_TIME
|
||||
printf("benchmarking with native FFmpeg timers\n");
|
||||
return 0;
|
||||
#else
|
||||
fprintf(stderr, "checkasm: --bench is not supported on your system\n");
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static int bench_init(void)
|
||||
{
|
||||
#if CONFIG_LINUX_PERF
|
||||
int ret = bench_init_linux();
|
||||
#else
|
||||
int ret = bench_init_ffmpeg();
|
||||
#endif
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
state.nop_time = measure_nop_time();
|
||||
printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bench_uninit(void)
|
||||
{
|
||||
#if CONFIG_LINUX_PERF
|
||||
if (state.sysfd > 0)
|
||||
close(state.sysfd);
|
||||
#endif
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
unsigned int seed = av_get_random_seed();
|
||||
int i, ret = 0;
|
||||
|
||||
#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
|
||||
if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags()))
|
||||
checkasm_checked_call = checkasm_checked_call_vfp;
|
||||
#endif
|
||||
|
||||
if (!tests[0].func || !cpus[0].flag) {
|
||||
fprintf(stderr, "checkasm: no tests to perform\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (argc > 1) {
|
||||
if (!strncmp(argv[1], "--bench", 7)) {
|
||||
if (bench_init() < 0)
|
||||
return 1;
|
||||
if (argv[1][7] == '=') {
|
||||
state.bench_pattern = argv[1] + 8;
|
||||
state.bench_pattern_len = strlen(state.bench_pattern);
|
||||
} else
|
||||
state.bench_pattern = "";
|
||||
} else if (!strncmp(argv[1], "--test=", 7)) {
|
||||
state.test_name = argv[1] + 7;
|
||||
} else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) {
|
||||
state.verbose = 1;
|
||||
} else {
|
||||
seed = strtoul(argv[1], NULL, 10);
|
||||
}
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
|
||||
fprintf(stderr, "checkasm: using random seed %u\n", seed);
|
||||
av_lfg_init(&checkasm_lfg, seed);
|
||||
|
||||
check_cpu_flag(NULL, 0);
|
||||
for (i = 0; cpus[i].flag; i++)
|
||||
check_cpu_flag(cpus[i].name, cpus[i].flag);
|
||||
|
||||
if (state.num_failed) {
|
||||
fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked);
|
||||
ret = 1;
|
||||
} else {
|
||||
fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
|
||||
if (state.bench_pattern) {
|
||||
print_benchs(state.funcs);
|
||||
}
|
||||
}
|
||||
|
||||
destroy_func_tree(state.funcs);
|
||||
bench_uninit();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Decide whether or not the specified function needs to be tested and
|
||||
* allocate/initialize data structures if needed. Returns a pointer to a
|
||||
* reference function if the function should be tested, otherwise NULL */
|
||||
void *checkasm_check_func(void *func, const char *name, ...)
|
||||
{
|
||||
char name_buf[256];
|
||||
void *ref = func;
|
||||
CheckasmFuncVersion *v;
|
||||
int name_length;
|
||||
va_list arg;
|
||||
|
||||
va_start(arg, name);
|
||||
name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
|
||||
va_end(arg);
|
||||
|
||||
if (!func || name_length <= 0 || name_length >= sizeof(name_buf))
|
||||
return NULL;
|
||||
|
||||
state.current_func = get_func(&state.funcs, name_buf);
|
||||
state.funcs->color = 1;
|
||||
v = &state.current_func->versions;
|
||||
|
||||
if (v->func) {
|
||||
CheckasmFuncVersion *prev;
|
||||
do {
|
||||
/* Only test functions that haven't already been tested */
|
||||
if (v->func == func)
|
||||
return NULL;
|
||||
|
||||
if (v->ok)
|
||||
ref = v->func;
|
||||
|
||||
prev = v;
|
||||
} while ((v = v->next));
|
||||
|
||||
v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
|
||||
}
|
||||
|
||||
v->func = func;
|
||||
v->ok = 1;
|
||||
v->cpu = state.cpu_flag;
|
||||
state.current_func_ver = v;
|
||||
|
||||
if (state.cpu_flag)
|
||||
state.num_checked++;
|
||||
|
||||
return ref;
|
||||
}
|
||||
|
||||
/* Decide whether or not the current function needs to be benchmarked */
|
||||
int checkasm_bench_func(void)
|
||||
{
|
||||
return !state.num_failed && state.bench_pattern &&
|
||||
!strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len);
|
||||
}
|
||||
|
||||
/* Indicate that the current test has failed */
|
||||
void checkasm_fail_func(const char *msg, ...)
|
||||
{
|
||||
if (state.current_func_ver->cpu && state.current_func_ver->ok) {
|
||||
va_list arg;
|
||||
|
||||
print_cpu_name();
|
||||
fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu));
|
||||
va_start(arg, msg);
|
||||
vfprintf(stderr, msg, arg);
|
||||
va_end(arg);
|
||||
fprintf(stderr, ")\n");
|
||||
|
||||
state.current_func_ver->ok = 0;
|
||||
state.num_failed++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the benchmark context of the current function */
|
||||
CheckasmPerf *checkasm_get_perf_context(void)
|
||||
{
|
||||
CheckasmPerf *perf = &state.current_func_ver->perf;
|
||||
memset(perf, 0, sizeof(*perf));
|
||||
perf->sysfd = state.sysfd;
|
||||
return perf;
|
||||
}
|
||||
|
||||
/* Print the outcome of all tests performed since the last time this function was called */
|
||||
void checkasm_report(const char *name, ...)
|
||||
{
|
||||
static int prev_checked, prev_failed, max_length;
|
||||
|
||||
if (state.num_checked > prev_checked) {
|
||||
int pad_length = max_length + 4;
|
||||
va_list arg;
|
||||
|
||||
print_cpu_name();
|
||||
pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
|
||||
va_start(arg, name);
|
||||
pad_length -= vfprintf(stderr, name, arg);
|
||||
va_end(arg);
|
||||
fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '[');
|
||||
|
||||
if (state.num_failed == prev_failed)
|
||||
color_printf(COLOR_GREEN, "OK");
|
||||
else
|
||||
color_printf(COLOR_RED, "FAILED");
|
||||
fprintf(stderr, "]\n");
|
||||
|
||||
prev_checked = state.num_checked;
|
||||
prev_failed = state.num_failed;
|
||||
} else if (!state.cpu_flag) {
|
||||
/* Calculate the amount of padding required to make the output vertically aligned */
|
||||
int length = strlen(state.current_test_name);
|
||||
va_list arg;
|
||||
|
||||
va_start(arg, name);
|
||||
length += vsnprintf(NULL, 0, name, arg);
|
||||
va_end(arg);
|
||||
|
||||
if (length > max_length)
|
||||
max_length = length;
|
||||
}
|
||||
}
|
||||
|
||||
#define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
|
||||
int checkasm_check_##type(const char *const file, const int line, \
|
||||
const type *buf1, ptrdiff_t stride1, \
|
||||
const type *buf2, ptrdiff_t stride2, \
|
||||
const int w, int h, const char *const name) \
|
||||
{ \
|
||||
int y = 0; \
|
||||
stride1 /= sizeof(*buf1); \
|
||||
stride2 /= sizeof(*buf2); \
|
||||
for (y = 0; y < h; y++) \
|
||||
if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \
|
||||
break; \
|
||||
if (y == h) \
|
||||
return 0; \
|
||||
checkasm_fail_func("%s:%d", file, line); \
|
||||
if (!state.verbose) \
|
||||
return 1; \
|
||||
fprintf(stderr, "%s:\n", name); \
|
||||
while (h--) { \
|
||||
for (int x = 0; x < w; x++) \
|
||||
fprintf(stderr, " " fmt, buf1[x]); \
|
||||
fprintf(stderr, " "); \
|
||||
for (int x = 0; x < w; x++) \
|
||||
fprintf(stderr, " " fmt, buf2[x]); \
|
||||
fprintf(stderr, " "); \
|
||||
for (int x = 0; x < w; x++) \
|
||||
fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \
|
||||
buf1 += stride1; \
|
||||
buf2 += stride2; \
|
||||
fprintf(stderr, "\n"); \
|
||||
} \
|
||||
return 1; \
|
||||
}
|
||||
|
||||
DEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x")
|
||||
DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x")
|
||||
DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d")
|
||||
DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d")
|
282
externals/ffmpeg/tests/checkasm/checkasm.h
vendored
Executable file
282
externals/ffmpeg/tests/checkasm/checkasm.h
vendored
Executable file
@@ -0,0 +1,282 @@
|
||||
/*
|
||||
* Assembly testing and benchmarking tool
|
||||
* Copyright (c) 2015 Henrik Gramner
|
||||
* Copyright (c) 2008 Loren Merritt
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#ifndef TESTS_CHECKASM_CHECKASM_H
|
||||
#define TESTS_CHECKASM_CHECKASM_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
|
||||
#if CONFIG_LINUX_PERF
|
||||
#include <unistd.h> // read(3)
|
||||
#include <sys/ioctl.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <linux/perf_event.h>
|
||||
#endif
|
||||
|
||||
#include "libavutil/avstring.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/lfg.h"
|
||||
#include "libavutil/timer.h"
|
||||
|
||||
void checkasm_check_aacpsdsp(void);
|
||||
void checkasm_check_afir(void);
|
||||
void checkasm_check_alacdsp(void);
|
||||
void checkasm_check_audiodsp(void);
|
||||
void checkasm_check_blend(void);
|
||||
void checkasm_check_blockdsp(void);
|
||||
void checkasm_check_bswapdsp(void);
|
||||
void checkasm_check_colorspace(void);
|
||||
void checkasm_check_exrdsp(void);
|
||||
void checkasm_check_fixed_dsp(void);
|
||||
void checkasm_check_flacdsp(void);
|
||||
void checkasm_check_float_dsp(void);
|
||||
void checkasm_check_fmtconvert(void);
|
||||
void checkasm_check_g722dsp(void);
|
||||
void checkasm_check_h264dsp(void);
|
||||
void checkasm_check_h264pred(void);
|
||||
void checkasm_check_h264qpel(void);
|
||||
void checkasm_check_hevc_add_res(void);
|
||||
void checkasm_check_hevc_idct(void);
|
||||
void checkasm_check_hevc_sao(void);
|
||||
void checkasm_check_huffyuvdsp(void);
|
||||
void checkasm_check_jpeg2000dsp(void);
|
||||
void checkasm_check_llviddsp(void);
|
||||
void checkasm_check_llviddspenc(void);
|
||||
void checkasm_check_nlmeans(void);
|
||||
void checkasm_check_opusdsp(void);
|
||||
void checkasm_check_pixblockdsp(void);
|
||||
void checkasm_check_sbrdsp(void);
|
||||
void checkasm_check_synth_filter(void);
|
||||
void checkasm_check_sw_rgb(void);
|
||||
void checkasm_check_sw_scale(void);
|
||||
void checkasm_check_utvideodsp(void);
|
||||
void checkasm_check_v210dec(void);
|
||||
void checkasm_check_v210enc(void);
|
||||
void checkasm_check_vf_eq(void);
|
||||
void checkasm_check_vf_gblur(void);
|
||||
void checkasm_check_vf_hflip(void);
|
||||
void checkasm_check_vf_threshold(void);
|
||||
void checkasm_check_vp8dsp(void);
|
||||
void checkasm_check_vp9dsp(void);
|
||||
void checkasm_check_videodsp(void);
|
||||
|
||||
struct CheckasmPerf;
|
||||
|
||||
void *checkasm_check_func(void *func, const char *name, ...) av_printf_format(2, 3);
|
||||
int checkasm_bench_func(void);
|
||||
void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2);
|
||||
struct CheckasmPerf *checkasm_get_perf_context(void);
|
||||
void checkasm_report(const char *name, ...) av_printf_format(1, 2);
|
||||
|
||||
/* float compare utilities */
|
||||
int float_near_ulp(float a, float b, unsigned max_ulp);
|
||||
int float_near_abs_eps(float a, float b, float eps);
|
||||
int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp);
|
||||
int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
|
||||
unsigned len);
|
||||
int float_near_abs_eps_array(const float *a, const float *b, float eps,
|
||||
unsigned len);
|
||||
int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
|
||||
unsigned max_ulp, unsigned len);
|
||||
int double_near_abs_eps(double a, double b, double eps);
|
||||
int double_near_abs_eps_array(const double *a, const double *b, double eps,
|
||||
unsigned len);
|
||||
|
||||
extern AVLFG checkasm_lfg;
|
||||
#define rnd() av_lfg_get(&checkasm_lfg)
|
||||
|
||||
static av_unused void *func_ref, *func_new;
|
||||
|
||||
#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
|
||||
|
||||
/* Decide whether or not the specified function needs to be tested */
|
||||
#define check_func(func, ...) (func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
|
||||
|
||||
/* Declare the function prototype. The first argument is the return value, the remaining
|
||||
* arguments are the function parameters. Naming parameters is optional. */
|
||||
#define declare_func(ret, ...) declare_new(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
|
||||
#define declare_func_float(ret, ...) declare_new_float(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
|
||||
#define declare_func_emms(cpu_flags, ret, ...) declare_new_emms(cpu_flags, ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
|
||||
|
||||
/* Indicate that the current test has failed */
|
||||
#define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__)
|
||||
|
||||
/* Print the test outcome */
|
||||
#define report checkasm_report
|
||||
|
||||
/* Call the reference function */
|
||||
#define call_ref(...) ((func_type *)func_ref)(__VA_ARGS__)
|
||||
|
||||
#if ARCH_X86 && HAVE_X86ASM
|
||||
/* Verifies that clobbered callee-saved registers are properly saved and restored
|
||||
* and that either no MMX registers are touched or emms is issued */
|
||||
void checkasm_checked_call(void *func, ...);
|
||||
/* Verifies that clobbered callee-saved registers are properly saved and restored
|
||||
* and issues emms for asm functions which are not required to do so */
|
||||
void checkasm_checked_call_emms(void *func, ...);
|
||||
/* Verifies that clobbered callee-saved registers are properly saved and restored
|
||||
* but doesn't issue emms. Meant for dsp functions returning float or double */
|
||||
void checkasm_checked_call_float(void *func, ...);
|
||||
|
||||
#if ARCH_X86_64
|
||||
/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
|
||||
* This is done by clobbering the stack with junk around the stack pointer and calling the
|
||||
* assembly function through checked_call() with added dummy arguments which forces all
|
||||
* real arguments to be passed on the stack and not in registers. For 32-bit arguments the
|
||||
* upper half of the 64-bit register locations on the stack will now contain junk which will
|
||||
* cause misbehaving functions to either produce incorrect output or segfault. Note that
|
||||
* even though this works extremely well in practice, it's technically not guaranteed
|
||||
* and false negatives is theoretically possible, but there can never be any false positives.
|
||||
*/
|
||||
void checkasm_stack_clobber(uint64_t clobber, ...);
|
||||
#define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\
|
||||
= (void *)checkasm_checked_call;
|
||||
#define declare_new_float(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\
|
||||
= (void *)checkasm_checked_call_float;
|
||||
#define declare_new_emms(cpu_flags, ret, ...) \
|
||||
ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) = \
|
||||
((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \
|
||||
(void *)checkasm_checked_call;
|
||||
#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
|
||||
#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
|
||||
CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
|
||||
checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__))
|
||||
#elif ARCH_X86_32
|
||||
#define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call;
|
||||
#define declare_new_float(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call_float;
|
||||
#define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = \
|
||||
((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \
|
||||
(void *)checkasm_checked_call;
|
||||
#define call_new(...) checked_call(func_new, __VA_ARGS__)
|
||||
#endif
|
||||
#elif ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
|
||||
/* Use a dummy argument, to offset the real parameters by 2, not only 1.
|
||||
* This makes sure that potential 8-byte-alignment of parameters is kept the same
|
||||
* even when the extra parameters have been removed. */
|
||||
void checkasm_checked_call_vfp(void *func, int dummy, ...);
|
||||
void checkasm_checked_call_novfp(void *func, int dummy, ...);
|
||||
extern void (*checkasm_checked_call)(void *func, int dummy, ...);
|
||||
#define declare_new(ret, ...) ret (*checked_call)(void *, int dummy, __VA_ARGS__, \
|
||||
int, int, int, int, int, int, int, int, \
|
||||
int, int, int, int, int, int, int) = (void *)checkasm_checked_call;
|
||||
#define call_new(...) checked_call(func_new, 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0)
|
||||
#elif ARCH_AARCH64 && !defined(__APPLE__)
|
||||
void checkasm_stack_clobber(uint64_t clobber, ...);
|
||||
void checkasm_checked_call(void *func, ...);
|
||||
#define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, int, int, __VA_ARGS__,\
|
||||
int, int, int, int, int, int, int, int,\
|
||||
int, int, int, int, int, int, int)\
|
||||
= (void *)checkasm_checked_call;
|
||||
#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
|
||||
#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
|
||||
CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
|
||||
checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
|
||||
7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0))
|
||||
#else
|
||||
#define declare_new(ret, ...)
|
||||
#define declare_new_float(ret, ...)
|
||||
#define declare_new_emms(cpu_flags, ret, ...)
|
||||
/* Call the function */
|
||||
#define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#ifndef declare_new_emms
|
||||
#define declare_new_emms(cpu_flags, ret, ...) declare_new(ret, __VA_ARGS__)
|
||||
#endif
|
||||
#ifndef declare_new_float
|
||||
#define declare_new_float(ret, ...) declare_new(ret, __VA_ARGS__)
|
||||
#endif
|
||||
|
||||
typedef struct CheckasmPerf {
|
||||
int sysfd;
|
||||
uint64_t cycles;
|
||||
int iterations;
|
||||
} CheckasmPerf;
|
||||
|
||||
#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF
|
||||
|
||||
#if CONFIG_LINUX_PERF
|
||||
#define PERF_START(t) do { \
|
||||
ioctl(sysfd, PERF_EVENT_IOC_RESET, 0); \
|
||||
ioctl(sysfd, PERF_EVENT_IOC_ENABLE, 0); \
|
||||
} while (0)
|
||||
#define PERF_STOP(t) do { \
|
||||
ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0); \
|
||||
read(sysfd, &t, sizeof(t)); \
|
||||
} while (0)
|
||||
#else
|
||||
#define PERF_START(t) t = AV_READ_TIME()
|
||||
#define PERF_STOP(t) t = AV_READ_TIME() - t
|
||||
#endif
|
||||
|
||||
/* Benchmark the function */
|
||||
#define bench_new(...)\
|
||||
do {\
|
||||
if (checkasm_bench_func()) {\
|
||||
struct CheckasmPerf *perf = checkasm_get_perf_context();\
|
||||
av_unused const int sysfd = perf->sysfd;\
|
||||
func_type *tfunc = func_new;\
|
||||
uint64_t tsum = 0;\
|
||||
int ti, tcount = 0;\
|
||||
uint64_t t = 0; \
|
||||
for (ti = 0; ti < BENCH_RUNS; ti++) {\
|
||||
PERF_START(t);\
|
||||
tfunc(__VA_ARGS__);\
|
||||
tfunc(__VA_ARGS__);\
|
||||
tfunc(__VA_ARGS__);\
|
||||
tfunc(__VA_ARGS__);\
|
||||
PERF_STOP(t);\
|
||||
if (t*tcount <= tsum*4 && ti > 0) {\
|
||||
tsum += t;\
|
||||
tcount++;\
|
||||
}\
|
||||
}\
|
||||
emms_c();\
|
||||
perf->cycles += t;\
|
||||
perf->iterations++;\
|
||||
}\
|
||||
} while (0)
|
||||
#else
|
||||
#define bench_new(...) while(0)
|
||||
#define PERF_START(t) while(0)
|
||||
#define PERF_STOP(t) while(0)
|
||||
#endif
|
||||
|
||||
#define DECL_CHECKASM_CHECK_FUNC(type) \
|
||||
int checkasm_check_##type(const char *const file, const int line, \
|
||||
const type *const buf1, const ptrdiff_t stride1, \
|
||||
const type *const buf2, const ptrdiff_t stride2, \
|
||||
const int w, const int h, const char *const name)
|
||||
|
||||
DECL_CHECKASM_CHECK_FUNC(uint8_t);
|
||||
DECL_CHECKASM_CHECK_FUNC(uint16_t);
|
||||
DECL_CHECKASM_CHECK_FUNC(int16_t);
|
||||
DECL_CHECKASM_CHECK_FUNC(int32_t);
|
||||
|
||||
#define PASTE(a,b) a ## b
|
||||
#define CONCAT(a,b) PASTE(a,b)
|
||||
|
||||
#define checkasm_check(prefix, ...) CONCAT(checkasm_check_, prefix)(__FILE__, __LINE__, __VA_ARGS__)
|
||||
|
||||
#endif /* TESTS_CHECKASM_CHECKASM_H */
|
91
externals/ffmpeg/tests/checkasm/exrdsp.c
vendored
Executable file
91
externals/ffmpeg/tests/checkasm/exrdsp.c
vendored
Executable file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2017 James Almer
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/avcodec.h"
|
||||
#include "libavcodec/exrdsp.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define BUF_SIZE 5120
|
||||
#define PADDED_BUF_SIZE BUF_SIZE+AV_INPUT_BUFFER_PADDING_SIZE*2
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < BUF_SIZE; i += 4) { \
|
||||
uint32_t r = rnd(); \
|
||||
AV_WN32A(src + i, r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_reorder_pixels(void) {
|
||||
LOCAL_ALIGNED_32(uint8_t, src, [PADDED_BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]);
|
||||
|
||||
declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t size);
|
||||
|
||||
memset(src, 0, PADDED_BUF_SIZE);
|
||||
memset(dst_ref, 0, PADDED_BUF_SIZE);
|
||||
memset(dst_new, 0, PADDED_BUF_SIZE);
|
||||
randomize_buffers();
|
||||
call_ref(dst_ref, src, BUF_SIZE);
|
||||
call_new(dst_new, src, BUF_SIZE);
|
||||
if (memcmp(dst_ref, dst_new, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(dst_new, src, BUF_SIZE);
|
||||
}
|
||||
|
||||
static void check_predictor(void) {
|
||||
LOCAL_ALIGNED_32(uint8_t, src, [PADDED_BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]);
|
||||
|
||||
declare_func(void, uint8_t *src, ptrdiff_t size);
|
||||
|
||||
memset(src, 0, PADDED_BUF_SIZE);
|
||||
randomize_buffers();
|
||||
memcpy(dst_ref, src, PADDED_BUF_SIZE);
|
||||
memcpy(dst_new, src, PADDED_BUF_SIZE);
|
||||
call_ref(dst_ref, BUF_SIZE);
|
||||
call_new(dst_new, BUF_SIZE);
|
||||
if (memcmp(dst_ref, dst_new, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(dst_new, BUF_SIZE);
|
||||
}
|
||||
|
||||
void checkasm_check_exrdsp(void)
|
||||
{
|
||||
ExrDSPContext h;
|
||||
|
||||
ff_exrdsp_init(&h);
|
||||
|
||||
if (check_func(h.reorder_pixels, "reorder_pixels"))
|
||||
check_reorder_pixels();
|
||||
|
||||
report("reorder_pixels");
|
||||
|
||||
if (check_func(h.predictor, "predictor"))
|
||||
check_predictor();
|
||||
|
||||
report("predictor");
|
||||
}
|
159
externals/ffmpeg/tests/checkasm/fixed_dsp.c
vendored
Executable file
159
externals/ffmpeg/tests/checkasm/fixed_dsp.c
vendored
Executable file
@@ -0,0 +1,159 @@
|
||||
/*
|
||||
* Copyright (c) 2015 James Almer
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "checkasm.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/fixed_dsp.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/mem.h"
|
||||
|
||||
#define BUF_SIZE 256
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < BUF_SIZE; i++) { \
|
||||
src0[i] = sign_extend(rnd(), 24); \
|
||||
src1[i] = sign_extend(rnd(), 24); \
|
||||
src2[i] = sign_extend(rnd(), 24); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_vector_fmul(const int *src0, const int *src1)
|
||||
{
|
||||
LOCAL_ALIGNED_32(int, ref, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(int, new, [BUF_SIZE]);
|
||||
|
||||
declare_func(void, int *dst, const int *src0, const int *src1, int len);
|
||||
|
||||
call_ref(ref, src0, src1, BUF_SIZE);
|
||||
call_new(new, src0, src1, BUF_SIZE);
|
||||
if (memcmp(ref, new, BUF_SIZE * sizeof(int)))
|
||||
fail();
|
||||
bench_new(new, src0, src1, BUF_SIZE);
|
||||
}
|
||||
|
||||
static void check_vector_fmul_add(const int *src0, const int *src1, const int *src2)
|
||||
{
|
||||
LOCAL_ALIGNED_32(int, ref, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(int, new, [BUF_SIZE]);
|
||||
|
||||
declare_func(void, int *dst, const int *src0, const int *src1, const int *src2, int len);
|
||||
|
||||
call_ref(ref, src0, src1, src2, BUF_SIZE);
|
||||
call_new(new, src0, src1, src2, BUF_SIZE);
|
||||
if (memcmp(ref, new, BUF_SIZE * sizeof(int)))
|
||||
fail();
|
||||
bench_new(new, src0, src1, src2, BUF_SIZE);
|
||||
}
|
||||
|
||||
static void check_vector_fmul_window(const int32_t *src0, const int32_t *src1, const int32_t *win)
|
||||
{
|
||||
LOCAL_ALIGNED_32(int32_t, ref, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(int32_t, new, [BUF_SIZE]);
|
||||
|
||||
declare_func(void, int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len);
|
||||
|
||||
call_ref(ref, src0, src1, win, BUF_SIZE / 2);
|
||||
call_new(new, src0, src1, win, BUF_SIZE / 2);
|
||||
if (memcmp(ref, new, BUF_SIZE * sizeof(int32_t)))
|
||||
fail();
|
||||
bench_new(new, src0, src1, win, BUF_SIZE / 2);
|
||||
}
|
||||
|
||||
static void check_vector_fmul_window_scaled(const int32_t *src0, const int32_t *src1, const int32_t *win)
|
||||
{
|
||||
LOCAL_ALIGNED_16(int16_t, ref, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(int16_t, new, [BUF_SIZE]);
|
||||
|
||||
declare_func(void, int16_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len, uint8_t bits);
|
||||
|
||||
call_ref(ref, src0, src1, win, BUF_SIZE / 2, 2);
|
||||
call_new(new, src0, src1, win, BUF_SIZE / 2, 2);
|
||||
if (memcmp(ref, new, BUF_SIZE * sizeof(int16_t)))
|
||||
fail();
|
||||
bench_new(new, src0, src1, win, BUF_SIZE / 2, 2);
|
||||
}
|
||||
|
||||
static void check_butterflies(const int *src0, const int *src1)
|
||||
{
|
||||
LOCAL_ALIGNED_16(int, ref0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(int, ref1, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(int, new0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(int, new1, [BUF_SIZE]);
|
||||
|
||||
declare_func(void, int *av_restrict src0, int *av_restrict src1, int len);
|
||||
|
||||
memcpy(ref0, src0, BUF_SIZE * sizeof(*src0));
|
||||
memcpy(ref1, src1, BUF_SIZE * sizeof(*src1));
|
||||
memcpy(new0, src0, BUF_SIZE * sizeof(*src0));
|
||||
memcpy(new1, src1, BUF_SIZE * sizeof(*src1));
|
||||
|
||||
call_ref(ref0, ref1, BUF_SIZE);
|
||||
call_new(new0, new1, BUF_SIZE);
|
||||
if (memcmp(ref0, new0, BUF_SIZE * sizeof(*ref0)) ||
|
||||
memcmp(ref1, new1, BUF_SIZE * sizeof(*ref1)))
|
||||
fail();
|
||||
memcpy(new0, src0, BUF_SIZE * sizeof(*src0));
|
||||
memcpy(new1, src1, BUF_SIZE * sizeof(*src1));
|
||||
bench_new(new0, new1, BUF_SIZE);
|
||||
}
|
||||
|
||||
static void check_scalarproduct_fixed(const int *src0, const int *src1)
|
||||
{
|
||||
int ref, new;
|
||||
|
||||
declare_func(int, const int *src0, const int *src1, int len);
|
||||
|
||||
ref = call_ref(src0, src1, BUF_SIZE);
|
||||
new = call_new(src0, src1, BUF_SIZE);
|
||||
if (ref != new)
|
||||
fail();
|
||||
bench_new(src0, src1, BUF_SIZE);
|
||||
}
|
||||
|
||||
void checkasm_check_fixed_dsp(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(int32_t, src0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(int32_t, src1, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(int32_t, src2, [BUF_SIZE]);
|
||||
AVFixedDSPContext *fdsp = avpriv_alloc_fixed_dsp(1);
|
||||
|
||||
randomize_buffers();
|
||||
if (check_func(fdsp->vector_fmul, "vector_fmul_fixed"))
|
||||
check_vector_fmul(src0, src1);
|
||||
if (check_func(fdsp->vector_fmul_add, "vector_fmul_add_fixed"))
|
||||
check_vector_fmul_add(src0, src1, src2);
|
||||
if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse_fixed"))
|
||||
check_vector_fmul(src0, src1);
|
||||
if (check_func(fdsp->vector_fmul_window, "vector_fmul_window_fixed"))
|
||||
check_vector_fmul_window(src0, src1, src2);
|
||||
if (check_func(fdsp->vector_fmul_window_scaled, "vector_fmul_window_scaled_fixed"))
|
||||
check_vector_fmul_window_scaled(src0, src1, src2);
|
||||
report("vector_fmul");
|
||||
if (check_func(fdsp->butterflies_fixed, "butterflies_fixed"))
|
||||
check_butterflies(src0, src1);
|
||||
report("butterflies_fixed");
|
||||
if (check_func(fdsp->scalarproduct_fixed, "scalarproduct_fixed"))
|
||||
check_scalarproduct_fixed(src0, src1);
|
||||
report("scalarproduct_fixed");
|
||||
|
||||
av_freep(&fdsp);
|
||||
}
|
90
externals/ffmpeg/tests/checkasm/flacdsp.c
vendored
Executable file
90
externals/ffmpeg/tests/checkasm/flacdsp.c
vendored
Executable file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (c) 2015 James Almer
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/flacdsp.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define BUF_SIZE 256
|
||||
#define MAX_CHANNELS 8
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int i, j; \
|
||||
for (i = 0; i < BUF_SIZE; i += 4) { \
|
||||
for (j = 0; j < channels; j++) { \
|
||||
uint32_t r = rnd() & (1 << (bits - 2)) - 1; \
|
||||
AV_WN32A(ref_src[j] + i, r); \
|
||||
AV_WN32A(new_src[j] + i, r); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_decorrelate(uint8_t **ref_dst, uint8_t **ref_src, uint8_t **new_dst, uint8_t **new_src,
|
||||
int channels, int bits) {
|
||||
declare_func(void, uint8_t **out, int32_t **in, int channels, int len, int shift);
|
||||
|
||||
randomize_buffers();
|
||||
call_ref(ref_dst, (int32_t **)ref_src, channels, BUF_SIZE / sizeof(int32_t), 8);
|
||||
call_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8);
|
||||
if (memcmp(*ref_dst, *new_dst, bits == 16 ? BUF_SIZE * (channels/2) : BUF_SIZE * channels) ||
|
||||
memcmp(*ref_src, *new_src, BUF_SIZE * channels))
|
||||
fail();
|
||||
bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8);
|
||||
}
|
||||
|
||||
void checkasm_check_flacdsp(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]);
|
||||
LOCAL_ALIGNED_16(uint8_t, ref_buf, [BUF_SIZE*MAX_CHANNELS]);
|
||||
LOCAL_ALIGNED_16(uint8_t, new_dst, [BUF_SIZE*MAX_CHANNELS]);
|
||||
LOCAL_ALIGNED_16(uint8_t, new_buf, [BUF_SIZE*MAX_CHANNELS]);
|
||||
uint8_t *ref_src[] = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1], &ref_buf[BUF_SIZE*2], &ref_buf[BUF_SIZE*3],
|
||||
&ref_buf[BUF_SIZE*4], &ref_buf[BUF_SIZE*5], &ref_buf[BUF_SIZE*6], &ref_buf[BUF_SIZE*7] };
|
||||
uint8_t *new_src[] = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1], &new_buf[BUF_SIZE*2], &new_buf[BUF_SIZE*3],
|
||||
&new_buf[BUF_SIZE*4], &new_buf[BUF_SIZE*5], &new_buf[BUF_SIZE*6], &new_buf[BUF_SIZE*7] };
|
||||
static const char * const names[3] = { "ls", "rs", "ms" };
|
||||
static const struct {
|
||||
enum AVSampleFormat fmt;
|
||||
int bits;
|
||||
} fmts[] = {
|
||||
{ AV_SAMPLE_FMT_S16, 16 },
|
||||
{ AV_SAMPLE_FMT_S32, 32 },
|
||||
};
|
||||
FLACDSPContext h;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
ff_flacdsp_init(&h, fmts[i].fmt, 2, 0);
|
||||
for (j = 0; j < 3; j++)
|
||||
if (check_func(h.decorrelate[j], "flac_decorrelate_%s_%d", names[j], fmts[i].bits))
|
||||
check_decorrelate(&ref_dst, ref_src, &new_dst, new_src, 2, fmts[i].bits);
|
||||
for (j = 2; j <= MAX_CHANNELS; j += 2) {
|
||||
ff_flacdsp_init(&h, fmts[i].fmt, j, 0);
|
||||
if (check_func(h.decorrelate[0], "flac_decorrelate_indep%d_%d", j, fmts[i].bits))
|
||||
check_decorrelate(&ref_dst, ref_src, &new_dst, new_src, j, fmts[i].bits);
|
||||
}
|
||||
}
|
||||
|
||||
report("decorrelate");
|
||||
}
|
338
externals/ffmpeg/tests/checkasm/float_dsp.c
vendored
Executable file
338
externals/ffmpeg/tests/checkasm/float_dsp.c
vendored
Executable file
@@ -0,0 +1,338 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <float.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/float_dsp.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "checkasm.h"
|
||||
|
||||
#define LEN 256
|
||||
|
||||
#define randomize_buffer(buf) \
|
||||
do { \
|
||||
int i; \
|
||||
double bmg[2], stddev = 10.0, mean = 0.0; \
|
||||
\
|
||||
for (i = 0; i < LEN; i += 2) { \
|
||||
av_bmg_get(&checkasm_lfg, bmg); \
|
||||
buf[i] = bmg[0] * stddev + mean; \
|
||||
buf[i + 1] = bmg[1] * stddev + mean; \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
static void test_vector_fmul(const float *src0, const float *src1)
|
||||
{
|
||||
LOCAL_ALIGNED_32(float, cdst, [LEN]);
|
||||
LOCAL_ALIGNED_32(float, odst, [LEN]);
|
||||
int i;
|
||||
|
||||
declare_func(void, float *dst, const float *src0, const float *src1,
|
||||
int len);
|
||||
|
||||
call_ref(cdst, src0, src1, LEN);
|
||||
call_new(odst, src0, src1, LEN);
|
||||
for (i = 0; i < LEN; i++) {
|
||||
double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
|
||||
if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
|
||||
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
|
||||
i, cdst[i], odst[i], cdst[i] - odst[i]);
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
bench_new(odst, src0, src1, LEN);
|
||||
}
|
||||
|
||||
static void test_vector_dmul(const double *src0, const double *src1)
|
||||
{
|
||||
LOCAL_ALIGNED_32(double, cdst, [LEN]);
|
||||
LOCAL_ALIGNED_32(double, odst, [LEN]);
|
||||
int i;
|
||||
|
||||
declare_func(void, double *dst, const double *src0, const double *src1,
|
||||
int len);
|
||||
|
||||
call_ref(cdst, src0, src1, LEN);
|
||||
call_new(odst, src0, src1, LEN);
|
||||
for (i = 0; i < LEN; i++) {
|
||||
double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
|
||||
if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
|
||||
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
|
||||
i, cdst[i], odst[i], cdst[i] - odst[i]);
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
bench_new(odst, src0, src1, LEN);
|
||||
}
|
||||
|
||||
#define ARBITRARY_FMUL_ADD_CONST 0.005
|
||||
static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
|
||||
{
|
||||
LOCAL_ALIGNED_32(float, cdst, [LEN]);
|
||||
LOCAL_ALIGNED_32(float, odst, [LEN]);
|
||||
int i;
|
||||
|
||||
declare_func(void, float *dst, const float *src0, const float *src1,
|
||||
const float *src2, int len);
|
||||
|
||||
call_ref(cdst, src0, src1, src2, LEN);
|
||||
call_new(odst, src0, src1, src2, LEN);
|
||||
for (i = 0; i < LEN; i++) {
|
||||
if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
|
||||
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
|
||||
i, cdst[i], odst[i], cdst[i] - odst[i]);
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
bench_new(odst, src0, src1, src2, LEN);
|
||||
}
|
||||
|
||||
static void test_vector_fmul_scalar(const float *src0, const float *src1)
|
||||
{
|
||||
LOCAL_ALIGNED_16(float, cdst, [LEN]);
|
||||
LOCAL_ALIGNED_16(float, odst, [LEN]);
|
||||
int i;
|
||||
|
||||
declare_func(void, float *dst, const float *src, float mul, int len);
|
||||
|
||||
call_ref(cdst, src0, src1[0], LEN);
|
||||
call_new(odst, src0, src1[0], LEN);
|
||||
for (i = 0; i < LEN; i++) {
|
||||
double t = fabs(src0[i]) + fabs(src1[0]) + fabs(src0[i] * src1[0]) + 1.0;
|
||||
if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
|
||||
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
|
||||
i, cdst[i], odst[i], cdst[i] - odst[i]);
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
bench_new(odst, src0, src1[0], LEN);
|
||||
}
|
||||
|
||||
#define ARBITRARY_FMUL_WINDOW_CONST 0.008
|
||||
static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
|
||||
{
|
||||
LOCAL_ALIGNED_16(float, cdst, [LEN]);
|
||||
LOCAL_ALIGNED_16(float, odst, [LEN]);
|
||||
int i;
|
||||
|
||||
declare_func(void, float *dst, const float *src0, const float *src1,
|
||||
const float *win, int len);
|
||||
|
||||
call_ref(cdst, src0, src1, win, LEN / 2);
|
||||
call_new(odst, src0, src1, win, LEN / 2);
|
||||
for (i = 0; i < LEN; i++) {
|
||||
if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
|
||||
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
|
||||
i, cdst[i], odst[i], cdst[i] - odst[i]);
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
bench_new(odst, src0, src1, win, LEN / 2);
|
||||
}
|
||||
|
||||
#define ARBITRARY_FMAC_SCALAR_CONST 0.005
|
||||
static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
|
||||
{
|
||||
LOCAL_ALIGNED_32(float, cdst, [LEN]);
|
||||
LOCAL_ALIGNED_32(float, odst, [LEN]);
|
||||
int i;
|
||||
|
||||
declare_func(void, float *dst, const float *src, float mul, int len);
|
||||
|
||||
memcpy(cdst, src2, LEN * sizeof(*src2));
|
||||
memcpy(odst, src2, LEN * sizeof(*src2));
|
||||
|
||||
call_ref(cdst, src0, src1[0], LEN);
|
||||
call_new(odst, src0, src1[0], LEN);
|
||||
for (i = 0; i < LEN; i++) {
|
||||
if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
|
||||
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
|
||||
i, cdst[i], odst[i], cdst[i] - odst[i]);
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
memcpy(odst, src2, LEN * sizeof(*src2));
|
||||
bench_new(odst, src0, src1[0], LEN);
|
||||
}
|
||||
|
||||
static void test_vector_dmul_scalar(const double *src0, const double *src1)
|
||||
{
|
||||
LOCAL_ALIGNED_32(double, cdst, [LEN]);
|
||||
LOCAL_ALIGNED_32(double, odst, [LEN]);
|
||||
int i;
|
||||
|
||||
declare_func(void, double *dst, const double *src, double mul, int len);
|
||||
|
||||
call_ref(cdst, src0, src1[0], LEN);
|
||||
call_new(odst, src0, src1[0], LEN);
|
||||
for (i = 0; i < LEN; i++) {
|
||||
double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
|
||||
if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
|
||||
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
|
||||
cdst[i], odst[i], cdst[i] - odst[i]);
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
bench_new(odst, src0, src1[0], LEN);
|
||||
}
|
||||
|
||||
#define ARBITRARY_DMAC_SCALAR_CONST 0.005
|
||||
static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
|
||||
{
|
||||
LOCAL_ALIGNED_32(double, cdst, [LEN]);
|
||||
LOCAL_ALIGNED_32(double, odst, [LEN]);
|
||||
int i;
|
||||
|
||||
declare_func(void, double *dst, const double *src, double mul, int len);
|
||||
|
||||
memcpy(cdst, src2, LEN * sizeof(*src2));
|
||||
memcpy(odst, src2, LEN * sizeof(*src2));
|
||||
call_ref(cdst, src0, src1[0], LEN);
|
||||
call_new(odst, src0, src1[0], LEN);
|
||||
for (i = 0; i < LEN; i++) {
|
||||
if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
|
||||
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
|
||||
i, cdst[i], odst[i], cdst[i] - odst[i]);
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
memcpy(odst, src2, LEN * sizeof(*src2));
|
||||
bench_new(odst, src0, src1[0], LEN);
|
||||
}
|
||||
|
||||
static void test_butterflies_float(const float *src0, const float *src1)
|
||||
{
|
||||
LOCAL_ALIGNED_16(float, cdst, [LEN]);
|
||||
LOCAL_ALIGNED_16(float, odst, [LEN]);
|
||||
LOCAL_ALIGNED_16(float, cdst1, [LEN]);
|
||||
LOCAL_ALIGNED_16(float, odst1, [LEN]);
|
||||
int i;
|
||||
|
||||
declare_func(void, float *av_restrict src0, float *av_restrict src1,
|
||||
int len);
|
||||
|
||||
memcpy(cdst, src0, LEN * sizeof(*src0));
|
||||
memcpy(cdst1, src1, LEN * sizeof(*src1));
|
||||
memcpy(odst, src0, LEN * sizeof(*src0));
|
||||
memcpy(odst1, src1, LEN * sizeof(*src1));
|
||||
|
||||
call_ref(cdst, cdst1, LEN);
|
||||
call_new(odst, odst1, LEN);
|
||||
for (i = 0; i < LEN; i++) {
|
||||
if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON) ||
|
||||
!float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
|
||||
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
|
||||
i, cdst[i], odst[i], cdst[i] - odst[i]);
|
||||
fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
|
||||
i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
memcpy(odst, src0, LEN * sizeof(*src0));
|
||||
memcpy(odst1, src1, LEN * sizeof(*src1));
|
||||
bench_new(odst, odst1, LEN);
|
||||
}
|
||||
|
||||
#define ARBITRARY_SCALARPRODUCT_CONST 0.2
|
||||
static void test_scalarproduct_float(const float *src0, const float *src1)
|
||||
{
|
||||
float cprod, oprod;
|
||||
|
||||
declare_func_float(float, const float *src0, const float *src1, int len);
|
||||
|
||||
cprod = call_ref(src0, src1, LEN);
|
||||
oprod = call_new(src0, src1, LEN);
|
||||
if (!float_near_abs_eps(cprod, oprod, ARBITRARY_SCALARPRODUCT_CONST)) {
|
||||
fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
|
||||
cprod, oprod, cprod - oprod);
|
||||
fail();
|
||||
}
|
||||
bench_new(src0, src1, LEN);
|
||||
}
|
||||
|
||||
void checkasm_check_float_dsp(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(float, src0, [LEN]);
|
||||
LOCAL_ALIGNED_32(float, src1, [LEN]);
|
||||
LOCAL_ALIGNED_32(float, src2, [LEN]);
|
||||
LOCAL_ALIGNED_16(float, src3, [LEN]);
|
||||
LOCAL_ALIGNED_16(float, src4, [LEN]);
|
||||
LOCAL_ALIGNED_16(float, src5, [LEN]);
|
||||
LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
|
||||
LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
|
||||
LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
|
||||
AVFloatDSPContext *fdsp = avpriv_float_dsp_alloc(1);
|
||||
|
||||
if (!fdsp) {
|
||||
fprintf(stderr, "floatdsp: Out of memory error\n");
|
||||
return;
|
||||
}
|
||||
|
||||
randomize_buffer(src0);
|
||||
randomize_buffer(src1);
|
||||
randomize_buffer(src2);
|
||||
randomize_buffer(src3);
|
||||
randomize_buffer(src4);
|
||||
randomize_buffer(src5);
|
||||
randomize_buffer(dbl_src0);
|
||||
randomize_buffer(dbl_src1);
|
||||
randomize_buffer(dbl_src2);
|
||||
|
||||
if (check_func(fdsp->vector_fmul, "vector_fmul"))
|
||||
test_vector_fmul(src0, src1);
|
||||
if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
|
||||
test_vector_fmul_add(src0, src1, src2);
|
||||
if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
|
||||
test_vector_fmul_scalar(src3, src4);
|
||||
if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
|
||||
test_vector_fmul(src0, src1);
|
||||
if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
|
||||
test_vector_fmul_window(src3, src4, src5);
|
||||
report("vector_fmul");
|
||||
if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
|
||||
test_vector_fmac_scalar(src0, src1, src2);
|
||||
report("vector_fmac");
|
||||
if (check_func(fdsp->vector_dmul, "vector_dmul"))
|
||||
test_vector_dmul(dbl_src0, dbl_src1);
|
||||
if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
|
||||
test_vector_dmul_scalar(dbl_src0, dbl_src1);
|
||||
report("vector_dmul");
|
||||
if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
|
||||
test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
|
||||
report("vector_dmac");
|
||||
if (check_func(fdsp->butterflies_float, "butterflies_float"))
|
||||
test_butterflies_float(src3, src4);
|
||||
report("butterflies_float");
|
||||
if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
|
||||
test_scalarproduct_float(src3, src4);
|
||||
report("scalarproduct_float");
|
||||
|
||||
av_freep(&fdsp);
|
||||
}
|
105
externals/ffmpeg/tests/checkasm/fmtconvert.c
vendored
Executable file
105
externals/ffmpeg/tests/checkasm/fmtconvert.c
vendored
Executable file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Janne Grunau
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavcodec/fmtconvert.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define BUF_SIZE 1024
|
||||
|
||||
#define randomize_input(len) \
|
||||
do { \
|
||||
int k; \
|
||||
for (k = 0; k < len; k++) { \
|
||||
in[k] = rnd() - INT32_MAX; \
|
||||
} \
|
||||
for ( ; k < BUF_SIZE; k++) { \
|
||||
in[k] = INT32_MAX; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_fmtconvert(void)
|
||||
{
|
||||
FmtConvertContext c;
|
||||
LOCAL_ALIGNED(32, float, dst0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED(32, float, dst1, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED(32, int32_t, in, [BUF_SIZE]);
|
||||
float scale_arr[128];
|
||||
int length[] = {8, 16, 24, 56, 72, 128, 512, 520, 656, 768, 992};
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++)
|
||||
scale_arr[i] = (FF_ARRAY_ELEMS(scale_arr) - FF_ARRAY_ELEMS(scale_arr) / 2) / 13;
|
||||
|
||||
ff_fmt_convert_init(&c, NULL);
|
||||
|
||||
memset(dst0, 0, sizeof(*dst0) * BUF_SIZE);
|
||||
memset(dst1, 0, sizeof(*dst1) * BUF_SIZE);
|
||||
|
||||
if (check_func(c.int32_to_float_fmul_scalar, "int32_to_float_fmul_scalar")) {
|
||||
declare_func(void, float *, const int32_t *, float, int);
|
||||
|
||||
for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++) {
|
||||
for (j = 0; j < FF_ARRAY_ELEMS(length); j++) {
|
||||
|
||||
randomize_input(length[j]);
|
||||
|
||||
call_ref(dst0, in, scale_arr[i], length[j]);
|
||||
call_new(dst1, in, scale_arr[i], length[j]);
|
||||
|
||||
if (!float_near_ulp_array(dst0, dst1, 3, length[j])) {
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
|
||||
bench_new(dst1, in, scale_arr[i], length[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (check_func(c.int32_to_float_fmul_array8, "int32_to_float_fmul_array8")) {
|
||||
declare_func(void, FmtConvertContext *, float *, const int32_t *,
|
||||
const float *, int);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (j = 0; j < FF_ARRAY_ELEMS(length); j++) {
|
||||
|
||||
randomize_input(length[j]);
|
||||
|
||||
call_ref(&c, dst0, in, scale_arr, length[j]);
|
||||
call_new(&c, dst1, in, scale_arr, length[j]);
|
||||
|
||||
if (!float_near_ulp_array(dst0, dst1, 3, length[j])) {
|
||||
fail();
|
||||
fprintf(stderr, "int32_to_float_fmul_array8: len: %d\n", length[j]);
|
||||
break;
|
||||
}
|
||||
|
||||
bench_new(&c, dst1, in, scale_arr, length[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
report("fmtconvert");
|
||||
}
|
63
externals/ffmpeg/tests/checkasm/g722dsp.c
vendored
Executable file
63
externals/ffmpeg/tests/checkasm/g722dsp.c
vendored
Executable file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/g722.h"
|
||||
#include "libavcodec/g722dsp.h"
|
||||
#include "libavcodec/mathops.h"
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < PREV_SAMPLES_BUF_SIZE; i++) { \
|
||||
src0[i] = src1[i] = sign_extend(rnd(), 16); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_qmf(void) {
|
||||
int16_t src0[PREV_SAMPLES_BUF_SIZE];
|
||||
int16_t src1[PREV_SAMPLES_BUF_SIZE];
|
||||
const int16_t *tmp0 = src0;
|
||||
const int16_t *tmp1 = src1;
|
||||
int dst0[2], dst1[2];
|
||||
int i;
|
||||
|
||||
declare_func(void, const int16_t *prev_samples, int xout[2]);
|
||||
|
||||
randomize_buffers();
|
||||
for (i = 0; i < PREV_SAMPLES_BUF_SIZE - 24; i++) {
|
||||
call_ref(tmp0++, dst0);
|
||||
call_new(tmp1++, dst1);
|
||||
if (memcmp(dst0, dst1, sizeof(dst0)))
|
||||
fail();
|
||||
}
|
||||
bench_new(src1, dst1);
|
||||
}
|
||||
|
||||
void checkasm_check_g722dsp(void)
|
||||
{
|
||||
G722DSPContext h;
|
||||
|
||||
ff_g722dsp_init(&h);
|
||||
|
||||
if (check_func(h.apply_qmf, "g722_apply_qmf"))
|
||||
check_qmf();
|
||||
|
||||
report("apply_qmf");
|
||||
}
|
452
externals/ffmpeg/tests/checkasm/h264dsp.c
vendored
Executable file
452
externals/ffmpeg/tests/checkasm/h264dsp.c
vendored
Executable file
@@ -0,0 +1,452 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Martin Storsjo
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/avcodec.h"
|
||||
#include "libavcodec/h264dsp.h"
|
||||
#include "libavcodec/h264data.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
|
||||
static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f };
|
||||
|
||||
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
|
||||
#define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))
|
||||
#define PIXEL_STRIDE 16
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int x, y; \
|
||||
uint32_t mask = pixel_mask[bit_depth - 8]; \
|
||||
for (y = 0; y < sz; y++) { \
|
||||
for (x = 0; x < PIXEL_STRIDE; x += 4) { \
|
||||
AV_WN32A(src + y * PIXEL_STRIDE + x, rnd() & mask); \
|
||||
AV_WN32A(dst + y * PIXEL_STRIDE + x, rnd() & mask); \
|
||||
} \
|
||||
for (x = 0; x < sz; x++) { \
|
||||
if (bit_depth == 8) { \
|
||||
coef[y * sz + x] = src[y * PIXEL_STRIDE + x] - \
|
||||
dst[y * PIXEL_STRIDE + x]; \
|
||||
} else { \
|
||||
((int32_t *)coef)[y * sz + x] = \
|
||||
((uint16_t *)src)[y * (PIXEL_STRIDE/2) + x] - \
|
||||
((uint16_t *)dst)[y * (PIXEL_STRIDE/2) + x]; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define dct4x4_impl(size, dctcoef) \
|
||||
static void dct4x4_##size(dctcoef *coef) \
|
||||
{ \
|
||||
int i, y, x; \
|
||||
dctcoef tmp[16]; \
|
||||
for (i = 0; i < 4; i++) { \
|
||||
const int z0 = coef[i*4 + 0] + coef[i*4 + 3]; \
|
||||
const int z1 = coef[i*4 + 1] + coef[i*4 + 2]; \
|
||||
const int z2 = coef[i*4 + 0] - coef[i*4 + 3]; \
|
||||
const int z3 = coef[i*4 + 1] - coef[i*4 + 2]; \
|
||||
tmp[i + 4*0] = z0 + z1; \
|
||||
tmp[i + 4*1] = 2*z2 + z3; \
|
||||
tmp[i + 4*2] = z0 - z1; \
|
||||
tmp[i + 4*3] = z2 - 2*z3; \
|
||||
} \
|
||||
for (i = 0; i < 4; i++) { \
|
||||
const int z0 = tmp[i*4 + 0] + tmp[i*4 + 3]; \
|
||||
const int z1 = tmp[i*4 + 1] + tmp[i*4 + 2]; \
|
||||
const int z2 = tmp[i*4 + 0] - tmp[i*4 + 3]; \
|
||||
const int z3 = tmp[i*4 + 1] - tmp[i*4 + 2]; \
|
||||
coef[i*4 + 0] = z0 + z1; \
|
||||
coef[i*4 + 1] = 2*z2 + z3; \
|
||||
coef[i*4 + 2] = z0 - z1; \
|
||||
coef[i*4 + 3] = z2 - 2*z3; \
|
||||
} \
|
||||
for (y = 0; y < 4; y++) { \
|
||||
for (x = 0; x < 4; x++) { \
|
||||
static const int scale[] = { 13107 * 10, 8066 * 13, 5243 * 16 }; \
|
||||
const int idx = (y & 1) + (x & 1); \
|
||||
coef[y*4 + x] = (coef[y*4 + x] * scale[idx] + (1 << 14)) >> 15; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define DCT8_1D(src, srcstride, dst, dststride) do { \
|
||||
const int a0 = (src)[srcstride * 0] + (src)[srcstride * 7]; \
|
||||
const int a1 = (src)[srcstride * 0] - (src)[srcstride * 7]; \
|
||||
const int a2 = (src)[srcstride * 1] + (src)[srcstride * 6]; \
|
||||
const int a3 = (src)[srcstride * 1] - (src)[srcstride * 6]; \
|
||||
const int a4 = (src)[srcstride * 2] + (src)[srcstride * 5]; \
|
||||
const int a5 = (src)[srcstride * 2] - (src)[srcstride * 5]; \
|
||||
const int a6 = (src)[srcstride * 3] + (src)[srcstride * 4]; \
|
||||
const int a7 = (src)[srcstride * 3] - (src)[srcstride * 4]; \
|
||||
const int b0 = a0 + a6; \
|
||||
const int b1 = a2 + a4; \
|
||||
const int b2 = a0 - a6; \
|
||||
const int b3 = a2 - a4; \
|
||||
const int b4 = a3 + a5 + (a1 + (a1 >> 1)); \
|
||||
const int b5 = a1 - a7 - (a5 + (a5 >> 1)); \
|
||||
const int b6 = a1 + a7 - (a3 + (a3 >> 1)); \
|
||||
const int b7 = a3 - a5 + (a7 + (a7 >> 1)); \
|
||||
(dst)[dststride * 0] = b0 + b1; \
|
||||
(dst)[dststride * 1] = b4 + (b7 >> 2); \
|
||||
(dst)[dststride * 2] = b2 + (b3 >> 1); \
|
||||
(dst)[dststride * 3] = b5 + (b6 >> 2); \
|
||||
(dst)[dststride * 4] = b0 - b1; \
|
||||
(dst)[dststride * 5] = b6 - (b5 >> 2); \
|
||||
(dst)[dststride * 6] = (b2 >> 1) - b3; \
|
||||
(dst)[dststride * 7] = (b4 >> 2) - b7; \
|
||||
} while (0)
|
||||
|
||||
#define dct8x8_impl(size, dctcoef) \
|
||||
static void dct8x8_##size(dctcoef *coef) \
|
||||
{ \
|
||||
int i, x, y; \
|
||||
dctcoef tmp[64]; \
|
||||
for (i = 0; i < 8; i++) \
|
||||
DCT8_1D(coef + i, 8, tmp + i, 8); \
|
||||
\
|
||||
for (i = 0; i < 8; i++) \
|
||||
DCT8_1D(tmp + 8*i, 1, coef + i, 8); \
|
||||
\
|
||||
for (y = 0; y < 8; y++) { \
|
||||
for (x = 0; x < 8; x++) { \
|
||||
static const int scale[] = { \
|
||||
13107 * 20, 11428 * 18, 20972 * 32, \
|
||||
12222 * 19, 16777 * 25, 15481 * 24, \
|
||||
}; \
|
||||
static const int idxmap[] = { \
|
||||
0, 3, 4, 3, \
|
||||
3, 1, 5, 1, \
|
||||
4, 5, 2, 5, \
|
||||
3, 1, 5, 1, \
|
||||
}; \
|
||||
const int idx = idxmap[(y & 3) * 4 + (x & 3)]; \
|
||||
coef[y*8 + x] = ((int64_t)coef[y*8 + x] * \
|
||||
scale[idx] + (1 << 17)) >> 18; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
dct4x4_impl(16, int16_t)
|
||||
dct4x4_impl(32, int32_t)
|
||||
|
||||
dct8x8_impl(16, int16_t)
|
||||
dct8x8_impl(32, int32_t)
|
||||
|
||||
static void dct4x4(int16_t *coef, int bit_depth)
|
||||
{
|
||||
if (bit_depth == 8)
|
||||
dct4x4_16(coef);
|
||||
else
|
||||
dct4x4_32((int32_t *) coef);
|
||||
}
|
||||
|
||||
static void dct8x8(int16_t *coef, int bit_depth)
|
||||
{
|
||||
if (bit_depth == 8) {
|
||||
dct8x8_16(coef);
|
||||
} else {
|
||||
dct8x8_32((int32_t *) coef);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void check_idct(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, src, [8 * 8 * 2]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst, [8 * 8 * 2]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0, [8 * 8 * 2]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1_base, [8 * 8 * 2 + 32]);
|
||||
LOCAL_ALIGNED_16(int16_t, coef, [8 * 8 * 2]);
|
||||
LOCAL_ALIGNED_16(int16_t, subcoef0, [8 * 8 * 2]);
|
||||
LOCAL_ALIGNED_16(int16_t, subcoef1, [8 * 8 * 2]);
|
||||
H264DSPContext h;
|
||||
int bit_depth, sz, align, dc;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, int stride);
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
|
||||
ff_h264dsp_init(&h, bit_depth, 1);
|
||||
for (sz = 4; sz <= 8; sz += 4) {
|
||||
randomize_buffers();
|
||||
|
||||
if (sz == 4)
|
||||
dct4x4(coef, bit_depth);
|
||||
else
|
||||
dct8x8(coef, bit_depth);
|
||||
|
||||
for (dc = 0; dc <= 1; dc++) {
|
||||
void (*idct)(uint8_t *, int16_t *, int) = NULL;
|
||||
switch ((sz << 1) | dc) {
|
||||
case (4 << 1) | 0: idct = h.h264_idct_add; break;
|
||||
case (4 << 1) | 1: idct = h.h264_idct_dc_add; break;
|
||||
case (8 << 1) | 0: idct = h.h264_idct8_add; break;
|
||||
case (8 << 1) | 1: idct = h.h264_idct8_dc_add; break;
|
||||
}
|
||||
if (check_func(idct, "h264_idct%d_add%s_%dbpp", sz, dc ? "_dc" : "", bit_depth)) {
|
||||
for (align = 0; align < 16; align += sz * SIZEOF_PIXEL) {
|
||||
uint8_t *dst1 = dst1_base + align;
|
||||
if (dc) {
|
||||
memset(subcoef0, 0, sz * sz * SIZEOF_COEF);
|
||||
memcpy(subcoef0, coef, SIZEOF_COEF);
|
||||
} else {
|
||||
memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
|
||||
}
|
||||
memcpy(dst0, dst, sz * PIXEL_STRIDE);
|
||||
memcpy(dst1, dst, sz * PIXEL_STRIDE);
|
||||
memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
|
||||
call_ref(dst0, subcoef0, PIXEL_STRIDE);
|
||||
call_new(dst1, subcoef1, PIXEL_STRIDE);
|
||||
if (memcmp(dst0, dst1, sz * PIXEL_STRIDE) ||
|
||||
memcmp(subcoef0, subcoef1, sz * sz * SIZEOF_COEF))
|
||||
fail();
|
||||
bench_new(dst1, subcoef1, sz * SIZEOF_PIXEL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_idct_multiple(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]);
|
||||
LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]);
|
||||
LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]);
|
||||
LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]);
|
||||
LOCAL_ALIGNED_16(uint8_t, nnzc, [15 * 8]);
|
||||
H264DSPContext h;
|
||||
int bit_depth, i, y, func;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]);
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
|
||||
ff_h264dsp_init(&h, bit_depth, 1);
|
||||
for (func = 0; func < 3; func++) {
|
||||
void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL;
|
||||
const char *name;
|
||||
int sz = 4, intra = 0;
|
||||
int block_offset[16] = { 0 };
|
||||
switch (func) {
|
||||
case 0:
|
||||
idct = h.h264_idct_add16;
|
||||
name = "h264_idct_add16";
|
||||
break;
|
||||
case 1:
|
||||
idct = h.h264_idct_add16intra;
|
||||
name = "h264_idct_add16intra";
|
||||
intra = 1;
|
||||
break;
|
||||
case 2:
|
||||
idct = h.h264_idct8_add4;
|
||||
name = "h264_idct8_add4";
|
||||
sz = 8;
|
||||
break;
|
||||
}
|
||||
memset(nnzc, 0, 15 * 8);
|
||||
memset(coef_full, 0, 16 * 16 * SIZEOF_COEF);
|
||||
for (i = 0; i < 16 * 16; i += sz * sz) {
|
||||
uint8_t src[8 * 8 * 2];
|
||||
uint8_t dst[8 * 8 * 2];
|
||||
int16_t coef[8 * 8 * 2];
|
||||
int index = i / sz;
|
||||
int block_y = (index / 16) * sz;
|
||||
int block_x = index % 16;
|
||||
int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL;
|
||||
int nnz = rnd() % 3;
|
||||
|
||||
randomize_buffers();
|
||||
if (sz == 4)
|
||||
dct4x4(coef, bit_depth);
|
||||
else
|
||||
dct8x8(coef, bit_depth);
|
||||
|
||||
for (y = 0; y < sz; y++)
|
||||
memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL],
|
||||
&dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL);
|
||||
|
||||
if (nnz > 1)
|
||||
nnz = sz * sz;
|
||||
memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])],
|
||||
coef, nnz * SIZEOF_COEF);
|
||||
|
||||
if (intra && nnz == 1)
|
||||
nnz = 0;
|
||||
|
||||
nnzc[scan8[i / 16]] = nnz;
|
||||
block_offset[i / 16] = offset;
|
||||
}
|
||||
|
||||
if (check_func(idct, "%s_%dbpp", name, bit_depth)) {
|
||||
memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF);
|
||||
memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF);
|
||||
memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL);
|
||||
memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL);
|
||||
call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc);
|
||||
call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
|
||||
if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) ||
|
||||
memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF))
|
||||
fail();
|
||||
bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void check_loop_filter(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
|
||||
H264DSPContext h;
|
||||
int bit_depth;
|
||||
int alphas[36], betas[36];
|
||||
int8_t tc0[36][4];
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
|
||||
int i, j, a, c;
|
||||
uint32_t mask = pixel_mask_lf[bit_depth - 8];
|
||||
ff_h264dsp_init(&h, bit_depth, 1);
|
||||
for (i = 35, a = 255, c = 250; i >= 0; i--) {
|
||||
alphas[i] = a << (bit_depth - 8);
|
||||
betas[i] = (i + 1) / 2 << (bit_depth - 8);
|
||||
tc0[i][0] = tc0[i][3] = (c + 6) / 10;
|
||||
tc0[i][1] = (c + 7) / 15;
|
||||
tc0[i][2] = (c + 9) / 20;
|
||||
a = a*9/10;
|
||||
c = c*9/10;
|
||||
}
|
||||
|
||||
#define CHECK_LOOP_FILTER(name, align, idc) \
|
||||
do { \
|
||||
if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
|
||||
for (j = 0; j < 36; j++) { \
|
||||
intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
|
||||
for (i = 0; i < 1024; i+=4) { \
|
||||
AV_WN32A(dst + i, rnd() & mask); \
|
||||
} \
|
||||
memcpy(dst0, dst, 32 * 16 * 2); \
|
||||
memcpy(dst1, dst, 32 * 16 * 2); \
|
||||
\
|
||||
call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \
|
||||
call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \
|
||||
if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
|
||||
fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d " \
|
||||
"tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], \
|
||||
tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
|
||||
fail(); \
|
||||
} \
|
||||
bench_new(dst1, 32, alphas[j], betas[j], tc0[j]); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1,);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0,);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0,);
|
||||
CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1,);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0,);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0,);
|
||||
|
||||
ff_h264dsp_init(&h, bit_depth, 2);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0, 422);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0, 422);
|
||||
#undef CHECK_LOOP_FILTER
|
||||
}
|
||||
}
|
||||
|
||||
static void check_loop_filter_intra(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
|
||||
H264DSPContext h;
|
||||
int bit_depth;
|
||||
int alphas[36], betas[36];
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta);
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
|
||||
int i, j, a;
|
||||
uint32_t mask = pixel_mask_lf[bit_depth - 8];
|
||||
ff_h264dsp_init(&h, bit_depth, 1);
|
||||
for (i = 35, a = 255; i >= 0; i--) {
|
||||
alphas[i] = a << (bit_depth - 8);
|
||||
betas[i] = (i + 1) / 2 << (bit_depth - 8);
|
||||
a = a*9/10;
|
||||
}
|
||||
|
||||
#define CHECK_LOOP_FILTER(name, align, idc) \
|
||||
do { \
|
||||
if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
|
||||
for (j = 0; j < 36; j++) { \
|
||||
intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
|
||||
for (i = 0; i < 1024; i+=4) { \
|
||||
AV_WN32A(dst + i, rnd() & mask); \
|
||||
} \
|
||||
memcpy(dst0, dst, 32 * 16 * 2); \
|
||||
memcpy(dst1, dst, 32 * 16 * 2); \
|
||||
\
|
||||
call_ref(dst0 + off, 32, alphas[j], betas[j]); \
|
||||
call_new(dst1 + off, 32, alphas[j], betas[j]); \
|
||||
if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
|
||||
fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d\n", \
|
||||
j, alphas[j], betas[j]); \
|
||||
fail(); \
|
||||
} \
|
||||
bench_new(dst1, 32, alphas[j], betas[j]); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
CHECK_LOOP_FILTER(h264_v_loop_filter_luma_intra, 1,);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_luma_intra, 0,);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff_intra, 0,);
|
||||
CHECK_LOOP_FILTER(h264_v_loop_filter_chroma_intra, 1,);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0,);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0,);
|
||||
|
||||
ff_h264dsp_init(&h, bit_depth, 2);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0, 422);
|
||||
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0, 422);
|
||||
#undef CHECK_LOOP_FILTER
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_h264dsp(void)
|
||||
{
|
||||
check_idct();
|
||||
check_idct_multiple();
|
||||
report("idct");
|
||||
|
||||
check_loop_filter();
|
||||
report("loop_filter");
|
||||
|
||||
check_loop_filter_intra();
|
||||
report("loop_filter_intra");
|
||||
}
|
260
externals/ffmpeg/tests/checkasm/h264pred.c
vendored
Executable file
260
externals/ffmpeg/tests/checkasm/h264pred.c
vendored
Executable file
@@ -0,0 +1,260 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Henrik Gramner
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/avcodec.h"
|
||||
#include "libavcodec/h264pred.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
static const int codec_ids[4] = { AV_CODEC_ID_H264, AV_CODEC_ID_VP8, AV_CODEC_ID_RV40, AV_CODEC_ID_SVQ3 };
|
||||
|
||||
static const char * const pred4x4_modes[4][15] = {
|
||||
{ /* H.264 */
|
||||
[VERT_PRED ] = "vertical",
|
||||
[HOR_PRED ] = "horizontal",
|
||||
[DC_PRED ] = "dc",
|
||||
[DIAG_DOWN_LEFT_PRED ] = "down_left",
|
||||
[DIAG_DOWN_RIGHT_PRED] = "down_right",
|
||||
[VERT_RIGHT_PRED ] = "vertical_right",
|
||||
[HOR_DOWN_PRED ] = "horizontal_right",
|
||||
[VERT_LEFT_PRED ] = "vertical_left",
|
||||
[HOR_UP_PRED ] = "horizontal_up",
|
||||
[LEFT_DC_PRED ] = "left_dc",
|
||||
[TOP_DC_PRED ] = "top_dc",
|
||||
[DC_128_PRED ] = "dc_128",
|
||||
},
|
||||
{ /* VP8 */
|
||||
[VERT_PRED ] = "vertical_vp8",
|
||||
[HOR_PRED ] = "horizontal_vp8",
|
||||
[VERT_LEFT_PRED] = "vertical_left_vp8",
|
||||
[TM_VP8_PRED ] = "tm_vp8",
|
||||
[DC_127_PRED ] = "dc_127_vp8",
|
||||
[DC_129_PRED ] = "dc_129_vp8",
|
||||
},
|
||||
{ /* RV40 */
|
||||
[DIAG_DOWN_LEFT_PRED ] = "down_left_rv40",
|
||||
[VERT_LEFT_PRED ] = "vertical_left_rv40",
|
||||
[HOR_UP_PRED ] = "horizontal_up_rv40",
|
||||
[DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = "down_left_nodown_rv40",
|
||||
[HOR_UP_PRED_RV40_NODOWN ] = "horizontal_up_nodown_rv40",
|
||||
[VERT_LEFT_PRED_RV40_NODOWN ] = "vertical_left_nodown_rv40",
|
||||
},
|
||||
{ /* SVQ3 */
|
||||
[DIAG_DOWN_LEFT_PRED] = "down_left_svq3",
|
||||
},
|
||||
};
|
||||
|
||||
static const char * const pred8x8_modes[4][11] = {
|
||||
{ /* H.264 */
|
||||
[DC_PRED8x8 ] = "dc",
|
||||
[HOR_PRED8x8 ] = "horizontal",
|
||||
[VERT_PRED8x8 ] = "vertical",
|
||||
[PLANE_PRED8x8 ] = "plane",
|
||||
[LEFT_DC_PRED8x8 ] = "left_dc",
|
||||
[TOP_DC_PRED8x8 ] = "top_dc",
|
||||
[DC_128_PRED8x8 ] = "dc_128",
|
||||
[ALZHEIMER_DC_L0T_PRED8x8] = "mad_cow_dc_l0t",
|
||||
[ALZHEIMER_DC_0LT_PRED8x8] = "mad_cow_dc_0lt",
|
||||
[ALZHEIMER_DC_L00_PRED8x8] = "mad_cow_dc_l00",
|
||||
[ALZHEIMER_DC_0L0_PRED8x8] = "mad_cow_dc_0l0",
|
||||
},
|
||||
{ /* VP8 */
|
||||
[PLANE_PRED8x8 ] = "tm_vp8",
|
||||
[DC_127_PRED8x8] = "dc_127_vp8",
|
||||
[DC_129_PRED8x8] = "dc_129_vp8",
|
||||
},
|
||||
{ /* RV40 */
|
||||
[DC_PRED8x8 ] = "dc_rv40",
|
||||
[LEFT_DC_PRED8x8] = "left_dc_rv40",
|
||||
[TOP_DC_PRED8x8 ] = "top_dc_rv40",
|
||||
},
|
||||
/* nothing for SVQ3 */
|
||||
};
|
||||
|
||||
static const char * const pred16x16_modes[4][9] = {
|
||||
{ /* H.264 */
|
||||
[DC_PRED8x8 ] = "dc",
|
||||
[HOR_PRED8x8 ] = "horizontal",
|
||||
[VERT_PRED8x8 ] = "vertical",
|
||||
[PLANE_PRED8x8 ] = "plane",
|
||||
[LEFT_DC_PRED8x8] = "left_dc",
|
||||
[TOP_DC_PRED8x8 ] = "top_dc",
|
||||
[DC_128_PRED8x8 ] = "dc_128",
|
||||
},
|
||||
{ /* VP8 */
|
||||
[PLANE_PRED8x8 ] = "tm_vp8",
|
||||
[DC_127_PRED8x8] = "dc_127_vp8",
|
||||
[DC_129_PRED8x8] = "dc_129_vp8",
|
||||
},
|
||||
{ /* RV40 */
|
||||
[PLANE_PRED8x8] = "plane_rv40",
|
||||
},
|
||||
{ /* SVQ3 */
|
||||
[PLANE_PRED8x8] = "plane_svq3",
|
||||
},
|
||||
};
|
||||
|
||||
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
|
||||
|
||||
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
|
||||
#define BUF_SIZE (3 * 16 * 17)
|
||||
|
||||
#define check_pred_func(func, name, mode_name) \
|
||||
(mode_name && ((codec_ids[codec] == AV_CODEC_ID_H264) ? \
|
||||
check_func(func, "pred%s_%s_%d", name, mode_name, bit_depth) : \
|
||||
check_func(func, "pred%s_%s", name, mode_name)))
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
uint32_t mask = pixel_mask[bit_depth - 8]; \
|
||||
int i; \
|
||||
for (i = 0; i < BUF_SIZE; i += 4) { \
|
||||
uint32_t r = rnd() & mask; \
|
||||
AV_WN32A(buf0 + i, r); \
|
||||
AV_WN32A(buf1 + i, r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define src0 (buf0 + 4 * 16) /* Offset to allow room for top and left */
|
||||
#define src1 (buf1 + 4 * 16)
|
||||
|
||||
static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
|
||||
int codec, int chroma_format, int bit_depth)
|
||||
{
|
||||
if (chroma_format == 1) {
|
||||
uint8_t *topright = buf0 + 2*16;
|
||||
int pred_mode;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride);
|
||||
|
||||
for (pred_mode = 0; pred_mode < 15; pred_mode++) {
|
||||
if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) {
|
||||
randomize_buffers();
|
||||
call_ref(src0, topright, 12*SIZEOF_PIXEL);
|
||||
call_new(src1, topright, 12*SIZEOF_PIXEL);
|
||||
if (memcmp(buf0, buf1, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(src1, topright, 12*SIZEOF_PIXEL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
|
||||
int codec, int chroma_format, int bit_depth)
|
||||
{
|
||||
int pred_mode;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride);
|
||||
|
||||
for (pred_mode = 0; pred_mode < 11; pred_mode++) {
|
||||
if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8",
|
||||
pred8x8_modes[codec][pred_mode])) {
|
||||
randomize_buffers();
|
||||
call_ref(src0, 24*SIZEOF_PIXEL);
|
||||
call_new(src1, 24*SIZEOF_PIXEL);
|
||||
if (memcmp(buf0, buf1, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(src1, 24*SIZEOF_PIXEL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
|
||||
int codec, int chroma_format, int bit_depth)
|
||||
{
|
||||
if (chroma_format == 1) {
|
||||
int pred_mode;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride);
|
||||
|
||||
for (pred_mode = 0; pred_mode < 9; pred_mode++) {
|
||||
if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) {
|
||||
randomize_buffers();
|
||||
call_ref(src0, 48);
|
||||
call_new(src1, 48);
|
||||
if (memcmp(buf0, buf1, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(src1, 48);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
|
||||
int codec, int chroma_format, int bit_depth)
|
||||
{
|
||||
if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) {
|
||||
int pred_mode;
|
||||
declare_func_emms(AV_CPU_FLAG_MMXEXT, void, uint8_t *src, int topleft, int topright, ptrdiff_t stride);
|
||||
|
||||
for (pred_mode = 0; pred_mode < 12; pred_mode++) {
|
||||
if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) {
|
||||
int neighbors;
|
||||
for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) {
|
||||
int has_topleft = neighbors & 0x8000;
|
||||
int has_topright = neighbors & 0x4000;
|
||||
|
||||
if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft)
|
||||
continue; /* Those aren't allowed according to the spec */
|
||||
|
||||
randomize_buffers();
|
||||
call_ref(src0, has_topleft, has_topright, 24*SIZEOF_PIXEL);
|
||||
call_new(src1, has_topleft, has_topright, 24*SIZEOF_PIXEL);
|
||||
if (memcmp(buf0, buf1, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(src1, has_topleft, has_topright, 24*SIZEOF_PIXEL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: Add tests for H.264 lossless H/V prediction */
|
||||
|
||||
void checkasm_check_h264pred(void)
|
||||
{
|
||||
static const struct {
|
||||
void (*func)(H264PredContext*, uint8_t*, uint8_t*, int, int, int);
|
||||
const char *name;
|
||||
} tests[] = {
|
||||
{ check_pred4x4, "pred4x4" },
|
||||
{ check_pred8x8, "pred8x8" },
|
||||
{ check_pred16x16, "pred16x16" },
|
||||
{ check_pred8x8l, "pred8x8l" },
|
||||
};
|
||||
|
||||
LOCAL_ALIGNED_16(uint8_t, buf0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, buf1, [BUF_SIZE]);
|
||||
H264PredContext h;
|
||||
int test, codec, chroma_format, bit_depth;
|
||||
|
||||
for (test = 0; test < FF_ARRAY_ELEMS(tests); test++) {
|
||||
for (codec = 0; codec < 4; codec++) {
|
||||
int codec_id = codec_ids[codec];
|
||||
for (bit_depth = 8; bit_depth <= (codec_id == AV_CODEC_ID_H264 ? 10 : 8); bit_depth++)
|
||||
for (chroma_format = 1; chroma_format <= (codec_id == AV_CODEC_ID_H264 ? 2 : 1); chroma_format++) {
|
||||
ff_h264_pred_init(&h, codec_id, bit_depth, chroma_format);
|
||||
tests[test].func(&h, buf0, buf1, codec, chroma_format, bit_depth);
|
||||
}
|
||||
}
|
||||
report("%s", tests[test].name);
|
||||
}
|
||||
}
|
81
externals/ffmpeg/tests/checkasm/h264qpel.c
vendored
Executable file
81
externals/ffmpeg/tests/checkasm/h264qpel.c
vendored
Executable file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Henrik Gramner
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/h264qpel.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
|
||||
|
||||
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
|
||||
#define BUF_SIZE (2 * 16 * (16 + 3 + 4))
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
uint32_t mask = pixel_mask[bit_depth - 8]; \
|
||||
int k; \
|
||||
for (k = 0; k < BUF_SIZE; k += 4) { \
|
||||
uint32_t r = rnd() & mask; \
|
||||
AV_WN32A(buf0 + k, r); \
|
||||
AV_WN32A(buf1 + k, r); \
|
||||
r = rnd(); \
|
||||
AV_WN32A(dst0 + k, r); \
|
||||
AV_WN32A(dst1 + k, r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define src0 (buf0 + 3 * 2 * 16) /* h264qpel functions read data from negative src pointer offsets */
|
||||
#define src1 (buf1 + 3 * 2 * 16)
|
||||
|
||||
void checkasm_check_h264qpel(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, buf0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, buf1, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]);
|
||||
H264QpelContext h;
|
||||
int op, bit_depth, i, j;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
|
||||
for (op = 0; op < 2; op++) {
|
||||
qpel_mc_func (*tab)[16] = op ? h.avg_h264_qpel_pixels_tab : h.put_h264_qpel_pixels_tab;
|
||||
const char *op_name = op ? "avg" : "put";
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
|
||||
ff_h264qpel_init(&h, bit_depth);
|
||||
for (i = 0; i < (op ? 3 : 4); i++) {
|
||||
int size = 16 >> i;
|
||||
for (j = 0; j < 16; j++)
|
||||
if (check_func(tab[i][j], "%s_h264_qpel_%d_mc%d%d_%d", op_name, size, j & 3, j >> 2, bit_depth)) {
|
||||
randomize_buffers();
|
||||
call_ref(dst0, src0, size * SIZEOF_PIXEL);
|
||||
call_new(dst1, src1, size * SIZEOF_PIXEL);
|
||||
if (memcmp(buf0, buf1, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(dst1, src1, size * SIZEOF_PIXEL);
|
||||
}
|
||||
}
|
||||
}
|
||||
report("%s", op_name);
|
||||
}
|
||||
}
|
96
externals/ffmpeg/tests/checkasm/hevc_add_res.c
vendored
Executable file
96
externals/ffmpeg/tests/checkasm/hevc_add_res.c
vendored
Executable file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Alexandra Hájková
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#include "libavcodec/hevcdsp.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define randomize_buffers(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
for (j = 0; j < size; j++) { \
|
||||
int16_t r = rnd(); \
|
||||
AV_WN16A(buf + j, r >> 3); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define randomize_buffers2(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
for (j = 0; j < size; j++) \
|
||||
AV_WN16A(buf + j * 2, rnd() & 0x3FF); \
|
||||
} while (0)
|
||||
|
||||
static void compare_add_res(int size, ptrdiff_t stride, int overflow_test)
|
||||
{
|
||||
LOCAL_ALIGNED_32(int16_t, res0, [32 * 32]);
|
||||
LOCAL_ALIGNED_32(int16_t, res1, [32 * 32]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *res, ptrdiff_t stride);
|
||||
|
||||
randomize_buffers(res0, size);
|
||||
randomize_buffers2(dst0, size);
|
||||
if (overflow_test)
|
||||
res0[0] = 0x8000;
|
||||
memcpy(res1, res0, sizeof(*res0) * size);
|
||||
memcpy(dst1, dst0, sizeof(int16_t) * size);
|
||||
|
||||
call_ref(dst0, res0, stride);
|
||||
call_new(dst1, res1, stride);
|
||||
if (memcmp(dst0, dst1, size))
|
||||
fail();
|
||||
bench_new(dst1, res1, stride);
|
||||
}
|
||||
|
||||
static void check_add_res(HEVCDSPContext h, int bit_depth)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 2; i <= 5; i++) {
|
||||
int block_size = 1 << i;
|
||||
int size = block_size * block_size;
|
||||
ptrdiff_t stride = block_size << (bit_depth > 8);
|
||||
|
||||
if (check_func(h.add_residual[i - 2], "hevc_add_res_%dx%d_%d", block_size, block_size, bit_depth)) {
|
||||
compare_add_res(size, stride, 0);
|
||||
// overflow test for res = -32768
|
||||
compare_add_res(size, stride, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_hevc_add_res(void)
|
||||
{
|
||||
int bit_depth;
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
|
||||
HEVCDSPContext h;
|
||||
|
||||
ff_hevc_dsp_init(&h, bit_depth);
|
||||
check_add_res(h, bit_depth);
|
||||
}
|
||||
report("add_residual");
|
||||
}
|
105
externals/ffmpeg/tests/checkasm/hevc_idct.c
vendored
Executable file
105
externals/ffmpeg/tests/checkasm/hevc_idct.c
vendored
Executable file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Alexandra Hájková
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#include "libavcodec/hevcdsp.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define randomize_buffers(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
for (j = 0; j < size; j++) { \
|
||||
int16_t r = rnd(); \
|
||||
AV_WN16A(buf + j, r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_idct(HEVCDSPContext h, int bit_depth)
|
||||
{
|
||||
int i;
|
||||
LOCAL_ALIGNED(32, int16_t, coeffs0, [32 * 32]);
|
||||
LOCAL_ALIGNED(32, int16_t, coeffs1, [32 * 32]);
|
||||
|
||||
for (i = 2; i <= 5; i++) {
|
||||
int block_size = 1 << i;
|
||||
int size = block_size * block_size;
|
||||
int col_limit = block_size;
|
||||
declare_func(void, int16_t *coeffs, int col_limit);
|
||||
|
||||
randomize_buffers(coeffs0, size);
|
||||
memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
|
||||
if (check_func(h.idct[i - 2], "hevc_idct_%dx%d_%d", block_size, block_size, bit_depth)) {
|
||||
call_ref(coeffs0, col_limit);
|
||||
call_new(coeffs1, col_limit);
|
||||
if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size))
|
||||
fail();
|
||||
bench_new(coeffs1, col_limit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_idct_dc(HEVCDSPContext h, int bit_depth)
|
||||
{
|
||||
int i;
|
||||
LOCAL_ALIGNED(32, int16_t, coeffs0, [32 * 32]);
|
||||
LOCAL_ALIGNED(32, int16_t, coeffs1, [32 * 32]);
|
||||
|
||||
for (i = 2; i <= 5; i++) {
|
||||
int block_size = 1 << i;
|
||||
int size = block_size * block_size;
|
||||
declare_func_emms(AV_CPU_FLAG_MMXEXT, void, int16_t *coeffs);
|
||||
|
||||
randomize_buffers(coeffs0, size);
|
||||
memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
|
||||
|
||||
if (check_func(h.idct_dc[i - 2], "hevc_idct_%dx%d_dc_%d", block_size, block_size, bit_depth)) {
|
||||
call_ref(coeffs0);
|
||||
call_new(coeffs1);
|
||||
if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size))
|
||||
fail();
|
||||
bench_new(coeffs1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_hevc_idct(void)
|
||||
{
|
||||
int bit_depth;
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
|
||||
HEVCDSPContext h;
|
||||
|
||||
ff_hevc_dsp_init(&h, bit_depth);
|
||||
check_idct_dc(h, bit_depth);
|
||||
}
|
||||
report("idct_dc");
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
|
||||
HEVCDSPContext h;
|
||||
|
||||
ff_hevc_dsp_init(&h, bit_depth);
|
||||
check_idct(h, bit_depth);
|
||||
}
|
||||
report("idct");
|
||||
}
|
150
externals/ffmpeg/tests/checkasm/hevc_sao.c
vendored
Executable file
150
externals/ffmpeg/tests/checkasm/hevc_sao.c
vendored
Executable file
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Yingming Fan <yingmingfan@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#include "libavcodec/avcodec.h"
|
||||
|
||||
#include "libavcodec/hevcdsp.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
|
||||
static const uint32_t sao_size[5] = {8, 16, 32, 48, 64};
|
||||
|
||||
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
|
||||
#define PIXEL_STRIDE (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) //same with sao_edge src_stride
|
||||
#define BUF_SIZE (PIXEL_STRIDE * (64+2) * 2) //+2 for top and bottom row, *2 for high bit depth
|
||||
#define OFFSET_THRESH (1 << (bit_depth - 5))
|
||||
#define OFFSET_LENGTH 5
|
||||
|
||||
#define randomize_buffers(buf0, buf1, size) \
|
||||
do { \
|
||||
uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
|
||||
int k; \
|
||||
for (k = 0; k < size; k += 4) { \
|
||||
uint32_t r = rnd() & mask; \
|
||||
AV_WN32A(buf0 + k, r); \
|
||||
AV_WN32A(buf1 + k, r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define randomize_buffers2(buf, size) \
|
||||
do { \
|
||||
uint32_t max_offset = OFFSET_THRESH; \
|
||||
int k; \
|
||||
if (bit_depth == 8) { \
|
||||
for (k = 0; k < size; k++) { \
|
||||
uint8_t r = rnd() % max_offset; \
|
||||
buf[k] = r; \
|
||||
} \
|
||||
} else { \
|
||||
for (k = 0; k < size; k++) { \
|
||||
uint16_t r = rnd() % max_offset; \
|
||||
buf[k] = r; \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_sao_band(HEVCDSPContext h, int bit_depth)
|
||||
{
|
||||
int i;
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]);
|
||||
int16_t offset_val[OFFSET_LENGTH];
|
||||
int left_class = rnd()%32;
|
||||
|
||||
for (i = 0; i <= 4; i++) {
|
||||
int block_size = sao_size[i];
|
||||
ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride,
|
||||
int16_t *sao_offset_val, int sao_left_class, int width, int height);
|
||||
|
||||
randomize_buffers(src0, src1, BUF_SIZE);
|
||||
randomize_buffers2(offset_val, OFFSET_LENGTH);
|
||||
memset(dst0, 0, BUF_SIZE);
|
||||
memset(dst1, 0, BUF_SIZE);
|
||||
|
||||
if (check_func(h.sao_band_filter[i], "hevc_sao_band_%dx%d_%d", block_size, block_size, bit_depth)) {
|
||||
call_ref(dst0, src0, stride, stride, offset_val, left_class, block_size, block_size);
|
||||
call_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size);
|
||||
if (memcmp(dst0, dst1, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_sao_edge(HEVCDSPContext h, int bit_depth)
|
||||
{
|
||||
int i;
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]);
|
||||
int16_t offset_val[OFFSET_LENGTH];
|
||||
int eo = rnd()%4;
|
||||
|
||||
for (i = 0; i <= 4; i++) {
|
||||
int block_size = sao_size[i];
|
||||
ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
|
||||
int offset = (AV_INPUT_BUFFER_PADDING_SIZE + PIXEL_STRIDE)*SIZEOF_PIXEL;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
|
||||
int16_t *sao_offset_val, int eo, int width, int height);
|
||||
|
||||
randomize_buffers(src0, src1, BUF_SIZE);
|
||||
randomize_buffers2(offset_val, OFFSET_LENGTH);
|
||||
memset(dst0, 0, BUF_SIZE);
|
||||
memset(dst1, 0, BUF_SIZE);
|
||||
|
||||
if (check_func(h.sao_edge_filter[i], "hevc_sao_edge_%dx%d_%d", block_size, block_size, bit_depth)) {
|
||||
call_ref(dst0, src0 + offset, stride, offset_val, eo, block_size, block_size);
|
||||
call_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size);
|
||||
if (memcmp(dst0, dst1, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_hevc_sao(void)
|
||||
{
|
||||
int bit_depth;
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
|
||||
HEVCDSPContext h;
|
||||
|
||||
ff_hevc_dsp_init(&h, bit_depth);
|
||||
check_sao_band(h, bit_depth);
|
||||
}
|
||||
report("sao_band");
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
|
||||
HEVCDSPContext h;
|
||||
|
||||
ff_hevc_dsp_init(&h, bit_depth);
|
||||
check_sao_edge(h, bit_depth);
|
||||
}
|
||||
report("sao_edge");
|
||||
}
|
81
externals/ffmpeg/tests/checkasm/huffyuvdsp.c
vendored
Executable file
81
externals/ffmpeg/tests/checkasm/huffyuvdsp.c
vendored
Executable file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Alexandra Hájková
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "libavutil/mem.h"
|
||||
|
||||
#include "libavcodec/huffyuvdsp.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define randomize_buffers(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
for (j = 0; j < size; j++) \
|
||||
buf[j] = rnd() & 0xFFFF; \
|
||||
} while (0)
|
||||
|
||||
static void check_add_int16(HuffYUVDSPContext c, unsigned mask, int width, const char * name)
|
||||
{
|
||||
uint16_t *src0 = av_mallocz(width * sizeof(uint16_t));
|
||||
uint16_t *src1 = av_mallocz(width * sizeof(uint16_t));
|
||||
uint16_t *dst0 = av_mallocz(width * sizeof(uint16_t));
|
||||
uint16_t *dst1 = av_mallocz(width * sizeof(uint16_t));
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint16_t *dst, uint16_t *src, unsigned mask, int w);
|
||||
|
||||
if (!src0 || !src1 || !dst0 || !dst1)
|
||||
fail();
|
||||
|
||||
randomize_buffers(src0, width);
|
||||
memcpy(src1, src0, width * sizeof(uint16_t));
|
||||
|
||||
if (check_func(c.add_int16, "%s", name)) {
|
||||
call_ref(dst0, src0, mask, width);
|
||||
call_new(dst1, src1, mask, width);
|
||||
if (memcmp(dst0, dst1, width * sizeof(uint16_t)))
|
||||
fail();
|
||||
bench_new(dst1, src1, mask, width);
|
||||
}
|
||||
|
||||
av_free(src0);
|
||||
av_free(src1);
|
||||
av_free(dst0);
|
||||
av_free(dst1);
|
||||
}
|
||||
|
||||
void checkasm_check_huffyuvdsp(void)
|
||||
{
|
||||
HuffYUVDSPContext c;
|
||||
int width = 16 * av_clip(rnd(), 16, 128);
|
||||
|
||||
ff_huffyuvdsp_init(&c, AV_PIX_FMT_YUV422P);
|
||||
|
||||
/*! test width not multiple of mmsize */
|
||||
check_add_int16(c, 65535, width, "add_int16_rnd_width");
|
||||
report("add_int16_rnd_width");
|
||||
|
||||
/*! test always with the same size (for perf test) */
|
||||
check_add_int16(c, 65535, 16*128, "add_int16_128");
|
||||
report("add_int16_128");
|
||||
}
|
103
externals/ffmpeg/tests/checkasm/jpeg2000dsp.c
vendored
Executable file
103
externals/ffmpeg/tests/checkasm/jpeg2000dsp.c
vendored
Executable file
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (c) 2015 James Almer
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/jpeg2000dsp.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define BUF_SIZE 512
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < BUF_SIZE*3; i++) \
|
||||
src[i] = rnd(); \
|
||||
} while (0)
|
||||
|
||||
#define randomize_buffers_float() \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < BUF_SIZE*3; i++) \
|
||||
src[i] = (float)rnd() / (UINT_MAX >> 5); \
|
||||
} while (0)
|
||||
|
||||
static void check_rct_int(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(int32_t, src, [BUF_SIZE*3]);
|
||||
LOCAL_ALIGNED_32(int32_t, ref, [BUF_SIZE*3]);
|
||||
LOCAL_ALIGNED_32(int32_t, new, [BUF_SIZE*3]);
|
||||
int32_t *ref0 = &ref[BUF_SIZE*0], *new0 = &new[BUF_SIZE*0];
|
||||
int32_t *ref1 = &ref[BUF_SIZE*1], *new1 = &new[BUF_SIZE*1];
|
||||
int32_t *ref2 = &ref[BUF_SIZE*2], *new2 = &new[BUF_SIZE*2];
|
||||
|
||||
declare_func(void, void *src0, void *src1, void *src2, int csize);
|
||||
|
||||
randomize_buffers();
|
||||
memcpy(ref, src, BUF_SIZE * 3 * sizeof(*src));
|
||||
memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
|
||||
call_ref(ref0, ref1, ref2, BUF_SIZE);
|
||||
call_new(new0, new1, new2, BUF_SIZE);
|
||||
if (memcmp(ref0, new0, BUF_SIZE * sizeof(*src)) ||
|
||||
memcmp(ref1, new1, BUF_SIZE * sizeof(*src)) ||
|
||||
memcmp(ref2, new2, BUF_SIZE * sizeof(*src)))
|
||||
fail();
|
||||
memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
|
||||
bench_new(new0, new1, new2, BUF_SIZE);
|
||||
}
|
||||
|
||||
static void check_ict_float(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(float, src, [BUF_SIZE*3]);
|
||||
LOCAL_ALIGNED_32(float, ref, [BUF_SIZE*3]);
|
||||
LOCAL_ALIGNED_32(float, new, [BUF_SIZE*3]);
|
||||
float *ref0 = &ref[BUF_SIZE*0], *new0 = &new[BUF_SIZE*0];
|
||||
float *ref1 = &ref[BUF_SIZE*1], *new1 = &new[BUF_SIZE*1];
|
||||
float *ref2 = &ref[BUF_SIZE*2], *new2 = &new[BUF_SIZE*2];
|
||||
|
||||
declare_func(void, void *src0, void *src1, void *src2, int csize);
|
||||
|
||||
randomize_buffers_float();
|
||||
memcpy(ref, src, BUF_SIZE * 3 * sizeof(*src));
|
||||
memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
|
||||
call_ref(ref0, ref1, ref2, BUF_SIZE);
|
||||
call_new(new0, new1, new2, BUF_SIZE);
|
||||
if (!float_near_abs_eps_array(ref0, new0, 1.0e-5, BUF_SIZE) ||
|
||||
!float_near_abs_eps_array(ref1, new1, 1.0e-5, BUF_SIZE) ||
|
||||
!float_near_abs_eps_array(ref2, new2, 1.0e-5, BUF_SIZE))
|
||||
fail();
|
||||
memcpy(new, src, BUF_SIZE * 3 * sizeof(*src));
|
||||
bench_new(new0, new1, new2, BUF_SIZE);
|
||||
}
|
||||
|
||||
void checkasm_check_jpeg2000dsp(void)
|
||||
{
|
||||
Jpeg2000DSPContext h;
|
||||
|
||||
ff_jpeg2000dsp_init(&h);
|
||||
|
||||
if (check_func(h.mct_decode[FF_DWT53], "jpeg2000_rct_int"))
|
||||
check_rct_int();
|
||||
if (check_func(h.mct_decode[FF_DWT97], "jpeg2000_ict_float"))
|
||||
check_ict_float();
|
||||
|
||||
report("mct_decode");
|
||||
}
|
220
externals/ffmpeg/tests/checkasm/llviddsp.c
vendored
Executable file
220
externals/ffmpeg/tests/checkasm/llviddsp.c
vendored
Executable file
@@ -0,0 +1,220 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Alexandra Hájková
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "libavutil/mem.h"
|
||||
|
||||
#include "libavcodec/lossless_videodsp.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define randomize_buffers(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
uint8_t *tmp_buf = (uint8_t *)buf;\
|
||||
for (j = 0; j < size; j++) \
|
||||
tmp_buf[j] = rnd() & 0xFF; \
|
||||
} while (0)
|
||||
|
||||
#define init_buffer(a0, a1, type, width)\
|
||||
if (!a0 || !a1)\
|
||||
fail();\
|
||||
randomize_buffers(a0, width * sizeof(type));\
|
||||
memcpy(a1, a0, width*sizeof(type));\
|
||||
|
||||
static void check_add_bytes(LLVidDSPContext c, int width)
|
||||
{
|
||||
uint8_t *dst0 = av_mallocz(width);
|
||||
uint8_t *dst1 = av_mallocz(width);
|
||||
uint8_t *src0 = av_mallocz_array(width, sizeof(uint8_t));
|
||||
uint8_t *src1 = av_mallocz_array(width, sizeof(uint8_t));
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t w);
|
||||
|
||||
init_buffer(src0, src1, uint8_t, width);
|
||||
|
||||
if (!dst0 || !dst1)
|
||||
fail();
|
||||
|
||||
|
||||
if (check_func(c.add_bytes, "add_bytes")) {
|
||||
call_ref(dst0, src0, width);
|
||||
call_new(dst1, src1, width);
|
||||
if (memcmp(dst0, dst1, width))
|
||||
fail();
|
||||
bench_new(dst1, src1, width);
|
||||
}
|
||||
|
||||
av_free(src0);
|
||||
av_free(src1);
|
||||
av_free(dst0);
|
||||
av_free(dst1);
|
||||
}
|
||||
|
||||
static void check_add_median_pred(LLVidDSPContext c, int width) {
|
||||
int A0, A1, B0, B1;
|
||||
uint8_t *dst0 = av_mallocz(width);
|
||||
uint8_t *dst1 = av_mallocz(width);
|
||||
uint8_t *src0 = av_mallocz_array(width, sizeof(uint8_t));
|
||||
uint8_t *src1 = av_mallocz_array(width, sizeof(uint8_t));
|
||||
uint8_t *diff0 = av_mallocz_array(width, sizeof(uint8_t));
|
||||
uint8_t *diff1 = av_mallocz_array(width, sizeof(uint8_t));
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src1,
|
||||
const uint8_t *diff, ptrdiff_t w,
|
||||
int *left, int *left_top);
|
||||
|
||||
init_buffer(src0, src1, uint8_t, width);
|
||||
init_buffer(diff0, diff1, uint8_t, width);
|
||||
|
||||
A0 = rnd() & 0xFF;
|
||||
B0 = rnd() & 0xFF;
|
||||
A1 = A0;
|
||||
B1 = B0;
|
||||
|
||||
|
||||
if (check_func(c.add_median_pred, "add_median_pred")) {
|
||||
call_ref(dst0, src0, diff0, width, &A0, &B0);
|
||||
call_new(dst1, src1, diff1, width, &A1, &B1);
|
||||
if (memcmp(dst0, dst1, width) || (A0 != A1) || (B0 != B1))
|
||||
fail();
|
||||
bench_new(dst1, src1, diff1, width, &A1, &B1);
|
||||
}
|
||||
|
||||
av_free(src0);
|
||||
av_free(src1);
|
||||
av_free(diff0);
|
||||
av_free(diff1);
|
||||
av_free(dst0);
|
||||
av_free(dst1);
|
||||
}
|
||||
|
||||
static void check_add_left_pred(LLVidDSPContext c, int width, int acc, const char * report)
|
||||
{
|
||||
int res0, res1;
|
||||
uint8_t *dst0 = av_mallocz(width);
|
||||
uint8_t *dst1 = av_mallocz(width);
|
||||
uint8_t *src0 = av_mallocz_array(width, sizeof(uint8_t));
|
||||
uint8_t *src1 = av_mallocz_array(width, sizeof(uint8_t));
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, int, uint8_t *dst, uint8_t *src, ptrdiff_t w, int acc);
|
||||
|
||||
init_buffer(src0, src1, uint8_t, width);
|
||||
|
||||
if (!dst0 || !dst1)
|
||||
fail();
|
||||
|
||||
if (check_func(c.add_left_pred, "%s", report)) {
|
||||
res0 = call_ref(dst0, src0, width, acc);
|
||||
res1 = call_new(dst1, src1, width, acc);
|
||||
if ((res0 & 0xFF) != (res1 & 0xFF)||\
|
||||
memcmp(dst0, dst1, width))
|
||||
fail();
|
||||
bench_new(dst1, src1, width, acc);
|
||||
}
|
||||
|
||||
av_free(src0);
|
||||
av_free(src1);
|
||||
av_free(dst0);
|
||||
av_free(dst1);
|
||||
}
|
||||
|
||||
static void check_add_left_pred_16(LLVidDSPContext c, unsigned mask, int width, unsigned acc, const char * report)
|
||||
{
|
||||
int res0, res1;
|
||||
uint16_t *dst0 = av_mallocz_array(width, sizeof(uint16_t));
|
||||
uint16_t *dst1 = av_mallocz_array(width, sizeof(uint16_t));
|
||||
uint16_t *src0 = av_mallocz_array(width, sizeof(uint16_t));
|
||||
uint16_t *src1 = av_mallocz_array(width, sizeof(uint16_t));
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, int, uint16_t *dst, uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
|
||||
|
||||
init_buffer(src0, src1, uint16_t, width);
|
||||
|
||||
if (!dst0 || !dst1)
|
||||
fail();
|
||||
|
||||
if (check_func(c.add_left_pred_int16, "%s", report)) {
|
||||
res0 = call_ref(dst0, src0, mask, width, acc);
|
||||
res1 = call_new(dst1, src1, mask, width, acc);
|
||||
if ((res0 &0xFFFF) != (res1 &0xFFFF)||\
|
||||
memcmp(dst0, dst1, width))
|
||||
fail();
|
||||
bench_new(dst1, src1, mask, width, acc);
|
||||
}
|
||||
|
||||
av_free(src0);
|
||||
av_free(src1);
|
||||
av_free(dst0);
|
||||
av_free(dst1);
|
||||
}
|
||||
|
||||
static void check_add_gradient_pred(LLVidDSPContext c, int w) {
|
||||
int src_size, stride;
|
||||
uint8_t *src0, *src1;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *src, const ptrdiff_t stride,
|
||||
const ptrdiff_t width);
|
||||
|
||||
stride = w + 32;
|
||||
src_size = (stride + 32) * 2; /* dsp need previous line, and ignore the start of the line */
|
||||
src0 = av_mallocz(src_size);
|
||||
src1 = av_mallocz(src_size);
|
||||
|
||||
init_buffer(src0, src1, uint8_t, src_size);
|
||||
|
||||
if (check_func(c.add_gradient_pred, "add_gradient_pred")) {
|
||||
call_ref(src0 + stride + 32, stride, w);
|
||||
call_new(src1 + stride + 32, stride, w);
|
||||
if (memcmp(src0, src1, stride)||/* previous line doesn't change */
|
||||
memcmp(src0+stride, src1 + stride, w + 32)) {
|
||||
fail();
|
||||
}
|
||||
bench_new(src1 + stride + 32, stride, w);
|
||||
}
|
||||
|
||||
av_free(src0);
|
||||
av_free(src1);
|
||||
}
|
||||
|
||||
void checkasm_check_llviddsp(void)
|
||||
{
|
||||
LLVidDSPContext c;
|
||||
int width = 16 * av_clip(rnd(), 16, 128);
|
||||
int accRnd = rnd() & 0xFF;
|
||||
|
||||
ff_llviddsp_init(&c);
|
||||
|
||||
check_add_bytes(c, width);
|
||||
report("add_bytes");
|
||||
|
||||
check_add_median_pred(c, width);
|
||||
report("add_median_pred");
|
||||
|
||||
check_add_left_pred(c, width, 0, "add_left_pred_zero");
|
||||
report("add_left_pred_zero");
|
||||
|
||||
check_add_left_pred(c, width, accRnd, "add_left_pred_rnd_acc");
|
||||
report("add_left_pred_rnd_acc");
|
||||
|
||||
check_add_left_pred_16(c, 255, width, accRnd, "add_left_pred_int16");
|
||||
report("add_left_pred_int16");
|
||||
|
||||
check_add_gradient_pred(c, width);
|
||||
report("add_gradient_pred");
|
||||
}
|
114
externals/ffmpeg/tests/checkasm/llviddspenc.c
vendored
Executable file
114
externals/ffmpeg/tests/checkasm/llviddspenc.c
vendored
Executable file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Alexandra Hájková
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "libavutil/mem.h"
|
||||
|
||||
#include "libavcodec/lossless_videoencdsp.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define randomize_buffers(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
for (j = 0; j < size; j+=4) \
|
||||
AV_WN32(buf + j, rnd()); \
|
||||
} while (0)
|
||||
|
||||
static const struct {uint8_t w, h, s;} planes[] = {
|
||||
{16,16,16}, {21,23,25}, {32,17,48}, {15,128,16}, {128,127,128}
|
||||
};
|
||||
|
||||
#define MAX_STRIDE 128
|
||||
#define MAX_HEIGHT 127
|
||||
|
||||
static void check_diff_bytes(LLVidEncDSPContext *c)
|
||||
{
|
||||
int i;
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src2, [MAX_STRIDE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src3, [MAX_STRIDE]);
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src1,
|
||||
const uint8_t *src2, intptr_t w);
|
||||
|
||||
memset(dst0, 0, MAX_STRIDE);
|
||||
memset(dst1, 0, MAX_STRIDE);
|
||||
randomize_buffers(src0, MAX_STRIDE);
|
||||
memcpy(src1, src0, MAX_STRIDE);
|
||||
randomize_buffers(src2, MAX_STRIDE);
|
||||
memcpy(src3, src2, MAX_STRIDE);
|
||||
|
||||
if (check_func(c->diff_bytes, "diff_bytes")) {
|
||||
for (i = 0; i < 5; i ++) {
|
||||
call_ref(dst0, src0, src2, planes[i].w);
|
||||
call_new(dst1, src1, src3, planes[i].w);
|
||||
if (memcmp(dst0, dst1, planes[i].w))
|
||||
fail();
|
||||
}
|
||||
bench_new(dst1, src0, src2, planes[4].w);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_sub_left_pred(LLVidEncDSPContext *c)
|
||||
{
|
||||
int i;
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE * MAX_HEIGHT]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE * MAX_HEIGHT]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT]);
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src,
|
||||
ptrdiff_t stride, ptrdiff_t width, int height);
|
||||
|
||||
memset(dst0, 0, MAX_STRIDE * MAX_HEIGHT);
|
||||
memset(dst1, 0, MAX_STRIDE * MAX_HEIGHT);
|
||||
randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT);
|
||||
memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT);
|
||||
|
||||
if (check_func(c->sub_left_predict, "sub_left_predict")) {
|
||||
for (i = 0; i < 5; i ++) {
|
||||
call_ref(dst0, src0, planes[i].s, planes[i].w, planes[i].h);
|
||||
call_new(dst1, src1, planes[i].s, planes[i].w, planes[i].h);
|
||||
if (memcmp(dst0, dst1, planes[i].w * planes[i].h))
|
||||
fail();
|
||||
break;
|
||||
}
|
||||
bench_new(dst1, src0, planes[4].s, planes[4].w, planes[4].h);
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_llviddspenc(void)
|
||||
{
|
||||
LLVidEncDSPContext c;
|
||||
ff_llvidencdsp_init(&c);
|
||||
|
||||
check_diff_bytes(&c);
|
||||
report("diff_bytes");
|
||||
|
||||
check_sub_left_pred(&c);
|
||||
report("sub_left_predict");
|
||||
}
|
103
externals/ffmpeg/tests/checkasm/opusdsp.c
vendored
Executable file
103
externals/ffmpeg/tests/checkasm/opusdsp.c
vendored
Executable file
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "libavcodec/opusdsp.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define randomize_float(buf, len) \
|
||||
do { \
|
||||
for (int i = 0; i < len; i++) { \
|
||||
float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f; \
|
||||
buf[i] = f; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define EPS 0.005
|
||||
#define MAX_SIZE (960)
|
||||
|
||||
/* period is between 15 and 1022, inclusive */
|
||||
static void test_postfilter(int period)
|
||||
{
|
||||
LOCAL_ALIGNED(16, float, data0, [MAX_SIZE + 1024]);
|
||||
LOCAL_ALIGNED(16, float, data1, [MAX_SIZE + 1024]);
|
||||
|
||||
/* This filter can explode very easily, so use a tapset from the codec.
|
||||
* In the codec these are usually multiplied by at least 0.09375f,
|
||||
* so its outside the largest filter value, but the filter is still stable
|
||||
* so use it. */
|
||||
float gains[3] = { 0.3066406250f, 0.2170410156f, 0.1296386719f };
|
||||
|
||||
/* The codec will always call with an offset which is aligned once
|
||||
* (period + 2) is subtracted, but here we have to align it outselves. */
|
||||
int offset = FFALIGN(period + 2, 4);
|
||||
|
||||
declare_func(void, float *data, int period, float *gains, int len);
|
||||
|
||||
randomize_float(data0, MAX_SIZE + 1024);
|
||||
memcpy(data1, data0, (MAX_SIZE + 1024)*sizeof(float));
|
||||
|
||||
call_ref(data0 + offset, period, gains, MAX_SIZE);
|
||||
call_new(data1 + offset, period, gains, MAX_SIZE);
|
||||
|
||||
if (!float_near_abs_eps_array(data0 + offset, data1 + offset, EPS, MAX_SIZE))
|
||||
fail();
|
||||
bench_new(data1 + offset, period, gains, MAX_SIZE);
|
||||
}
|
||||
|
||||
static void test_deemphasis(void)
|
||||
{
|
||||
LOCAL_ALIGNED(16, float, src, [FFALIGN(MAX_SIZE, 4)]);
|
||||
LOCAL_ALIGNED(16, float, dst0, [FFALIGN(MAX_SIZE, 4)]);
|
||||
LOCAL_ALIGNED(16, float, dst1, [FFALIGN(MAX_SIZE, 4)]);
|
||||
float coeff0 = (float)rnd() / (UINT_MAX >> 5) - 16.0f, coeff1 = coeff0;
|
||||
|
||||
declare_func_float(float, float *out, float *in, float coeff, int len);
|
||||
|
||||
randomize_float(src, MAX_SIZE);
|
||||
|
||||
coeff0 = call_ref(dst0, src, coeff0, MAX_SIZE);
|
||||
coeff1 = call_new(dst1, src, coeff1, MAX_SIZE);
|
||||
|
||||
if (!float_near_abs_eps(coeff0, coeff1, EPS) ||
|
||||
!float_near_abs_eps_array(dst0, dst1, EPS, MAX_SIZE))
|
||||
fail();
|
||||
bench_new(dst1, src, coeff1, MAX_SIZE);
|
||||
}
|
||||
|
||||
void checkasm_check_opusdsp(void)
|
||||
{
|
||||
OpusDSP ctx;
|
||||
ff_opus_dsp_init(&ctx);
|
||||
|
||||
if (check_func(ctx.postfilter, "postfilter_15"))
|
||||
test_postfilter(15);
|
||||
report("postfilter_15");
|
||||
|
||||
if (check_func(ctx.postfilter, "postfilter_512"))
|
||||
test_postfilter(512);
|
||||
report("postfilter_512");
|
||||
|
||||
if (check_func(ctx.postfilter, "postfilter_1022"))
|
||||
test_postfilter(1022);
|
||||
report("postfilter_1022");
|
||||
|
||||
if (check_func(ctx.deemphasis, "deemphasis"))
|
||||
test_deemphasis();
|
||||
report("deemphasis");
|
||||
}
|
111
externals/ffmpeg/tests/checkasm/pixblockdsp.c
vendored
Executable file
111
externals/ffmpeg/tests/checkasm/pixblockdsp.c
vendored
Executable file
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Tiancheng "Timothy" Gu
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/pixblockdsp.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define BUF_UNITS 8
|
||||
#define BUF_SIZE (BUF_UNITS * 128 + 8 * BUF_UNITS)
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < BUF_SIZE; i += 4) { \
|
||||
uint32_t r = rnd(); \
|
||||
AV_WN32A(src10 + i, r); \
|
||||
AV_WN32A(src11 + i, r); \
|
||||
r = rnd(); \
|
||||
AV_WN32A(src20 + i, r); \
|
||||
AV_WN32A(src21 + i, r); \
|
||||
r = rnd(); \
|
||||
AV_WN32A(dst0_ + i, r); \
|
||||
AV_WN32A(dst1_ + i, r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define check_get_pixels(type, aligned) \
|
||||
do { \
|
||||
int i; \
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); \
|
||||
\
|
||||
for (i = 0; i < BUF_UNITS; i++) { \
|
||||
int src_offset = i * 64 * sizeof(type) + (aligned ? 8 : 1) * i; \
|
||||
int dst_offset = i * 64; /* dst must be aligned */ \
|
||||
randomize_buffers(); \
|
||||
call_ref(dst0 + dst_offset, src10 + src_offset, 8); \
|
||||
call_new(dst1 + dst_offset, src11 + src_offset, 8); \
|
||||
if (memcmp(src10, src11, BUF_SIZE)|| memcmp(dst0, dst1, BUF_SIZE)) \
|
||||
fail(); \
|
||||
bench_new(dst1 + dst_offset, src11 + src_offset, 8); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define check_diff_pixels(type, aligned) \
|
||||
do { \
|
||||
int i; \
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); \
|
||||
\
|
||||
for (i = 0; i < BUF_UNITS; i++) { \
|
||||
int src_offset = i * 64 * sizeof(type) + (aligned ? 8 : 1) * i; \
|
||||
int dst_offset = i * 64; /* dst must be aligned */ \
|
||||
randomize_buffers(); \
|
||||
call_ref(dst0 + dst_offset, src10 + src_offset, src20 + src_offset, 8); \
|
||||
call_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8); \
|
||||
if (memcmp(src10, src11, BUF_SIZE) || memcmp(src20, src21, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE)) \
|
||||
fail(); \
|
||||
bench_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_pixblockdsp(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, src10, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, src11, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, src20, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, src21, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0_, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1_, [BUF_SIZE]);
|
||||
uint16_t *dst0 = (uint16_t *)dst0_;
|
||||
uint16_t *dst1 = (uint16_t *)dst1_;
|
||||
PixblockDSPContext h;
|
||||
AVCodecContext avctx = {
|
||||
.bits_per_raw_sample = 8,
|
||||
};
|
||||
|
||||
ff_pixblockdsp_init(&h, &avctx);
|
||||
|
||||
if (check_func(h.get_pixels, "get_pixels"))
|
||||
check_get_pixels(uint8_t, 1);
|
||||
if (check_func(h.get_pixels_unaligned, "get_pixels_unaligned"))
|
||||
check_get_pixels(uint8_t, 0);
|
||||
|
||||
report("get_pixels");
|
||||
|
||||
if (check_func(h.diff_pixels, "diff_pixels"))
|
||||
check_diff_pixels(uint8_t, 1);
|
||||
if (check_func(h.diff_pixels_unaligned, "diff_pixels_unaligned"))
|
||||
check_diff_pixels(uint8_t, 0);
|
||||
|
||||
report("diff_pixels");
|
||||
}
|
300
externals/ffmpeg/tests/checkasm/sbrdsp.c
vendored
Executable file
300
externals/ffmpeg/tests/checkasm/sbrdsp.c
vendored
Executable file
@@ -0,0 +1,300 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "libavcodec/sbrdsp.h"
|
||||
#include <float.h>
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define randomize(buf, len) do { \
|
||||
int i; \
|
||||
for (i = 0; i < len; i++) { \
|
||||
const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
|
||||
(buf)[i] = f; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define EPS 0.0001
|
||||
|
||||
static void test_sum64x5(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [64 + 256]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [64 + 256]);
|
||||
|
||||
declare_func(void, INTFLOAT *z);
|
||||
|
||||
randomize((INTFLOAT *)dst0, 64 + 256);
|
||||
memcpy(dst1, dst0, (64 + 256) * sizeof(INTFLOAT));
|
||||
call_ref(dst0);
|
||||
call_new(dst1);
|
||||
if (!float_near_abs_eps_array(dst0, dst1, EPS, 64 + 256))
|
||||
fail();
|
||||
bench_new(dst1);
|
||||
}
|
||||
|
||||
static void test_sum_square(void)
|
||||
{
|
||||
INTFLOAT res0;
|
||||
INTFLOAT res1;
|
||||
LOCAL_ALIGNED_16(INTFLOAT, src, [256], [2]);
|
||||
double t = 4 * 256;
|
||||
|
||||
declare_func_float(INTFLOAT, INTFLOAT (*x)[2], int n);
|
||||
|
||||
randomize((INTFLOAT *)src, 256 * 2);
|
||||
res0 = call_ref(src, 256);
|
||||
res1 = call_new(src, 256);
|
||||
if (!float_near_abs_eps(res0, res1, t * 2 * FLT_EPSILON))
|
||||
fail();
|
||||
bench_new(src, 256);
|
||||
}
|
||||
|
||||
static void test_neg_odd_64(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
|
||||
|
||||
declare_func(void, INTFLOAT *x);
|
||||
|
||||
randomize((INTFLOAT *)dst0, 64);
|
||||
memcpy(dst1, dst0, (64) * sizeof(INTFLOAT));
|
||||
call_ref(dst0);
|
||||
call_new(dst1);
|
||||
if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
|
||||
fail();
|
||||
bench_new(dst1);
|
||||
}
|
||||
|
||||
static void test_qmf_pre_shuffle(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
|
||||
|
||||
declare_func(void, INTFLOAT *z);
|
||||
|
||||
randomize((INTFLOAT *)dst0, 128);
|
||||
memcpy(dst1, dst0, (128) * sizeof(INTFLOAT));
|
||||
call_ref(dst0);
|
||||
call_new(dst1);
|
||||
if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
|
||||
fail();
|
||||
bench_new(dst1);
|
||||
}
|
||||
|
||||
static void test_qmf_post_shuffle(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [32], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [32], [2]);
|
||||
|
||||
declare_func(void, INTFLOAT W[32][2], const INTFLOAT *z);
|
||||
|
||||
randomize((INTFLOAT *)src, 64);
|
||||
call_ref(dst0, src);
|
||||
call_new(dst1, src);
|
||||
if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 64))
|
||||
fail();
|
||||
bench_new(dst1, src);
|
||||
}
|
||||
|
||||
static void test_qmf_deint_neg(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
|
||||
|
||||
declare_func(void, INTFLOAT *v, const INTFLOAT *src);
|
||||
|
||||
randomize((INTFLOAT *)src, 64);
|
||||
call_ref(dst0, src);
|
||||
call_new(dst1, src);
|
||||
if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
|
||||
fail();
|
||||
bench_new(dst1, src);
|
||||
}
|
||||
|
||||
static void test_qmf_deint_bfly(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, src0, [64]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, src1, [64]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
|
||||
|
||||
declare_func(void, INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT *src1);
|
||||
|
||||
memset(dst0, 0, 128 * sizeof(INTFLOAT));
|
||||
memset(dst1, 0, 128 * sizeof(INTFLOAT));
|
||||
|
||||
randomize((INTFLOAT *)src0, 64);
|
||||
randomize((INTFLOAT *)src1, 64);
|
||||
call_ref(dst0, src0, src1);
|
||||
call_new(dst1, src0, src1);
|
||||
if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
|
||||
fail();
|
||||
bench_new(dst1, src0, src1);
|
||||
}
|
||||
|
||||
static void test_autocorrelate(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, src, [40], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [3], [2][2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [3], [2][2]);
|
||||
|
||||
declare_func(void, const INTFLOAT x[40][2], INTFLOAT phi[3][2][2]);
|
||||
|
||||
memset(dst0, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
|
||||
memset(dst1, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
|
||||
|
||||
randomize((INTFLOAT *)src, 80);
|
||||
call_ref(src, dst0);
|
||||
call_new(src, dst1);
|
||||
if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 3 * 2 * 2))
|
||||
fail();
|
||||
bench_new(src, dst1);
|
||||
}
|
||||
|
||||
static void test_hf_gen(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, low, [128], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, alpha0, [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, alpha1, [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
|
||||
INTFLOAT bw = (INTFLOAT)rnd() / UINT_MAX;
|
||||
int i;
|
||||
|
||||
declare_func(void, INTFLOAT (*X_high)[2], const INTFLOAT (*X_low)[2],
|
||||
const INTFLOAT alpha0[2], const INTFLOAT alpha1[2],
|
||||
INTFLOAT bw, int start, int end);
|
||||
|
||||
randomize((INTFLOAT *)low, 128 * 2);
|
||||
randomize((INTFLOAT *)alpha0, 2);
|
||||
randomize((INTFLOAT *)alpha1, 2);
|
||||
for (i = 2; i < 64; i += 2) {
|
||||
memset(dst0, 0, 128 * 2 * sizeof(INTFLOAT));
|
||||
memset(dst1, 0, 128 * 2 * sizeof(INTFLOAT));
|
||||
call_ref(dst0, low, alpha0, alpha1, 0.0, i, 128);
|
||||
call_new(dst1, low, alpha0, alpha1, 0.0, i, 128);
|
||||
if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
|
||||
fail();
|
||||
bench_new(dst1, low, alpha0, alpha1, bw, i, 128);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_hf_g_filt(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(INTFLOAT, high, [128], [40][2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, g_filt, [128]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
|
||||
|
||||
declare_func(void, INTFLOAT (*Y)[2], const INTFLOAT (*X_high)[40][2],
|
||||
const INTFLOAT *g_filt, int m_max, intptr_t ixh);
|
||||
|
||||
randomize((INTFLOAT *)high, 128 * 40 * 2);
|
||||
randomize((INTFLOAT *)g_filt, 128);
|
||||
|
||||
call_ref(dst0, high, g_filt, 128, 20);
|
||||
call_new(dst1, high, g_filt, 128, 20);
|
||||
if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
|
||||
fail();
|
||||
bench_new(dst1, high, g_filt, 128, 20);
|
||||
}
|
||||
|
||||
static void test_hf_apply_noise(const SBRDSPContext *sbrdsp)
|
||||
{
|
||||
LOCAL_ALIGNED_16(AAC_FLOAT, s_m, [128]);
|
||||
LOCAL_ALIGNED_16(AAC_FLOAT, q_filt, [128]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, ref, [128], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
|
||||
LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
|
||||
int noise = 0x2a;
|
||||
int i, j;
|
||||
|
||||
declare_func(void, INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
|
||||
const AAC_FLOAT *q_filt, int noise,
|
||||
int kx, int m_max);
|
||||
|
||||
randomize((INTFLOAT *)ref, 128 * 2);
|
||||
randomize((INTFLOAT *)s_m, 128);
|
||||
randomize((INTFLOAT *)q_filt, 128);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (check_func(sbrdsp->hf_apply_noise[i], "hf_apply_noise_%d", i)) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
memcpy(dst0, ref, 128 * 2 * sizeof(INTFLOAT));
|
||||
memcpy(dst1, ref, 128 * 2 * sizeof(INTFLOAT));
|
||||
call_ref(dst0, s_m, q_filt, noise, j, 128);
|
||||
call_new(dst1, s_m, q_filt, noise, j, 128);
|
||||
if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
|
||||
fail();
|
||||
bench_new(dst1, s_m, q_filt, noise, j, 128);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_sbrdsp(void)
|
||||
{
|
||||
SBRDSPContext sbrdsp;
|
||||
|
||||
ff_sbrdsp_init(&sbrdsp);
|
||||
|
||||
if (check_func(sbrdsp.sum64x5, "sum64x5"))
|
||||
test_sum64x5();
|
||||
report("sum64x5");
|
||||
|
||||
if (check_func(sbrdsp.sum_square, "sum_square"))
|
||||
test_sum_square();
|
||||
report("sum_square");
|
||||
|
||||
if (check_func(sbrdsp.neg_odd_64, "neg_odd_64"))
|
||||
test_neg_odd_64();
|
||||
report("neg_odd_64");
|
||||
|
||||
if (check_func(sbrdsp.qmf_pre_shuffle, "qmf_pre_shuffle"))
|
||||
test_qmf_pre_shuffle();
|
||||
report("qmf_pre_shuffle");
|
||||
|
||||
if (check_func(sbrdsp.qmf_post_shuffle, "qmf_post_shuffle"))
|
||||
test_qmf_post_shuffle();
|
||||
report("qmf_post_shuffle");
|
||||
|
||||
if (check_func(sbrdsp.qmf_deint_neg, "qmf_deint_neg"))
|
||||
test_qmf_deint_neg();
|
||||
report("qmf_deint_neg");
|
||||
|
||||
if (check_func(sbrdsp.qmf_deint_bfly, "qmf_deint_bfly"))
|
||||
test_qmf_deint_bfly();
|
||||
report("qmf_deint_bfly");
|
||||
|
||||
if (check_func(sbrdsp.autocorrelate, "autocorrelate"))
|
||||
test_autocorrelate();
|
||||
report("autocorrelate");
|
||||
|
||||
if (check_func(sbrdsp.hf_gen, "hf_gen"))
|
||||
test_hf_gen();
|
||||
report("hf_gen");
|
||||
|
||||
if (check_func(sbrdsp.hf_g_filt, "hf_g_filt"))
|
||||
test_hf_g_filt();
|
||||
report("hf_g_filt");
|
||||
|
||||
test_hf_apply_noise(&sbrdsp);
|
||||
report("hf_apply_noise");
|
||||
}
|
206
externals/ffmpeg/tests/checkasm/sw_rgb.c
vendored
Executable file
206
externals/ffmpeg/tests/checkasm/sw_rgb.c
vendored
Executable file
@@ -0,0 +1,206 @@
|
||||
/*
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "libavutil/mem.h"
|
||||
|
||||
#include "libswscale/rgb2rgb.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define randomize_buffers(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
for (j = 0; j < size; j+=4) \
|
||||
AV_WN32(buf + j, rnd()); \
|
||||
} while (0)
|
||||
|
||||
static const uint8_t width[] = {12, 16, 20, 32, 36, 128};
|
||||
static const struct {uint8_t w, h, s;} planes[] = {
|
||||
{12,16,12}, {16,16,16}, {20,23,25}, {32,18,48}, {8,128,16}, {128,128,128}
|
||||
};
|
||||
|
||||
#define MAX_STRIDE 128
|
||||
#define MAX_HEIGHT 128
|
||||
|
||||
static void check_shuffle_bytes(void * func, const char * report)
|
||||
{
|
||||
int i;
|
||||
LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE]);
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, const uint8_t *src, uint8_t *dst, int src_size);
|
||||
|
||||
memset(dst0, 0, MAX_STRIDE);
|
||||
memset(dst1, 0, MAX_STRIDE);
|
||||
randomize_buffers(src0, MAX_STRIDE);
|
||||
memcpy(src1, src0, MAX_STRIDE);
|
||||
|
||||
if (check_func(func, "%s", report)) {
|
||||
for (i = 0; i < 6; i ++) {
|
||||
call_ref(src0, dst0, width[i]);
|
||||
call_new(src1, dst1, width[i]);
|
||||
if (memcmp(dst0, dst1, MAX_STRIDE))
|
||||
fail();
|
||||
}
|
||||
bench_new(src0, dst0, width[5]);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_uyvy_to_422p(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_y_0, [MAX_STRIDE * MAX_HEIGHT]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_y_1, [MAX_STRIDE * MAX_HEIGHT]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_u_0, [(MAX_STRIDE/2) * MAX_HEIGHT]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_u_1, [(MAX_STRIDE/2) * MAX_HEIGHT]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_v_0, [(MAX_STRIDE/2) * MAX_HEIGHT]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_v_1, [(MAX_STRIDE/2) * MAX_HEIGHT]);
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
const uint8_t *src, int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
|
||||
randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT * 2);
|
||||
memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT * 2);
|
||||
|
||||
if (check_func(uyvytoyuv422, "uyvytoyuv422")) {
|
||||
for (i = 0; i < 6; i ++) {
|
||||
memset(dst_y_0, 0, MAX_STRIDE * MAX_HEIGHT);
|
||||
memset(dst_y_1, 0, MAX_STRIDE * MAX_HEIGHT);
|
||||
memset(dst_u_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
|
||||
memset(dst_u_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
|
||||
memset(dst_v_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
|
||||
memset(dst_v_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
|
||||
|
||||
call_ref(dst_y_0, dst_u_0, dst_v_0, src0, planes[i].w, planes[i].h,
|
||||
MAX_STRIDE, MAX_STRIDE / 2, planes[i].s);
|
||||
call_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[i].w, planes[i].h,
|
||||
MAX_STRIDE, MAX_STRIDE / 2, planes[i].s);
|
||||
if (memcmp(dst_y_0, dst_y_1, MAX_STRIDE * MAX_HEIGHT) ||
|
||||
memcmp(dst_u_0, dst_u_1, (MAX_STRIDE/2) * MAX_HEIGHT) ||
|
||||
memcmp(dst_v_0, dst_v_1, (MAX_STRIDE/2) * MAX_HEIGHT))
|
||||
fail();
|
||||
}
|
||||
bench_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[5].w, planes[5].h,
|
||||
MAX_STRIDE, MAX_STRIDE / 2, planes[5].s);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_interleave_bytes(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, src0_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
|
||||
LOCAL_ALIGNED_16(uint8_t, src1_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]);
|
||||
// Intentionally using unaligned buffers, as this function doesn't have
|
||||
// any alignment requirements.
|
||||
uint8_t *src0 = src0_buf + 1;
|
||||
uint8_t *src1 = src1_buf + 1;
|
||||
uint8_t *dst0 = dst0_buf + 2;
|
||||
uint8_t *dst1 = dst1_buf + 2;
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, const uint8_t *, const uint8_t *,
|
||||
uint8_t *, int, int, int, int, int);
|
||||
|
||||
randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT);
|
||||
randomize_buffers(src1, MAX_STRIDE * MAX_HEIGHT);
|
||||
|
||||
if (check_func(interleaveBytes, "interleave_bytes")) {
|
||||
for (int i = 0; i <= 16; i++) {
|
||||
// Try all widths [1,16], and try one random width.
|
||||
|
||||
int w = i > 0 ? i : (1 + (rnd() % (MAX_STRIDE-2)));
|
||||
int h = 1 + (rnd() % (MAX_HEIGHT-2));
|
||||
|
||||
int src0_offset = 0, src0_stride = MAX_STRIDE;
|
||||
int src1_offset = 0, src1_stride = MAX_STRIDE;
|
||||
int dst_offset = 0, dst_stride = 2 * MAX_STRIDE;
|
||||
|
||||
memset(dst0, 0, 2 * MAX_STRIDE * MAX_HEIGHT);
|
||||
memset(dst1, 0, 2 * MAX_STRIDE * MAX_HEIGHT);
|
||||
|
||||
// Try different combinations of negative strides
|
||||
if (i & 1) {
|
||||
src0_offset = (h-1)*src0_stride;
|
||||
src0_stride = -src0_stride;
|
||||
}
|
||||
if (i & 2) {
|
||||
src1_offset = (h-1)*src1_stride;
|
||||
src1_stride = -src1_stride;
|
||||
}
|
||||
if (i & 4) {
|
||||
dst_offset = (h-1)*dst_stride;
|
||||
dst_stride = -dst_stride;
|
||||
}
|
||||
|
||||
call_ref(src0 + src0_offset, src1 + src1_offset, dst0 + dst_offset,
|
||||
w, h, src0_stride, src1_stride, dst_stride);
|
||||
call_new(src0 + src0_offset, src1 + src1_offset, dst1 + dst_offset,
|
||||
w, h, src0_stride, src1_stride, dst_stride);
|
||||
// Check a one pixel-pair edge around the destination area,
|
||||
// to catch overwrites past the end.
|
||||
checkasm_check(uint8_t, dst0, 2*MAX_STRIDE, dst1, 2*MAX_STRIDE,
|
||||
2 * w + 2, h + 1, "dst");
|
||||
}
|
||||
|
||||
bench_new(src0, src1, dst1, 127, MAX_HEIGHT,
|
||||
MAX_STRIDE, MAX_STRIDE, 2*MAX_STRIDE);
|
||||
}
|
||||
if (check_func(interleaveBytes, "interleave_bytes_aligned")) {
|
||||
// Bench the function in a more typical case, with aligned
|
||||
// buffers and widths.
|
||||
bench_new(src0_buf, src1_buf, dst1_buf, 128, MAX_HEIGHT,
|
||||
MAX_STRIDE, MAX_STRIDE, 2*MAX_STRIDE);
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_sw_rgb(void)
|
||||
{
|
||||
ff_sws_rgb2rgb_init();
|
||||
|
||||
check_shuffle_bytes(shuffle_bytes_2103, "shuffle_bytes_2103");
|
||||
report("shuffle_bytes_2103");
|
||||
|
||||
check_shuffle_bytes(shuffle_bytes_0321, "shuffle_bytes_0321");
|
||||
report("shuffle_bytes_0321");
|
||||
|
||||
check_shuffle_bytes(shuffle_bytes_1230, "shuffle_bytes_1230");
|
||||
report("shuffle_bytes_1230");
|
||||
|
||||
check_shuffle_bytes(shuffle_bytes_3012, "shuffle_bytes_3012");
|
||||
report("shuffle_bytes_3012");
|
||||
|
||||
check_shuffle_bytes(shuffle_bytes_3210, "shuffle_bytes_3210");
|
||||
report("shuffle_bytes_3210");
|
||||
|
||||
check_uyvy_to_422p();
|
||||
report("uyvytoyuv422");
|
||||
|
||||
check_interleave_bytes();
|
||||
report("interleave_bytes");
|
||||
}
|
134
externals/ffmpeg/tests/checkasm/sw_scale.c
vendored
Executable file
134
externals/ffmpeg/tests/checkasm/sw_scale.c
vendored
Executable file
@@ -0,0 +1,134 @@
|
||||
/*
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "libavutil/mem.h"
|
||||
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define randomize_buffers(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
for (j = 0; j < size; j+=4) \
|
||||
AV_WN32(buf + j, rnd()); \
|
||||
} while (0)
|
||||
|
||||
#define SRC_PIXELS 128
|
||||
|
||||
static void check_hscale(void)
|
||||
{
|
||||
#define MAX_FILTER_WIDTH 40
|
||||
#define FILTER_SIZES 5
|
||||
static const int filter_sizes[FILTER_SIZES] = { 4, 8, 16, 32, 40 };
|
||||
|
||||
#define HSCALE_PAIRS 2
|
||||
static const int hscale_pairs[HSCALE_PAIRS][2] = {
|
||||
{ 8, 14 },
|
||||
{ 8, 18 },
|
||||
};
|
||||
|
||||
int i, j, fsi, hpi, width;
|
||||
struct SwsContext *ctx;
|
||||
|
||||
// padded
|
||||
LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);
|
||||
LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
|
||||
LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
|
||||
|
||||
// padded
|
||||
LOCAL_ALIGNED_32(int16_t, filter, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
|
||||
LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
|
||||
|
||||
// The dst parameter here is either int16_t or int32_t but we use void* to
|
||||
// just cover both cases.
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, void *c, void *dst, int dstW,
|
||||
const uint8_t *src, const int16_t *filter,
|
||||
const int32_t *filterPos, int filterSize);
|
||||
|
||||
ctx = sws_alloc_context();
|
||||
if (sws_init_context(ctx, NULL, NULL) < 0)
|
||||
fail();
|
||||
|
||||
randomize_buffers(src, SRC_PIXELS + MAX_FILTER_WIDTH - 1);
|
||||
|
||||
for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {
|
||||
for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
|
||||
width = filter_sizes[fsi];
|
||||
|
||||
ctx->srcBpc = hscale_pairs[hpi][0];
|
||||
ctx->dstBpc = hscale_pairs[hpi][1];
|
||||
ctx->hLumFilterSize = ctx->hChrFilterSize = width;
|
||||
|
||||
for (i = 0; i < SRC_PIXELS; i++) {
|
||||
filterPos[i] = i;
|
||||
|
||||
// These filter cofficients are chosen to try break two corner
|
||||
// cases, namely:
|
||||
//
|
||||
// - Negative filter coefficients. The filters output signed
|
||||
// values, and it should be possible to end up with negative
|
||||
// output values.
|
||||
//
|
||||
// - Positive clipping. The hscale filter function has clipping
|
||||
// at (1<<15) - 1
|
||||
//
|
||||
// The coefficients sum to the 1.0 point for the hscale
|
||||
// functions (1 << 14).
|
||||
|
||||
for (j = 0; j < width; j++) {
|
||||
filter[i * width + j] = -((1 << 14) / (width - 1));
|
||||
}
|
||||
filter[i * width + (rnd() % width)] = ((1 << 15) - 1);
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_FILTER_WIDTH; i++) {
|
||||
// These values should be unused in SIMD implementations but
|
||||
// may still be read, random coefficients here should help show
|
||||
// issues where they are used in error.
|
||||
|
||||
filter[SRC_PIXELS * width + i] = rnd();
|
||||
}
|
||||
ff_getSwsFunc(ctx);
|
||||
|
||||
if (check_func(ctx->hcScale, "hscale_%d_to_%d_width%d", ctx->srcBpc, ctx->dstBpc + 1, width)) {
|
||||
memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
|
||||
memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
|
||||
|
||||
call_ref(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
|
||||
call_new(NULL, dst1, SRC_PIXELS, src, filter, filterPos, width);
|
||||
if (memcmp(dst0, dst1, SRC_PIXELS * sizeof(dst0[0])))
|
||||
fail();
|
||||
bench_new(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
|
||||
}
|
||||
}
|
||||
}
|
||||
sws_freeContext(ctx);
|
||||
}
|
||||
|
||||
void checkasm_check_sw_scale(void)
|
||||
{
|
||||
check_hscale();
|
||||
report("hscale");
|
||||
}
|
121
externals/ffmpeg/tests/checkasm/synth_filter.c
vendored
Executable file
121
externals/ffmpeg/tests/checkasm/synth_filter.c
vendored
Executable file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Janne Grunau
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intfloat.h"
|
||||
#include "libavcodec/dcadata.h"
|
||||
#include "libavcodec/synth_filter.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define BUF_SIZE 32
|
||||
|
||||
#define randomize_input() \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < BUF_SIZE; i++) { \
|
||||
float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f; \
|
||||
in[i] = f; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_synth_filter(void)
|
||||
{
|
||||
FFTContext imdct;
|
||||
SynthFilterContext synth;
|
||||
|
||||
ff_mdct_init(&imdct, 6, 1, 1.0);
|
||||
ff_synth_filter_init(&synth);
|
||||
|
||||
if (check_func(synth.synth_filter_float, "synth_filter_float")) {
|
||||
LOCAL_ALIGNED(32, float, out0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED(32, float, out1, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED(32, float, out_b, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED(32, float, in, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED(32, float, buf2_0, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED(32, float, buf2_1, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED(32, float, buf2_b, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED(32, float, buf0, [512]);
|
||||
LOCAL_ALIGNED(32, float, buf1, [512]);
|
||||
LOCAL_ALIGNED(32, float, buf_b, [512]);
|
||||
float scale = 1.0f;
|
||||
int i, offset0 = 0, offset1 = 0, offset_b = 0;
|
||||
|
||||
declare_func(void, FFTContext *, float *, int *, float[32], const float[512],
|
||||
float[32], float[32], float);
|
||||
|
||||
memset(buf2_0, 0, sizeof(*buf2_0) * BUF_SIZE);
|
||||
memset(buf2_1, 0, sizeof(*buf2_1) * BUF_SIZE);
|
||||
memset(buf2_b, 0, sizeof(*buf2_b) * BUF_SIZE);
|
||||
memset(buf0, 0, sizeof(*buf2_0) * 512);
|
||||
memset(buf1, 0, sizeof(*buf2_1) * 512);
|
||||
memset(buf_b, 0, sizeof(*buf2_b) * 512);
|
||||
|
||||
/* more than 1 synth_buf_offset wrap-around */
|
||||
for (i = 0; i < 20; i++) {
|
||||
int j;
|
||||
const float * window = (i & 1) ? ff_dca_fir_32bands_perfect : ff_dca_fir_32bands_nonperfect;
|
||||
|
||||
memset(out0, 0, sizeof(*out0) * BUF_SIZE);
|
||||
memset(out1, 0, sizeof(*out1) * BUF_SIZE);
|
||||
memset(out_b, 0, sizeof(*out_b) * BUF_SIZE);
|
||||
|
||||
randomize_input();
|
||||
|
||||
call_ref(&imdct, buf0, &offset0, buf2_0, window,
|
||||
out0, in, scale);
|
||||
call_new(&imdct, buf1, &offset1, buf2_1, window,
|
||||
out1, in, scale);
|
||||
|
||||
if (offset0 != offset1) {
|
||||
fail();
|
||||
fprintf(stderr, "offsets do not match: %d, %d", offset0, offset1);
|
||||
break;
|
||||
}
|
||||
|
||||
for (j = 0; j < BUF_SIZE; j++) {
|
||||
if (!float_near_abs_eps_ulp(out0[j], out1[j], 7.0e-7, 16) ||
|
||||
!float_near_abs_eps_ulp(buf2_0[j], buf2_1[j], 7.0e-7, 16)) {
|
||||
union av_intfloat32 o0, o1, b0, b1;
|
||||
|
||||
fail();
|
||||
o0.f = out0[j]; o1.f = out1[j];
|
||||
b0.f = buf2_0[j], b1.f = buf2_1[j];
|
||||
fprintf(stderr, "out: %11g (0x%08x); %11g (0x%08x); abs diff %11g\n",
|
||||
o0.f, o0.i, o1.f, o1.i, fabsf(o0.f - o1.f));
|
||||
fprintf(stderr, "buf2: %11g (0x%08x); %11g (0x%08x); abs diff %11g\n",
|
||||
b0.f, b0.i, b1.f, b1.i, fabsf(b0.f - b1.f));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bench_new(&imdct, buf_b, &offset_b, buf2_b, window,
|
||||
out_b, in, scale);
|
||||
}
|
||||
}
|
||||
ff_mdct_end(&imdct);
|
||||
|
||||
report("synth_filter");
|
||||
}
|
105
externals/ffmpeg/tests/checkasm/utvideodsp.c
vendored
Executable file
105
externals/ffmpeg/tests/checkasm/utvideodsp.c
vendored
Executable file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Jokyo Images
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/avcodec.h"
|
||||
#include "libavcodec/utvideodsp.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define WIDTH 240
|
||||
#define HEIGHT 120
|
||||
#define WIDTH_PADDED (WIDTH + 16) /* padded to 32 */
|
||||
#define BUFFER_SIZE (WIDTH_PADDED * HEIGHT)
|
||||
|
||||
|
||||
#define randomize_plane(buf, type) \
|
||||
do { \
|
||||
int w, h; \
|
||||
type * tmp = buf; \
|
||||
for (h = 0; h < HEIGHT; h++) { \
|
||||
for (w = 0; w < WIDTH; w++) \
|
||||
tmp[w] = rnd() & 0xFF; \
|
||||
tmp += WIDTH_PADDED; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define cmp_plane(buf0, buf1, s) \
|
||||
do { \
|
||||
int h; \
|
||||
for (h = 0; h < HEIGHT; h++) { \
|
||||
if (memcmp(buf0 + h*WIDTH_PADDED, \
|
||||
buf1 + h*WIDTH_PADDED, WIDTH *s)) \
|
||||
fail();\
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define CHECK_RESTORE(type)\
|
||||
LOCAL_ALIGNED_32(type, src_r0, [BUFFER_SIZE]); \
|
||||
LOCAL_ALIGNED_32(type, src_g0, [BUFFER_SIZE]); \
|
||||
LOCAL_ALIGNED_32(type, src_b0, [BUFFER_SIZE]); \
|
||||
LOCAL_ALIGNED_32(type, src_r1, [BUFFER_SIZE]); \
|
||||
LOCAL_ALIGNED_32(type, src_g1, [BUFFER_SIZE]); \
|
||||
LOCAL_ALIGNED_32(type, src_b1, [BUFFER_SIZE]); \
|
||||
declare_func(void, type *src_r, type *src_g, type *src_b, \
|
||||
ptrdiff_t linesize_r, ptrdiff_t linesize_g, \
|
||||
ptrdiff_t linesize_b, int width, int height); \
|
||||
memset(src_r0, 0, BUFFER_SIZE * sizeof(type)); \
|
||||
memset(src_g0, 0, BUFFER_SIZE * sizeof(type)); \
|
||||
memset(src_b0, 0, BUFFER_SIZE * sizeof(type)); \
|
||||
randomize_plane(src_r0, type); \
|
||||
randomize_plane(src_g0, type); \
|
||||
randomize_plane(src_b0, type); \
|
||||
memcpy(src_r1, src_r0, BUFFER_SIZE * sizeof(type)); \
|
||||
memcpy(src_g1, src_g0, BUFFER_SIZE * sizeof(type)); \
|
||||
memcpy(src_b1, src_b0, BUFFER_SIZE * sizeof(type)); \
|
||||
call_ref(src_r0, src_g0, src_b0, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT);\
|
||||
call_new(src_r1, src_g1, src_b1, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT);\
|
||||
cmp_plane(src_r0, src_r1, sizeof(type)); \
|
||||
cmp_plane(src_g0, src_g1, sizeof(type)); \
|
||||
cmp_plane(src_b0, src_b1, sizeof(type)); \
|
||||
bench_new(src_r1, src_g1, src_b1, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT)
|
||||
|
||||
static void check_restore_rgb_planes(void) {
|
||||
CHECK_RESTORE(uint8_t);
|
||||
}
|
||||
|
||||
static void check_restore_rgb_planes10(void) {
|
||||
CHECK_RESTORE(uint16_t);
|
||||
}
|
||||
|
||||
void checkasm_check_utvideodsp(void)
|
||||
{
|
||||
UTVideoDSPContext h;
|
||||
|
||||
ff_utvideodsp_init(&h);
|
||||
|
||||
if (check_func(h.restore_rgb_planes, "restore_rgb_planes"))
|
||||
check_restore_rgb_planes();
|
||||
|
||||
report("restore_rgb_planes");
|
||||
|
||||
if (check_func(h.restore_rgb_planes10, "restore_rgb_planes10"))
|
||||
check_restore_rgb_planes10();
|
||||
|
||||
report("restore_rgb_planes10");
|
||||
}
|
77
externals/ffmpeg/tests/checkasm/v210dec.c
vendored
Executable file
77
externals/ffmpeg/tests/checkasm/v210dec.c
vendored
Executable file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2019 James Darnley
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/v210dec.h"
|
||||
|
||||
static uint32_t get_v210(void)
|
||||
{
|
||||
uint32_t t0 = rnd() & 0x3ff,
|
||||
t1 = rnd() & 0x3ff,
|
||||
t2 = rnd() & 0x3ff;
|
||||
uint32_t value = t0
|
||||
| (t1 << 10)
|
||||
| (t2 << 20);
|
||||
return value;
|
||||
}
|
||||
|
||||
#define NUM_SAMPLES 2048
|
||||
|
||||
static void randomize_buffers(uint32_t *src0, uint32_t *src1, int len)
|
||||
{
|
||||
for (int i = 0; i < len; i++) {
|
||||
uint32_t value = get_v210();
|
||||
src0[i] = value;
|
||||
src1[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_v210dec(void)
|
||||
{
|
||||
V210DecContext h;
|
||||
|
||||
h.aligned_input = 0;
|
||||
ff_v210dec_init(&h);
|
||||
|
||||
if (check_func(h.unpack_frame, "v210_unpack")) {
|
||||
uint32_t src0[NUM_SAMPLES/3];
|
||||
uint32_t src1[NUM_SAMPLES/3];
|
||||
uint16_t y0[NUM_SAMPLES/2];
|
||||
uint16_t y1[NUM_SAMPLES/2];
|
||||
uint16_t u0[NUM_SAMPLES/4];
|
||||
uint16_t u1[NUM_SAMPLES/4];
|
||||
uint16_t v0[NUM_SAMPLES/4];
|
||||
uint16_t v1[NUM_SAMPLES/4];
|
||||
declare_func(void, const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
|
||||
const int pixels = NUM_SAMPLES / 2 / 6 * 6;
|
||||
|
||||
randomize_buffers(src0, src1, NUM_SAMPLES/3);
|
||||
call_ref(src0, y0, u0, v0, pixels);
|
||||
call_new(src1, y1, u1, v1, pixels);
|
||||
if (memcmp(src0, src1, NUM_SAMPLES/3 * sizeof src0[0])
|
||||
|| memcmp(y0, y1, pixels * sizeof y0[0])
|
||||
|| memcmp(u0, u1, pixels/2 * sizeof u0[0])
|
||||
|| memcmp(v0, v1, pixels/2 * sizeof v0[0]))
|
||||
fail();
|
||||
bench_new(src1, y1, u1, v1, pixels);
|
||||
}
|
||||
report("v210_unpack");
|
||||
}
|
94
externals/ffmpeg/tests/checkasm/v210enc.c
vendored
Executable file
94
externals/ffmpeg/tests/checkasm/v210enc.c
vendored
Executable file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Henrik Gramner
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/v210enc.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define BUF_SIZE 512
|
||||
|
||||
#define randomize_buffers(mask) \
|
||||
do { \
|
||||
int i, size = sizeof(*y0); \
|
||||
for (i = 0; i < BUF_SIZE; i += 4 / size) { \
|
||||
uint32_t r = rnd() & mask; \
|
||||
AV_WN32A(y0 + i, r); \
|
||||
AV_WN32A(y1 + i, r); \
|
||||
} \
|
||||
for (i = 0; i < BUF_SIZE / 2; i += 4 / size) { \
|
||||
uint32_t r = rnd() & mask; \
|
||||
AV_WN32A(u0 + i, r); \
|
||||
AV_WN32A(u1 + i, r); \
|
||||
r = rnd() & mask; \
|
||||
AV_WN32A(v0 + i, r); \
|
||||
AV_WN32A(v1 + i, r); \
|
||||
} \
|
||||
for (i = 0; i < width * 8 / 3; i += 4) { \
|
||||
uint32_t r = rnd(); \
|
||||
AV_WN32A(dst0 + i, r); \
|
||||
AV_WN32A(dst1 + i, r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define check_pack_line(type, mask) \
|
||||
do { \
|
||||
LOCAL_ALIGNED_16(type, y0, [BUF_SIZE]); \
|
||||
LOCAL_ALIGNED_16(type, y1, [BUF_SIZE]); \
|
||||
LOCAL_ALIGNED_16(type, u0, [BUF_SIZE / 2]); \
|
||||
LOCAL_ALIGNED_16(type, u1, [BUF_SIZE / 2]); \
|
||||
LOCAL_ALIGNED_16(type, v0, [BUF_SIZE / 2]); \
|
||||
LOCAL_ALIGNED_16(type, v1, [BUF_SIZE / 2]); \
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE * 8 / 3]); \
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE * 8 / 3]); \
|
||||
\
|
||||
declare_func(void, const type * y, const type * u, const type * v, \
|
||||
uint8_t * dst, ptrdiff_t width); \
|
||||
ptrdiff_t width, step = 12 / sizeof(type); \
|
||||
\
|
||||
for (width = step; width < BUF_SIZE - 15; width += step) { \
|
||||
int y_offset = rnd() & 15; \
|
||||
int uv_offset = y_offset / 2; \
|
||||
randomize_buffers(mask); \
|
||||
call_ref(y0 + y_offset, u0 + uv_offset, v0 + uv_offset, dst0, width); \
|
||||
call_new(y1 + y_offset, u1 + uv_offset, v1 + uv_offset, dst1, width); \
|
||||
if (memcmp(y0, y1, BUF_SIZE) || memcmp(u0, u1, BUF_SIZE / 2) || \
|
||||
memcmp(v0, v1, BUF_SIZE / 2) || memcmp(dst0, dst1, width * 8 / 3)) \
|
||||
fail(); \
|
||||
bench_new(y1 + y_offset, u1 + uv_offset, v1 + uv_offset, dst1, width); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_v210enc(void)
|
||||
{
|
||||
V210EncContext h;
|
||||
|
||||
ff_v210enc_init(&h);
|
||||
|
||||
if (check_func(h.pack_line_8, "v210_planar_pack_8"))
|
||||
check_pack_line(uint8_t, 0xffffffff);
|
||||
|
||||
if (check_func(h.pack_line_10, "v210_planar_pack_10"))
|
||||
check_pack_line(uint16_t, 0x03ff03ff);
|
||||
|
||||
report("planar_pack");
|
||||
}
|
149
externals/ffmpeg/tests/checkasm/vf_blend.c
vendored
Executable file
149
externals/ffmpeg/tests/checkasm/vf_blend.c
vendored
Executable file
@@ -0,0 +1,149 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Tiancheng "Timothy" Gu
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavfilter/blend.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define WIDTH 256
|
||||
#define HEIGHT 256
|
||||
#define BUF_UNITS 3
|
||||
#define SIZE_PER_UNIT (WIDTH * HEIGHT)
|
||||
#define BUF_SIZE (BUF_UNITS * SIZE_PER_UNIT)
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int i, j; \
|
||||
for (i = 0; i < HEIGHT; i++) { \
|
||||
for (j = 0; j < WIDTH; j++) { \
|
||||
top1[i * WIDTH + j] = \
|
||||
top2[i * WIDTH + j] = i; \
|
||||
bot1[i * WIDTH + j] = \
|
||||
bot2[i * WIDTH + j] = j; \
|
||||
} \
|
||||
} \
|
||||
for (i = 0; i < SIZE_PER_UNIT; i += 4) { \
|
||||
uint32_t r = rnd(); \
|
||||
AV_WN32A(dst1 + i, r); \
|
||||
AV_WN32A(dst2 + i, r); \
|
||||
} \
|
||||
for (; i < BUF_SIZE; i += 4) { \
|
||||
uint32_t r = rnd(); \
|
||||
AV_WN32A(top1 + i, r); \
|
||||
AV_WN32A(top2 + i, r); \
|
||||
r = rnd(); \
|
||||
AV_WN32A(bot1 + i, r); \
|
||||
AV_WN32A(bot2 + i, r); \
|
||||
r = rnd(); \
|
||||
AV_WN32A(dst1 + i, r); \
|
||||
AV_WN32A(dst2 + i, r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define check_blend_func(depth) \
|
||||
do { \
|
||||
int i, w; \
|
||||
declare_func(void, const uint8_t *top, ptrdiff_t top_linesize, \
|
||||
const uint8_t *bottom, ptrdiff_t bottom_linesize, \
|
||||
uint8_t *dst, ptrdiff_t dst_linesize, \
|
||||
ptrdiff_t width, ptrdiff_t height, \
|
||||
struct FilterParams *param, double *values); \
|
||||
w = WIDTH / depth; \
|
||||
\
|
||||
for (i = 0; i < BUF_UNITS - 1; i++) { \
|
||||
int src_offset = i * SIZE_PER_UNIT + (BUF_UNITS - 1 - i) * depth; /* Test various alignments */ \
|
||||
int dst_offset = i * SIZE_PER_UNIT; /* dst must be aligned */ \
|
||||
randomize_buffers(); \
|
||||
call_ref(top1 + src_offset, w, bot1 + src_offset, w, \
|
||||
dst1 + dst_offset, w, w, HEIGHT, ¶m, NULL); \
|
||||
call_new(top2 + src_offset, w, bot2 + src_offset, w, \
|
||||
dst2 + dst_offset, w, w, HEIGHT, ¶m, NULL); \
|
||||
if (memcmp(top1, top2, BUF_SIZE) || memcmp(bot1, bot2, BUF_SIZE) || memcmp(dst1, dst2, BUF_SIZE)) \
|
||||
fail(); \
|
||||
} \
|
||||
bench_new(top2, w / 4, bot2, w / 4, dst2, w / 4, \
|
||||
w / 4, HEIGHT / 4, ¶m, NULL); \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_blend(void)
|
||||
{
|
||||
uint8_t *top1 = av_malloc(BUF_SIZE);
|
||||
uint8_t *top2 = av_malloc(BUF_SIZE);
|
||||
uint8_t *bot1 = av_malloc(BUF_SIZE);
|
||||
uint8_t *bot2 = av_malloc(BUF_SIZE);
|
||||
uint8_t *dst1 = av_malloc(BUF_SIZE);
|
||||
uint8_t *dst2 = av_malloc(BUF_SIZE);
|
||||
FilterParams param = {
|
||||
.opacity = 1.0,
|
||||
};
|
||||
|
||||
#define check_and_report(name, val, depth) \
|
||||
param.mode = val; \
|
||||
ff_blend_init(¶m, depth - 1); \
|
||||
if (check_func(param.blend, #name)) \
|
||||
check_blend_func(depth);
|
||||
|
||||
check_and_report(addition, BLEND_ADDITION, 1)
|
||||
check_and_report(grainmerge, BLEND_GRAINMERGE, 1)
|
||||
check_and_report(and, BLEND_AND, 1)
|
||||
check_and_report(average, BLEND_AVERAGE, 1)
|
||||
check_and_report(darken, BLEND_DARKEN, 1)
|
||||
check_and_report(grainextract, BLEND_GRAINEXTRACT, 1)
|
||||
check_and_report(hardmix, BLEND_HARDMIX, 1)
|
||||
check_and_report(lighten, BLEND_LIGHTEN, 1)
|
||||
check_and_report(multiply, BLEND_MULTIPLY, 1)
|
||||
check_and_report(or, BLEND_OR, 1)
|
||||
check_and_report(phoenix, BLEND_PHOENIX, 1)
|
||||
check_and_report(screen, BLEND_SCREEN, 1)
|
||||
check_and_report(subtract, BLEND_SUBTRACT, 1)
|
||||
check_and_report(xor, BLEND_XOR, 1)
|
||||
check_and_report(difference, BLEND_DIFFERENCE, 1)
|
||||
check_and_report(extremity, BLEND_EXTREMITY, 1)
|
||||
check_and_report(negation, BLEND_NEGATION, 1)
|
||||
|
||||
report("8bit");
|
||||
|
||||
check_and_report(addition_16, BLEND_ADDITION, 2)
|
||||
check_and_report(grainmerge_16, BLEND_GRAINMERGE, 2)
|
||||
check_and_report(and_16, BLEND_AND, 2)
|
||||
check_and_report(average_16, BLEND_AVERAGE, 2)
|
||||
check_and_report(darken_16, BLEND_DARKEN, 2)
|
||||
check_and_report(grainextract_16, BLEND_GRAINEXTRACT, 2)
|
||||
check_and_report(difference_16, BLEND_DIFFERENCE, 2)
|
||||
check_and_report(extremity_16, BLEND_EXTREMITY, 2)
|
||||
check_and_report(negation_16, BLEND_NEGATION, 2)
|
||||
check_and_report(lighten_16, BLEND_LIGHTEN, 2)
|
||||
check_and_report(or_16, BLEND_OR, 2)
|
||||
check_and_report(phoenix_16, BLEND_PHOENIX, 2)
|
||||
check_and_report(subtract_16, BLEND_SUBTRACT, 2)
|
||||
check_and_report(xor_16, BLEND_SUBTRACT, 2)
|
||||
|
||||
report("16bit");
|
||||
|
||||
av_freep(&top1);
|
||||
av_freep(&top2);
|
||||
av_freep(&bot1);
|
||||
av_freep(&bot2);
|
||||
av_freep(&dst1);
|
||||
av_freep(&dst2);
|
||||
}
|
314
externals/ffmpeg/tests/checkasm/vf_colorspace.c
vendored
Executable file
314
externals/ffmpeg/tests/checkasm/vf_colorspace.c
vendored
Executable file
@@ -0,0 +1,314 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavfilter/colorspacedsp.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define W 64
|
||||
#define H 64
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
unsigned mask = bpp_mask[idepth]; \
|
||||
int n, m; \
|
||||
int bpp = 1 + (!!idepth); \
|
||||
int buf_size = W * H * bpp; \
|
||||
for (m = 0; m < 3; m++) { \
|
||||
int ss = m ? ss_w + ss_h : 0; \
|
||||
int plane_sz = buf_size >> ss; \
|
||||
for (n = 0; n < plane_sz; n += 4) { \
|
||||
unsigned r = rnd() & mask; \
|
||||
AV_WN32A(&src[m][n], r); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static const char *format_string[] = {
|
||||
"444", "422", "420"
|
||||
};
|
||||
|
||||
static const unsigned bpp_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
|
||||
|
||||
static void check_yuv2yuv(void)
|
||||
{
|
||||
declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
|
||||
uint8_t *src[3], ptrdiff_t src_stride[3],
|
||||
int w, int h, const int16_t coeff[3][3][8],
|
||||
const int16_t off[2][8]);
|
||||
ColorSpaceDSPContext dsp;
|
||||
int idepth, odepth, fmt, n;
|
||||
LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
|
||||
uint8_t *src[3] = { src_y, src_u, src_v };
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
|
||||
uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
|
||||
LOCAL_ALIGNED_32(int16_t, offset_buf, [16]);
|
||||
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
|
||||
int16_t (*offset)[8] = (int16_t(*)[8]) offset_buf;
|
||||
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
|
||||
|
||||
ff_colorspacedsp_init(&dsp);
|
||||
for (n = 0; n < 8; n++) {
|
||||
offset[0][n] = offset[1][n] = 16;
|
||||
|
||||
coeff[0][0][n] = (1 << 14) + (1 << 7) + 1;
|
||||
coeff[0][1][n] = (1 << 7) - 1;
|
||||
coeff[0][2][n] = -(1 << 8);
|
||||
coeff[1][0][n] = coeff[2][0][n] = 0;
|
||||
coeff[1][1][n] = (1 << 14) + (1 << 7);
|
||||
coeff[1][2][n] = -(1 << 7);
|
||||
coeff[2][2][n] = (1 << 14) - (1 << 6);
|
||||
coeff[2][1][n] = 1 << 6;
|
||||
}
|
||||
for (idepth = 0; idepth < 3; idepth++) {
|
||||
for (odepth = 0; odepth < 3; odepth++) {
|
||||
for (fmt = 0; fmt < 3; fmt++) {
|
||||
if (check_func(dsp.yuv2yuv[idepth][odepth][fmt],
|
||||
"ff_colorspacedsp_yuv2yuv_%sp%dto%d",
|
||||
format_string[fmt],
|
||||
idepth * 2 + 8, odepth * 2 + 8)) {
|
||||
int ss_w = !!fmt, ss_h = fmt == 2;
|
||||
int y_src_stride = W << !!idepth, y_dst_stride = W << !!odepth;
|
||||
int uv_src_stride = y_src_stride >> ss_w, uv_dst_stride = y_dst_stride >> ss_w;
|
||||
|
||||
randomize_buffers();
|
||||
call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
|
||||
src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
|
||||
W, H, coeff, offset);
|
||||
call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
|
||||
src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
|
||||
W, H, coeff, offset);
|
||||
if (memcmp(dst0[0], dst1[0], y_dst_stride * H) ||
|
||||
memcmp(dst0[1], dst1[1], uv_dst_stride * H >> ss_h) ||
|
||||
memcmp(dst0[2], dst1[2], uv_dst_stride * H >> ss_h)) {
|
||||
fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
report("yuv2yuv");
|
||||
}
|
||||
|
||||
static void check_yuv2rgb(void)
|
||||
{
|
||||
declare_func(void, int16_t *dst[3], ptrdiff_t dst_stride,
|
||||
uint8_t *src[3], ptrdiff_t src_stride[3],
|
||||
int w, int h, const int16_t coeff[3][3][8],
|
||||
const int16_t off[8]);
|
||||
ColorSpaceDSPContext dsp;
|
||||
int idepth, fmt, n;
|
||||
LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
|
||||
uint8_t *src[3] = { src_y, src_u, src_v };
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
|
||||
int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
|
||||
LOCAL_ALIGNED_32(int16_t, offset, [8]);
|
||||
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
|
||||
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
|
||||
|
||||
ff_colorspacedsp_init(&dsp);
|
||||
for (n = 0; n < 8; n++) {
|
||||
offset[n] = 16;
|
||||
|
||||
coeff[0][0][n] = coeff[1][0][n] = coeff[2][0][n] = (1 << 14) | 1;
|
||||
coeff[0][1][n] = coeff[2][2][n] = 0;
|
||||
coeff[0][2][n] = 1 << 13;
|
||||
coeff[1][1][n] = -(1 << 12);
|
||||
coeff[1][2][n] = 1 << 12;
|
||||
coeff[2][1][n] = 1 << 11;
|
||||
}
|
||||
for (idepth = 0; idepth < 3; idepth++) {
|
||||
for (fmt = 0; fmt < 3; fmt++) {
|
||||
if (check_func(dsp.yuv2rgb[idepth][fmt],
|
||||
"ff_colorspacedsp_yuv2rgb_%sp%d",
|
||||
format_string[fmt], idepth * 2 + 8)) {
|
||||
int ss_w = !!fmt, ss_h = fmt == 2;
|
||||
int y_src_stride = W << !!idepth;
|
||||
int uv_src_stride = y_src_stride >> ss_w;
|
||||
|
||||
randomize_buffers();
|
||||
call_ref(dst0, W, src,
|
||||
(ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
|
||||
W, H, coeff, offset);
|
||||
call_new(dst1, W, src,
|
||||
(ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
|
||||
W, H, coeff, offset);
|
||||
if (memcmp(dst0[0], dst1[0], W * H * sizeof(int16_t)) ||
|
||||
memcmp(dst0[1], dst1[1], W * H * sizeof(int16_t)) ||
|
||||
memcmp(dst0[2], dst1[2], W * H * sizeof(int16_t))) {
|
||||
fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
report("yuv2rgb");
|
||||
}
|
||||
|
||||
#undef randomize_buffers
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int y, x, p; \
|
||||
for (p = 0; p < 3; p++) { \
|
||||
for (y = 0; y < H; y++) { \
|
||||
for (x = 0; x < W; x++) { \
|
||||
int r = rnd() & 0x7fff; \
|
||||
r -= (32768 - 28672) >> 1; \
|
||||
src[p][y * W + x] = r; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_rgb2yuv(void)
|
||||
{
|
||||
declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
|
||||
int16_t *src[3], ptrdiff_t src_stride,
|
||||
int w, int h, const int16_t coeff[3][3][8],
|
||||
const int16_t off[8]);
|
||||
ColorSpaceDSPContext dsp;
|
||||
int odepth, fmt, n;
|
||||
LOCAL_ALIGNED_32(int16_t, src_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(int16_t, src_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(int16_t, src_v, [W * H * 2]);
|
||||
int16_t *src[3] = { src_y, src_u, src_v };
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
|
||||
uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
|
||||
LOCAL_ALIGNED_32(int16_t, offset, [8]);
|
||||
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
|
||||
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
|
||||
|
||||
ff_colorspacedsp_init(&dsp);
|
||||
for (n = 0; n < 8; n++) {
|
||||
offset[n] = 16;
|
||||
|
||||
// these somewhat resemble bt601/smpte170m coefficients
|
||||
coeff[0][0][n] = lrint(0.3 * (1 << 14));
|
||||
coeff[0][1][n] = lrint(0.6 * (1 << 14));
|
||||
coeff[0][2][n] = lrint(0.1 * (1 << 14));
|
||||
coeff[1][0][n] = lrint(-0.15 * (1 << 14));
|
||||
coeff[1][1][n] = lrint(-0.35 * (1 << 14));
|
||||
coeff[1][2][n] = lrint(0.5 * (1 << 14));
|
||||
coeff[2][0][n] = lrint(0.5 * (1 << 14));
|
||||
coeff[2][1][n] = lrint(-0.42 * (1 << 14));
|
||||
coeff[2][2][n] = lrint(-0.08 * (1 << 14));
|
||||
}
|
||||
for (odepth = 0; odepth < 3; odepth++) {
|
||||
for (fmt = 0; fmt < 3; fmt++) {
|
||||
if (check_func(dsp.rgb2yuv[odepth][fmt],
|
||||
"ff_colorspacedsp_rgb2yuv_%sp%d",
|
||||
format_string[fmt], odepth * 2 + 8)) {
|
||||
int ss_w = !!fmt, ss_h = fmt == 2;
|
||||
int y_dst_stride = W << !!odepth;
|
||||
int uv_dst_stride = y_dst_stride >> ss_w;
|
||||
|
||||
randomize_buffers();
|
||||
call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
|
||||
src, W, W, H, coeff, offset);
|
||||
call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
|
||||
src, W, W, H, coeff, offset);
|
||||
if (memcmp(dst0[0], dst1[0], H * y_dst_stride) ||
|
||||
memcmp(dst0[1], dst1[1], H * uv_dst_stride >> ss_h) ||
|
||||
memcmp(dst0[2], dst1[2], H * uv_dst_stride >> ss_h)) {
|
||||
fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
report("rgb2yuv");
|
||||
}
|
||||
|
||||
static void check_multiply3x3(void)
|
||||
{
|
||||
declare_func(void, int16_t *data[3], ptrdiff_t stride,
|
||||
int w, int h, const int16_t coeff[3][3][8]);
|
||||
ColorSpaceDSPContext dsp;
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
|
||||
int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
|
||||
int16_t **src = dst0;
|
||||
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
|
||||
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
|
||||
int n;
|
||||
|
||||
ff_colorspacedsp_init(&dsp);
|
||||
for (n = 0; n < 8; n++) {
|
||||
coeff[0][0][n] = lrint(0.85 * (1 << 14));
|
||||
coeff[0][1][n] = lrint(0.10 * (1 << 14));
|
||||
coeff[0][2][n] = lrint(0.05 * (1 << 14));
|
||||
coeff[1][0][n] = lrint(-0.1 * (1 << 14));
|
||||
coeff[1][1][n] = lrint(0.95 * (1 << 14));
|
||||
coeff[1][2][n] = lrint(0.15 * (1 << 14));
|
||||
coeff[2][0][n] = lrint(-0.2 * (1 << 14));
|
||||
coeff[2][1][n] = lrint(0.30 * (1 << 14));
|
||||
coeff[2][2][n] = lrint(0.90 * (1 << 14));
|
||||
}
|
||||
if (check_func(dsp.multiply3x3, "ff_colorspacedsp_multiply3x3")) {
|
||||
randomize_buffers();
|
||||
memcpy(dst1_y, dst0_y, W * H * sizeof(*dst1_y));
|
||||
memcpy(dst1_u, dst0_u, W * H * sizeof(*dst1_u));
|
||||
memcpy(dst1_v, dst0_v, W * H * sizeof(*dst1_v));
|
||||
call_ref(dst0, W, W, H, coeff);
|
||||
call_new(dst1, W, W, H, coeff);
|
||||
if (memcmp(dst0[0], dst1[0], H * W * sizeof(*dst0_y)) ||
|
||||
memcmp(dst0[1], dst1[1], H * W * sizeof(*dst0_u)) ||
|
||||
memcmp(dst0[2], dst1[2], H * W * sizeof(*dst0_v))) {
|
||||
fail();
|
||||
}
|
||||
}
|
||||
|
||||
report("multiply3x3");
|
||||
}
|
||||
|
||||
void checkasm_check_colorspace(void)
|
||||
{
|
||||
check_yuv2yuv();
|
||||
check_yuv2rgb();
|
||||
check_rgb2yuv();
|
||||
check_multiply3x3();
|
||||
}
|
79
externals/ffmpeg/tests/checkasm/vf_eq.c
vendored
Executable file
79
externals/ffmpeg/tests/checkasm/vf_eq.c
vendored
Executable file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavfilter/avfilter.h"
|
||||
#include "libavfilter/vf_eq.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define WIDTH 256
|
||||
#define HEIGHT 256
|
||||
#define SRC_STRIDE 256
|
||||
#define PIXELS (WIDTH * HEIGHT)
|
||||
#define RANDOM_RANGE 80000
|
||||
#define SCALE 10000
|
||||
|
||||
#define randomize_buffers(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
uint8_t *tmp_buf = (uint8_t *)buf;\
|
||||
for (j = 0; j< size; j++) \
|
||||
tmp_buf[j] = rnd() & 0xFF; \
|
||||
} while (0)
|
||||
|
||||
static void check_eq(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(uint8_t, src, [PIXELS]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_ref, [PIXELS]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_new, [PIXELS]);
|
||||
int w = WIDTH;
|
||||
int h = HEIGHT;
|
||||
int src_stride = SRC_STRIDE;
|
||||
int dst_stride = SRC_STRIDE;
|
||||
EQParameters pa;
|
||||
EQContext eq;
|
||||
declare_func(void, EQParameters *param, uint8_t *dst, int dst_stride,
|
||||
const uint8_t *src, int src_stride, int w, int h);
|
||||
|
||||
double rand_contrast = (int)(rnd() % (RANDOM_RANGE * 2) - RANDOM_RANGE) /
|
||||
(SCALE * 1.0);
|
||||
double rand_brightness = (int)(rnd() % (SCALE * 2) - SCALE) /
|
||||
(SCALE * 1.0);
|
||||
pa.contrast = rand_contrast;
|
||||
pa.brightness = rand_brightness;
|
||||
|
||||
memset(dst_ref, 0, PIXELS);
|
||||
memset(dst_new, 0, PIXELS);
|
||||
randomize_buffers(src, PIXELS);
|
||||
ff_eq_init(&eq);
|
||||
|
||||
if (check_func(eq.process, "process")) {
|
||||
call_ref(&pa, dst_ref, dst_stride, src, src_stride, w, h);
|
||||
call_new(&pa, dst_new, dst_stride, src, src_stride, w, h);
|
||||
if (memcmp(dst_ref, dst_new, PIXELS))
|
||||
fail();
|
||||
bench_new(&pa, dst_new, dst_stride, src, src_stride, w, h);
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_vf_eq(void)
|
||||
{
|
||||
check_eq();
|
||||
report("eq");
|
||||
}
|
66
externals/ffmpeg/tests/checkasm/vf_gblur.c
vendored
Executable file
66
externals/ffmpeg/tests/checkasm/vf_gblur.c
vendored
Executable file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavfilter/gblur.h"
|
||||
|
||||
#define WIDTH 256
|
||||
#define HEIGHT 256
|
||||
#define PIXELS (WIDTH * HEIGHT)
|
||||
#define BUF_SIZE (PIXELS * 4)
|
||||
|
||||
#define randomize_buffers(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
float *tmp_buf = (float *)buf; \
|
||||
for (j = 0; j < size; j++) \
|
||||
tmp_buf[j] = (float)(rnd() & 0xFF); \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_vf_gblur(void)
|
||||
{
|
||||
float *dst_ref = av_malloc(BUF_SIZE);
|
||||
float *dst_new = av_malloc(BUF_SIZE);
|
||||
int w = WIDTH;
|
||||
int h = HEIGHT;
|
||||
int steps = 2;
|
||||
float nu = 0.101f;
|
||||
float bscale = 1.112f;
|
||||
GBlurContext s;
|
||||
|
||||
declare_func(void, float *dst, int w, int h, int steps, float nu, float bscale);
|
||||
|
||||
randomize_buffers(dst_ref, PIXELS);
|
||||
memcpy(dst_new, dst_ref, BUF_SIZE);
|
||||
|
||||
ff_gblur_init(&s);
|
||||
|
||||
if (check_func(s.horiz_slice, "horiz_slice")) {
|
||||
call_ref(dst_ref, w, h, steps, nu, bscale);
|
||||
call_new(dst_new, w, h, steps, nu, bscale);
|
||||
|
||||
if (!float_near_abs_eps_array(dst_ref, dst_new, 0.01f, PIXELS)) {
|
||||
fail();
|
||||
}
|
||||
bench_new(dst_new, w, h, 1, nu, bscale);
|
||||
}
|
||||
report("horiz_slice");
|
||||
av_freep(&dst_ref);
|
||||
av_freep(&dst_new);
|
||||
}
|
76
externals/ffmpeg/tests/checkasm/vf_hflip.c
vendored
Executable file
76
externals/ffmpeg/tests/checkasm/vf_hflip.c
vendored
Executable file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavfilter/hflip.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define WIDTH 256
|
||||
#define WIDTH_PADDED 256 + 32
|
||||
|
||||
#define randomize_buffers(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
uint8_t *tmp_buf = (uint8_t *)buf;\
|
||||
for (j = 0; j < size; j++) \
|
||||
tmp_buf[j] = rnd() & 0xFF; \
|
||||
} while (0)
|
||||
|
||||
static void check_hflip(int step, const char * report_name){
|
||||
LOCAL_ALIGNED_32(uint8_t, src, [WIDTH_PADDED]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_ref, [WIDTH_PADDED]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_new, [WIDTH_PADDED]);
|
||||
int w = WIDTH;
|
||||
int i;
|
||||
int step_array[4] = {1, 1, 1, 1};
|
||||
FlipContext s;
|
||||
|
||||
declare_func(void, const uint8_t *src, uint8_t *dst, int w);
|
||||
|
||||
memset(src, 0, WIDTH_PADDED);
|
||||
memset(dst_ref, 0, WIDTH_PADDED);
|
||||
memset(dst_new, 0, WIDTH_PADDED);
|
||||
randomize_buffers(src, WIDTH_PADDED);
|
||||
|
||||
if (step == 2) {
|
||||
w /= 2;
|
||||
for (i = 0; i < 4; i++)
|
||||
step_array[i] = step;
|
||||
}
|
||||
|
||||
ff_hflip_init(&s, step_array, 4);
|
||||
|
||||
if (check_func(s.flip_line[0], "hflip_%s", report_name)) {
|
||||
for (i = 1; i < w; i++) {
|
||||
call_ref(src + (w - 1) * step, dst_ref, i);
|
||||
call_new(src + (w - 1) * step, dst_new, i);
|
||||
if (memcmp(dst_ref, dst_new, i * step))
|
||||
fail();
|
||||
}
|
||||
bench_new(src + (w - 1) * step, dst_new, w);
|
||||
}
|
||||
}
|
||||
void checkasm_check_vf_hflip(void)
|
||||
{
|
||||
check_hflip(1, "byte");
|
||||
report("hflip_byte");
|
||||
|
||||
check_hflip(2, "short");
|
||||
report("hflip_short");
|
||||
}
|
113
externals/ffmpeg/tests/checkasm/vf_nlmeans.c
vendored
Executable file
113
externals/ffmpeg/tests/checkasm/vf_nlmeans.c
vendored
Executable file
@@ -0,0 +1,113 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Clément Bœsch <u pkh me>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "checkasm.h"
|
||||
#include "libavfilter/vf_nlmeans.h"
|
||||
#include "libavutil/avassert.h"
|
||||
|
||||
#define randomize_buffer(buf, size) do { \
|
||||
int i; \
|
||||
for (i = 0; i < size / 4; i++) \
|
||||
((uint32_t *)buf)[i] = rnd(); \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_nlmeans(void)
|
||||
{
|
||||
NLMeansDSPContext dsp = {0};
|
||||
|
||||
const int w = 123; // source width
|
||||
const int h = 45; // source height
|
||||
const int p = 3; // patch half size
|
||||
const int r = 2; // research window half size
|
||||
|
||||
ff_nlmeans_init(&dsp);
|
||||
|
||||
/* See the filter's code for the explanations on the variables */
|
||||
if (check_func(dsp.compute_safe_ssd_integral_image, "ssd_integral_image")) {
|
||||
int offx, offy;
|
||||
const int e = p + r;
|
||||
const int ii_w = w + e*2;
|
||||
const int ii_h = h + e*2;
|
||||
const int ii_lz_32 = FFALIGN(ii_w + 1, 4);
|
||||
uint32_t *ii_orig_ref = av_mallocz_array(ii_h + 1, ii_lz_32 * sizeof(*ii_orig_ref));
|
||||
uint32_t *ii_ref = ii_orig_ref + ii_lz_32 + 1;
|
||||
uint32_t *ii_orig_new = av_mallocz_array(ii_h + 1, ii_lz_32 * sizeof(*ii_orig_new));
|
||||
uint32_t *ii_new = ii_orig_new + ii_lz_32 + 1;
|
||||
const int src_lz = FFALIGN(w, 16);
|
||||
uint8_t *src = av_mallocz_array(h, src_lz);
|
||||
|
||||
declare_func(void, uint32_t *dst, ptrdiff_t dst_linesize_32,
|
||||
const uint8_t *s1, ptrdiff_t linesize1,
|
||||
const uint8_t *s2, ptrdiff_t linesize2,
|
||||
int w, int h);
|
||||
|
||||
randomize_buffer(src, h * src_lz);
|
||||
|
||||
for (offy = -r; offy <= r; offy++) {
|
||||
for (offx = -r; offx <= r; offx++) {
|
||||
if (offx || offy) {
|
||||
const int s1x = e;
|
||||
const int s1y = e;
|
||||
const int s2x = e + offx;
|
||||
const int s2y = e + offy;
|
||||
const int startx_safe = FFMAX(s1x, s2x);
|
||||
const int starty_safe = FFMAX(s1y, s2y);
|
||||
const int u_endx_safe = FFMIN(s1x + w, s2x + w);
|
||||
const int endy_safe = FFMIN(s1y + h, s2y + h);
|
||||
const int safe_pw = (u_endx_safe - startx_safe) & ~0xf;
|
||||
const int safe_ph = endy_safe - starty_safe;
|
||||
|
||||
av_assert0(safe_pw && safe_ph);
|
||||
av_assert0(startx_safe - s1x >= 0); av_assert0(startx_safe - s1x < w);
|
||||
av_assert0(starty_safe - s1y >= 0); av_assert0(starty_safe - s1y < h);
|
||||
av_assert0(startx_safe - s2x >= 0); av_assert0(startx_safe - s2x < w);
|
||||
av_assert0(starty_safe - s2y >= 0); av_assert0(starty_safe - s2y < h);
|
||||
|
||||
memset(ii_ref, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_ref));
|
||||
memset(ii_new, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_new));
|
||||
|
||||
call_ref(ii_ref + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
|
||||
src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
|
||||
src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
|
||||
safe_pw, safe_ph);
|
||||
call_new(ii_new + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
|
||||
src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
|
||||
src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
|
||||
safe_pw, safe_ph);
|
||||
|
||||
if (memcmp(ii_ref, ii_new, (ii_lz_32 * ii_h - 1) * sizeof(*ii_ref)))
|
||||
fail();
|
||||
|
||||
memset(ii_new, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_new));
|
||||
bench_new(ii_new + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
|
||||
src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
|
||||
src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
|
||||
safe_pw, safe_ph);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
av_freep(&ii_orig_ref);
|
||||
av_freep(&ii_orig_new);
|
||||
av_freep(&src);
|
||||
}
|
||||
|
||||
report("dsp");
|
||||
}
|
85
externals/ffmpeg/tests/checkasm/vf_threshold.c
vendored
Executable file
85
externals/ffmpeg/tests/checkasm/vf_threshold.c
vendored
Executable file
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavfilter/threshold.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define WIDTH 256
|
||||
#define WIDTH_PADDED 256 + 32
|
||||
|
||||
#define randomize_buffers(buf, size) \
|
||||
do { \
|
||||
int j; \
|
||||
uint8_t *tmp_buf = (uint8_t *)buf;\
|
||||
for (j = 0; j < size; j++) \
|
||||
tmp_buf[j] = rnd() & 0xFF; \
|
||||
} while (0)
|
||||
|
||||
static void check_threshold(int depth){
|
||||
LOCAL_ALIGNED_32(uint8_t, in , [WIDTH_PADDED]);
|
||||
LOCAL_ALIGNED_32(uint8_t, threshold, [WIDTH_PADDED]);
|
||||
LOCAL_ALIGNED_32(uint8_t, min , [WIDTH_PADDED]);
|
||||
LOCAL_ALIGNED_32(uint8_t, max , [WIDTH_PADDED]);
|
||||
LOCAL_ALIGNED_32(uint8_t, out_ref , [WIDTH_PADDED]);
|
||||
LOCAL_ALIGNED_32(uint8_t, out_new , [WIDTH_PADDED]);
|
||||
ptrdiff_t line_size = WIDTH_PADDED;
|
||||
int w = WIDTH;
|
||||
|
||||
declare_func(void, const uint8_t *in, const uint8_t *threshold,
|
||||
const uint8_t *min, const uint8_t *max, uint8_t *out,
|
||||
ptrdiff_t ilinesize, ptrdiff_t tlinesize,
|
||||
ptrdiff_t flinesize, ptrdiff_t slinesize,
|
||||
ptrdiff_t olinesize, int w, int h);
|
||||
|
||||
ThresholdContext s;
|
||||
s.depth = depth;
|
||||
ff_threshold_init(&s);
|
||||
|
||||
memset(in, 0, WIDTH_PADDED);
|
||||
memset(threshold, 0, WIDTH_PADDED);
|
||||
memset(min, 0, WIDTH_PADDED);
|
||||
memset(max, 0, WIDTH_PADDED);
|
||||
memset(out_ref, 0, WIDTH_PADDED);
|
||||
memset(out_new, 0, WIDTH_PADDED);
|
||||
randomize_buffers(in, WIDTH);
|
||||
randomize_buffers(threshold, WIDTH);
|
||||
randomize_buffers(min, WIDTH);
|
||||
randomize_buffers(max, WIDTH);
|
||||
|
||||
if (depth == 16)
|
||||
w /= 2;
|
||||
|
||||
if (check_func(s.threshold, "threshold%d", depth)) {
|
||||
call_ref(in, threshold, min, max, out_ref, line_size, line_size, line_size, line_size, line_size, w, 1);
|
||||
call_new(in, threshold, min, max, out_new, line_size, line_size, line_size, line_size, line_size, w, 1);
|
||||
if (memcmp(out_ref, out_new, WIDTH))
|
||||
fail();
|
||||
bench_new(in, threshold, min, max, out_new, line_size, line_size, line_size, line_size, line_size, w, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_vf_threshold(void)
|
||||
{
|
||||
check_threshold(8);
|
||||
report("threshold8");
|
||||
|
||||
check_threshold(16);
|
||||
report("threshold16");
|
||||
}
|
89
externals/ffmpeg/tests/checkasm/videodsp.c
vendored
Executable file
89
externals/ffmpeg/tests/checkasm/videodsp.c
vendored
Executable file
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/videodsp.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define randomize_buffers(w, h) \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < w * h * sizeof(*src0); i += 4) \
|
||||
AV_WN32A(((uint8_t *) src0) + i, rnd()); \
|
||||
} while (0)
|
||||
|
||||
#define iter_1d(type, fix, fix_val, var, var_start, var_end) \
|
||||
for (fix = fix_val, var = var_start; var <= var_end; var++) { \
|
||||
call_ref((type *) dst0, (const type *) (src0 + y * pw + x), \
|
||||
bw * sizeof(type), pw * sizeof(type), \
|
||||
bw, bh, x, y, pw, ph); \
|
||||
call_new((type *) dst1, (const type *) (src1 + y * pw + x), \
|
||||
bw * sizeof(type), pw * sizeof(type), \
|
||||
bw, bh, x, y, pw, ph); \
|
||||
if (memcmp(dst0, dst1, bw * bh * sizeof(type))) \
|
||||
fail(); \
|
||||
bench_new((type *) dst1, (const type *) (src1 + y * pw + x),\
|
||||
bw * sizeof(type), pw * sizeof(type), \
|
||||
bw, bh, x, y, pw, ph); \
|
||||
}
|
||||
|
||||
#define check_emu_edge_size(type, src_w, src_h, dst_w, dst_h) \
|
||||
do { \
|
||||
LOCAL_ALIGNED_16(type, src0, [src_w * src_h]); \
|
||||
LOCAL_ALIGNED_16(type, src1, [src_w * src_h]); \
|
||||
int bw = dst_w, bh = dst_h; \
|
||||
int pw = src_w, ph = src_h; \
|
||||
int y, x; \
|
||||
randomize_buffers(src_w, src_h); \
|
||||
memcpy(src1, src0, pw * ph * sizeof(type)); \
|
||||
iter_1d(type, y, 0 - src_h, x, 0 - src_w, src_w - 0); \
|
||||
iter_1d(type, x, src_w - 0, y, 0 - src_h, src_h - 0); \
|
||||
iter_1d(type, y, src_h - 0, x, 0 - src_w, src_w - 0); \
|
||||
iter_1d(type, x, 0 - src_w, y, 0 - src_h, src_h - 0); \
|
||||
} while (0)
|
||||
|
||||
#define check_emu_edge(type) \
|
||||
do { \
|
||||
LOCAL_ALIGNED_16(type, dst0, [64 * 64]); \
|
||||
LOCAL_ALIGNED_16(type, dst1, [64 * 64]); \
|
||||
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, \
|
||||
void, type *dst, const type *src, \
|
||||
ptrdiff_t dst_linesize, \
|
||||
ptrdiff_t src_linesize, \
|
||||
int block_w, int block_h, \
|
||||
int src_x, int src_y, \
|
||||
int src_w, int src_h); \
|
||||
check_emu_edge_size(type, 16, 1, 64, 64); \
|
||||
check_emu_edge_size(type, 16, 16, 64, 64); \
|
||||
check_emu_edge_size(type, 64, 64, 64, 64); \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_videodsp(void)
|
||||
{
|
||||
VideoDSPContext vdsp;
|
||||
|
||||
ff_videodsp_init(&vdsp, 8);
|
||||
if (check_func(vdsp.emulated_edge_mc, "emulated_edge_mc_8"))
|
||||
check_emu_edge(uint8_t);
|
||||
|
||||
report("emulated_edge_mc");
|
||||
}
|
519
externals/ffmpeg/tests/checkasm/vp8dsp.c
vendored
Executable file
519
externals/ffmpeg/tests/checkasm/vp8dsp.c
vendored
Executable file
@@ -0,0 +1,519 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Martin Storsjo
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "libavcodec/avcodec.h"
|
||||
#include "libavcodec/vp8dsp.h"
|
||||
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define PIXEL_STRIDE 16
|
||||
|
||||
#define randomize_buffers(src, dst, stride, coef) \
|
||||
do { \
|
||||
int x, y; \
|
||||
for (y = 0; y < 4; y++) { \
|
||||
AV_WN32A((src) + y * (stride), rnd()); \
|
||||
AV_WN32A((dst) + y * (stride), rnd()); \
|
||||
for (x = 0; x < 4; x++) \
|
||||
(coef)[y * 4 + x] = (src)[y * (stride) + x] - \
|
||||
(dst)[y * (stride) + x]; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void dct4x4(int16_t *coef)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 4; i++) {
|
||||
const int a1 = (coef[i*4 + 0] + coef[i*4 + 3]) * 8;
|
||||
const int b1 = (coef[i*4 + 1] + coef[i*4 + 2]) * 8;
|
||||
const int c1 = (coef[i*4 + 1] - coef[i*4 + 2]) * 8;
|
||||
const int d1 = (coef[i*4 + 0] - coef[i*4 + 3]) * 8;
|
||||
coef[i*4 + 0] = a1 + b1;
|
||||
coef[i*4 + 1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
|
||||
coef[i*4 + 2] = a1 - b1;
|
||||
coef[i*4 + 3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;
|
||||
}
|
||||
for (i = 0; i < 4; i++) {
|
||||
const int a1 = coef[i + 0*4] + coef[i + 3*4];
|
||||
const int b1 = coef[i + 1*4] + coef[i + 2*4];
|
||||
const int c1 = coef[i + 1*4] - coef[i + 2*4];
|
||||
const int d1 = coef[i + 0*4] - coef[i + 3*4];
|
||||
coef[i + 0*4] = (a1 + b1 + 7) >> 4;
|
||||
coef[i + 1*4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + !!d1;
|
||||
coef[i + 2*4] = (a1 - b1 + 7) >> 4;
|
||||
coef[i + 3*4] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;
|
||||
}
|
||||
}
|
||||
|
||||
static void wht4x4(int16_t *coef)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 4; i++) {
|
||||
int a1 = coef[0 * 4 + i];
|
||||
int b1 = coef[1 * 4 + i];
|
||||
int c1 = coef[2 * 4 + i];
|
||||
int d1 = coef[3 * 4 + i];
|
||||
int e1;
|
||||
a1 += b1;
|
||||
d1 -= c1;
|
||||
e1 = (a1 - d1) >> 1;
|
||||
b1 = e1 - b1;
|
||||
c1 = e1 - c1;
|
||||
a1 -= c1;
|
||||
d1 += b1;
|
||||
coef[0 * 4 + i] = a1;
|
||||
coef[1 * 4 + i] = c1;
|
||||
coef[2 * 4 + i] = d1;
|
||||
coef[3 * 4 + i] = b1;
|
||||
}
|
||||
for (i = 0; i < 4; i++) {
|
||||
int a1 = coef[i * 4 + 0];
|
||||
int b1 = coef[i * 4 + 1];
|
||||
int c1 = coef[i * 4 + 2];
|
||||
int d1 = coef[i * 4 + 3];
|
||||
int e1;
|
||||
a1 += b1;
|
||||
d1 -= c1;
|
||||
e1 = (a1 - d1) >> 1;
|
||||
b1 = e1 - b1;
|
||||
c1 = e1 - c1;
|
||||
a1 -= c1;
|
||||
d1 += b1;
|
||||
coef[i * 4 + 0] = a1 * 2;
|
||||
coef[i * 4 + 1] = c1 * 2;
|
||||
coef[i * 4 + 2] = d1 * 2;
|
||||
coef[i * 4 + 3] = b1 * 2;
|
||||
}
|
||||
}
|
||||
|
||||
static void check_idct(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, src, [4 * 4]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4]);
|
||||
LOCAL_ALIGNED_16(int16_t, coef, [4 * 4]);
|
||||
LOCAL_ALIGNED_16(int16_t, subcoef0, [4 * 4]);
|
||||
LOCAL_ALIGNED_16(int16_t, subcoef1, [4 * 4]);
|
||||
VP8DSPContext d;
|
||||
int dc;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, ptrdiff_t stride);
|
||||
|
||||
ff_vp8dsp_init(&d);
|
||||
randomize_buffers(src, dst, 4, coef);
|
||||
|
||||
dct4x4(coef);
|
||||
|
||||
for (dc = 0; dc <= 1; dc++) {
|
||||
void (*idct)(uint8_t *, int16_t *, ptrdiff_t) = dc ? d.vp8_idct_dc_add : d.vp8_idct_add;
|
||||
|
||||
if (check_func(idct, "vp8_idct_%sadd", dc ? "dc_" : "")) {
|
||||
if (dc) {
|
||||
memset(subcoef0, 0, 4 * 4 * sizeof(int16_t));
|
||||
subcoef0[0] = coef[0];
|
||||
} else {
|
||||
memcpy(subcoef0, coef, 4 * 4 * sizeof(int16_t));
|
||||
}
|
||||
memcpy(dst0, dst, 4 * 4);
|
||||
memcpy(dst1, dst, 4 * 4);
|
||||
memcpy(subcoef1, subcoef0, 4 * 4 * sizeof(int16_t));
|
||||
// Note, this uses a pixel stride of 4, even though the real decoder uses a stride as a
|
||||
// multiple of 16. If optimizations want to take advantage of that, this test needs to be
|
||||
// updated to make it more like the h264dsp tests.
|
||||
call_ref(dst0, subcoef0, 4);
|
||||
call_new(dst1, subcoef1, 4);
|
||||
if (memcmp(dst0, dst1, 4 * 4) ||
|
||||
memcmp(subcoef0, subcoef1, 4 * 4 * sizeof(int16_t)))
|
||||
fail();
|
||||
|
||||
bench_new(dst1, subcoef1, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_idct_dc4(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, src, [4 * 4 * 4]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4 * 4]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4 * 4]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4 * 4]);
|
||||
LOCAL_ALIGNED_16(int16_t, coef, [4], [4 * 4]);
|
||||
LOCAL_ALIGNED_16(int16_t, subcoef0, [4], [4 * 4]);
|
||||
LOCAL_ALIGNED_16(int16_t, subcoef1, [4], [4 * 4]);
|
||||
VP8DSPContext d;
|
||||
int i, chroma;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
|
||||
|
||||
ff_vp8dsp_init(&d);
|
||||
|
||||
for (chroma = 0; chroma <= 1; chroma++) {
|
||||
void (*idct4dc)(uint8_t *, int16_t[4][16], ptrdiff_t) = chroma ? d.vp8_idct_dc_add4uv : d.vp8_idct_dc_add4y;
|
||||
if (check_func(idct4dc, "vp8_idct_dc_add4%s", chroma ? "uv" : "y")) {
|
||||
ptrdiff_t stride = chroma ? 8 : 16;
|
||||
int w = chroma ? 2 : 4;
|
||||
for (i = 0; i < 4; i++) {
|
||||
int blockx = 4 * (i % w);
|
||||
int blocky = 4 * (i / w);
|
||||
randomize_buffers(src + stride * blocky + blockx, dst + stride * blocky + blockx, stride, coef[i]);
|
||||
dct4x4(coef[i]);
|
||||
memset(&coef[i][1], 0, 15 * sizeof(int16_t));
|
||||
}
|
||||
|
||||
memcpy(dst0, dst, 4 * 4 * 4);
|
||||
memcpy(dst1, dst, 4 * 4 * 4);
|
||||
memcpy(subcoef0, coef, 4 * 4 * 4 * sizeof(int16_t));
|
||||
memcpy(subcoef1, coef, 4 * 4 * 4 * sizeof(int16_t));
|
||||
call_ref(dst0, subcoef0, stride);
|
||||
call_new(dst1, subcoef1, stride);
|
||||
if (memcmp(dst0, dst1, 4 * 4 * 4) ||
|
||||
memcmp(subcoef0, subcoef1, 4 * 4 * 4 * sizeof(int16_t)))
|
||||
fail();
|
||||
bench_new(dst1, subcoef1, stride);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void check_luma_dc_wht(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(int16_t, dc, [4 * 4]);
|
||||
LOCAL_ALIGNED_16(int16_t, dc0, [4 * 4]);
|
||||
LOCAL_ALIGNED_16(int16_t, dc1, [4 * 4]);
|
||||
int16_t block[4][4][16];
|
||||
LOCAL_ALIGNED_16(int16_t, block0, [4], [4][16]);
|
||||
LOCAL_ALIGNED_16(int16_t, block1, [4], [4][16]);
|
||||
VP8DSPContext d;
|
||||
int dc_only;
|
||||
int blockx, blocky;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t block[4][4][16], int16_t dc[16]);
|
||||
|
||||
ff_vp8dsp_init(&d);
|
||||
|
||||
for (blocky = 0; blocky < 4; blocky++) {
|
||||
for (blockx = 0; blockx < 4; blockx++) {
|
||||
uint8_t src[16], dst[16];
|
||||
randomize_buffers(src, dst, 4, block[blocky][blockx]);
|
||||
|
||||
dct4x4(block[blocky][blockx]);
|
||||
dc[blocky * 4 + blockx] = block[blocky][blockx][0];
|
||||
block[blocky][blockx][0] = rnd();
|
||||
}
|
||||
}
|
||||
wht4x4(dc);
|
||||
|
||||
for (dc_only = 0; dc_only <= 1; dc_only++) {
|
||||
void (*idct)(int16_t [4][4][16], int16_t [16]) = dc_only ? d.vp8_luma_dc_wht_dc : d.vp8_luma_dc_wht;
|
||||
|
||||
if (check_func(idct, "vp8_luma_dc_wht%s", dc_only ? "_dc" : "")) {
|
||||
if (dc_only) {
|
||||
memset(dc0, 0, 16 * sizeof(int16_t));
|
||||
dc0[0] = dc[0];
|
||||
} else {
|
||||
memcpy(dc0, dc, 16 * sizeof(int16_t));
|
||||
}
|
||||
memcpy(dc1, dc0, 16 * sizeof(int16_t));
|
||||
memcpy(block0, block, 4 * 4 * 16 * sizeof(int16_t));
|
||||
memcpy(block1, block, 4 * 4 * 16 * sizeof(int16_t));
|
||||
call_ref(block0, dc0);
|
||||
call_new(block1, dc1);
|
||||
if (memcmp(block0, block1, 4 * 4 * 16 * sizeof(int16_t)) ||
|
||||
memcmp(dc0, dc1, 16 * sizeof(int16_t)))
|
||||
fail();
|
||||
bench_new(block1, dc1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define SRC_BUF_STRIDE 32
|
||||
#define SRC_BUF_SIZE (((size << (size < 16)) + 5) * SRC_BUF_STRIDE)
|
||||
// The mc subpixel interpolation filter needs the 2 previous pixels in either
|
||||
// direction, the +1 is to make sure the actual load addresses always are
|
||||
// unaligned.
|
||||
#define src (buf + 2 * SRC_BUF_STRIDE + 2 + 1)
|
||||
|
||||
#undef randomize_buffers
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int k; \
|
||||
for (k = 0; k < SRC_BUF_SIZE; k += 4) { \
|
||||
AV_WN32A(buf + k, rnd()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_mc(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, buf, [32 * 32]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16]);
|
||||
LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16]);
|
||||
VP8DSPContext d;
|
||||
int type, k, dx, dy;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, uint8_t *, ptrdiff_t, int, int, int);
|
||||
|
||||
ff_vp78dsp_init(&d);
|
||||
|
||||
for (type = 0; type < 2; type++) {
|
||||
vp8_mc_func (*tab)[3][3] = type ? d.put_vp8_bilinear_pixels_tab : d.put_vp8_epel_pixels_tab;
|
||||
for (k = 1; k < 8; k++) {
|
||||
int hsize = k / 3;
|
||||
int size = 16 >> hsize;
|
||||
int height = (size << 1) >> (k % 3);
|
||||
for (dy = 0; dy < 3; dy++) {
|
||||
for (dx = 0; dx < 3; dx++) {
|
||||
char str[100];
|
||||
if (dx || dy) {
|
||||
if (type == 0) {
|
||||
static const char *dx_names[] = { "", "h4", "h6" };
|
||||
static const char *dy_names[] = { "", "v4", "v6" };
|
||||
snprintf(str, sizeof(str), "epel%d_%s%s", size, dx_names[dx], dy_names[dy]);
|
||||
} else {
|
||||
snprintf(str, sizeof(str), "bilin%d_%s%s", size, dx ? "h" : "", dy ? "v" : "");
|
||||
}
|
||||
} else {
|
||||
snprintf(str, sizeof(str), "pixels%d", size);
|
||||
}
|
||||
if (check_func(tab[hsize][dy][dx], "vp8_put_%s", str)) {
|
||||
int mx, my;
|
||||
int i;
|
||||
if (type == 0) {
|
||||
mx = dx == 2 ? 2 + 2 * (rnd() % 3) : dx == 1 ? 1 + 2 * (rnd() % 4) : 0;
|
||||
my = dy == 2 ? 2 + 2 * (rnd() % 3) : dy == 1 ? 1 + 2 * (rnd() % 4) : 0;
|
||||
} else {
|
||||
mx = dx ? 1 + (rnd() % 7) : 0;
|
||||
my = dy ? 1 + (rnd() % 7) : 0;
|
||||
}
|
||||
randomize_buffers();
|
||||
for (i = -2; i <= 3; i++) {
|
||||
int val = (i == -1 || i == 2) ? 0 : 0xff;
|
||||
// Set pixels in the first row and column to the maximum pattern,
|
||||
// to test for potential overflows in the filter.
|
||||
src[i ] = val;
|
||||
src[i * SRC_BUF_STRIDE] = val;
|
||||
}
|
||||
call_ref(dst0, size, src, SRC_BUF_STRIDE, height, mx, my);
|
||||
call_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
|
||||
if (memcmp(dst0, dst1, size * height))
|
||||
fail();
|
||||
bench_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef randomize_buffers
|
||||
|
||||
#define setpx(a, b, c) buf[(a) + (b) * jstride] = av_clip_uint8(c)
|
||||
// Set the pixel to c +/- [0,d]
|
||||
#define setdx(a, b, c, d) setpx(a, b, c - (d) + (rnd() % ((d) * 2 + 1)))
|
||||
// Set the pixel to c +/- [d,d+e] (making sure it won't be clipped)
|
||||
#define setdx2(a, b, o, c, d, e) setpx(a, b, o = c + ((d) + (rnd() % (e))) * (c >= 128 ? -1 : 1))
|
||||
|
||||
static void randomize_loopfilter_buffers(int lineoff, int str,
|
||||
int dir, int flim_E, int flim_I,
|
||||
int hev_thresh, uint8_t *buf,
|
||||
int force_hev)
|
||||
{
|
||||
uint32_t mask = 0xff;
|
||||
int off = dir ? lineoff : lineoff * str;
|
||||
int istride = dir ? 1 : str;
|
||||
int jstride = dir ? str : 1;
|
||||
int i;
|
||||
for (i = 0; i < 8; i += 2) {
|
||||
// Row 0 will trigger hev for q0/q1, row 2 will trigger hev for p0/p1,
|
||||
// rows 4 and 6 will not trigger hev.
|
||||
// force_hev 1 will make sure all rows trigger hev, while force_hev -1
|
||||
// makes none of them trigger it.
|
||||
int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
|
||||
setpx(idx, 0, q0 = rnd() & mask);
|
||||
if (i == 0 && force_hev >= 0 || force_hev > 0)
|
||||
setdx2(idx, 1, q1, q0, hev_thresh + 1, flim_I - hev_thresh - 1);
|
||||
else
|
||||
setdx(idx, 1, q1 = q0, hev_thresh);
|
||||
setdx(idx, 2, q2 = q1, flim_I);
|
||||
setdx(idx, 3, q2, flim_I);
|
||||
setdx(idx, -1, p0 = q0, flim_E >> 2);
|
||||
if (i == 2 && force_hev >= 0 || force_hev > 0)
|
||||
setdx2(idx, -2, p1, p0, hev_thresh + 1, flim_I - hev_thresh - 1);
|
||||
else
|
||||
setdx(idx, -2, p1 = p0, hev_thresh);
|
||||
setdx(idx, -3, p2 = p1, flim_I);
|
||||
setdx(idx, -4, p2, flim_I);
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the buffer with random pixels
|
||||
static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
|
||||
{
|
||||
int x, y;
|
||||
for (y = 0; y < h; y++)
|
||||
for (x = 0; x < w; x++)
|
||||
buf[y * stride + x] = rnd() & 0xff;
|
||||
}
|
||||
|
||||
#define randomize_buffers(buf, lineoff, str, force_hev) \
|
||||
randomize_loopfilter_buffers(lineoff, str, dir, flim_E, flim_I, hev_thresh, buf, force_hev)
|
||||
|
||||
static void check_loopfilter_16y(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
|
||||
LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
|
||||
VP8DSPContext d;
|
||||
int dir, edge, force_hev;
|
||||
int flim_E = 20, flim_I = 10, hev_thresh = 7;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int, int, int);
|
||||
|
||||
ff_vp8dsp_init(&d);
|
||||
|
||||
for (dir = 0; dir < 2; dir++) {
|
||||
int midoff = dir ? 4 * 16 : 4;
|
||||
int midoff_aligned = dir ? 4 * 16 : 16;
|
||||
uint8_t *buf0 = base0 + midoff_aligned;
|
||||
uint8_t *buf1 = base1 + midoff_aligned;
|
||||
for (edge = 0; edge < 2; edge++) {
|
||||
void (*func)(uint8_t *, ptrdiff_t, int, int, int) = NULL;
|
||||
switch (dir << 1 | edge) {
|
||||
case (0 << 1) | 0: func = d.vp8_h_loop_filter16y; break;
|
||||
case (1 << 1) | 0: func = d.vp8_v_loop_filter16y; break;
|
||||
case (0 << 1) | 1: func = d.vp8_h_loop_filter16y_inner; break;
|
||||
case (1 << 1) | 1: func = d.vp8_v_loop_filter16y_inner; break;
|
||||
}
|
||||
if (check_func(func, "vp8_loop_filter16y%s_%s", edge ? "_inner" : "", dir ? "v" : "h")) {
|
||||
for (force_hev = -1; force_hev <= 1; force_hev++) {
|
||||
fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
|
||||
randomize_buffers(buf0, 0, 16, force_hev);
|
||||
randomize_buffers(buf0, 8, 16, force_hev);
|
||||
memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
|
||||
call_ref(buf0, 16, flim_E, flim_I, hev_thresh);
|
||||
call_new(buf1, 16, flim_E, flim_I, hev_thresh);
|
||||
if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
|
||||
fail();
|
||||
}
|
||||
fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
|
||||
randomize_buffers(buf0, 0, 16, 0);
|
||||
randomize_buffers(buf0, 8, 16, 0);
|
||||
bench_new(buf0, 16, flim_E, flim_I, hev_thresh);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_loopfilter_8uv(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, base0u, [32 + 16 * 16]);
|
||||
LOCAL_ALIGNED_16(uint8_t, base0v, [32 + 16 * 16]);
|
||||
LOCAL_ALIGNED_16(uint8_t, base1u, [32 + 16 * 16]);
|
||||
LOCAL_ALIGNED_16(uint8_t, base1v, [32 + 16 * 16]);
|
||||
VP8DSPContext d;
|
||||
int dir, edge, force_hev;
|
||||
int flim_E = 20, flim_I = 10, hev_thresh = 7;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, uint8_t *, ptrdiff_t, int, int, int);
|
||||
|
||||
ff_vp8dsp_init(&d);
|
||||
|
||||
for (dir = 0; dir < 2; dir++) {
|
||||
int midoff = dir ? 4 * 16 : 4;
|
||||
int midoff_aligned = dir ? 4 * 16 : 16;
|
||||
uint8_t *buf0u = base0u + midoff_aligned;
|
||||
uint8_t *buf0v = base0v + midoff_aligned;
|
||||
uint8_t *buf1u = base1u + midoff_aligned;
|
||||
uint8_t *buf1v = base1v + midoff_aligned;
|
||||
for (edge = 0; edge < 2; edge++) {
|
||||
void (*func)(uint8_t *, uint8_t *, ptrdiff_t, int, int, int) = NULL;
|
||||
switch (dir << 1 | edge) {
|
||||
case (0 << 1) | 0: func = d.vp8_h_loop_filter8uv; break;
|
||||
case (1 << 1) | 0: func = d.vp8_v_loop_filter8uv; break;
|
||||
case (0 << 1) | 1: func = d.vp8_h_loop_filter8uv_inner; break;
|
||||
case (1 << 1) | 1: func = d.vp8_v_loop_filter8uv_inner; break;
|
||||
}
|
||||
if (check_func(func, "vp8_loop_filter8uv%s_%s", edge ? "_inner" : "", dir ? "v" : "h")) {
|
||||
for (force_hev = -1; force_hev <= 1; force_hev++) {
|
||||
fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
|
||||
fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
|
||||
randomize_buffers(buf0u, 0, 16, force_hev);
|
||||
randomize_buffers(buf0v, 0, 16, force_hev);
|
||||
memcpy(buf1u - midoff, buf0u - midoff, 16 * 16);
|
||||
memcpy(buf1v - midoff, buf0v - midoff, 16 * 16);
|
||||
|
||||
call_ref(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
|
||||
call_new(buf1u, buf1v, 16, flim_E, flim_I, hev_thresh);
|
||||
if (memcmp(buf0u - midoff, buf1u - midoff, 16 * 16) ||
|
||||
memcmp(buf0v - midoff, buf1v - midoff, 16 * 16))
|
||||
fail();
|
||||
}
|
||||
fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
|
||||
fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
|
||||
randomize_buffers(buf0u, 0, 16, 0);
|
||||
randomize_buffers(buf0v, 0, 16, 0);
|
||||
bench_new(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_loopfilter_simple(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
|
||||
LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
|
||||
VP8DSPContext d;
|
||||
int dir;
|
||||
int flim_E = 20, flim_I = 30, hev_thresh = 0;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int);
|
||||
|
||||
ff_vp8dsp_init(&d);
|
||||
|
||||
for (dir = 0; dir < 2; dir++) {
|
||||
int midoff = dir ? 4 * 16 : 4;
|
||||
int midoff_aligned = dir ? 4 * 16 : 16;
|
||||
uint8_t *buf0 = base0 + midoff_aligned;
|
||||
uint8_t *buf1 = base1 + midoff_aligned;
|
||||
void (*func)(uint8_t *, ptrdiff_t, int) = dir ? d.vp8_v_loop_filter_simple : d.vp8_h_loop_filter_simple;
|
||||
if (check_func(func, "vp8_loop_filter_simple_%s", dir ? "v" : "h")) {
|
||||
fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
|
||||
randomize_buffers(buf0, 0, 16, -1);
|
||||
randomize_buffers(buf0, 8, 16, -1);
|
||||
memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
|
||||
call_ref(buf0, 16, flim_E);
|
||||
call_new(buf1, 16, flim_E);
|
||||
if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
|
||||
fail();
|
||||
bench_new(buf0, 16, flim_E);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void checkasm_check_vp8dsp(void)
|
||||
{
|
||||
check_idct();
|
||||
check_idct_dc4();
|
||||
check_luma_dc_wht();
|
||||
report("idct");
|
||||
check_mc();
|
||||
report("mc");
|
||||
check_loopfilter_16y();
|
||||
check_loopfilter_8uv();
|
||||
check_loopfilter_simple();
|
||||
report("loopfilter");
|
||||
}
|
631
externals/ffmpeg/tests/checkasm/vp9dsp.c
vendored
Executable file
631
externals/ffmpeg/tests/checkasm/vp9dsp.c
vendored
Executable file
@@ -0,0 +1,631 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Ronald S. Bultje <rsbultje@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavcodec/vp9data.h"
|
||||
#include "libavcodec/vp9.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "libavutil/mathematics.h"
|
||||
|
||||
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
|
||||
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
|
||||
int k; \
|
||||
for (k = -4; k < SIZEOF_PIXEL * FFMAX(8, size); k += 4) { \
|
||||
uint32_t r = rnd() & mask; \
|
||||
AV_WN32A(a + k, r); \
|
||||
} \
|
||||
for (k = 0; k < size * SIZEOF_PIXEL; k += 4) { \
|
||||
uint32_t r = rnd() & mask; \
|
||||
AV_WN32A(l + k, r); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_ipred(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(uint8_t, a_buf, [64 * 2]);
|
||||
uint8_t *a = &a_buf[32 * 2];
|
||||
LOCAL_ALIGNED_32(uint8_t, l, [32 * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
|
||||
VP9DSPContext dsp;
|
||||
int tx, mode, bit_depth;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride,
|
||||
const uint8_t *left, const uint8_t *top);
|
||||
static const char *const mode_names[N_INTRA_PRED_MODES] = {
|
||||
[VERT_PRED] = "vert",
|
||||
[HOR_PRED] = "hor",
|
||||
[DC_PRED] = "dc",
|
||||
[DIAG_DOWN_LEFT_PRED] = "diag_downleft",
|
||||
[DIAG_DOWN_RIGHT_PRED] = "diag_downright",
|
||||
[VERT_RIGHT_PRED] = "vert_right",
|
||||
[HOR_DOWN_PRED] = "hor_down",
|
||||
[VERT_LEFT_PRED] = "vert_left",
|
||||
[HOR_UP_PRED] = "hor_up",
|
||||
[TM_VP8_PRED] = "tm",
|
||||
[LEFT_DC_PRED] = "dc_left",
|
||||
[TOP_DC_PRED] = "dc_top",
|
||||
[DC_128_PRED] = "dc_128",
|
||||
[DC_127_PRED] = "dc_127",
|
||||
[DC_129_PRED] = "dc_129",
|
||||
};
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
|
||||
ff_vp9dsp_init(&dsp, bit_depth, 0);
|
||||
for (tx = 0; tx < 4; tx++) {
|
||||
int size = 4 << tx;
|
||||
|
||||
for (mode = 0; mode < N_INTRA_PRED_MODES; mode++) {
|
||||
if (check_func(dsp.intra_pred[tx][mode], "vp9_%s_%dx%d_%dbpp",
|
||||
mode_names[mode], size, size, bit_depth)) {
|
||||
randomize_buffers();
|
||||
call_ref(dst0, size * SIZEOF_PIXEL, l, a);
|
||||
call_new(dst1, size * SIZEOF_PIXEL, l, a);
|
||||
if (memcmp(dst0, dst1, size * size * SIZEOF_PIXEL))
|
||||
fail();
|
||||
bench_new(dst1, size * SIZEOF_PIXEL,l, a);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
report("ipred");
|
||||
}
|
||||
|
||||
#undef randomize_buffers
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
|
||||
for (y = 0; y < sz; y++) { \
|
||||
for (x = 0; x < sz * SIZEOF_PIXEL; x += 4) { \
|
||||
uint32_t r = rnd() & mask; \
|
||||
AV_WN32A(dst + y * sz * SIZEOF_PIXEL + x, r); \
|
||||
AV_WN32A(src + y * sz * SIZEOF_PIXEL + x, rnd() & mask); \
|
||||
} \
|
||||
for (x = 0; x < sz; x++) { \
|
||||
if (bit_depth == 8) { \
|
||||
coef[y * sz + x] = src[y * sz + x] - dst[y * sz + x]; \
|
||||
} else { \
|
||||
((int32_t *) coef)[y * sz + x] = \
|
||||
((uint16_t *) src)[y * sz + x] - \
|
||||
((uint16_t *) dst)[y * sz + x]; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
// wht function copied from libvpx
|
||||
static void fwht_1d(double *out, const double *in, int sz)
|
||||
{
|
||||
double t0 = in[0] + in[1];
|
||||
double t3 = in[3] - in[2];
|
||||
double t4 = trunc((t0 - t3) * 0.5);
|
||||
double t1 = t4 - in[1];
|
||||
double t2 = t4 - in[2];
|
||||
|
||||
out[0] = t0 - t2;
|
||||
out[1] = t2;
|
||||
out[2] = t3 + t1;
|
||||
out[3] = t1;
|
||||
}
|
||||
|
||||
// standard DCT-II
|
||||
static void fdct_1d(double *out, const double *in, int sz)
|
||||
{
|
||||
int k, n;
|
||||
|
||||
for (k = 0; k < sz; k++) {
|
||||
out[k] = 0.0;
|
||||
for (n = 0; n < sz; n++)
|
||||
out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (sz * 2.0));
|
||||
}
|
||||
out[0] *= M_SQRT1_2;
|
||||
}
|
||||
|
||||
// see "Towards jointly optimal spatial prediction and adaptive transform in
|
||||
// video/image coding", by J. Han, A. Saxena, and K. Rose
|
||||
// IEEE Proc. ICASSP, pp. 726-729, Mar. 2010.
|
||||
static void fadst4_1d(double *out, const double *in, int sz)
|
||||
{
|
||||
int k, n;
|
||||
|
||||
for (k = 0; k < sz; k++) {
|
||||
out[k] = 0.0;
|
||||
for (n = 0; n < sz; n++)
|
||||
out[k] += in[n] * sin(M_PI * (n + 1) * (2 * k + 1) / (sz * 2.0 + 1.0));
|
||||
}
|
||||
}
|
||||
|
||||
// see "A Butterfly Structured Design of The Hybrid Transform Coding Scheme",
|
||||
// by Jingning Han, Yaowu Xu, and Debargha Mukherjee
|
||||
// http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41418.pdf
|
||||
static void fadst_1d(double *out, const double *in, int sz)
|
||||
{
|
||||
int k, n;
|
||||
|
||||
for (k = 0; k < sz; k++) {
|
||||
out[k] = 0.0;
|
||||
for (n = 0; n < sz; n++)
|
||||
out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (sz * 4.0));
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*ftx1d_fn)(double *out, const double *in, int sz);
|
||||
static void ftx_2d(double *out, const double *in, enum TxfmMode tx,
|
||||
enum TxfmType txtp, int sz)
|
||||
{
|
||||
static const double scaling_factors[5][4] = {
|
||||
{ 4.0, 16.0 * M_SQRT1_2 / 3.0, 16.0 * M_SQRT1_2 / 3.0, 32.0 / 9.0 },
|
||||
{ 2.0, 2.0, 2.0, 2.0 },
|
||||
{ 1.0, 1.0, 1.0, 1.0 },
|
||||
{ 0.25 },
|
||||
{ 4.0 }
|
||||
};
|
||||
static const ftx1d_fn ftx1d_tbl[5][4][2] = {
|
||||
{
|
||||
{ fdct_1d, fdct_1d },
|
||||
{ fadst4_1d, fdct_1d },
|
||||
{ fdct_1d, fadst4_1d },
|
||||
{ fadst4_1d, fadst4_1d },
|
||||
}, {
|
||||
{ fdct_1d, fdct_1d },
|
||||
{ fadst_1d, fdct_1d },
|
||||
{ fdct_1d, fadst_1d },
|
||||
{ fadst_1d, fadst_1d },
|
||||
}, {
|
||||
{ fdct_1d, fdct_1d },
|
||||
{ fadst_1d, fdct_1d },
|
||||
{ fdct_1d, fadst_1d },
|
||||
{ fadst_1d, fadst_1d },
|
||||
}, {
|
||||
{ fdct_1d, fdct_1d },
|
||||
}, {
|
||||
{ fwht_1d, fwht_1d },
|
||||
},
|
||||
};
|
||||
double temp[1024];
|
||||
double scaling_factor = scaling_factors[tx][txtp];
|
||||
int i, j;
|
||||
|
||||
// cols
|
||||
for (i = 0; i < sz; ++i) {
|
||||
double temp_out[32];
|
||||
|
||||
ftx1d_tbl[tx][txtp][0](temp_out, &in[i * sz], sz);
|
||||
// scale and transpose
|
||||
for (j = 0; j < sz; ++j)
|
||||
temp[j * sz + i] = temp_out[j] * scaling_factor;
|
||||
}
|
||||
|
||||
// rows
|
||||
for (i = 0; i < sz; i++)
|
||||
ftx1d_tbl[tx][txtp][1](&out[i * sz], &temp[i * sz], sz);
|
||||
}
|
||||
|
||||
static void ftx(int16_t *buf, enum TxfmMode tx,
|
||||
enum TxfmType txtp, int sz, int bit_depth)
|
||||
{
|
||||
double ind[1024], outd[1024];
|
||||
int n;
|
||||
|
||||
emms_c();
|
||||
for (n = 0; n < sz * sz; n++) {
|
||||
if (bit_depth == 8)
|
||||
ind[n] = buf[n];
|
||||
else
|
||||
ind[n] = ((int32_t *) buf)[n];
|
||||
}
|
||||
ftx_2d(outd, ind, tx, txtp, sz);
|
||||
for (n = 0; n < sz * sz; n++) {
|
||||
if (bit_depth == 8)
|
||||
buf[n] = lrint(outd[n]);
|
||||
else
|
||||
((int32_t *) buf)[n] = lrint(outd[n]);
|
||||
}
|
||||
}
|
||||
|
||||
static int copy_subcoefs(int16_t *out, const int16_t *in, enum TxfmMode tx,
|
||||
enum TxfmType txtp, int sz, int sub, int bit_depth)
|
||||
{
|
||||
// copy the topleft coefficients such that the return value (being the
|
||||
// coefficient scantable index for the eob token) guarantees that only
|
||||
// the topleft $sub out of $sz (where $sz >= $sub) coefficients in both
|
||||
// dimensions are non-zero. This leads to braching to specific optimized
|
||||
// simd versions (e.g. dc-only) so that we get full asm coverage in this
|
||||
// test
|
||||
|
||||
int n;
|
||||
const int16_t *scan = ff_vp9_scans[tx][txtp];
|
||||
int eob;
|
||||
|
||||
for (n = 0; n < sz * sz; n++) {
|
||||
int rc = scan[n], rcx = rc % sz, rcy = rc / sz;
|
||||
|
||||
// find eob for this sub-idct
|
||||
if (rcx >= sub || rcy >= sub)
|
||||
break;
|
||||
|
||||
// copy coef
|
||||
if (bit_depth == 8) {
|
||||
out[rc] = in[rc];
|
||||
} else {
|
||||
AV_COPY32(&out[rc * 2], &in[rc * 2]);
|
||||
}
|
||||
}
|
||||
|
||||
eob = n;
|
||||
|
||||
for (; n < sz * sz; n++) {
|
||||
int rc = scan[n];
|
||||
|
||||
// zero
|
||||
if (bit_depth == 8) {
|
||||
out[rc] = 0;
|
||||
} else {
|
||||
AV_ZERO32(&out[rc * 2]);
|
||||
}
|
||||
}
|
||||
|
||||
return eob;
|
||||
}
|
||||
|
||||
static int iszero(const int16_t *c, int sz)
|
||||
{
|
||||
int n;
|
||||
|
||||
for (n = 0; n < sz / sizeof(int16_t); n += 2)
|
||||
if (AV_RN32A(&c[n]))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))
|
||||
|
||||
static void check_itxfm(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(uint8_t, src, [32 * 32 * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst, [32 * 32 * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
|
||||
LOCAL_ALIGNED_32(int16_t, coef, [32 * 32 * 2]);
|
||||
LOCAL_ALIGNED_32(int16_t, subcoef0, [32 * 32 * 2]);
|
||||
LOCAL_ALIGNED_32(int16_t, subcoef1, [32 * 32 * 2]);
|
||||
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
|
||||
VP9DSPContext dsp;
|
||||
int y, x, tx, txtp, bit_depth, sub;
|
||||
static const char *const txtp_types[N_TXFM_TYPES] = {
|
||||
[DCT_DCT] = "dct_dct", [DCT_ADST] = "adst_dct",
|
||||
[ADST_DCT] = "dct_adst", [ADST_ADST] = "adst_adst"
|
||||
};
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
|
||||
ff_vp9dsp_init(&dsp, bit_depth, 0);
|
||||
|
||||
for (tx = TX_4X4; tx <= N_TXFM_SIZES /* 4 = lossless */; tx++) {
|
||||
int sz = 4 << (tx & 3);
|
||||
int n_txtps = tx < TX_32X32 ? N_TXFM_TYPES : 1;
|
||||
|
||||
for (txtp = 0; txtp < n_txtps; txtp++) {
|
||||
// skip testing sub-IDCTs for WHT or ADST since they don't
|
||||
// implement it in any of the SIMD functions. If they do,
|
||||
// consider changing this to ensure we have complete test
|
||||
// coverage. Test sub=1 for dc-only, then 2, 4, 8, 12, etc,
|
||||
// since the arm version can distinguish them at that level.
|
||||
for (sub = (txtp == 0 && tx < 4) ? 1 : sz; sub <= sz;
|
||||
sub < 4 ? (sub <<= 1) : (sub += 4)) {
|
||||
if (check_func(dsp.itxfm_add[tx][txtp],
|
||||
"vp9_inv_%s_%dx%d_sub%d_add_%d",
|
||||
tx == 4 ? "wht_wht" : txtp_types[txtp],
|
||||
sz, sz, sub, bit_depth)) {
|
||||
int eob;
|
||||
|
||||
randomize_buffers();
|
||||
ftx(coef, tx, txtp, sz, bit_depth);
|
||||
|
||||
if (sub < sz) {
|
||||
eob = copy_subcoefs(subcoef0, coef, tx, txtp,
|
||||
sz, sub, bit_depth);
|
||||
} else {
|
||||
eob = sz * sz;
|
||||
memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
|
||||
}
|
||||
|
||||
memcpy(dst0, dst, sz * sz * SIZEOF_PIXEL);
|
||||
memcpy(dst1, dst, sz * sz * SIZEOF_PIXEL);
|
||||
memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
|
||||
call_ref(dst0, sz * SIZEOF_PIXEL, subcoef0, eob);
|
||||
call_new(dst1, sz * SIZEOF_PIXEL, subcoef1, eob);
|
||||
if (memcmp(dst0, dst1, sz * sz * SIZEOF_PIXEL) ||
|
||||
!iszero(subcoef0, sz * sz * SIZEOF_COEF) ||
|
||||
!iszero(subcoef1, sz * sz * SIZEOF_COEF))
|
||||
fail();
|
||||
|
||||
bench_new(dst, sz * SIZEOF_PIXEL, coef, eob);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
report("itxfm");
|
||||
}
|
||||
|
||||
#undef randomize_buffers
|
||||
|
||||
#define setpx(a,b,c) \
|
||||
do { \
|
||||
if (SIZEOF_PIXEL == 1) { \
|
||||
buf0[(a) + (b) * jstride] = av_clip_uint8(c); \
|
||||
} else { \
|
||||
((uint16_t *)buf0)[(a) + (b) * jstride] = av_clip_uintp2(c, bit_depth); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// c can be an assignment and must not be put under ()
|
||||
#define setdx(a,b,c,d) setpx(a,b,c-(d)+(rnd()%((d)*2+1)))
|
||||
#define setsx(a,b,c,d) setdx(a,b,c,(d) << (bit_depth - 8))
|
||||
static void randomize_loopfilter_buffers(int bidx, int lineoff, int str,
|
||||
int bit_depth, int dir, const int *E,
|
||||
const int *F, const int *H, const int *I,
|
||||
uint8_t *buf0, uint8_t *buf1)
|
||||
{
|
||||
uint32_t mask = (1 << bit_depth) - 1;
|
||||
int off = dir ? lineoff : lineoff * 16;
|
||||
int istride = dir ? 1 : 16;
|
||||
int jstride = dir ? str : 1;
|
||||
int i, j;
|
||||
for (i = 0; i < 2; i++) /* flat16 */ {
|
||||
int idx = off + i * istride, p0, q0;
|
||||
setpx(idx, 0, q0 = rnd() & mask);
|
||||
setsx(idx, -1, p0 = q0, E[bidx] >> 2);
|
||||
for (j = 1; j < 8; j++) {
|
||||
setsx(idx, -1 - j, p0, F[bidx]);
|
||||
setsx(idx, j, q0, F[bidx]);
|
||||
}
|
||||
}
|
||||
for (i = 2; i < 4; i++) /* flat8 */ {
|
||||
int idx = off + i * istride, p0, q0;
|
||||
setpx(idx, 0, q0 = rnd() & mask);
|
||||
setsx(idx, -1, p0 = q0, E[bidx] >> 2);
|
||||
for (j = 1; j < 4; j++) {
|
||||
setsx(idx, -1 - j, p0, F[bidx]);
|
||||
setsx(idx, j, q0, F[bidx]);
|
||||
}
|
||||
for (j = 4; j < 8; j++) {
|
||||
setpx(idx, -1 - j, rnd() & mask);
|
||||
setpx(idx, j, rnd() & mask);
|
||||
}
|
||||
}
|
||||
for (i = 4; i < 6; i++) /* regular */ {
|
||||
int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
|
||||
setpx(idx, 0, q0 = rnd() & mask);
|
||||
setsx(idx, 1, q1 = q0, I[bidx]);
|
||||
setsx(idx, 2, q2 = q1, I[bidx]);
|
||||
setsx(idx, 3, q2, I[bidx]);
|
||||
setsx(idx, -1, p0 = q0, E[bidx] >> 2);
|
||||
setsx(idx, -2, p1 = p0, I[bidx]);
|
||||
setsx(idx, -3, p2 = p1, I[bidx]);
|
||||
setsx(idx, -4, p2, I[bidx]);
|
||||
for (j = 4; j < 8; j++) {
|
||||
setpx(idx, -1 - j, rnd() & mask);
|
||||
setpx(idx, j, rnd() & mask);
|
||||
}
|
||||
}
|
||||
for (i = 6; i < 8; i++) /* off */ {
|
||||
int idx = off + i * istride;
|
||||
for (j = 0; j < 8; j++) {
|
||||
setpx(idx, -1 - j, rnd() & mask);
|
||||
setpx(idx, j, rnd() & mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
#define randomize_buffers(bidx, lineoff, str) \
|
||||
randomize_loopfilter_buffers(bidx, lineoff, str, bit_depth, dir, \
|
||||
E, F, H, I, buf0, buf1)
|
||||
|
||||
static void check_loopfilter(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(uint8_t, base0, [32 + 16 * 16 * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, base1, [32 + 16 * 16 * 2]);
|
||||
VP9DSPContext dsp;
|
||||
int dir, wd, wd2, bit_depth;
|
||||
static const char *const dir_name[2] = { "h", "v" };
|
||||
static const int E[2] = { 20, 28 }, I[2] = { 10, 16 };
|
||||
static const int H[2] = { 7, 11 }, F[2] = { 1, 1 };
|
||||
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
|
||||
|
||||
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
|
||||
ff_vp9dsp_init(&dsp, bit_depth, 0);
|
||||
|
||||
for (dir = 0; dir < 2; dir++) {
|
||||
int midoff = (dir ? 8 * 8 : 8) * SIZEOF_PIXEL;
|
||||
int midoff_aligned = (dir ? 8 * 8 : 16) * SIZEOF_PIXEL;
|
||||
uint8_t *buf0 = base0 + midoff_aligned;
|
||||
uint8_t *buf1 = base1 + midoff_aligned;
|
||||
|
||||
for (wd = 0; wd < 3; wd++) {
|
||||
// 4/8/16wd_8px
|
||||
if (check_func(dsp.loop_filter_8[wd][dir],
|
||||
"vp9_loop_filter_%s_%d_8_%dbpp",
|
||||
dir_name[dir], 4 << wd, bit_depth)) {
|
||||
randomize_buffers(0, 0, 8);
|
||||
memcpy(buf1 - midoff, buf0 - midoff,
|
||||
16 * 8 * SIZEOF_PIXEL);
|
||||
call_ref(buf0, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
|
||||
call_new(buf1, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
|
||||
if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 8 * SIZEOF_PIXEL))
|
||||
fail();
|
||||
bench_new(buf1, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
|
||||
}
|
||||
}
|
||||
|
||||
midoff = (dir ? 16 * 8 : 8) * SIZEOF_PIXEL;
|
||||
midoff_aligned = (dir ? 16 * 8 : 16) * SIZEOF_PIXEL;
|
||||
|
||||
buf0 = base0 + midoff_aligned;
|
||||
buf1 = base1 + midoff_aligned;
|
||||
|
||||
// 16wd_16px loopfilter
|
||||
if (check_func(dsp.loop_filter_16[dir],
|
||||
"vp9_loop_filter_%s_16_16_%dbpp",
|
||||
dir_name[dir], bit_depth)) {
|
||||
randomize_buffers(0, 0, 16);
|
||||
randomize_buffers(0, 8, 16);
|
||||
memcpy(buf1 - midoff, buf0 - midoff, 16 * 16 * SIZEOF_PIXEL);
|
||||
call_ref(buf0, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
|
||||
call_new(buf1, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
|
||||
if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16 * SIZEOF_PIXEL))
|
||||
fail();
|
||||
bench_new(buf1, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
|
||||
}
|
||||
|
||||
for (wd = 0; wd < 2; wd++) {
|
||||
for (wd2 = 0; wd2 < 2; wd2++) {
|
||||
// mix2 loopfilter
|
||||
if (check_func(dsp.loop_filter_mix2[wd][wd2][dir],
|
||||
"vp9_loop_filter_mix2_%s_%d%d_16_%dbpp",
|
||||
dir_name[dir], 4 << wd, 4 << wd2, bit_depth)) {
|
||||
randomize_buffers(0, 0, 16);
|
||||
randomize_buffers(1, 8, 16);
|
||||
memcpy(buf1 - midoff, buf0 - midoff, 16 * 16 * SIZEOF_PIXEL);
|
||||
#define M(a) (((a)[1] << 8) | (a)[0])
|
||||
call_ref(buf0, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
|
||||
call_new(buf1, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
|
||||
if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16 * SIZEOF_PIXEL))
|
||||
fail();
|
||||
bench_new(buf1, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
|
||||
#undef M
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
report("loopfilter");
|
||||
}
|
||||
|
||||
#undef setsx
|
||||
#undef setpx
|
||||
#undef setdx
|
||||
#undef randomize_buffers
|
||||
|
||||
#define DST_BUF_SIZE (size * size * SIZEOF_PIXEL)
|
||||
#define SRC_BUF_STRIDE 72
|
||||
#define SRC_BUF_SIZE ((size + 7) * SRC_BUF_STRIDE * SIZEOF_PIXEL)
|
||||
#define src (buf + 3 * SIZEOF_PIXEL * (SRC_BUF_STRIDE + 1))
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
|
||||
int k; \
|
||||
for (k = 0; k < SRC_BUF_SIZE; k += 4) { \
|
||||
uint32_t r = rnd() & mask; \
|
||||
AV_WN32A(buf + k, r); \
|
||||
} \
|
||||
if (op == 1) { \
|
||||
for (k = 0; k < DST_BUF_SIZE; k += 4) { \
|
||||
uint32_t r = rnd() & mask; \
|
||||
AV_WN32A(dst0 + k, r); \
|
||||
AV_WN32A(dst1 + k, r); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_mc(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(uint8_t, buf, [72 * 72 * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0, [64 * 64 * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1, [64 * 64 * 2]);
|
||||
VP9DSPContext dsp;
|
||||
int op, hsize, bit_depth, filter, dx, dy;
|
||||
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const uint8_t *ref, ptrdiff_t ref_stride,
|
||||
int h, int mx, int my);
|
||||
static const char *const filter_names[4] = {
|
||||
"8tap_smooth", "8tap_regular", "8tap_sharp", "bilin"
|
||||
};
|
||||
static const char *const subpel_names[2][2] = { { "", "h" }, { "v", "hv" } };
|
||||
static const char *const op_names[2] = { "put", "avg" };
|
||||
char str[256];
|
||||
|
||||
for (op = 0; op < 2; op++) {
|
||||
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
|
||||
ff_vp9dsp_init(&dsp, bit_depth, 0);
|
||||
for (hsize = 0; hsize < 5; hsize++) {
|
||||
int size = 64 >> hsize;
|
||||
|
||||
for (filter = 0; filter < 4; filter++) {
|
||||
for (dx = 0; dx < 2; dx++) {
|
||||
for (dy = 0; dy < 2; dy++) {
|
||||
if (dx || dy) {
|
||||
snprintf(str, sizeof(str),
|
||||
"%s_%s_%d%s", op_names[op],
|
||||
filter_names[filter], size,
|
||||
subpel_names[dy][dx]);
|
||||
} else {
|
||||
snprintf(str, sizeof(str),
|
||||
"%s%d", op_names[op], size);
|
||||
}
|
||||
if (check_func(dsp.mc[hsize][filter][op][dx][dy],
|
||||
"vp9_%s_%dbpp", str, bit_depth)) {
|
||||
int mx = dx ? 1 + (rnd() % 14) : 0;
|
||||
int my = dy ? 1 + (rnd() % 14) : 0;
|
||||
randomize_buffers();
|
||||
call_ref(dst0, size * SIZEOF_PIXEL,
|
||||
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
|
||||
size, mx, my);
|
||||
call_new(dst1, size * SIZEOF_PIXEL,
|
||||
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
|
||||
size, mx, my);
|
||||
if (memcmp(dst0, dst1, DST_BUF_SIZE))
|
||||
fail();
|
||||
|
||||
// simd implementations for each filter of subpel
|
||||
// functions are identical
|
||||
if (filter >= 1 && filter <= 2) continue;
|
||||
// 10/12 bpp for bilin are identical
|
||||
if (bit_depth == 12 && filter == 3) continue;
|
||||
|
||||
bench_new(dst1, size * SIZEOF_PIXEL,
|
||||
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
|
||||
size, mx, my);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
report("mc");
|
||||
}
|
||||
|
||||
void checkasm_check_vp9dsp(void)
|
||||
{
|
||||
check_ipred();
|
||||
check_itxfm();
|
||||
check_loopfilter();
|
||||
check_mc();
|
||||
}
|
244
externals/ffmpeg/tests/checkasm/x86/checkasm.asm
vendored
Executable file
244
externals/ffmpeg/tests/checkasm/x86/checkasm.asm
vendored
Executable file
@@ -0,0 +1,244 @@
|
||||
;*****************************************************************************
|
||||
;* Assembly testing and benchmarking tool
|
||||
;* Copyright (c) 2008 Loren Merritt
|
||||
;* Copyright (c) 2012 Henrik Gramner
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or modify
|
||||
;* it under the terms of the GNU General Public License as published by
|
||||
;* the Free Software Foundation; either version 2 of the License, or
|
||||
;* (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;* GNU General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU General Public License
|
||||
;* along with this program; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
;*****************************************************************************
|
||||
|
||||
%define private_prefix checkasm
|
||||
%include "libavutil/x86/x86inc.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
error_message: db "failed to preserve register", 0
|
||||
error_message_emms: db "failed to issue emms", 0
|
||||
|
||||
%if ARCH_X86_64
|
||||
; just random numbers to reduce the chance of incidental match
|
||||
ALIGN 16
|
||||
x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064
|
||||
x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
|
||||
x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
|
||||
x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
|
||||
x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
|
||||
x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
|
||||
x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
|
||||
x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
|
||||
x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
|
||||
x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
|
||||
n7: dq 0x21f86d66c8ca00ce
|
||||
n8: dq 0x75b6ba21077c48ad
|
||||
n9: dq 0xed56bb2dcb3c7736
|
||||
n10: dq 0x8bda43d3fd1a7e06
|
||||
n11: dq 0xb64a9c9e5d318408
|
||||
n12: dq 0xdf9a54b303f1d3a3
|
||||
n13: dq 0x4a75479abd64e097
|
||||
n14: dq 0x249214109d5d1c88
|
||||
%endif
|
||||
|
||||
SECTION .text
|
||||
|
||||
cextern fail_func
|
||||
|
||||
; max number of args used by any asm function.
|
||||
; (max_args % 4) must equal 3 for stack alignment
|
||||
%define max_args 15
|
||||
|
||||
%if ARCH_X86_64
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; int checkasm_stack_clobber(uint64_t clobber, ...)
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal stack_clobber, 1,2
|
||||
; Clobber the stack with junk below the stack pointer
|
||||
%define argsize (max_args+6)*8
|
||||
SUB rsp, argsize
|
||||
mov r1, argsize-8
|
||||
.loop:
|
||||
mov [rsp+r1], r0
|
||||
sub r1, 8
|
||||
jge .loop
|
||||
ADD rsp, argsize
|
||||
RET
|
||||
|
||||
%if WIN64
|
||||
%assign free_regs 7
|
||||
DECLARE_REG_TMP 4
|
||||
%else
|
||||
%assign free_regs 9
|
||||
DECLARE_REG_TMP 7
|
||||
%endif
|
||||
|
||||
%macro report_fail 1
|
||||
mov r9, rax
|
||||
mov r10, rdx
|
||||
lea r0, [%1]
|
||||
xor eax, eax
|
||||
call fail_func
|
||||
mov rdx, r10
|
||||
mov rax, r9
|
||||
%endmacro
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void checkasm_checked_call(void *func, ...)
|
||||
;-----------------------------------------------------------------------------
|
||||
INIT_XMM
|
||||
%macro CHECKED_CALL 0-1
|
||||
cglobal checked_call%1, 2,15,16,max_args*8+8
|
||||
mov t0, r0
|
||||
|
||||
; All arguments have been pushed on the stack instead of registers in order to
|
||||
; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
|
||||
mov r0, r6mp
|
||||
mov r1, r7mp
|
||||
mov r2, r8mp
|
||||
mov r3, r9mp
|
||||
%if UNIX64
|
||||
mov r4, r10mp
|
||||
mov r5, r11mp
|
||||
%assign i 6
|
||||
%rep max_args-6
|
||||
mov r9, [rsp+stack_offset+(i+1)*8]
|
||||
mov [rsp+(i-6)*8], r9
|
||||
%assign i i+1
|
||||
%endrep
|
||||
%else ; WIN64
|
||||
%assign i 4
|
||||
%rep max_args-4
|
||||
mov r9, [rsp+stack_offset+(i+7)*8]
|
||||
mov [rsp+i*8], r9
|
||||
%assign i i+1
|
||||
%endrep
|
||||
|
||||
; Move possible floating-point arguments to the correct registers
|
||||
movq m0, r0
|
||||
movq m1, r1
|
||||
movq m2, r2
|
||||
movq m3, r3
|
||||
|
||||
%assign i 6
|
||||
%rep 16-6
|
||||
mova m %+ i, [x %+ i]
|
||||
%assign i i+1
|
||||
%endrep
|
||||
%endif
|
||||
|
||||
%assign i 14
|
||||
%rep 15-free_regs
|
||||
mov r %+ i, [n %+ i]
|
||||
%assign i i-1
|
||||
%endrep
|
||||
call t0
|
||||
%assign i 14
|
||||
%rep 15-free_regs
|
||||
xor r %+ i, [n %+ i]
|
||||
or r14, r %+ i
|
||||
%assign i i-1
|
||||
%endrep
|
||||
|
||||
%if WIN64
|
||||
%assign i 6
|
||||
%rep 16-6
|
||||
pxor m %+ i, [x %+ i]
|
||||
por m6, m %+ i
|
||||
%assign i i+1
|
||||
%endrep
|
||||
packsswb m6, m6
|
||||
movq r5, m6
|
||||
or r14, r5
|
||||
%endif
|
||||
|
||||
; Call fail_func() with a descriptive message to mark it as a failure
|
||||
; if the called function didn't preserve all callee-saved registers.
|
||||
; Save the return value located in rdx:rax first to prevent clobbering.
|
||||
jz .clobber_ok
|
||||
report_fail error_message
|
||||
.clobber_ok:
|
||||
%ifidn %1, _emms
|
||||
emms
|
||||
%elifnidn %1, _float
|
||||
fstenv [rsp]
|
||||
cmp word [rsp + 8], 0xffff
|
||||
je .emms_ok
|
||||
report_fail error_message_emms
|
||||
emms
|
||||
.emms_ok:
|
||||
%endif
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
%else
|
||||
|
||||
; just random numbers to reduce the chance of incidental match
|
||||
%define n3 dword 0x6549315c
|
||||
%define n4 dword 0xe02f3e23
|
||||
%define n5 dword 0xb78d0d1d
|
||||
%define n6 dword 0x33627ba7
|
||||
|
||||
%macro report_fail 1
|
||||
mov r3, eax
|
||||
mov r4, edx
|
||||
lea r0, [%1]
|
||||
mov [esp], r0
|
||||
call fail_func
|
||||
mov edx, r4
|
||||
mov eax, r3
|
||||
%endmacro
|
||||
|
||||
%macro CHECKED_CALL 0-1
|
||||
;-----------------------------------------------------------------------------
|
||||
; void checkasm_checked_call(void *func, ...)
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal checked_call%1, 1,7
|
||||
mov r3, n3
|
||||
mov r4, n4
|
||||
mov r5, n5
|
||||
mov r6, n6
|
||||
%rep max_args
|
||||
PUSH dword [esp+20+max_args*4]
|
||||
%endrep
|
||||
call r0
|
||||
xor r3, n3
|
||||
xor r4, n4
|
||||
xor r5, n5
|
||||
xor r6, n6
|
||||
or r3, r4
|
||||
or r5, r6
|
||||
or r3, r5
|
||||
jz .clobber_ok
|
||||
report_fail error_message
|
||||
.clobber_ok:
|
||||
%ifidn %1, _emms
|
||||
emms
|
||||
%elifnidn %1, _float
|
||||
fstenv [esp]
|
||||
cmp word [esp + 8], 0xffff
|
||||
je .emms_ok
|
||||
report_fail error_message_emms
|
||||
emms
|
||||
.emms_ok:
|
||||
%endif
|
||||
add esp, max_args*4
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
%endif ; ARCH_X86_64
|
||||
|
||||
CHECKED_CALL
|
||||
CHECKED_CALL _emms
|
||||
CHECKED_CALL _float
|
Reference in New Issue
Block a user