early-access version 1432
This commit is contained in:
4
externals/ffmpeg/libavutil/aarch64/Makefile
vendored
Executable file
4
externals/ffmpeg/libavutil/aarch64/Makefile
vendored
Executable file
@@ -0,0 +1,4 @@
|
||||
OBJS += aarch64/cpu.o \
|
||||
aarch64/float_dsp_init.o \
|
||||
|
||||
NEON-OBJS += aarch64/float_dsp_neon.o
|
117
externals/ffmpeg/libavutil/aarch64/asm.S
vendored
Executable file
117
externals/ffmpeg/libavutil/aarch64/asm.S
vendored
Executable file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#ifdef __ELF__
|
||||
# define ELF
|
||||
#else
|
||||
# define ELF #
|
||||
#endif
|
||||
|
||||
#if HAVE_AS_FUNC
|
||||
# define FUNC
|
||||
#else
|
||||
# define FUNC #
|
||||
#endif
|
||||
|
||||
#ifndef __has_feature
|
||||
# define __has_feature(x) 0
|
||||
#endif
|
||||
|
||||
.macro function name, export=0, align=2
|
||||
.macro endfunc
|
||||
ELF .size \name, . - \name
|
||||
FUNC .endfunc
|
||||
.purgem endfunc
|
||||
.endm
|
||||
.text
|
||||
.align \align
|
||||
.if \export
|
||||
.global EXTERN_ASM\name
|
||||
ELF .type EXTERN_ASM\name, %function
|
||||
FUNC .func EXTERN_ASM\name
|
||||
EXTERN_ASM\name:
|
||||
.else
|
||||
ELF .type \name, %function
|
||||
FUNC .func \name
|
||||
\name:
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro const name, align=2, relocate=0
|
||||
.macro endconst
|
||||
ELF .size \name, . - \name
|
||||
.purgem endconst
|
||||
.endm
|
||||
#if HAVE_SECTION_DATA_REL_RO
|
||||
.if \relocate
|
||||
.section .data.rel.ro
|
||||
.else
|
||||
.section .rodata
|
||||
.endif
|
||||
#elif defined(_WIN32)
|
||||
.section .rdata
|
||||
#elif !defined(__MACH__)
|
||||
.section .rodata
|
||||
#else
|
||||
.const_data
|
||||
#endif
|
||||
.align \align
|
||||
\name:
|
||||
.endm
|
||||
|
||||
.macro movrel rd, val, offset=0
|
||||
#if CONFIG_PIC && defined(__APPLE__)
|
||||
.if \offset < 0
|
||||
adrp \rd, \val@PAGE
|
||||
add \rd, \rd, \val@PAGEOFF
|
||||
sub \rd, \rd, -(\offset)
|
||||
.else
|
||||
adrp \rd, \val+(\offset)@PAGE
|
||||
add \rd, \rd, \val+(\offset)@PAGEOFF
|
||||
.endif
|
||||
#elif CONFIG_PIC && defined(_WIN32)
|
||||
.if \offset < 0
|
||||
adrp \rd, \val
|
||||
add \rd, \rd, :lo12:\val
|
||||
sub \rd, \rd, -(\offset)
|
||||
.else
|
||||
adrp \rd, \val+(\offset)
|
||||
add \rd, \rd, :lo12:\val+(\offset)
|
||||
.endif
|
||||
#elif CONFIG_PIC
|
||||
# if __has_feature(hwaddress_sanitizer)
|
||||
adrp \rd, :pg_hi21_nc:\val+(\offset)
|
||||
# else
|
||||
adrp \rd, \val+(\offset)
|
||||
# endif
|
||||
add \rd, \rd, :lo12:\val+(\offset)
|
||||
#else
|
||||
ldr \rd, =\val+\offset
|
||||
#endif
|
||||
.endm
|
||||
|
||||
#define GLUE(a, b) a ## b
|
||||
#define JOIN(a, b) GLUE(a, b)
|
||||
#define X(s) JOIN(EXTERN_ASM, s)
|
||||
|
||||
#define x18 do_not_use_x18
|
||||
#define w18 do_not_use_w18
|
50
externals/ffmpeg/libavutil/aarch64/bswap.h
vendored
Executable file
50
externals/ffmpeg/libavutil/aarch64/bswap.h
vendored
Executable file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_AARCH64_BSWAP_H
|
||||
#define AVUTIL_AARCH64_BSWAP_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
#define av_bswap16 av_bswap16
|
||||
static av_always_inline av_const unsigned av_bswap16(unsigned x)
|
||||
{
|
||||
__asm__("rev16 %w0, %w0" : "+r"(x));
|
||||
return x;
|
||||
}
|
||||
|
||||
#define av_bswap32 av_bswap32
|
||||
static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
|
||||
{
|
||||
__asm__("rev %w0, %w0" : "+r"(x));
|
||||
return x;
|
||||
}
|
||||
|
||||
#define av_bswap64 av_bswap64
|
||||
static av_always_inline av_const uint64_t av_bswap64(uint64_t x)
|
||||
{
|
||||
__asm__("rev %0, %0" : "+r"(x));
|
||||
return x;
|
||||
}
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
#endif /* AVUTIL_AARCH64_BSWAP_H */
|
38
externals/ffmpeg/libavutil/aarch64/cpu.c
vendored
Executable file
38
externals/ffmpeg/libavutil/aarch64/cpu.c
vendored
Executable file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/cpu_internal.h"
|
||||
#include "config.h"
|
||||
|
||||
int ff_get_cpu_flags_aarch64(void)
|
||||
{
|
||||
return AV_CPU_FLAG_ARMV8 * HAVE_ARMV8 |
|
||||
AV_CPU_FLAG_NEON * HAVE_NEON |
|
||||
AV_CPU_FLAG_VFP * HAVE_VFP;
|
||||
}
|
||||
|
||||
size_t ff_get_cpu_max_align_aarch64(void)
|
||||
{
|
||||
int flags = av_get_cpu_flags();
|
||||
|
||||
if (flags & AV_CPU_FLAG_NEON)
|
||||
return 16;
|
||||
|
||||
return 8;
|
||||
}
|
29
externals/ffmpeg/libavutil/aarch64/cpu.h
vendored
Executable file
29
externals/ffmpeg/libavutil/aarch64/cpu.h
vendored
Executable file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_AARCH64_CPU_H
|
||||
#define AVUTIL_AARCH64_CPU_H
|
||||
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/cpu_internal.h"
|
||||
|
||||
#define have_armv8(flags) CPUEXT(flags, ARMV8)
|
||||
#define have_neon(flags) CPUEXT(flags, NEON)
|
||||
#define have_vfp(flags) CPUEXT(flags, VFP)
|
||||
|
||||
#endif /* AVUTIL_AARCH64_CPU_H */
|
69
externals/ffmpeg/libavutil/aarch64/float_dsp_init.c
vendored
Executable file
69
externals/ffmpeg/libavutil/aarch64/float_dsp_init.c
vendored
Executable file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* ARM NEON optimised Float DSP functions
|
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/float_dsp.h"
|
||||
#include "cpu.h"
|
||||
|
||||
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1,
|
||||
int len);
|
||||
|
||||
void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_dmul_scalar_neon(double *dst, const double *src, double mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_fmul_window_neon(float *dst, const float *src0,
|
||||
const float *src1, const float *win, int len);
|
||||
|
||||
void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1,
|
||||
const float *src2, int len);
|
||||
|
||||
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
|
||||
void ff_butterflies_float_neon(float *v1, float *v2, int len);
|
||||
|
||||
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
|
||||
|
||||
av_cold void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (have_neon(cpu_flags)) {
|
||||
fdsp->butterflies_float = ff_butterflies_float_neon;
|
||||
fdsp->scalarproduct_float = ff_scalarproduct_float_neon;
|
||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_neon;
|
||||
fdsp->vector_fmul = ff_vector_fmul_neon;
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
|
||||
fdsp->vector_fmul_add = ff_vector_fmul_add_neon;
|
||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
|
||||
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
|
||||
fdsp->vector_fmul_window = ff_vector_fmul_window_neon;
|
||||
}
|
||||
}
|
202
externals/ffmpeg/libavutil/aarch64/float_dsp_neon.S
vendored
Executable file
202
externals/ffmpeg/libavutil/aarch64/float_dsp_neon.S
vendored
Executable file
@@ -0,0 +1,202 @@
|
||||
/*
|
||||
* ARM NEON optimised Float DSP functions
|
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
* Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "asm.S"
|
||||
|
||||
function ff_vector_fmul_neon, export=1
|
||||
1: subs w3, w3, #16
|
||||
ld1 {v0.4S, v1.4S}, [x1], #32
|
||||
ld1 {v2.4S, v3.4S}, [x1], #32
|
||||
ld1 {v4.4S, v5.4S}, [x2], #32
|
||||
ld1 {v6.4S, v7.4S}, [x2], #32
|
||||
fmul v16.4S, v0.4S, v4.4S
|
||||
fmul v17.4S, v1.4S, v5.4S
|
||||
fmul v18.4S, v2.4S, v6.4S
|
||||
fmul v19.4S, v3.4S, v7.4S
|
||||
st1 {v16.4S, v17.4S}, [x0], #32
|
||||
st1 {v18.4S, v19.4S}, [x0], #32
|
||||
b.ne 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmac_scalar_neon, export=1
|
||||
mov x3, #-32
|
||||
1: subs w2, w2, #16
|
||||
ld1 {v16.4S, v17.4S}, [x0], #32
|
||||
ld1 {v18.4S, v19.4S}, [x0], x3
|
||||
ld1 {v4.4S, v5.4S}, [x1], #32
|
||||
ld1 {v6.4S, v7.4S}, [x1], #32
|
||||
fmla v16.4S, v4.4S, v0.S[0]
|
||||
fmla v17.4S, v5.4S, v0.S[0]
|
||||
fmla v18.4S, v6.4S, v0.S[0]
|
||||
fmla v19.4S, v7.4S, v0.S[0]
|
||||
st1 {v16.4S, v17.4S}, [x0], #32
|
||||
st1 {v18.4S, v19.4S}, [x0], #32
|
||||
b.ne 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_scalar_neon, export=1
|
||||
mov w4, #15
|
||||
bics w3, w2, w4
|
||||
dup v16.4S, v0.S[0]
|
||||
b.eq 3f
|
||||
ld1 {v0.4S, v1.4S}, [x1], #32
|
||||
1: subs w3, w3, #16
|
||||
fmul v0.4S, v0.4S, v16.4S
|
||||
ld1 {v2.4S, v3.4S}, [x1], #32
|
||||
fmul v1.4S, v1.4S, v16.4S
|
||||
fmul v2.4S, v2.4S, v16.4S
|
||||
st1 {v0.4S, v1.4S}, [x0], #32
|
||||
fmul v3.4S, v3.4S, v16.4S
|
||||
b.eq 2f
|
||||
ld1 {v0.4S, v1.4S}, [x1], #32
|
||||
st1 {v2.4S, v3.4S}, [x0], #32
|
||||
b 1b
|
||||
2: ands w2, w2, #15
|
||||
st1 {v2.4S, v3.4S}, [x0], #32
|
||||
b.eq 4f
|
||||
3: ld1 {v0.4S}, [x1], #16
|
||||
fmul v0.4S, v0.4S, v16.4S
|
||||
st1 {v0.4S}, [x0], #16
|
||||
subs w2, w2, #4
|
||||
b.gt 3b
|
||||
4: ret
|
||||
endfunc
|
||||
|
||||
function ff_vector_dmul_scalar_neon, export=1
|
||||
dup v16.2D, v0.D[0]
|
||||
ld1 {v0.2D, v1.2D}, [x1], #32
|
||||
1: subs w2, w2, #8
|
||||
fmul v0.2D, v0.2D, v16.2D
|
||||
ld1 {v2.2D, v3.2D}, [x1], #32
|
||||
fmul v1.2D, v1.2D, v16.2D
|
||||
fmul v2.2D, v2.2D, v16.2D
|
||||
st1 {v0.2D, v1.2D}, [x0], #32
|
||||
fmul v3.2D, v3.2D, v16.2D
|
||||
ld1 {v0.2D, v1.2D}, [x1], #32
|
||||
st1 {v2.2D, v3.2D}, [x0], #32
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_window_neon, export=1
|
||||
sxtw x4, w4 // len
|
||||
sub x2, x2, #8
|
||||
sub x5, x4, #2
|
||||
add x2, x2, x5, lsl #2 // src1 + 4 * (len - 4)
|
||||
add x6, x3, x5, lsl #3 // win + 8 * (len - 2)
|
||||
add x5, x0, x5, lsl #3 // dst + 8 * (len - 2)
|
||||
mov x7, #-16
|
||||
ld1 {v0.4S}, [x1], #16 // s0
|
||||
ld1 {v2.4S}, [x3], #16 // wi
|
||||
ld1 {v1.4S}, [x2], x7 // s1
|
||||
1: ld1 {v3.4S}, [x6], x7 // wj
|
||||
subs x4, x4, #4
|
||||
fmul v17.4S, v0.4S, v2.4S // s0 * wi
|
||||
rev64 v4.4S, v1.4S
|
||||
rev64 v5.4S, v3.4S
|
||||
rev64 v17.4S, v17.4S
|
||||
ext v4.16B, v4.16B, v4.16B, #8 // s1_r
|
||||
ext v5.16B, v5.16B, v5.16B, #8 // wj_r
|
||||
ext v17.16B, v17.16B, v17.16B, #8 // (s0 * wi)_rev
|
||||
fmul v16.4S, v0.4S, v5.4S // s0 * wj_r
|
||||
fmla v17.4S, v1.4S, v3.4S // (s0 * wi)_rev + s1 * wj
|
||||
b.eq 2f
|
||||
ld1 {v0.4S}, [x1], #16
|
||||
fmls v16.4S, v4.4S, v2.4S // s0 * wj_r - s1_r * wi
|
||||
st1 {v17.4S}, [x5], x7
|
||||
ld1 {v2.4S}, [x3], #16
|
||||
ld1 {v1.4S}, [x2], x7
|
||||
st1 {v16.4S}, [x0], #16
|
||||
b 1b
|
||||
2:
|
||||
fmls v16.4S, v4.4S, v2.4S // s0 * wj_r - s1_r * wi
|
||||
st1 {v17.4S}, [x5], x7
|
||||
st1 {v16.4S}, [x0], #16
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_add_neon, export=1
|
||||
ld1 {v0.4S, v1.4S}, [x1], #32
|
||||
ld1 {v2.4S, v3.4S}, [x2], #32
|
||||
ld1 {v4.4S, v5.4S}, [x3], #32
|
||||
1: subs w4, w4, #8
|
||||
fmla v4.4S, v0.4S, v2.4S
|
||||
fmla v5.4S, v1.4S, v3.4S
|
||||
b.eq 2f
|
||||
ld1 {v0.4S, v1.4S}, [x1], #32
|
||||
ld1 {v2.4S, v3.4S}, [x2], #32
|
||||
st1 {v4.4S, v5.4S}, [x0], #32
|
||||
ld1 {v4.4S, v5.4S}, [x3], #32
|
||||
b 1b
|
||||
2: st1 {v4.4S, v5.4S}, [x0], #32
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_reverse_neon, export=1
|
||||
sxtw x3, w3
|
||||
add x2, x2, x3, lsl #2
|
||||
sub x2, x2, #32
|
||||
mov x4, #-32
|
||||
ld1 {v2.4S, v3.4S}, [x2], x4
|
||||
ld1 {v0.4S, v1.4S}, [x1], #32
|
||||
1: subs x3, x3, #8
|
||||
rev64 v3.4S, v3.4S
|
||||
rev64 v2.4S, v2.4S
|
||||
ext v3.16B, v3.16B, v3.16B, #8
|
||||
ext v2.16B, v2.16B, v2.16B, #8
|
||||
fmul v16.4S, v0.4S, v3.4S
|
||||
fmul v17.4S, v1.4S, v2.4S
|
||||
b.eq 2f
|
||||
ld1 {v2.4S, v3.4S}, [x2], x4
|
||||
ld1 {v0.4S, v1.4S}, [x1], #32
|
||||
st1 {v16.4S, v17.4S}, [x0], #32
|
||||
b 1b
|
||||
2: st1 {v16.4S, v17.4S}, [x0], #32
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_butterflies_float_neon, export=1
|
||||
1: ld1 {v0.4S}, [x0]
|
||||
ld1 {v1.4S}, [x1]
|
||||
subs w2, w2, #4
|
||||
fsub v2.4S, v0.4S, v1.4S
|
||||
fadd v3.4S, v0.4S, v1.4S
|
||||
st1 {v2.4S}, [x1], #16
|
||||
st1 {v3.4S}, [x0], #16
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_scalarproduct_float_neon, export=1
|
||||
movi v2.4S, #0
|
||||
1: ld1 {v0.4S}, [x0], #16
|
||||
ld1 {v1.4S}, [x1], #16
|
||||
subs w2, w2, #4
|
||||
fmla v2.4S, v0.4S, v1.4S
|
||||
b.gt 1b
|
||||
faddp v0.4S, v2.4S, v2.4S
|
||||
faddp s0, v0.2S
|
||||
ret
|
||||
endfunc
|
70
externals/ffmpeg/libavutil/aarch64/neontest.h
vendored
Executable file
70
externals/ffmpeg/libavutil/aarch64/neontest.h
vendored
Executable file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* check NEON registers for clobbering
|
||||
* Copyright (c) 2008 Ramiro Polla <ramiro.polla@gmail.com>
|
||||
* Copyright (c) 2013 Martin Storsjo
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_AARCH64_NEONTEST_H
|
||||
#define AVUTIL_AARCH64_NEONTEST_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/bswap.h"
|
||||
|
||||
#define storeneonregs(mem) \
|
||||
__asm__ volatile( \
|
||||
"stp d8, d9, [%0]\n\t" \
|
||||
"stp d10, d11, [%0, #16]\n\t" \
|
||||
"stp d12, d13, [%0, #32]\n\t" \
|
||||
"stp d14, d15, [%0, #48]\n\t" \
|
||||
:: "r"(mem) : "memory")
|
||||
|
||||
#define testneonclobbers(func, ctx, ...) \
|
||||
uint64_t neon[2][8]; \
|
||||
int ret; \
|
||||
storeneonregs(neon[0]); \
|
||||
ret = __real_ ## func(ctx, __VA_ARGS__); \
|
||||
storeneonregs(neon[1]); \
|
||||
if (memcmp(neon[0], neon[1], sizeof(neon[0]))) { \
|
||||
int i; \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
"NEON REGS CLOBBERED IN %s!\n", #func); \
|
||||
for (i = 0; i < 8; i ++) \
|
||||
if (neon[0][i] != neon[1][i]) { \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
"d%-2d = %016"PRIx64"\n", \
|
||||
8 + i, av_bswap64(neon[0][i])); \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
" -> %016"PRIx64"\n", \
|
||||
av_bswap64(neon[1][i])); \
|
||||
} \
|
||||
abort(); \
|
||||
} \
|
||||
return ret
|
||||
|
||||
#define wrap(func) \
|
||||
int __real_ ## func; \
|
||||
int __wrap_ ## func; \
|
||||
int __wrap_ ## func
|
||||
|
||||
#endif /* AVUTIL_AARCH64_NEONTEST_H */
|
44
externals/ffmpeg/libavutil/aarch64/timer.h
vendored
Executable file
44
externals/ffmpeg/libavutil/aarch64/timer.h
vendored
Executable file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Janne Grunau <janne-libav@jannau.net>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_AARCH64_TIMER_H
|
||||
#define AVUTIL_AARCH64_TIMER_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
#define AV_READ_TIME read_time
|
||||
|
||||
static inline uint64_t read_time(void)
|
||||
{
|
||||
uint64_t cycle_counter;
|
||||
__asm__ volatile(
|
||||
"isb \t\n"
|
||||
"mrs %0, pmccntr_el0 "
|
||||
: "=r"(cycle_counter) :: "memory" );
|
||||
|
||||
return cycle_counter;
|
||||
}
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#endif /* AVUTIL_AARCH64_TIMER_H */
|
Reference in New Issue
Block a user