early-access version 1432

This commit is contained in:
pineappleEA
2021-02-09 04:25:58 +01:00
parent de64eab4b4
commit 3d5a9d908a
7336 changed files with 1773492 additions and 111 deletions

4
externals/ffmpeg/libavutil/aarch64/Makefile vendored Executable file
View File

@@ -0,0 +1,4 @@
OBJS += aarch64/cpu.o \
aarch64/float_dsp_init.o \
NEON-OBJS += aarch64/float_dsp_neon.o

117
externals/ffmpeg/libavutil/aarch64/asm.S vendored Executable file
View File

@@ -0,0 +1,117 @@
/*
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#ifdef __ELF__
# define ELF
#else
# define ELF #
#endif
#if HAVE_AS_FUNC
# define FUNC
#else
# define FUNC #
#endif
#ifndef __has_feature
# define __has_feature(x) 0
#endif
.macro function name, export=0, align=2
.macro endfunc
ELF .size \name, . - \name
FUNC .endfunc
.purgem endfunc
.endm
.text
.align \align
.if \export
.global EXTERN_ASM\name
ELF .type EXTERN_ASM\name, %function
FUNC .func EXTERN_ASM\name
EXTERN_ASM\name:
.else
ELF .type \name, %function
FUNC .func \name
\name:
.endif
.endm
.macro const name, align=2, relocate=0
.macro endconst
ELF .size \name, . - \name
.purgem endconst
.endm
#if HAVE_SECTION_DATA_REL_RO
.if \relocate
.section .data.rel.ro
.else
.section .rodata
.endif
#elif defined(_WIN32)
.section .rdata
#elif !defined(__MACH__)
.section .rodata
#else
.const_data
#endif
.align \align
\name:
.endm
.macro movrel rd, val, offset=0
#if CONFIG_PIC && defined(__APPLE__)
.if \offset < 0
adrp \rd, \val@PAGE
add \rd, \rd, \val@PAGEOFF
sub \rd, \rd, -(\offset)
.else
adrp \rd, \val+(\offset)@PAGE
add \rd, \rd, \val+(\offset)@PAGEOFF
.endif
#elif CONFIG_PIC && defined(_WIN32)
.if \offset < 0
adrp \rd, \val
add \rd, \rd, :lo12:\val
sub \rd, \rd, -(\offset)
.else
adrp \rd, \val+(\offset)
add \rd, \rd, :lo12:\val+(\offset)
.endif
#elif CONFIG_PIC
# if __has_feature(hwaddress_sanitizer)
adrp \rd, :pg_hi21_nc:\val+(\offset)
# else
adrp \rd, \val+(\offset)
# endif
add \rd, \rd, :lo12:\val+(\offset)
#else
ldr \rd, =\val+\offset
#endif
.endm
#define GLUE(a, b) a ## b
#define JOIN(a, b) GLUE(a, b)
#define X(s) JOIN(EXTERN_ASM, s)
#define x18 do_not_use_x18
#define w18 do_not_use_w18

50
externals/ffmpeg/libavutil/aarch64/bswap.h vendored Executable file
View File

@@ -0,0 +1,50 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVUTIL_AARCH64_BSWAP_H
#define AVUTIL_AARCH64_BSWAP_H
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#if HAVE_INLINE_ASM
#define av_bswap16 av_bswap16
static av_always_inline av_const unsigned av_bswap16(unsigned x)
{
__asm__("rev16 %w0, %w0" : "+r"(x));
return x;
}
#define av_bswap32 av_bswap32
static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
{
__asm__("rev %w0, %w0" : "+r"(x));
return x;
}
#define av_bswap64 av_bswap64
static av_always_inline av_const uint64_t av_bswap64(uint64_t x)
{
__asm__("rev %0, %0" : "+r"(x));
return x;
}
#endif /* HAVE_INLINE_ASM */
#endif /* AVUTIL_AARCH64_BSWAP_H */

38
externals/ffmpeg/libavutil/aarch64/cpu.c vendored Executable file
View File

@@ -0,0 +1,38 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/cpu.h"
#include "libavutil/cpu_internal.h"
#include "config.h"
int ff_get_cpu_flags_aarch64(void)
{
return AV_CPU_FLAG_ARMV8 * HAVE_ARMV8 |
AV_CPU_FLAG_NEON * HAVE_NEON |
AV_CPU_FLAG_VFP * HAVE_VFP;
}
size_t ff_get_cpu_max_align_aarch64(void)
{
int flags = av_get_cpu_flags();
if (flags & AV_CPU_FLAG_NEON)
return 16;
return 8;
}

29
externals/ffmpeg/libavutil/aarch64/cpu.h vendored Executable file
View File

@@ -0,0 +1,29 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVUTIL_AARCH64_CPU_H
#define AVUTIL_AARCH64_CPU_H
#include "libavutil/cpu.h"
#include "libavutil/cpu_internal.h"
#define have_armv8(flags) CPUEXT(flags, ARMV8)
#define have_neon(flags) CPUEXT(flags, NEON)
#define have_vfp(flags) CPUEXT(flags, VFP)
#endif /* AVUTIL_AARCH64_CPU_H */

View File

@@ -0,0 +1,69 @@
/*
* ARM NEON optimised Float DSP functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/float_dsp.h"
#include "cpu.h"
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1,
int len);
void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
int len);
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
int len);
void ff_vector_dmul_scalar_neon(double *dst, const double *src, double mul,
int len);
void ff_vector_fmul_window_neon(float *dst, const float *src0,
const float *src1, const float *win, int len);
void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1,
const float *src2, int len);
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
const float *src1, int len);
void ff_butterflies_float_neon(float *v1, float *v2, int len);
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
av_cold void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
fdsp->butterflies_float = ff_butterflies_float_neon;
fdsp->scalarproduct_float = ff_scalarproduct_float_neon;
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_neon;
fdsp->vector_fmul = ff_vector_fmul_neon;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
fdsp->vector_fmul_add = ff_vector_fmul_add_neon;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
fdsp->vector_fmul_window = ff_vector_fmul_window_neon;
}
}

View File

@@ -0,0 +1,202 @@
/*
* ARM NEON optimised Float DSP functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "asm.S"
function ff_vector_fmul_neon, export=1
1: subs w3, w3, #16
ld1 {v0.4S, v1.4S}, [x1], #32
ld1 {v2.4S, v3.4S}, [x1], #32
ld1 {v4.4S, v5.4S}, [x2], #32
ld1 {v6.4S, v7.4S}, [x2], #32
fmul v16.4S, v0.4S, v4.4S
fmul v17.4S, v1.4S, v5.4S
fmul v18.4S, v2.4S, v6.4S
fmul v19.4S, v3.4S, v7.4S
st1 {v16.4S, v17.4S}, [x0], #32
st1 {v18.4S, v19.4S}, [x0], #32
b.ne 1b
ret
endfunc
function ff_vector_fmac_scalar_neon, export=1
mov x3, #-32
1: subs w2, w2, #16
ld1 {v16.4S, v17.4S}, [x0], #32
ld1 {v18.4S, v19.4S}, [x0], x3
ld1 {v4.4S, v5.4S}, [x1], #32
ld1 {v6.4S, v7.4S}, [x1], #32
fmla v16.4S, v4.4S, v0.S[0]
fmla v17.4S, v5.4S, v0.S[0]
fmla v18.4S, v6.4S, v0.S[0]
fmla v19.4S, v7.4S, v0.S[0]
st1 {v16.4S, v17.4S}, [x0], #32
st1 {v18.4S, v19.4S}, [x0], #32
b.ne 1b
ret
endfunc
function ff_vector_fmul_scalar_neon, export=1
mov w4, #15
bics w3, w2, w4
dup v16.4S, v0.S[0]
b.eq 3f
ld1 {v0.4S, v1.4S}, [x1], #32
1: subs w3, w3, #16
fmul v0.4S, v0.4S, v16.4S
ld1 {v2.4S, v3.4S}, [x1], #32
fmul v1.4S, v1.4S, v16.4S
fmul v2.4S, v2.4S, v16.4S
st1 {v0.4S, v1.4S}, [x0], #32
fmul v3.4S, v3.4S, v16.4S
b.eq 2f
ld1 {v0.4S, v1.4S}, [x1], #32
st1 {v2.4S, v3.4S}, [x0], #32
b 1b
2: ands w2, w2, #15
st1 {v2.4S, v3.4S}, [x0], #32
b.eq 4f
3: ld1 {v0.4S}, [x1], #16
fmul v0.4S, v0.4S, v16.4S
st1 {v0.4S}, [x0], #16
subs w2, w2, #4
b.gt 3b
4: ret
endfunc
function ff_vector_dmul_scalar_neon, export=1
dup v16.2D, v0.D[0]
ld1 {v0.2D, v1.2D}, [x1], #32
1: subs w2, w2, #8
fmul v0.2D, v0.2D, v16.2D
ld1 {v2.2D, v3.2D}, [x1], #32
fmul v1.2D, v1.2D, v16.2D
fmul v2.2D, v2.2D, v16.2D
st1 {v0.2D, v1.2D}, [x0], #32
fmul v3.2D, v3.2D, v16.2D
ld1 {v0.2D, v1.2D}, [x1], #32
st1 {v2.2D, v3.2D}, [x0], #32
b.gt 1b
ret
endfunc
function ff_vector_fmul_window_neon, export=1
sxtw x4, w4 // len
sub x2, x2, #8
sub x5, x4, #2
add x2, x2, x5, lsl #2 // src1 + 4 * (len - 4)
add x6, x3, x5, lsl #3 // win + 8 * (len - 2)
add x5, x0, x5, lsl #3 // dst + 8 * (len - 2)
mov x7, #-16
ld1 {v0.4S}, [x1], #16 // s0
ld1 {v2.4S}, [x3], #16 // wi
ld1 {v1.4S}, [x2], x7 // s1
1: ld1 {v3.4S}, [x6], x7 // wj
subs x4, x4, #4
fmul v17.4S, v0.4S, v2.4S // s0 * wi
rev64 v4.4S, v1.4S
rev64 v5.4S, v3.4S
rev64 v17.4S, v17.4S
ext v4.16B, v4.16B, v4.16B, #8 // s1_r
ext v5.16B, v5.16B, v5.16B, #8 // wj_r
ext v17.16B, v17.16B, v17.16B, #8 // (s0 * wi)_rev
fmul v16.4S, v0.4S, v5.4S // s0 * wj_r
fmla v17.4S, v1.4S, v3.4S // (s0 * wi)_rev + s1 * wj
b.eq 2f
ld1 {v0.4S}, [x1], #16
fmls v16.4S, v4.4S, v2.4S // s0 * wj_r - s1_r * wi
st1 {v17.4S}, [x5], x7
ld1 {v2.4S}, [x3], #16
ld1 {v1.4S}, [x2], x7
st1 {v16.4S}, [x0], #16
b 1b
2:
fmls v16.4S, v4.4S, v2.4S // s0 * wj_r - s1_r * wi
st1 {v17.4S}, [x5], x7
st1 {v16.4S}, [x0], #16
ret
endfunc
function ff_vector_fmul_add_neon, export=1
ld1 {v0.4S, v1.4S}, [x1], #32
ld1 {v2.4S, v3.4S}, [x2], #32
ld1 {v4.4S, v5.4S}, [x3], #32
1: subs w4, w4, #8
fmla v4.4S, v0.4S, v2.4S
fmla v5.4S, v1.4S, v3.4S
b.eq 2f
ld1 {v0.4S, v1.4S}, [x1], #32
ld1 {v2.4S, v3.4S}, [x2], #32
st1 {v4.4S, v5.4S}, [x0], #32
ld1 {v4.4S, v5.4S}, [x3], #32
b 1b
2: st1 {v4.4S, v5.4S}, [x0], #32
ret
endfunc
function ff_vector_fmul_reverse_neon, export=1
sxtw x3, w3
add x2, x2, x3, lsl #2
sub x2, x2, #32
mov x4, #-32
ld1 {v2.4S, v3.4S}, [x2], x4
ld1 {v0.4S, v1.4S}, [x1], #32
1: subs x3, x3, #8
rev64 v3.4S, v3.4S
rev64 v2.4S, v2.4S
ext v3.16B, v3.16B, v3.16B, #8
ext v2.16B, v2.16B, v2.16B, #8
fmul v16.4S, v0.4S, v3.4S
fmul v17.4S, v1.4S, v2.4S
b.eq 2f
ld1 {v2.4S, v3.4S}, [x2], x4
ld1 {v0.4S, v1.4S}, [x1], #32
st1 {v16.4S, v17.4S}, [x0], #32
b 1b
2: st1 {v16.4S, v17.4S}, [x0], #32
ret
endfunc
function ff_butterflies_float_neon, export=1
1: ld1 {v0.4S}, [x0]
ld1 {v1.4S}, [x1]
subs w2, w2, #4
fsub v2.4S, v0.4S, v1.4S
fadd v3.4S, v0.4S, v1.4S
st1 {v2.4S}, [x1], #16
st1 {v3.4S}, [x0], #16
b.gt 1b
ret
endfunc
function ff_scalarproduct_float_neon, export=1
movi v2.4S, #0
1: ld1 {v0.4S}, [x0], #16
ld1 {v1.4S}, [x1], #16
subs w2, w2, #4
fmla v2.4S, v0.4S, v1.4S
b.gt 1b
faddp v0.4S, v2.4S, v2.4S
faddp s0, v0.2S
ret
endfunc

70
externals/ffmpeg/libavutil/aarch64/neontest.h vendored Executable file
View File

@@ -0,0 +1,70 @@
/*
* check NEON registers for clobbering
* Copyright (c) 2008 Ramiro Polla <ramiro.polla@gmail.com>
* Copyright (c) 2013 Martin Storsjo
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVUTIL_AARCH64_NEONTEST_H
#define AVUTIL_AARCH64_NEONTEST_H
#include <inttypes.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include "libavutil/bswap.h"
#define storeneonregs(mem) \
__asm__ volatile( \
"stp d8, d9, [%0]\n\t" \
"stp d10, d11, [%0, #16]\n\t" \
"stp d12, d13, [%0, #32]\n\t" \
"stp d14, d15, [%0, #48]\n\t" \
:: "r"(mem) : "memory")
#define testneonclobbers(func, ctx, ...) \
uint64_t neon[2][8]; \
int ret; \
storeneonregs(neon[0]); \
ret = __real_ ## func(ctx, __VA_ARGS__); \
storeneonregs(neon[1]); \
if (memcmp(neon[0], neon[1], sizeof(neon[0]))) { \
int i; \
av_log(ctx, AV_LOG_ERROR, \
"NEON REGS CLOBBERED IN %s!\n", #func); \
for (i = 0; i < 8; i ++) \
if (neon[0][i] != neon[1][i]) { \
av_log(ctx, AV_LOG_ERROR, \
"d%-2d = %016"PRIx64"\n", \
8 + i, av_bswap64(neon[0][i])); \
av_log(ctx, AV_LOG_ERROR, \
" -> %016"PRIx64"\n", \
av_bswap64(neon[1][i])); \
} \
abort(); \
} \
return ret
#define wrap(func) \
int __real_ ## func; \
int __wrap_ ## func; \
int __wrap_ ## func
#endif /* AVUTIL_AARCH64_NEONTEST_H */

44
externals/ffmpeg/libavutil/aarch64/timer.h vendored Executable file
View File

@@ -0,0 +1,44 @@
/*
* Copyright (c) 2015 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVUTIL_AARCH64_TIMER_H
#define AVUTIL_AARCH64_TIMER_H
#include <stdint.h>
#include "config.h"
#if HAVE_INLINE_ASM
#define AV_READ_TIME read_time
static inline uint64_t read_time(void)
{
uint64_t cycle_counter;
__asm__ volatile(
"isb \t\n"
"mrs %0, pmccntr_el0 "
: "=r"(cycle_counter) :: "memory" );
return cycle_counter;
}
#endif /* HAVE_INLINE_ASM */
#endif /* AVUTIL_AARCH64_TIMER_H */