early-access version 1432
This commit is contained in:
29
externals/ffmpeg/libswscale/Makefile
vendored
Executable file
29
externals/ffmpeg/libswscale/Makefile
vendored
Executable file
@@ -0,0 +1,29 @@
|
||||
NAME = swscale
|
||||
DESC = FFmpeg image rescaling library
|
||||
|
||||
HEADERS = swscale.h \
|
||||
version.h \
|
||||
|
||||
OBJS = alphablend.o \
|
||||
hscale.o \
|
||||
hscale_fast_bilinear.o \
|
||||
gamma.o \
|
||||
input.o \
|
||||
options.o \
|
||||
output.o \
|
||||
rgb2rgb.o \
|
||||
slice.o \
|
||||
swscale.o \
|
||||
swscale_unscaled.o \
|
||||
utils.o \
|
||||
yuv2rgb.o \
|
||||
vscale.o \
|
||||
|
||||
OBJS-$(CONFIG_SHARED) += log2_tab.o
|
||||
|
||||
# Windows resource file
|
||||
SLIBOBJS-$(HAVE_GNU_WINDRES) += swscaleres.o
|
||||
|
||||
TESTPROGS = colorspace \
|
||||
pixdesc_query \
|
||||
swscale \
|
||||
8
externals/ffmpeg/libswscale/aarch64/Makefile
vendored
Executable file
8
externals/ffmpeg/libswscale/aarch64/Makefile
vendored
Executable file
@@ -0,0 +1,8 @@
|
||||
OBJS += aarch64/rgb2rgb.o \
|
||||
aarch64/swscale.o \
|
||||
aarch64/swscale_unscaled.o \
|
||||
|
||||
NEON-OBJS += aarch64/hscale.o \
|
||||
aarch64/output.o \
|
||||
aarch64/rgb2rgb_neon.o \
|
||||
aarch64/yuv2rgb_neon.o \
|
||||
80
externals/ffmpeg/libswscale/aarch64/hscale.S
vendored
Executable file
80
externals/ffmpeg/libswscale/aarch64/hscale.S
vendored
Executable file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Clément Bœsch <clement stupeflix.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/aarch64/asm.S"
|
||||
|
||||
function ff_hscale_8_to_15_neon, export=1
|
||||
sbfiz x7, x6, #1, #32 // filterSize*2 (*2 because int16)
|
||||
1: ldr w8, [x5], #4 // filterPos[idx]
|
||||
ldr w0, [x5], #4 // filterPos[idx + 1]
|
||||
ldr w11, [x5], #4 // filterPos[idx + 2]
|
||||
ldr w9, [x5], #4 // filterPos[idx + 3]
|
||||
mov x16, x4 // filter0 = filter
|
||||
add x12, x16, x7 // filter1 = filter0 + filterSize*2
|
||||
add x13, x12, x7 // filter2 = filter1 + filterSize*2
|
||||
add x4, x13, x7 // filter3 = filter2 + filterSize*2
|
||||
movi v0.2D, #0 // val sum part 1 (for dst[0])
|
||||
movi v1.2D, #0 // val sum part 2 (for dst[1])
|
||||
movi v2.2D, #0 // val sum part 3 (for dst[2])
|
||||
movi v3.2D, #0 // val sum part 4 (for dst[3])
|
||||
add x17, x3, w8, UXTW // srcp + filterPos[0]
|
||||
add x8, x3, w0, UXTW // srcp + filterPos[1]
|
||||
add x0, x3, w11, UXTW // srcp + filterPos[2]
|
||||
add x11, x3, w9, UXTW // srcp + filterPos[3]
|
||||
mov w15, w6 // filterSize counter
|
||||
2: ld1 {v4.8B}, [x17], #8 // srcp[filterPos[0] + {0..7}]
|
||||
ld1 {v5.8H}, [x16], #16 // load 8x16-bit filter values, part 1
|
||||
ld1 {v6.8B}, [x8], #8 // srcp[filterPos[1] + {0..7}]
|
||||
ld1 {v7.8H}, [x12], #16 // load 8x16-bit at filter+filterSize
|
||||
uxtl v4.8H, v4.8B // unpack part 1 to 16-bit
|
||||
smlal v0.4S, v4.4H, v5.4H // v0 accumulates srcp[filterPos[0] + {0..3}] * filter[{0..3}]
|
||||
smlal2 v0.4S, v4.8H, v5.8H // v0 accumulates srcp[filterPos[0] + {4..7}] * filter[{4..7}]
|
||||
ld1 {v16.8B}, [x0], #8 // srcp[filterPos[2] + {0..7}]
|
||||
ld1 {v17.8H}, [x13], #16 // load 8x16-bit at filter+2*filterSize
|
||||
uxtl v6.8H, v6.8B // unpack part 2 to 16-bit
|
||||
smlal v1.4S, v6.4H, v7.4H // v1 accumulates srcp[filterPos[1] + {0..3}] * filter[{0..3}]
|
||||
uxtl v16.8H, v16.8B // unpack part 3 to 16-bit
|
||||
smlal v2.4S, v16.4H, v17.4H // v2 accumulates srcp[filterPos[2] + {0..3}] * filter[{0..3}]
|
||||
smlal2 v2.4S, v16.8H, v17.8H // v2 accumulates srcp[filterPos[2] + {4..7}] * filter[{4..7}]
|
||||
ld1 {v18.8B}, [x11], #8 // srcp[filterPos[3] + {0..7}]
|
||||
smlal2 v1.4S, v6.8H, v7.8H // v1 accumulates srcp[filterPos[1] + {4..7}] * filter[{4..7}]
|
||||
ld1 {v19.8H}, [x4], #16 // load 8x16-bit at filter+3*filterSize
|
||||
subs w15, w15, #8 // j -= 8: processed 8/filterSize
|
||||
uxtl v18.8H, v18.8B // unpack part 4 to 16-bit
|
||||
smlal v3.4S, v18.4H, v19.4H // v3 accumulates srcp[filterPos[3] + {0..3}] * filter[{0..3}]
|
||||
smlal2 v3.4S, v18.8H, v19.8H // v3 accumulates srcp[filterPos[3] + {4..7}] * filter[{4..7}]
|
||||
b.gt 2b // inner loop if filterSize not consumed completely
|
||||
addp v0.4S, v0.4S, v0.4S // part0 horizontal pair adding
|
||||
addp v1.4S, v1.4S, v1.4S // part1 horizontal pair adding
|
||||
addp v2.4S, v2.4S, v2.4S // part2 horizontal pair adding
|
||||
addp v3.4S, v3.4S, v3.4S // part3 horizontal pair adding
|
||||
addp v0.4S, v0.4S, v0.4S // part0 horizontal pair adding
|
||||
addp v1.4S, v1.4S, v1.4S // part1 horizontal pair adding
|
||||
addp v2.4S, v2.4S, v2.4S // part2 horizontal pair adding
|
||||
addp v3.4S, v3.4S, v3.4S // part3 horizontal pair adding
|
||||
zip1 v0.4S, v0.4S, v1.4S // part01 = zip values from part0 and part1
|
||||
zip1 v2.4S, v2.4S, v3.4S // part23 = zip values from part2 and part3
|
||||
mov v0.d[1], v2.d[0] // part0123 = zip values from part01 and part23
|
||||
subs w2, w2, #4 // dstW -= 4
|
||||
sqshrn v0.4H, v0.4S, #7 // shift and clip the 2x16-bit final values
|
||||
st1 {v0.4H}, [x1], #8 // write to destination part0123
|
||||
b.gt 1b // loop until end of line
|
||||
ret
|
||||
endfunc
|
||||
58
externals/ffmpeg/libswscale/aarch64/output.S
vendored
Executable file
58
externals/ffmpeg/libswscale/aarch64/output.S
vendored
Executable file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Clément Bœsch <clement stupeflix.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/aarch64/asm.S"
|
||||
|
||||
function ff_yuv2planeX_8_neon, export=1
|
||||
ld1 {v0.8B}, [x5] // load 8x8-bit dither
|
||||
cbz w6, 1f // check if offsetting present
|
||||
ext v0.8B, v0.8B, v0.8B, #3 // honor offsetting which can be 0 or 3 only
|
||||
1: uxtl v0.8H, v0.8B // extend dither to 16-bit
|
||||
ushll v1.4S, v0.4H, #12 // extend dither to 32-bit with left shift by 12 (part 1)
|
||||
ushll2 v2.4S, v0.8H, #12 // extend dither to 32-bit with left shift by 12 (part 2)
|
||||
mov x7, #0 // i = 0
|
||||
2: mov v3.16B, v1.16B // initialize accumulator part 1 with dithering value
|
||||
mov v4.16B, v2.16B // initialize accumulator part 2 with dithering value
|
||||
mov w8, w1 // tmpfilterSize = filterSize
|
||||
mov x9, x2 // srcp = src
|
||||
mov x10, x0 // filterp = filter
|
||||
3: ldp x11, x12, [x9], #16 // get 2 pointers: src[j] and src[j+1]
|
||||
add x11, x11, x7, lsl #1 // &src[j ][i]
|
||||
add x12, x12, x7, lsl #1 // &src[j+1][i]
|
||||
ld1 {v5.8H}, [x11] // read 8x16-bit @ src[j ][i + {0..7}]: A,B,C,D,E,F,G,H
|
||||
ld1 {v6.8H}, [x12] // read 8x16-bit @ src[j+1][i + {0..7}]: I,J,K,L,M,N,O,P
|
||||
ld1r {v7.8H}, [x10], #2 // read 1x16-bit coeff X at filter[j ] and duplicate across lanes
|
||||
ld1r {v16.8H}, [x10], #2 // read 1x16-bit coeff Y at filter[j+1] and duplicate across lanes
|
||||
smlal v3.4S, v5.4H, v7.4H // val0 += {A,B,C,D} * X
|
||||
smlal2 v4.4S, v5.8H, v7.8H // val1 += {E,F,G,H} * X
|
||||
smlal v3.4S, v6.4H, v16.4H // val0 += {I,J,K,L} * Y
|
||||
smlal2 v4.4S, v6.8H, v16.8H // val1 += {M,N,O,P} * Y
|
||||
subs w8, w8, #2 // tmpfilterSize -= 2
|
||||
b.gt 3b // loop until filterSize consumed
|
||||
|
||||
sqshrun v3.4h, v3.4s, #16 // clip16(val0>>16)
|
||||
sqshrun2 v3.8h, v4.4s, #16 // clip16(val1>>16)
|
||||
uqshrn v3.8b, v3.8h, #3 // clip8(val>>19)
|
||||
st1 {v3.8b}, [x3], #8 // write to destination
|
||||
subs w4, w4, #8 // dstW -= 8
|
||||
add x7, x7, #8 // i += 8
|
||||
b.gt 2b // loop until width consumed
|
||||
ret
|
||||
endfunc
|
||||
41
externals/ffmpeg/libswscale/aarch64/rgb2rgb.c
vendored
Executable file
41
externals/ffmpeg/libswscale/aarch64/rgb2rgb.c
vendored
Executable file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/aarch64/cpu.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/bswap.h"
|
||||
#include "libswscale/rgb2rgb.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
|
||||
void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2,
|
||||
uint8_t *dest, int width, int height,
|
||||
int src1Stride, int src2Stride, int dstStride);
|
||||
|
||||
av_cold void rgb2rgb_init_aarch64(void)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (have_neon(cpu_flags)) {
|
||||
interleaveBytes = ff_interleave_bytes_neon;
|
||||
}
|
||||
}
|
||||
79
externals/ffmpeg/libswscale/aarch64/rgb2rgb_neon.S
vendored
Executable file
79
externals/ffmpeg/libswscale/aarch64/rgb2rgb_neon.S
vendored
Executable file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (c) 2020 Martin Storsjo
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/aarch64/asm.S"
|
||||
|
||||
// void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2,
|
||||
// uint8_t *dest, int width, int height,
|
||||
// int src1Stride, int src2Stride, int dstStride);
|
||||
function ff_interleave_bytes_neon, export=1
|
||||
sub w5, w5, w3
|
||||
sub w6, w6, w3
|
||||
sub w7, w7, w3, lsl #1
|
||||
1:
|
||||
ands w8, w3, #0xfffffff0 // & ~15
|
||||
b.eq 3f
|
||||
2:
|
||||
ld1 {v0.16b}, [x0], #16
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
subs w8, w8, #16
|
||||
st2 {v0.16b, v1.16b}, [x2], #32
|
||||
b.gt 2b
|
||||
|
||||
tst w3, #15
|
||||
b.eq 9f
|
||||
|
||||
3:
|
||||
tst w3, #8
|
||||
b.eq 4f
|
||||
ld1 {v0.8b}, [x0], #8
|
||||
ld1 {v1.8b}, [x1], #8
|
||||
st2 {v0.8b, v1.8b}, [x2], #16
|
||||
4:
|
||||
tst w3, #4
|
||||
b.eq 5f
|
||||
|
||||
ld1 {v0.s}[0], [x0], #4
|
||||
ld1 {v1.s}[0], [x1], #4
|
||||
zip1 v0.8b, v0.8b, v1.8b
|
||||
st1 {v0.8b}, [x2], #8
|
||||
|
||||
5:
|
||||
ands w8, w3, #3
|
||||
b.eq 9f
|
||||
6:
|
||||
ldrb w9, [x0], #1
|
||||
ldrb w10, [x1], #1
|
||||
subs w8, w8, #1
|
||||
bfi w9, w10, #8, #8
|
||||
strh w9, [x2], #2
|
||||
b.gt 6b
|
||||
|
||||
9:
|
||||
subs w4, w4, #1
|
||||
b.eq 0f
|
||||
add x0, x0, w5, sxtw
|
||||
add x1, x1, w6, sxtw
|
||||
add x2, x2, w7, sxtw
|
||||
b 1b
|
||||
|
||||
0:
|
||||
ret
|
||||
endfunc
|
||||
47
externals/ffmpeg/libswscale/aarch64/swscale.c
vendored
Executable file
47
externals/ffmpeg/libswscale/aarch64/swscale.c
vendored
Executable file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
#include "libavutil/aarch64/cpu.h"
|
||||
|
||||
void ff_hscale_8_to_15_neon(SwsContext *c, int16_t *dst, int dstW,
|
||||
const uint8_t *src, const int16_t *filter,
|
||||
const int32_t *filterPos, int filterSize);
|
||||
|
||||
void ff_yuv2planeX_8_neon(const int16_t *filter, int filterSize,
|
||||
const int16_t **src, uint8_t *dest, int dstW,
|
||||
const uint8_t *dither, int offset);
|
||||
|
||||
av_cold void ff_sws_init_swscale_aarch64(SwsContext *c)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (have_neon(cpu_flags)) {
|
||||
if (c->srcBpc == 8 && c->dstBpc <= 14 &&
|
||||
(c->hLumFilterSize % 8) == 0 &&
|
||||
(c->hChrFilterSize % 8) == 0)
|
||||
{
|
||||
c->hyScale = c->hcScale = ff_hscale_8_to_15_neon;
|
||||
}
|
||||
if (c->dstBpc == 8) {
|
||||
c->yuv2planeX = ff_yuv2planeX_8_neon;
|
||||
}
|
||||
}
|
||||
}
|
||||
132
externals/ffmpeg/libswscale/aarch64/swscale_unscaled.c
vendored
Executable file
132
externals/ffmpeg/libswscale/aarch64/swscale_unscaled.c
vendored
Executable file
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
#include "libavutil/aarch64/cpu.h"
|
||||
|
||||
#define YUV_TO_RGB_TABLE \
|
||||
c->yuv2rgb_v2r_coeff, \
|
||||
c->yuv2rgb_u2g_coeff, \
|
||||
c->yuv2rgb_v2g_coeff, \
|
||||
c->yuv2rgb_u2b_coeff, \
|
||||
|
||||
#define DECLARE_FF_YUVX_TO_RGBX_FUNCS(ifmt, ofmt) \
|
||||
int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
|
||||
uint8_t *dst, int linesize, \
|
||||
const uint8_t *srcY, int linesizeY, \
|
||||
const uint8_t *srcU, int linesizeU, \
|
||||
const uint8_t *srcV, int linesizeV, \
|
||||
const int16_t *table, \
|
||||
int y_offset, \
|
||||
int y_coeff); \
|
||||
\
|
||||
static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[], \
|
||||
int srcStride[], int srcSliceY, int srcSliceH, \
|
||||
uint8_t *dst[], int dstStride[]) { \
|
||||
const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
|
||||
\
|
||||
ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
|
||||
dst[0] + srcSliceY * dstStride[0], dstStride[0], \
|
||||
src[0], srcStride[0], \
|
||||
src[1], srcStride[1], \
|
||||
src[2], srcStride[2], \
|
||||
yuv2rgb_table, \
|
||||
c->yuv2rgb_y_offset >> 6, \
|
||||
c->yuv2rgb_y_coeff); \
|
||||
return 0; \
|
||||
} \
|
||||
|
||||
#define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx) \
|
||||
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, argb) \
|
||||
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, rgba) \
|
||||
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, abgr) \
|
||||
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, bgra) \
|
||||
|
||||
DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuv420p)
|
||||
DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuv422p)
|
||||
|
||||
#define DECLARE_FF_NVX_TO_RGBX_FUNCS(ifmt, ofmt) \
|
||||
int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
|
||||
uint8_t *dst, int linesize, \
|
||||
const uint8_t *srcY, int linesizeY, \
|
||||
const uint8_t *srcC, int linesizeC, \
|
||||
const int16_t *table, \
|
||||
int y_offset, \
|
||||
int y_coeff); \
|
||||
\
|
||||
static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[], \
|
||||
int srcStride[], int srcSliceY, int srcSliceH, \
|
||||
uint8_t *dst[], int dstStride[]) { \
|
||||
const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
|
||||
\
|
||||
ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
|
||||
dst[0] + srcSliceY * dstStride[0], dstStride[0], \
|
||||
src[0], srcStride[0], src[1], srcStride[1], \
|
||||
yuv2rgb_table, \
|
||||
c->yuv2rgb_y_offset >> 6, \
|
||||
c->yuv2rgb_y_coeff); \
|
||||
\
|
||||
return 0; \
|
||||
} \
|
||||
|
||||
#define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx) \
|
||||
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, argb) \
|
||||
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, rgba) \
|
||||
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, abgr) \
|
||||
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, bgra) \
|
||||
|
||||
DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nv12)
|
||||
DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nv21)
|
||||
|
||||
/* We need a 16 pixel width alignment. This constraint can easily be removed
|
||||
* for input reading but for the output which is 4-bytes per pixel (RGBA) the
|
||||
* assembly might be writing as much as 4*15=60 extra bytes at the end of the
|
||||
* line, which won't fit the 32-bytes buffer alignment. */
|
||||
#define SET_FF_NVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT, accurate_rnd) do { \
|
||||
if (c->srcFormat == AV_PIX_FMT_##IFMT \
|
||||
&& c->dstFormat == AV_PIX_FMT_##OFMT \
|
||||
&& !(c->srcH & 1) \
|
||||
&& !(c->srcW & 15) \
|
||||
&& !accurate_rnd) \
|
||||
c->swscale = ifmt##_to_##ofmt##_neon_wrapper; \
|
||||
} while (0)
|
||||
|
||||
#define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd) do { \
|
||||
SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, argb, ARGB, accurate_rnd); \
|
||||
SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, rgba, RGBA, accurate_rnd); \
|
||||
SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, abgr, ABGR, accurate_rnd); \
|
||||
SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, bgra, BGRA, accurate_rnd); \
|
||||
} while (0)
|
||||
|
||||
static void get_unscaled_swscale_neon(SwsContext *c) {
|
||||
int accurate_rnd = c->flags & SWS_ACCURATE_RND;
|
||||
|
||||
SET_FF_NVX_TO_ALL_RGBX_FUNC(nv12, NV12, accurate_rnd);
|
||||
SET_FF_NVX_TO_ALL_RGBX_FUNC(nv21, NV21, accurate_rnd);
|
||||
SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv420p, YUV420P, accurate_rnd);
|
||||
SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv422p, YUV422P, accurate_rnd);
|
||||
}
|
||||
|
||||
void ff_get_unscaled_swscale_aarch64(SwsContext *c)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
if (have_neon(cpu_flags))
|
||||
get_unscaled_swscale_neon(c);
|
||||
}
|
||||
210
externals/ffmpeg/libswscale/aarch64/yuv2rgb_neon.S
vendored
Executable file
210
externals/ffmpeg/libswscale/aarch64/yuv2rgb_neon.S
vendored
Executable file
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Matthieu Bouron <matthieu.bouron stupeflix.com>
|
||||
* Copyright (c) 2016 Clément Bœsch <clement stupeflix.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/aarch64/asm.S"
|
||||
|
||||
.macro load_yoff_ycoeff yoff ycoeff
|
||||
#if defined(__APPLE__)
|
||||
ldp w9, w10, [sp, #\yoff]
|
||||
#else
|
||||
ldr w9, [sp, #\yoff]
|
||||
ldr w10, [sp, #\ycoeff]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro load_args_nv12
|
||||
ldr x8, [sp] // table
|
||||
load_yoff_ycoeff 8, 16 // y_offset, y_coeff
|
||||
ld1 {v1.1D}, [x8]
|
||||
dup v0.8H, w10
|
||||
dup v3.8H, w9
|
||||
sub w3, w3, w0, lsl #2 // w3 = linesize - width * 4 (padding)
|
||||
sub w5, w5, w0 // w5 = linesizeY - width (paddingY)
|
||||
sub w7, w7, w0 // w7 = linesizeC - width (paddingC)
|
||||
neg w11, w0
|
||||
.endm
|
||||
|
||||
.macro load_args_nv21
|
||||
load_args_nv12
|
||||
.endm
|
||||
|
||||
.macro load_args_yuv420p
|
||||
ldr x13, [sp] // srcV
|
||||
ldr w14, [sp, #8] // linesizeV
|
||||
ldr x8, [sp, #16] // table
|
||||
load_yoff_ycoeff 24, 32 // y_offset, y_coeff
|
||||
ld1 {v1.1D}, [x8]
|
||||
dup v0.8H, w10
|
||||
dup v3.8H, w9
|
||||
sub w3, w3, w0, lsl #2 // w3 = linesize - width * 4 (padding)
|
||||
sub w5, w5, w0 // w5 = linesizeY - width (paddingY)
|
||||
sub w7, w7, w0, lsr #1 // w7 = linesizeU - width / 2 (paddingU)
|
||||
sub w14, w14, w0, lsr #1 // w14 = linesizeV - width / 2 (paddingV)
|
||||
lsr w11, w0, #1
|
||||
neg w11, w11
|
||||
.endm
|
||||
|
||||
.macro load_args_yuv422p
|
||||
ldr x13, [sp] // srcV
|
||||
ldr w14, [sp, #8] // linesizeV
|
||||
ldr x8, [sp, #16] // table
|
||||
load_yoff_ycoeff 24, 32 // y_offset, y_coeff
|
||||
ld1 {v1.1D}, [x8]
|
||||
dup v0.8H, w10
|
||||
dup v3.8H, w9
|
||||
sub w3, w3, w0, lsl #2 // w3 = linesize - width * 4 (padding)
|
||||
sub w5, w5, w0 // w5 = linesizeY - width (paddingY)
|
||||
sub w7, w7, w0, lsr #1 // w7 = linesizeU - width / 2 (paddingU)
|
||||
sub w14, w14, w0, lsr #1 // w14 = linesizeV - width / 2 (paddingV)
|
||||
.endm
|
||||
|
||||
.macro load_chroma_nv12
|
||||
ld2 {v16.8B, v17.8B}, [x6], #16
|
||||
ushll v18.8H, v16.8B, #3
|
||||
ushll v19.8H, v17.8B, #3
|
||||
.endm
|
||||
|
||||
.macro load_chroma_nv21
|
||||
ld2 {v16.8B, v17.8B}, [x6], #16
|
||||
ushll v19.8H, v16.8B, #3
|
||||
ushll v18.8H, v17.8B, #3
|
||||
.endm
|
||||
|
||||
.macro load_chroma_yuv420p
|
||||
ld1 {v16.8B}, [ x6], #8
|
||||
ld1 {v17.8B}, [x13], #8
|
||||
ushll v18.8H, v16.8B, #3
|
||||
ushll v19.8H, v17.8B, #3
|
||||
.endm
|
||||
|
||||
.macro load_chroma_yuv422p
|
||||
load_chroma_yuv420p
|
||||
.endm
|
||||
|
||||
.macro increment_nv12
|
||||
ands w15, w1, #1
|
||||
csel w16, w7, w11, ne // incC = (h & 1) ? paddincC : -width
|
||||
add x6, x6, w16, SXTW // srcC += incC
|
||||
.endm
|
||||
|
||||
.macro increment_nv21
|
||||
increment_nv12
|
||||
.endm
|
||||
|
||||
.macro increment_yuv420p
|
||||
ands w15, w1, #1
|
||||
csel w16, w7, w11, ne // incU = (h & 1) ? paddincU : -width/2
|
||||
csel w17, w14, w11, ne // incV = (h & 1) ? paddincV : -width/2
|
||||
add x6, x6, w16, SXTW // srcU += incU
|
||||
add x13, x13, w17, SXTW // srcV += incV
|
||||
.endm
|
||||
|
||||
.macro increment_yuv422p
|
||||
add x6, x6, w7, UXTW // srcU += incU
|
||||
add x13, x13, w14, UXTW // srcV += incV
|
||||
.endm
|
||||
|
||||
.macro compute_rgba r1 g1 b1 a1 r2 g2 b2 a2
|
||||
add v20.8H, v26.8H, v20.8H // Y1 + R1
|
||||
add v21.8H, v27.8H, v21.8H // Y2 + R2
|
||||
add v22.8H, v26.8H, v22.8H // Y1 + G1
|
||||
add v23.8H, v27.8H, v23.8H // Y2 + G2
|
||||
add v24.8H, v26.8H, v24.8H // Y1 + B1
|
||||
add v25.8H, v27.8H, v25.8H // Y2 + B2
|
||||
sqrshrun \r1, v20.8H, #1 // clip_u8((Y1 + R1) >> 1)
|
||||
sqrshrun \r2, v21.8H, #1 // clip_u8((Y2 + R1) >> 1)
|
||||
sqrshrun \g1, v22.8H, #1 // clip_u8((Y1 + G1) >> 1)
|
||||
sqrshrun \g2, v23.8H, #1 // clip_u8((Y2 + G1) >> 1)
|
||||
sqrshrun \b1, v24.8H, #1 // clip_u8((Y1 + B1) >> 1)
|
||||
sqrshrun \b2, v25.8H, #1 // clip_u8((Y2 + B1) >> 1)
|
||||
movi \a1, #255
|
||||
movi \a2, #255
|
||||
.endm
|
||||
|
||||
.macro declare_func ifmt ofmt
|
||||
function ff_\ifmt\()_to_\ofmt\()_neon, export=1
|
||||
load_args_\ifmt
|
||||
1:
|
||||
mov w8, w0 // w8 = width
|
||||
2:
|
||||
movi v5.8H, #4, lsl #8 // 128 * (1<<3)
|
||||
load_chroma_\ifmt
|
||||
sub v18.8H, v18.8H, v5.8H // U*(1<<3) - 128*(1<<3)
|
||||
sub v19.8H, v19.8H, v5.8H // V*(1<<3) - 128*(1<<3)
|
||||
sqdmulh v20.8H, v19.8H, v1.H[0] // V * v2r (R)
|
||||
sqdmulh v22.8H, v18.8H, v1.H[1] // U * u2g
|
||||
sqdmulh v19.8H, v19.8H, v1.H[2] // V * v2g
|
||||
add v22.8H, v22.8H, v19.8H // U * u2g + V * v2g (G)
|
||||
sqdmulh v24.8H, v18.8H, v1.H[3] // U * u2b (B)
|
||||
zip2 v21.8H, v20.8H, v20.8H // R2
|
||||
zip1 v20.8H, v20.8H, v20.8H // R1
|
||||
zip2 v23.8H, v22.8H, v22.8H // G2
|
||||
zip1 v22.8H, v22.8H, v22.8H // G1
|
||||
zip2 v25.8H, v24.8H, v24.8H // B2
|
||||
zip1 v24.8H, v24.8H, v24.8H // B1
|
||||
ld1 {v2.16B}, [x4], #16 // load luma
|
||||
ushll v26.8H, v2.8B, #3 // Y1*(1<<3)
|
||||
ushll2 v27.8H, v2.16B, #3 // Y2*(1<<3)
|
||||
sub v26.8H, v26.8H, v3.8H // Y1*(1<<3) - y_offset
|
||||
sub v27.8H, v27.8H, v3.8H // Y2*(1<<3) - y_offset
|
||||
sqdmulh v26.8H, v26.8H, v0.8H // ((Y1*(1<<3) - y_offset) * y_coeff) >> 15
|
||||
sqdmulh v27.8H, v27.8H, v0.8H // ((Y2*(1<<3) - y_offset) * y_coeff) >> 15
|
||||
|
||||
.ifc \ofmt,argb // 1 2 3 0
|
||||
compute_rgba v5.8B,v6.8B,v7.8B,v4.8B, v17.8B,v18.8B,v19.8B,v16.8B
|
||||
.endif
|
||||
|
||||
.ifc \ofmt,rgba // 0 1 2 3
|
||||
compute_rgba v4.8B,v5.8B,v6.8B,v7.8B, v16.8B,v17.8B,v18.8B,v19.8B
|
||||
.endif
|
||||
|
||||
.ifc \ofmt,abgr // 3 2 1 0
|
||||
compute_rgba v7.8B,v6.8B,v5.8B,v4.8B, v19.8B,v18.8B,v17.8B,v16.8B
|
||||
.endif
|
||||
|
||||
.ifc \ofmt,bgra // 2 1 0 3
|
||||
compute_rgba v6.8B,v5.8B,v4.8B,v7.8B, v18.8B,v17.8B,v16.8B,v19.8B
|
||||
.endif
|
||||
|
||||
st4 { v4.8B, v5.8B, v6.8B, v7.8B}, [x2], #32
|
||||
st4 {v16.8B,v17.8B,v18.8B,v19.8B}, [x2], #32
|
||||
subs w8, w8, #16 // width -= 16
|
||||
b.gt 2b
|
||||
add x2, x2, w3, UXTW // dst += padding
|
||||
add x4, x4, w5, UXTW // srcY += paddingY
|
||||
increment_\ifmt
|
||||
subs w1, w1, #1 // height -= 1
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
.macro declare_rgb_funcs ifmt
|
||||
declare_func \ifmt, argb
|
||||
declare_func \ifmt, rgba
|
||||
declare_func \ifmt, abgr
|
||||
declare_func \ifmt, bgra
|
||||
.endm
|
||||
|
||||
declare_rgb_funcs nv12
|
||||
declare_rgb_funcs nv21
|
||||
declare_rgb_funcs yuv420p
|
||||
declare_rgb_funcs yuv422p
|
||||
169
externals/ffmpeg/libswscale/alphablend.c
vendored
Executable file
169
externals/ffmpeg/libswscale/alphablend.c
vendored
Executable file
@@ -0,0 +1,169 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "swscale_internal.h"
|
||||
|
||||
int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[], int srcSliceY, int srcSliceH,
|
||||
uint8_t *dst[], int dstStride[])
|
||||
{
|
||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
|
||||
int nb_components = desc->nb_components;
|
||||
int plane, x, y;
|
||||
int plane_count = isGray(c->srcFormat) ? 1 : 3;
|
||||
int sixteen_bits = desc->comp[0].depth >= 9;
|
||||
unsigned off = 1<<(desc->comp[0].depth - 1);
|
||||
unsigned shift = desc->comp[0].depth;
|
||||
unsigned max = (1<<shift) - 1;
|
||||
int target_table[2][3];
|
||||
|
||||
for (plane = 0; plane < plane_count; plane++) {
|
||||
int a = 0, b = 0;
|
||||
if (c->alphablend == SWS_ALPHA_BLEND_CHECKERBOARD) {
|
||||
a = (1<<(desc->comp[0].depth - 1))/2;
|
||||
b = 3*(1<<(desc->comp[0].depth-1))/2;
|
||||
}
|
||||
target_table[0][plane] = plane && !(desc->flags & AV_PIX_FMT_FLAG_RGB) ? 1<<(desc->comp[0].depth - 1) : a;
|
||||
target_table[1][plane] = plane && !(desc->flags & AV_PIX_FMT_FLAG_RGB) ? 1<<(desc->comp[0].depth - 1) : b;
|
||||
}
|
||||
|
||||
av_assert0(plane_count == nb_components - 1);
|
||||
if (desc->flags & AV_PIX_FMT_FLAG_PLANAR) {
|
||||
for (plane = 0; plane < plane_count; plane++) {
|
||||
int w = plane ? c->chrSrcW : c->srcW;
|
||||
int x_subsample = plane ? desc->log2_chroma_w: 0;
|
||||
int y_subsample = plane ? desc->log2_chroma_h: 0;
|
||||
for (y = srcSliceY >> y_subsample; y < AV_CEIL_RSHIFT(srcSliceH, y_subsample); y++) {
|
||||
if (x_subsample || y_subsample) {
|
||||
int alpha;
|
||||
unsigned u;
|
||||
if (sixteen_bits) {
|
||||
ptrdiff_t alpha_step = srcStride[plane_count] >> 1;
|
||||
const uint16_t *s = (const uint16_t *)(src[plane ] + srcStride[plane ] * y);
|
||||
const uint16_t *a = (const uint16_t *)(src[plane_count] + (srcStride[plane_count] * y << y_subsample));
|
||||
uint16_t *d = ( uint16_t *)(dst[plane ] + dstStride[plane ] * y);
|
||||
if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) {
|
||||
for (x = 0; x < w; x++) {
|
||||
if (y_subsample) {
|
||||
alpha = (a[2*x] + a[2*x + 1] + 2 +
|
||||
a[2*x + alpha_step] + a[2*x + alpha_step + 1]) >> 2;
|
||||
} else
|
||||
alpha = (a[2*x] + a[2*x + 1]) >> 1;
|
||||
u = s[x]*alpha + target_table[((x^y)>>5)&1][plane]*(max-alpha) + off;
|
||||
d[x] = av_clip((u + (u >> shift)) >> shift, 0, max);
|
||||
}
|
||||
} else {
|
||||
for (x = 0; x < w; x++) {
|
||||
if (y_subsample) {
|
||||
alpha = (av_bswap16(a[2*x]) + av_bswap16(a[2*x + 1]) + 2 +
|
||||
av_bswap16(a[2*x + alpha_step]) + av_bswap16(a[2*x + alpha_step + 1])) >> 2;
|
||||
} else
|
||||
alpha = (av_bswap16(a[2*x]) + av_bswap16(a[2*x + 1])) >> 1;
|
||||
u = av_bswap16(s[x])*alpha + target_table[((x^y)>>5)&1][plane]*(max-alpha) + off;
|
||||
d[x] = av_clip((u + (u >> shift)) >> shift, 0, max);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ptrdiff_t alpha_step = srcStride[plane_count];
|
||||
const uint8_t *s = src[plane ] + srcStride[plane] * y;
|
||||
const uint8_t *a = src[plane_count] + (srcStride[plane_count] * y << y_subsample);
|
||||
uint8_t *d = dst[plane ] + dstStride[plane] * y;
|
||||
for (x = 0; x < w; x++) {
|
||||
if (y_subsample) {
|
||||
alpha = (a[2*x] + a[2*x + 1] + 2 +
|
||||
a[2*x + alpha_step] + a[2*x + alpha_step + 1]) >> 2;
|
||||
} else
|
||||
alpha = (a[2*x] + a[2*x + 1]) >> 1;
|
||||
u = s[x]*alpha + target_table[((x^y)>>5)&1][plane]*(255-alpha) + 128;
|
||||
d[x] = (257*u) >> 16;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (sixteen_bits) {
|
||||
const uint16_t *s = (const uint16_t *)(src[plane ] + srcStride[plane ] * y);
|
||||
const uint16_t *a = (const uint16_t *)(src[plane_count] + srcStride[plane_count] * y);
|
||||
uint16_t *d = ( uint16_t *)(dst[plane ] + dstStride[plane ] * y);
|
||||
if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) {
|
||||
for (x = 0; x < w; x++) {
|
||||
unsigned u = s[x]*a[x] + target_table[((x^y)>>5)&1][plane]*(max-a[x]) + off;
|
||||
d[x] = av_clip((u + (u >> shift)) >> shift, 0, max);
|
||||
}
|
||||
} else {
|
||||
for (x = 0; x < w; x++) {
|
||||
unsigned aswap =av_bswap16(a[x]);
|
||||
unsigned u = av_bswap16(s[x])*aswap + target_table[((x^y)>>5)&1][plane]*(max-aswap) + off;
|
||||
d[x] = av_clip((u + (u >> shift)) >> shift, 0, max);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const uint8_t *s = src[plane ] + srcStride[plane] * y;
|
||||
const uint8_t *a = src[plane_count] + srcStride[plane_count] * y;
|
||||
uint8_t *d = dst[plane ] + dstStride[plane] * y;
|
||||
for (x = 0; x < w; x++) {
|
||||
unsigned u = s[x]*a[x] + target_table[((x^y)>>5)&1][plane]*(255-a[x]) + 128;
|
||||
d[x] = (257*u) >> 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int alpha_pos = desc->comp[plane_count].offset;
|
||||
int w = c->srcW;
|
||||
for (y = srcSliceY; y < srcSliceH; y++) {
|
||||
if (sixteen_bits) {
|
||||
const uint16_t *s = (const uint16_t *)(src[0] + srcStride[0] * y + 2*!alpha_pos);
|
||||
const uint16_t *a = (const uint16_t *)(src[0] + srcStride[0] * y + alpha_pos);
|
||||
uint16_t *d = ( uint16_t *)(dst[0] + dstStride[0] * y);
|
||||
if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) {
|
||||
for (x = 0; x < w; x++) {
|
||||
for (plane = 0; plane < plane_count; plane++) {
|
||||
int x_index = (plane_count + 1) * x;
|
||||
unsigned u = s[x_index + plane]*a[x_index] + target_table[((x^y)>>5)&1][plane]*(max-a[x_index]) + off;
|
||||
d[plane_count*x + plane] = av_clip((u + (u >> shift)) >> shift, 0, max);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (x = 0; x < w; x++) {
|
||||
for (plane = 0; plane < plane_count; plane++) {
|
||||
int x_index = (plane_count + 1) * x;
|
||||
unsigned aswap =av_bswap16(a[x_index]);
|
||||
unsigned u = av_bswap16(s[x_index + plane])*aswap + target_table[((x^y)>>5)&1][plane]*(max-aswap) + off;
|
||||
d[plane_count*x + plane] = av_clip((u + (u >> shift)) >> shift, 0, max);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const uint8_t *s = src[0] + srcStride[0] * y + !alpha_pos;
|
||||
const uint8_t *a = src[0] + srcStride[0] * y + alpha_pos;
|
||||
uint8_t *d = dst[0] + dstStride[0] * y;
|
||||
for (x = 0; x < w; x++) {
|
||||
for (plane = 0; plane < plane_count; plane++) {
|
||||
int x_index = (plane_count + 1) * x;
|
||||
unsigned u = s[x_index + plane]*a[x_index] + target_table[((x^y)>>5)&1][plane]*(255-a[x_index]) + 128;
|
||||
d[plane_count*x + plane] = (257*u) >> 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
8
externals/ffmpeg/libswscale/arm/Makefile
vendored
Executable file
8
externals/ffmpeg/libswscale/arm/Makefile
vendored
Executable file
@@ -0,0 +1,8 @@
|
||||
OBJS += arm/swscale.o \
|
||||
arm/swscale_unscaled.o \
|
||||
|
||||
NEON-OBJS += arm/rgb2yuv_neon_32.o
|
||||
NEON-OBJS += arm/rgb2yuv_neon_16.o
|
||||
NEON-OBJS += arm/hscale.o \
|
||||
arm/output.o \
|
||||
arm/yuv2rgb_neon.o \
|
||||
70
externals/ffmpeg/libswscale/arm/hscale.S
vendored
Executable file
70
externals/ffmpeg/libswscale/arm/hscale.S
vendored
Executable file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Clément Bœsch <clement stupeflix.com>
|
||||
* Copyright (c) 2016 Matthieu Bouron <matthieu.bouron stupeflix.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/arm/asm.S"
|
||||
|
||||
function ff_hscale_8_to_15_neon, export=1
|
||||
push {r4-r12, lr}
|
||||
vpush {q4-q7}
|
||||
ldr r4, [sp, #104] @ filter
|
||||
ldr r5, [sp, #108] @ filterPos
|
||||
ldr r6, [sp, #112] @ filterSize
|
||||
add r10, r4, r6, lsl #1 @ filter2 = filter + filterSize * 2
|
||||
1: ldr r8, [r5], #4 @ filterPos[0]
|
||||
ldr r9, [r5], #4 @ filterPos[1]
|
||||
vmov.s32 q4, #0 @ val accumulator
|
||||
vmov.s32 q5, #0 @ val accumulator
|
||||
mov r7, r6 @ tmpfilterSize = filterSize
|
||||
mov r0, r3 @ srcp
|
||||
2: add r11, r0, r8 @ srcp + filterPos[0]
|
||||
add r12, r0, r9 @ srcp + filterPos[1]
|
||||
vld1.8 d0, [r11] @ srcp[filterPos[0] + {0..7}]
|
||||
vld1.8 d2, [r12] @ srcp[filterPos[1] + {0..7}]
|
||||
vld1.16 {q2}, [r4]! @ load 8x16-bit filter values
|
||||
vld1.16 {q3}, [r10]! @ load 8x16-bit filter values
|
||||
vmovl.u8 q0, d0 @ unpack src values to 16-bit
|
||||
vmovl.u8 q1, d2 @ unpack src values to 16-bit
|
||||
vmull.s16 q8, d0, d4 @ srcp[filterPos[0] + {0..7}] * filter[{0..7}] (part 1)
|
||||
vmull.s16 q9, d1, d5 @ srcp[filterPos[0] + {0..7}] * filter[{0..7}] (part 2)
|
||||
vmull.s16 q10, d2, d6 @ srcp[filterPos[1] + {0..7}] * filter[{0..7}] (part 1)
|
||||
vmull.s16 q11, d3, d7 @ srcp[filterPos[1] + {0..7}] * filter[{0..7}] (part 2)
|
||||
vpadd.s32 d16, d16, d17 @ horizontal pair adding of the 8x32-bit multiplied values into 4x32-bit (part 1)
|
||||
vpadd.s32 d17, d18, d19 @ horizontal pair adding of the 8x32-bit multiplied values into 4x32-bit (part 2)
|
||||
vpadd.s32 d20, d20, d21 @ horizontal pair adding of the 8x32-bit multiplied values into 4x32-bit (part 1)
|
||||
vpadd.s32 d21, d22, d23 @ horizontal pair adding of the 8x32-bit multiplied values into 4x32-bit (part 2)
|
||||
vadd.s32 q4, q8 @ update val accumulator
|
||||
vadd.s32 q5, q10 @ update val accumulator
|
||||
add r0, #8 @ srcp += 8
|
||||
subs r7, #8 @ tmpfilterSize -= 8
|
||||
bgt 2b @ loop until tmpfilterSize is consumed
|
||||
mov r4, r10 @ filter = filter2
|
||||
add r10, r10, r6, lsl #1 @ filter2 += filterSize * 2
|
||||
vpadd.s32 d8, d8, d9 @ horizontal pair adding of the 8x32-bit sums into 4x32-bit (part 1)
|
||||
vpadd.s32 d9, d10, d11 @ horizontal pair adding of the 8x32-bit sums into 4x32-bit (part 2)
|
||||
vpadd.s32 d8, d8, d9 @ horizontal pair adding of the 4x32-bit sums into 2x32-bit
|
||||
vqshrn.s32 d8, q4, #7 @ shift and clip the 2x16-bit final values
|
||||
vst1.32 {d8[0]},[r1]! @ write destination
|
||||
subs r2, #2 @ dstW -= 2
|
||||
bgt 1b @ loop until end of line
|
||||
vpop {q4-q7}
|
||||
pop {r4-r12, lr}
|
||||
mov pc, lr
|
||||
endfunc
|
||||
78
externals/ffmpeg/libswscale/arm/output.S
vendored
Executable file
78
externals/ffmpeg/libswscale/arm/output.S
vendored
Executable file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Clément Bœsch <clement stupeflix.com>
|
||||
* Copyright (c) 2016 Matthieu Bouron <matthieu.bouron stupeflix.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/arm/asm.S"
|
||||
|
||||
function ff_yuv2planeX_8_neon, export=1
|
||||
push {r4-r12, lr}
|
||||
vpush {q4-q7}
|
||||
ldr r4, [sp, #104] @ dstW
|
||||
ldr r5, [sp, #108] @ dither
|
||||
ldr r6, [sp, #112] @ offset
|
||||
vld1.8 {d0}, [r5] @ load 8x8-bit dither values
|
||||
cmp r6, #0 @ check offsetting which can be 0 or 3 only
|
||||
beq 1f
|
||||
vext.u8 d0, d0, d0, #3 @ honor offseting which can be 3 only
|
||||
1: vmovl.u8 q0, d0 @ extend dither to 16-bit
|
||||
vshll.u16 q1, d0, #12 @ extend dither to 32-bit with left shift by 12 (part 1)
|
||||
vshll.u16 q2, d1, #12 @ extend dither to 32-bit with left shift by 12 (part 2)
|
||||
mov r7, #0 @ i = 0
|
||||
2: vmov.u8 q3, q1 @ initialize accumulator with dithering values (part 1)
|
||||
vmov.u8 q4, q2 @ initialize accumulator with dithering values (part 2)
|
||||
mov r8, r1 @ tmpFilterSize = filterSize
|
||||
mov r9, r2 @ srcp
|
||||
mov r10, r0 @ filterp
|
||||
3: ldr r11, [r9], #4 @ get pointer @ src[j]
|
||||
ldr r12, [r9], #4 @ get pointer @ src[j+1]
|
||||
add r11, r11, r7, lsl #1 @ &src[j][i]
|
||||
add r12, r12, r7, lsl #1 @ &src[j+1][i]
|
||||
vld1.16 {q5}, [r11] @ read 8x16-bit @ src[j ][i + {0..7}]: A,B,C,D,E,F,G,H
|
||||
vld1.16 {q6}, [r12] @ read 8x16-bit @ src[j+1][i + {0..7}]: I,J,K,L,M,N,O,P
|
||||
ldr r11, [r10], #4 @ read 2x16-bit coeffs (X, Y) at (filter[j], filter[j+1])
|
||||
vmov.16 q7, q5 @ copy 8x16-bit @ src[j ][i + {0..7}] for following inplace zip instruction
|
||||
vmov.16 q8, q6 @ copy 8x16-bit @ src[j+1][i + {0..7}] for following inplace zip instruction
|
||||
vzip.16 q7, q8 @ A,I,B,J,C,K,D,L,E,M,F,N,G,O,H,P
|
||||
vdup.32 q15, r11 @ X,Y,X,Y,X,Y,X,Y
|
||||
vmull.s16 q9, d14, d30 @ A*X,I*Y,B*X,J*Y
|
||||
vmull.s16 q10, d15, d31 @ C*X,K*Y,D*X,L*Y
|
||||
vmull.s16 q11, d16, d30 @ E*X,M*Y,F*X,N*Y
|
||||
vmull.s16 q12, d17, d31 @ G*X,O*Y,H*X,P*Y
|
||||
vpadd.s32 d10, d18, d19 @ A*X+I*Y,B*X+J*Y
|
||||
vpadd.s32 d11, d20, d21 @ C*X+K*Y,D*X+L*Y
|
||||
vpadd.s32 d12, d22, d23 @ E*X+M*Y,F*X+N*Y
|
||||
vpadd.s32 d13, d24, d25 @ G*X+O*Y,H*X+P*Y
|
||||
vadd.s32 q3, q5 @ update val accumulator (part 1)
|
||||
vadd.s32 q4, q6 @ update val accumulator (part 2)
|
||||
subs r8, #2 @ tmpFilterSize -= 2
|
||||
bgt 3b @ loop until filterSize is consumed
|
||||
vshr.s32 q3, q3, #19 @ val>>19 (part 1)
|
||||
vshr.s32 q4, q4, #19 @ val>>19 (part 2)
|
||||
vqmovun.s32 d6, q3 @ clip16(val>>19) (part 1)
|
||||
vqmovun.s32 d7, q4 @ clip16(val>>19) (part 2)
|
||||
vqmovn.u16 d6, q3 @ merge part 1 and part 2
|
||||
vst1.8 {d6}, [r3]! @ write destination
|
||||
add r7, #8 @ i += 8
|
||||
subs r4, r4, #8 @ dstW -= 8
|
||||
bgt 2b @ loop until width is consumed
|
||||
vpop {q4-q7}
|
||||
pop {r4-r12, lr}
|
||||
mov pc, lr
|
||||
endfunc
|
||||
83
externals/ffmpeg/libswscale/arm/rgb2yuv_neon_16.S
vendored
Executable file
83
externals/ffmpeg/libswscale/arm/rgb2yuv_neon_16.S
vendored
Executable file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#if HAVE_AS_DN_DIRECTIVE
|
||||
#include "rgb2yuv_neon_common.S"
|
||||
|
||||
/* downsampled R16G16B16 x8 */
|
||||
alias_qw r16x8, q7
|
||||
alias_qw g16x8, q8
|
||||
alias_qw b16x8, q9
|
||||
|
||||
alias n16x16_l, q11
|
||||
alias n16x16_h, q12
|
||||
|
||||
alias y16x16_l, q13
|
||||
alias y16x16_h, q14
|
||||
|
||||
alias_qw y8x16, q15
|
||||
|
||||
.macro init src
|
||||
vld3.i32 {q13_l, q14_l, q15_l}, [\src]!
|
||||
vld3.i32 {q13_h[0], q14_h[0], q15_h[0]}, [\src]
|
||||
vrshrn.i32 CO_R, q13, #7
|
||||
vrshrn.i32 CO_G, q14, #7
|
||||
vrshrn.i32 CO_B, q15, #7
|
||||
|
||||
vmov.u8 BIAS_Y, #16
|
||||
vmov.u8 BIAS_U, #128
|
||||
.endm
|
||||
|
||||
|
||||
.macro compute_y_16x1_step action, s8x16, coeff
|
||||
vmovl.u8 n16x16_l, \s8x16\()_l
|
||||
vmovl.u8 n16x16_h, \s8x16\()_h
|
||||
|
||||
\action y16x16_l, n16x16_l, \coeff
|
||||
\action y16x16_h, n16x16_h, \coeff
|
||||
.endm
|
||||
|
||||
.macro compute_y_16x1
|
||||
compute_y_16x1_step vmul, r8x16, CO_RY
|
||||
compute_y_16x1_step vmla, g8x16, CO_GY
|
||||
compute_y_16x1_step vmla, b8x16, CO_BY
|
||||
|
||||
vrshrn.i16 y8x16_l, y16x16_l, #8
|
||||
vrshrn.i16 y8x16_h, y16x16_h, #8
|
||||
|
||||
vadd.u8 y8x16, y8x16, BIAS_Y
|
||||
.endm
|
||||
|
||||
alias c16x8, q15
|
||||
alias_qw c8x8x2, q10
|
||||
|
||||
|
||||
.macro compute_chroma_8x1 c, C
|
||||
vmul c16x8, r16x8, CO_R\C
|
||||
vmla c16x8, g16x8, CO_G\C
|
||||
vmla c16x8, b16x8, CO_B\C
|
||||
|
||||
vrshrn.i16 \c\()8x8, c16x8, #8
|
||||
vadd.u8 \c\()8x8, \c\()8x8, BIAS_\C
|
||||
.endm
|
||||
|
||||
loop_420sp rgbx, nv12, init, kernel_420_16x2, 16
|
||||
#endif
|
||||
122
externals/ffmpeg/libswscale/arm/rgb2yuv_neon_32.S
vendored
Executable file
122
externals/ffmpeg/libswscale/arm/rgb2yuv_neon_32.S
vendored
Executable file
@@ -0,0 +1,122 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#if HAVE_AS_DN_DIRECTIVE
|
||||
#include "rgb2yuv_neon_common.S"
|
||||
|
||||
/* downsampled R16G16B16 x8 */
|
||||
alias_qw r16x8, q7
|
||||
alias_qw g16x8, q8
|
||||
alias_qw b16x8, q9
|
||||
|
||||
alias n16x16_o, q11
|
||||
alias n16x16_ol, q11_l
|
||||
alias n16x16_oh, q11_h
|
||||
|
||||
alias y32x16_el, q12
|
||||
alias y32x16_eh, q13
|
||||
alias y32x16_ol, q14
|
||||
alias y32x16_oh, q15
|
||||
|
||||
alias y16x16_e, q12
|
||||
alias y16x16_el, q12_l
|
||||
alias y16x16_eh, q12_h
|
||||
alias y16x16_o, q13
|
||||
alias y16x16_ol, q13_l
|
||||
alias y16x16_oh, q13_h
|
||||
|
||||
|
||||
alias y8x16, y16x16_e
|
||||
|
||||
|
||||
.macro init src
|
||||
// load s32x3x3, narrow to s16x3x3
|
||||
vld3.i32 {q13_l, q14_l, q15_l}, [\src]!
|
||||
vld3.i32 {q13_h[0], q14_h[0], q15_h[0]}, [\src]
|
||||
|
||||
vmovn.i32 CO_R, q13
|
||||
vmovn.i32 CO_G, q14
|
||||
vmovn.i32 CO_B, q15
|
||||
|
||||
vmov.u8 BIAS_Y, #16
|
||||
vmov.u8 BIAS_U, #128
|
||||
.endm
|
||||
|
||||
|
||||
.macro compute_y_16x1_step action, s8x16, coeff
|
||||
vmov.u8 n16x16_o, #0
|
||||
vtrn.u8 \s8x16, n16x16_o
|
||||
|
||||
\action y32x16_el, \s8x16\()_l, \coeff
|
||||
\action y32x16_eh, \s8x16\()_h, \coeff
|
||||
\action y32x16_ol, n16x16_ol, \coeff
|
||||
\action y32x16_oh, n16x16_oh, \coeff
|
||||
.endm
|
||||
|
||||
/*
|
||||
* in: r8x16, g8x16, b8x16
|
||||
* out: y8x16
|
||||
* clobber: q11-q15, r8x16, g8x16, b8x16
|
||||
*/
|
||||
.macro compute_y_16x1
|
||||
compute_y_16x1_step vmull, r8x16, CO_RY
|
||||
compute_y_16x1_step vmlal, g8x16, CO_GY
|
||||
compute_y_16x1_step vmlal, b8x16, CO_BY
|
||||
|
||||
vrshrn.i32 y16x16_el, y32x16_el, #15
|
||||
vrshrn.i32 y16x16_eh, y32x16_eh, #15
|
||||
vrshrn.i32 y16x16_ol, y32x16_ol, #15
|
||||
vrshrn.i32 y16x16_oh, y32x16_oh, #15
|
||||
|
||||
vtrn.8 y16x16_e, y16x16_o
|
||||
vadd.u8 y8x16, y8x16, BIAS_Y
|
||||
.endm
|
||||
|
||||
alias c32x8_l, q14
|
||||
alias c32x8_h, q15
|
||||
|
||||
alias_qw c16x8, q13
|
||||
alias_qw c8x8x2, q10
|
||||
|
||||
.macro compute_chroma_8x1_step action, s16x8, coeff
|
||||
\action c32x8_l, \s16x8\()_l, \coeff
|
||||
\action c32x8_h, \s16x8\()_h, \coeff
|
||||
.endm
|
||||
|
||||
/*
|
||||
* in: r16x8, g16x8, b16x8
|
||||
* out: c8x8
|
||||
* clobber: q14-q15
|
||||
*/
|
||||
.macro compute_chroma_8x1 c, C
|
||||
compute_chroma_8x1_step vmull, r16x8, CO_R\C
|
||||
compute_chroma_8x1_step vmlal, g16x8, CO_G\C
|
||||
compute_chroma_8x1_step vmlal, b16x8, CO_B\C
|
||||
|
||||
vrshrn.i32 c16x8_l, c32x8_l, #15
|
||||
vrshrn.i32 c16x8_h, c32x8_h, #15
|
||||
vmovn.i16 \c\()8x8, c16x8
|
||||
vadd.u8 \c\()8x8, \c\()8x8, BIAS_\C
|
||||
.endm
|
||||
|
||||
|
||||
loop_420sp rgbx, nv12, init, kernel_420_16x2, 32
|
||||
#endif
|
||||
291
externals/ffmpeg/libswscale/arm/rgb2yuv_neon_common.S
vendored
Executable file
291
externals/ffmpeg/libswscale/arm/rgb2yuv_neon_common.S
vendored
Executable file
@@ -0,0 +1,291 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/arm/asm.S"
|
||||
|
||||
.macro alias name, tgt, set=1
|
||||
.if \set != 0
|
||||
\name .req \tgt
|
||||
.else
|
||||
.unreq \name
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.altmacro
|
||||
|
||||
.macro alias_dw_all qw, dw_l, dw_h
|
||||
alias q\qw\()_l, d\dw_l
|
||||
alias q\qw\()_h, d\dw_h
|
||||
.if \qw < 15
|
||||
alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2)
|
||||
.endif
|
||||
.endm
|
||||
|
||||
alias_dw_all 0, 0, 1
|
||||
|
||||
.noaltmacro
|
||||
|
||||
.macro alias_qw name, qw, set=1
|
||||
alias \name\(), \qw, \set
|
||||
alias \name\()_l, \qw\()_l, \set
|
||||
alias \name\()_h, \qw\()_h, \set
|
||||
.endm
|
||||
|
||||
.macro prologue
|
||||
push {r4-r12, lr}
|
||||
vpush {q4-q7}
|
||||
.endm
|
||||
|
||||
.macro epilogue
|
||||
vpop {q4-q7}
|
||||
pop {r4-r12, pc}
|
||||
.endm
|
||||
|
||||
.macro load_arg reg, ix
|
||||
ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)]
|
||||
.endm
|
||||
|
||||
|
||||
/* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma
|
||||
* int width, int height,
|
||||
* int y_stride, int c_stride, int src_stride,
|
||||
* int32_t coeff_table[9]);
|
||||
*/
|
||||
.macro alias_loop_420sp set=1
|
||||
alias src, r0, \set
|
||||
alias src0, src, \set
|
||||
alias y, r1, \set
|
||||
alias y0, y, \set
|
||||
alias chroma, r2, \set
|
||||
alias width, r3, \set
|
||||
alias header, width, \set
|
||||
|
||||
alias height, r4, \set
|
||||
alias y_stride, r5, \set
|
||||
alias c_stride, r6, \set
|
||||
alias c_padding, c_stride, \set
|
||||
alias src_stride, r7, \set
|
||||
|
||||
alias y0_end, r8, \set
|
||||
|
||||
alias src_padding,r9, \set
|
||||
alias y_padding, r10, \set
|
||||
|
||||
alias src1, r11, \set
|
||||
alias y1, r12, \set
|
||||
|
||||
alias coeff_table,r12, \set
|
||||
.endm
|
||||
|
||||
|
||||
.macro loop_420sp s_fmt, d_fmt, init, kernel, precision
|
||||
|
||||
function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1
|
||||
prologue
|
||||
|
||||
alias_loop_420sp
|
||||
|
||||
load_arg height, 4
|
||||
load_arg y_stride, 5
|
||||
load_arg c_stride, 6
|
||||
load_arg src_stride, 7
|
||||
load_arg coeff_table, 8
|
||||
|
||||
\init coeff_table
|
||||
|
||||
sub y_padding, y_stride, width
|
||||
sub c_padding, c_stride, width
|
||||
sub src_padding, src_stride, width, LSL #2
|
||||
|
||||
add y0_end, y0, width
|
||||
and header, width, #15
|
||||
|
||||
add y1, y0, y_stride
|
||||
add src1, src0, src_stride
|
||||
|
||||
0:
|
||||
cmp header, #0
|
||||
beq 1f
|
||||
|
||||
\kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header
|
||||
|
||||
1:
|
||||
\kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma
|
||||
|
||||
cmp y0, y0_end
|
||||
blt 1b
|
||||
2:
|
||||
add y0, y1, y_padding
|
||||
add y0_end, y1, y_stride
|
||||
add chroma, chroma, c_padding
|
||||
add src0, src1, src_padding
|
||||
|
||||
add y1, y0, y_stride
|
||||
add src1, src0, src_stride
|
||||
|
||||
subs height, height, #2
|
||||
|
||||
bgt 0b
|
||||
|
||||
epilogue
|
||||
|
||||
alias_loop_420sp 0
|
||||
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
.macro downsample
|
||||
vpaddl.u8 r16x8, r8x16
|
||||
vpaddl.u8 g16x8, g8x16
|
||||
vpaddl.u8 b16x8, b8x16
|
||||
.endm
|
||||
|
||||
|
||||
/* acculumate and right shift by 2 */
|
||||
.macro downsample_ars2
|
||||
vpadal.u8 r16x8, r8x16
|
||||
vpadal.u8 g16x8, g8x16
|
||||
vpadal.u8 b16x8, b8x16
|
||||
|
||||
vrshr.u16 r16x8, r16x8, #2
|
||||
vrshr.u16 g16x8, g16x8, #2
|
||||
vrshr.u16 b16x8, b16x8, #2
|
||||
.endm
|
||||
|
||||
.macro store_y8_16x1 dst, count
|
||||
.ifc "\count",""
|
||||
vstmia \dst!, {y8x16}
|
||||
.else
|
||||
vstmia \dst, {y8x16}
|
||||
add \dst, \dst, \count
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro store_chroma_nv12_8x1 dst, count
|
||||
.ifc "\count",""
|
||||
vst2.i8 {u8x8, v8x8}, [\dst]!
|
||||
.else
|
||||
vst2.i8 {u8x8, v8x8}, [\dst], \count
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro store_chroma_nv21_8x1 dst, count
|
||||
.ifc "\count",""
|
||||
vst2.i8 {v8x8, u8x8}, [\dst]!
|
||||
.else
|
||||
vst2.i8 {v8x8, u8x8}, [\dst], \count
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro load_8888_16x1 a, b, c, d, src, count
|
||||
.ifc "\count",""
|
||||
vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
|
||||
vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]!
|
||||
.else
|
||||
vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
|
||||
vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]
|
||||
sub \src, \src, #32
|
||||
add \src, \src, \count, LSL #2
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro load_rgbx_16x1 src, count
|
||||
load_8888_16x1 r, g, b, x, \src, \count
|
||||
.endm
|
||||
|
||||
.macro load_bgrx_16x1 src, count
|
||||
load_8888_16x1 b, g, r, x, \src, \count
|
||||
.endm
|
||||
|
||||
.macro alias_src_rgbx set=1
|
||||
alias_src_8888 r, g, b, x, \set
|
||||
.endm
|
||||
|
||||
.macro alias_src_bgrx set=1
|
||||
alias_src_8888 b, g, r, x, \set
|
||||
.endm
|
||||
|
||||
.macro alias_dst_nv12 set=1
|
||||
alias u8x8, c8x8x2_l, \set
|
||||
alias v8x8, c8x8x2_h, \set
|
||||
.endm
|
||||
|
||||
.macro alias_dst_nv21 set=1
|
||||
alias v8x8, c8x8x2_l, \set
|
||||
alias u8x8, c8x8x2_h, \set
|
||||
.endm
|
||||
|
||||
|
||||
// common aliases
|
||||
|
||||
alias CO_R d0
|
||||
CO_RY .dn d0.s16[0]
|
||||
CO_RU .dn d0.s16[1]
|
||||
CO_RV .dn d0.s16[2]
|
||||
|
||||
alias CO_G d1
|
||||
CO_GY .dn d1.s16[0]
|
||||
CO_GU .dn d1.s16[1]
|
||||
CO_GV .dn d1.s16[2]
|
||||
|
||||
alias CO_B d2
|
||||
CO_BY .dn d2.s16[0]
|
||||
CO_BU .dn d2.s16[1]
|
||||
CO_BV .dn d2.s16[2]
|
||||
|
||||
alias BIAS_U, d3
|
||||
alias BIAS_V, BIAS_U
|
||||
|
||||
alias BIAS_Y, q2
|
||||
|
||||
|
||||
/* q3-q6 R8G8B8X8 x16 */
|
||||
|
||||
.macro alias_src_8888 a, b, c, d, set
|
||||
alias_qw \a\()8x16, q3, \set
|
||||
alias_qw \b\()8x16, q4, \set
|
||||
alias_qw \c\()8x16, q5, \set
|
||||
alias_qw \d\()8x16, q6, \set
|
||||
.endm
|
||||
|
||||
.macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count
|
||||
alias_src_\rgb_fmt
|
||||
alias_dst_\yuv_fmt
|
||||
|
||||
load_\rgb_fmt\()_16x1 \rgb0, \count
|
||||
|
||||
downsample
|
||||
compute_y_16x1
|
||||
store_y8_16x1 \y0, \count
|
||||
|
||||
|
||||
load_\rgb_fmt\()_16x1 \rgb1, \count
|
||||
downsample_ars2
|
||||
compute_y_16x1
|
||||
store_y8_16x1 \y1, \count
|
||||
|
||||
compute_chroma_8x1 u, U
|
||||
compute_chroma_8x1 v, V
|
||||
|
||||
store_chroma_\yuv_fmt\()_8x1 \chroma, \count
|
||||
|
||||
alias_dst_\yuv_fmt 0
|
||||
alias_src_\rgb_fmt 0
|
||||
.endm
|
||||
47
externals/ffmpeg/libswscale/arm/swscale.c
vendored
Executable file
47
externals/ffmpeg/libswscale/arm/swscale.c
vendored
Executable file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
#include "libavutil/arm/cpu.h"
|
||||
|
||||
void ff_hscale_8_to_15_neon(SwsContext *c, int16_t *dst, int dstW,
|
||||
const uint8_t *src, const int16_t *filter,
|
||||
const int32_t *filterPos, int filterSize);
|
||||
|
||||
void ff_yuv2planeX_8_neon(const int16_t *filter, int filterSize,
|
||||
const int16_t **src, uint8_t *dest, int dstW,
|
||||
const uint8_t *dither, int offset);
|
||||
|
||||
av_cold void ff_sws_init_swscale_arm(SwsContext *c)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (have_neon(cpu_flags)) {
|
||||
if (c->srcBpc == 8 && c->dstBpc <= 14 &&
|
||||
(c->hLumFilterSize % 8) == 0 &&
|
||||
(c->hChrFilterSize % 8) == 0)
|
||||
{
|
||||
c->hyScale = c->hcScale = ff_hscale_8_to_15_neon;
|
||||
}
|
||||
if (c->dstBpc == 8) {
|
||||
c->yuv2planeX = ff_yuv2planeX_8_neon;
|
||||
}
|
||||
}
|
||||
}
|
||||
186
externals/ffmpeg/libswscale/arm/swscale_unscaled.c
vendored
Executable file
186
externals/ffmpeg/libswscale/arm/swscale_unscaled.c
vendored
Executable file
@@ -0,0 +1,186 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
#include "libavutil/arm/cpu.h"
|
||||
|
||||
#if HAVE_AS_DN_DIRECTIVE
|
||||
extern void rgbx_to_nv12_neon_32(const uint8_t *src, uint8_t *y, uint8_t *chroma,
|
||||
int width, int height,
|
||||
int y_stride, int c_stride, int src_stride,
|
||||
int32_t coeff_tbl[9]);
|
||||
|
||||
extern void rgbx_to_nv12_neon_16(const uint8_t *src, uint8_t *y, uint8_t *chroma,
|
||||
int width, int height,
|
||||
int y_stride, int c_stride, int src_stride,
|
||||
int32_t coeff_tbl[9]);
|
||||
|
||||
static int rgbx_to_nv12_neon_32_wrapper(SwsContext *context, const uint8_t *src[],
|
||||
int srcStride[], int srcSliceY, int srcSliceH,
|
||||
uint8_t *dst[], int dstStride[]) {
|
||||
|
||||
rgbx_to_nv12_neon_32(src[0] + srcSliceY * srcStride[0],
|
||||
dst[0] + srcSliceY * dstStride[0],
|
||||
dst[1] + (srcSliceY / 2) * dstStride[1],
|
||||
context->srcW, srcSliceH,
|
||||
dstStride[0], dstStride[1], srcStride[0],
|
||||
context->input_rgb2yuv_table);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rgbx_to_nv12_neon_16_wrapper(SwsContext *context, const uint8_t *src[],
|
||||
int srcStride[], int srcSliceY, int srcSliceH,
|
||||
uint8_t *dst[], int dstStride[]) {
|
||||
|
||||
rgbx_to_nv12_neon_16(src[0] + srcSliceY * srcStride[0],
|
||||
dst[0] + srcSliceY * dstStride[0],
|
||||
dst[1] + (srcSliceY / 2) * dstStride[1],
|
||||
context->srcW, srcSliceH,
|
||||
dstStride[0], dstStride[1], srcStride[0],
|
||||
context->input_rgb2yuv_table);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define YUV_TO_RGB_TABLE \
|
||||
c->yuv2rgb_v2r_coeff, \
|
||||
c->yuv2rgb_u2g_coeff, \
|
||||
c->yuv2rgb_v2g_coeff, \
|
||||
c->yuv2rgb_u2b_coeff, \
|
||||
|
||||
#define DECLARE_FF_YUVX_TO_RGBX_FUNCS(ifmt, ofmt) \
|
||||
int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
|
||||
uint8_t *dst, int linesize, \
|
||||
const uint8_t *srcY, int linesizeY, \
|
||||
const uint8_t *srcU, int linesizeU, \
|
||||
const uint8_t *srcV, int linesizeV, \
|
||||
const int16_t *table, \
|
||||
int y_offset, \
|
||||
int y_coeff); \
|
||||
\
|
||||
static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[], \
|
||||
int srcStride[], int srcSliceY, int srcSliceH, \
|
||||
uint8_t *dst[], int dstStride[]) { \
|
||||
const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
|
||||
\
|
||||
ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
|
||||
dst[0] + srcSliceY * dstStride[0], dstStride[0], \
|
||||
src[0], srcStride[0], \
|
||||
src[1], srcStride[1], \
|
||||
src[2], srcStride[2], \
|
||||
yuv2rgb_table, \
|
||||
c->yuv2rgb_y_offset >> 6, \
|
||||
c->yuv2rgb_y_coeff); \
|
||||
\
|
||||
return 0; \
|
||||
} \
|
||||
|
||||
#define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx) \
|
||||
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, argb) \
|
||||
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, rgba) \
|
||||
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, abgr) \
|
||||
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, bgra) \
|
||||
|
||||
DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuv420p)
|
||||
DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuv422p)
|
||||
|
||||
#define DECLARE_FF_NVX_TO_RGBX_FUNCS(ifmt, ofmt) \
|
||||
int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
|
||||
uint8_t *dst, int linesize, \
|
||||
const uint8_t *srcY, int linesizeY, \
|
||||
const uint8_t *srcC, int linesizeC, \
|
||||
const int16_t *table, \
|
||||
int y_offset, \
|
||||
int y_coeff); \
|
||||
\
|
||||
static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[], \
|
||||
int srcStride[], int srcSliceY, int srcSliceH, \
|
||||
uint8_t *dst[], int dstStride[]) { \
|
||||
const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
|
||||
\
|
||||
ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
|
||||
dst[0] + srcSliceY * dstStride[0], dstStride[0], \
|
||||
src[0], srcStride[0], src[1], srcStride[1], \
|
||||
yuv2rgb_table, \
|
||||
c->yuv2rgb_y_offset >> 6, \
|
||||
c->yuv2rgb_y_coeff); \
|
||||
\
|
||||
return 0; \
|
||||
} \
|
||||
|
||||
#define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx) \
|
||||
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, argb) \
|
||||
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, rgba) \
|
||||
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, abgr) \
|
||||
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, bgra) \
|
||||
|
||||
DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nv12)
|
||||
DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nv21)
|
||||
|
||||
/* We need a 16 pixel width alignment. This constraint can easily be removed
|
||||
* for input reading but for the output which is 4-bytes per pixel (RGBA) the
|
||||
* assembly might be writing as much as 4*15=60 extra bytes at the end of the
|
||||
* line, which won't fit the 32-bytes buffer alignment. */
|
||||
#define SET_FF_NVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT, accurate_rnd) do { \
|
||||
if (c->srcFormat == AV_PIX_FMT_##IFMT \
|
||||
&& c->dstFormat == AV_PIX_FMT_##OFMT \
|
||||
&& !(c->srcH & 1) \
|
||||
&& !(c->srcW & 15) \
|
||||
&& !accurate_rnd) { \
|
||||
c->swscale = ifmt##_to_##ofmt##_neon_wrapper; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd) do { \
|
||||
SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, argb, ARGB, accurate_rnd); \
|
||||
SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, rgba, RGBA, accurate_rnd); \
|
||||
SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, abgr, ABGR, accurate_rnd); \
|
||||
SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, bgra, BGRA, accurate_rnd); \
|
||||
} while (0)
|
||||
|
||||
static void get_unscaled_swscale_neon(SwsContext *c) {
|
||||
int accurate_rnd = c->flags & SWS_ACCURATE_RND;
|
||||
if (c->srcFormat == AV_PIX_FMT_RGBA
|
||||
&& c->dstFormat == AV_PIX_FMT_NV12
|
||||
&& (c->srcW >= 16)) {
|
||||
c->swscale = accurate_rnd ? rgbx_to_nv12_neon_32_wrapper
|
||||
: rgbx_to_nv12_neon_16_wrapper;
|
||||
}
|
||||
|
||||
SET_FF_NVX_TO_ALL_RGBX_FUNC(nv12, NV12, accurate_rnd);
|
||||
SET_FF_NVX_TO_ALL_RGBX_FUNC(nv21, NV21, accurate_rnd);
|
||||
SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv420p, YUV420P, accurate_rnd);
|
||||
SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv422p, YUV422P, accurate_rnd);
|
||||
}
|
||||
|
||||
void ff_get_unscaled_swscale_arm(SwsContext *c)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
if (have_neon(cpu_flags))
|
||||
get_unscaled_swscale_neon(c);
|
||||
}
|
||||
#else
|
||||
void ff_get_unscaled_swscale_arm(SwsContext *c)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
280
externals/ffmpeg/libswscale/arm/yuv2rgb_neon.S
vendored
Executable file
280
externals/ffmpeg/libswscale/arm/yuv2rgb_neon.S
vendored
Executable file
@@ -0,0 +1,280 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Matthieu Bouron <matthieu.bouron stupeflix.com>
|
||||
* Copyright (c) 2015 Clément Bœsch <clement stupeflix.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/arm/asm.S"
|
||||
|
||||
|
||||
.macro compute_premult
|
||||
vsub.u16 q14,q11 @ q14 = U * (1 << 3) - 128 * (1 << 3)
|
||||
vsub.u16 q15,q11 @ q15 = V * (1 << 3) - 128 * (1 << 3)
|
||||
vqdmulh.s16 q8, q15, d1[0] @ q8 = V * v2r
|
||||
vqdmulh.s16 q9, q14, d1[1] @ q9 = U * u2g
|
||||
vqdmulh.s16 q5, q15, d1[2] @ q5 = V * v2g
|
||||
vadd.s16 q9, q5 @ q9 = U * u2g + V * v2g
|
||||
vqdmulh.s16 q10,q14, d1[3] @ q10 = U * u2b
|
||||
.endm
|
||||
|
||||
.macro compute_color dst_comp1 dst_comp2 pre
|
||||
vadd.s16 q1, q14, \pre
|
||||
vadd.s16 q2, q15, \pre
|
||||
vqrshrun.s16 \dst_comp1, q1, #1
|
||||
vqrshrun.s16 \dst_comp2, q2, #1
|
||||
.endm
|
||||
|
||||
.macro compute_rgba r1 g1 b1 a1 r2 g2 b2 a2
|
||||
compute_color \r1, \r2, q8
|
||||
compute_color \g1, \g2, q9
|
||||
compute_color \b1, \b2, q10
|
||||
vmov.u8 \a1, #255
|
||||
vmov.u8 \a2, #255
|
||||
.endm
|
||||
|
||||
.macro compute dst ofmt
|
||||
vshll.u8 q14, d14, #3 @ q14 = Y * (1 << 3)
|
||||
vshll.u8 q15, d15, #3 @ q15 = Y * (1 << 3)
|
||||
vsub.s16 q14, q12 @ q14 = (Y - y_offset)
|
||||
vsub.s16 q15, q12 @ q15 = (Y - y_offset)
|
||||
vqdmulh.s16 q14, q13 @ q14 = (Y - y_offset) * y_coeff
|
||||
vqdmulh.s16 q15, q13 @ q15 = (Y - y_offset) * y_coeff
|
||||
|
||||
.ifc \ofmt,argb
|
||||
compute_rgba d7, d8, d9, d6, d11, d12, d13, d10
|
||||
.endif
|
||||
|
||||
.ifc \ofmt,rgba
|
||||
compute_rgba d6, d7, d8, d9, d10, d11, d12, d13
|
||||
.endif
|
||||
|
||||
.ifc \ofmt,abgr
|
||||
compute_rgba d9, d8, d7, d6, d13, d12, d11, d10
|
||||
.endif
|
||||
|
||||
.ifc \ofmt,bgra
|
||||
compute_rgba d8, d7, d6, d9, d12, d11, d10, d13
|
||||
.endif
|
||||
|
||||
vzip.8 d6, d10 @ d6 = R1R2R3R4R5R6R7R8 d10 = R9R10R11R12R13R14R15R16
|
||||
vzip.8 d7, d11 @ d7 = G1G2G3G4G5G6G7G8 d11 = G9G10G11G12G13G14G15G16
|
||||
vzip.8 d8, d12 @ d8 = B1B2B3B4B5B6B7B8 d12 = B9B10B11B12B13B14B15B16
|
||||
vzip.8 d9, d13 @ d9 = A1A2A3A4A5A6A7A8 d13 = A9A10A11A12A13A14A15A16
|
||||
vst4.8 {q3, q4}, [\dst,:128]!
|
||||
vst4.8 {q5, q6}, [\dst,:128]!
|
||||
.endm
|
||||
|
||||
.macro process_1l_internal dst src ofmt
|
||||
vld2.8 {d14, d15}, [\src]! @ q7 = Y (interleaved)
|
||||
compute \dst, \ofmt
|
||||
.endm
|
||||
|
||||
.macro process_1l ofmt
|
||||
compute_premult
|
||||
process_1l_internal r2, r4, \ofmt
|
||||
.endm
|
||||
|
||||
.macro process_2l ofmt
|
||||
compute_premult
|
||||
process_1l_internal r2, r4, \ofmt
|
||||
process_1l_internal r11,r12,\ofmt
|
||||
.endm
|
||||
|
||||
.macro load_args_nv12
|
||||
push {r4-r12, lr}
|
||||
vpush {q4-q7}
|
||||
ldr r4, [sp, #104] @ r4 = srcY
|
||||
ldr r5, [sp, #108] @ r5 = linesizeY
|
||||
ldr r6, [sp, #112] @ r6 = srcC
|
||||
ldr r7, [sp, #116] @ r7 = linesizeC
|
||||
ldr r8, [sp, #120] @ r8 = table
|
||||
ldr r9, [sp, #124] @ r9 = y_offset
|
||||
ldr r10,[sp, #128] @ r10 = y_coeff
|
||||
vdup.16 d0, r10 @ d0 = y_coeff
|
||||
vld1.16 {d1}, [r8] @ d1 = *table
|
||||
add r11, r2, r3 @ r11 = dst + linesize (dst2)
|
||||
add r12, r4, r5 @ r12 = srcY + linesizeY (srcY2)
|
||||
lsl r3, r3, #1
|
||||
lsl r5, r5, #1
|
||||
sub r3, r3, r0, lsl #2 @ r3 = linesize * 2 - width * 4 (padding)
|
||||
sub r5, r5, r0 @ r5 = linesizeY * 2 - width (paddingY)
|
||||
sub r7, r7, r0 @ r7 = linesizeC - width (paddingC)
|
||||
.endm
|
||||
|
||||
.macro load_args_nv21
|
||||
load_args_nv12
|
||||
.endm
|
||||
|
||||
.macro load_args_yuv420p
|
||||
push {r4-r12, lr}
|
||||
vpush {q4-q7}
|
||||
ldr r4, [sp, #104] @ r4 = srcY
|
||||
ldr r5, [sp, #108] @ r5 = linesizeY
|
||||
ldr r6, [sp, #112] @ r6 = srcU
|
||||
ldr r8, [sp, #128] @ r8 = table
|
||||
ldr r9, [sp, #132] @ r9 = y_offset
|
||||
ldr r10,[sp, #136] @ r10 = y_coeff
|
||||
vdup.16 d0, r10 @ d0 = y_coeff
|
||||
vld1.16 {d1}, [r8] @ d1 = *table
|
||||
add r11, r2, r3 @ r11 = dst + linesize (dst2)
|
||||
add r12, r4, r5 @ r12 = srcY + linesizeY (srcY2)
|
||||
lsl r3, r3, #1
|
||||
lsl r5, r5, #1
|
||||
sub r3, r3, r0, lsl #2 @ r3 = linesize * 2 - width * 4 (padding)
|
||||
sub r5, r5, r0 @ r5 = linesizeY * 2 - width (paddingY)
|
||||
ldr r10,[sp, #120] @ r10 = srcV
|
||||
.endm
|
||||
|
||||
.macro load_args_yuv422p
|
||||
push {r4-r12, lr}
|
||||
vpush {q4-q7}
|
||||
ldr r4, [sp, #104] @ r4 = srcY
|
||||
ldr r5, [sp, #108] @ r5 = linesizeY
|
||||
ldr r6, [sp, #112] @ r6 = srcU
|
||||
ldr r7, [sp, #116] @ r7 = linesizeU
|
||||
ldr r12,[sp, #124] @ r12 = linesizeV
|
||||
ldr r8, [sp, #128] @ r8 = table
|
||||
ldr r9, [sp, #132] @ r9 = y_offset
|
||||
ldr r10,[sp, #136] @ r10 = y_coeff
|
||||
vdup.16 d0, r10 @ d0 = y_coeff
|
||||
vld1.16 {d1}, [r8] @ d1 = *table
|
||||
sub r3, r3, r0, lsl #2 @ r3 = linesize - width * 4 (padding)
|
||||
sub r5, r5, r0 @ r5 = linesizeY - width (paddingY)
|
||||
sub r7, r7, r0, lsr #1 @ r7 = linesizeU - width / 2 (paddingU)
|
||||
sub r12,r12,r0, lsr #1 @ r12 = linesizeV - width / 2 (paddingV)
|
||||
ldr r10,[sp, #120] @ r10 = srcV
|
||||
.endm
|
||||
|
||||
.macro load_chroma_nv12
|
||||
pld [r12, #64*3]
|
||||
|
||||
vld2.8 {d2, d3}, [r6]! @ q1: interleaved chroma line
|
||||
vshll.u8 q14, d2, #3 @ q14 = U * (1 << 3)
|
||||
vshll.u8 q15, d3, #3 @ q15 = V * (1 << 3)
|
||||
.endm
|
||||
|
||||
.macro load_chroma_nv21
|
||||
pld [r12, #64*3]
|
||||
|
||||
vld2.8 {d2, d3}, [r6]! @ q1: interleaved chroma line
|
||||
vshll.u8 q14, d3, #3 @ q14 = U * (1 << 3)
|
||||
vshll.u8 q15, d2, #3 @ q15 = V * (1 << 3)
|
||||
.endm
|
||||
|
||||
.macro load_chroma_yuv420p
|
||||
pld [r10, #64*3]
|
||||
pld [r12, #64*3]
|
||||
|
||||
vld1.8 d2, [r6]! @ d2: chroma red line
|
||||
vld1.8 d3, [r10]! @ d3: chroma blue line
|
||||
vshll.u8 q14, d2, #3 @ q14 = U * (1 << 3)
|
||||
vshll.u8 q15, d3, #3 @ q15 = V * (1 << 3)
|
||||
.endm
|
||||
|
||||
.macro load_chroma_yuv422p
|
||||
pld [r10, #64*3]
|
||||
|
||||
vld1.8 d2, [r6]! @ d2: chroma red line
|
||||
vld1.8 d3, [r10]! @ d3: chroma blue line
|
||||
vshll.u8 q14, d2, #3 @ q14 = U * (1 << 3)
|
||||
vshll.u8 q15, d3, #3 @ q15 = V * (1 << 3)
|
||||
.endm
|
||||
|
||||
.macro increment_and_test_nv12
|
||||
add r11, r11, r3 @ dst2 += padding
|
||||
add r12, r12, r5 @ srcY2 += paddingY
|
||||
add r6, r6, r7 @ srcC += paddingC
|
||||
subs r1, r1, #2 @ height -= 2
|
||||
.endm
|
||||
|
||||
.macro increment_and_test_nv21
|
||||
increment_and_test_nv12
|
||||
.endm
|
||||
|
||||
.macro increment_and_test_yuv420p
|
||||
add r11, r11, r3 @ dst2 += padding
|
||||
add r12, r12, r5 @ srcY2 += paddingY
|
||||
ldr r7, [sp, #116] @ r7 = linesizeU
|
||||
sub r7, r7, r0, lsr #1 @ r7 = linesizeU - width / 2 (paddingU)
|
||||
add r6, r6, r7 @ srcU += paddingU
|
||||
ldr r7, [sp, #124] @ r7 = linesizeV
|
||||
sub r7, r7, r0, lsr #1 @ r7 = linesizeV - width / 2 (paddingV)
|
||||
add r10, r10, r7 @ srcV += paddingV
|
||||
subs r1, r1, #2 @ height -= 2
|
||||
.endm
|
||||
|
||||
.macro increment_and_test_yuv422p
|
||||
add r6, r6, r7 @ srcU += paddingU
|
||||
add r10,r10,r12 @ srcV += paddingV
|
||||
subs r1, r1, #1 @ height -= 1
|
||||
.endm
|
||||
|
||||
.macro process_nv12 ofmt
|
||||
process_2l \ofmt
|
||||
.endm
|
||||
|
||||
.macro process_nv21 ofmt
|
||||
process_2l \ofmt
|
||||
.endm
|
||||
|
||||
.macro process_yuv420p ofmt
|
||||
process_2l \ofmt
|
||||
.endm
|
||||
|
||||
.macro process_yuv422p ofmt
|
||||
process_1l \ofmt
|
||||
.endm
|
||||
|
||||
.macro declare_func ifmt ofmt
|
||||
function ff_\ifmt\()_to_\ofmt\()_neon, export=1
|
||||
load_args_\ifmt
|
||||
vmov.u16 q11, #1024 @ q11 = 128 * (1 << 3)
|
||||
vdup.16 q12, r9 @ q12 = y_offset
|
||||
vmov d26, d0 @ q13 = y_coeff
|
||||
vmov d27, d0 @ q13 = y_coeff
|
||||
1:
|
||||
mov r8, r0 @ r8 = width
|
||||
2:
|
||||
pld [r6, #64*3]
|
||||
pld [r4, #64*3]
|
||||
vmov.i8 d10, #128
|
||||
load_chroma_\ifmt
|
||||
process_\ifmt \ofmt
|
||||
subs r8, r8, #16 @ width -= 16
|
||||
bgt 2b
|
||||
add r2, r2, r3 @ dst += padding
|
||||
add r4, r4, r5 @ srcY += paddingY
|
||||
increment_and_test_\ifmt
|
||||
bgt 1b
|
||||
vpop {q4-q7}
|
||||
pop {r4-r12, lr}
|
||||
mov pc, lr
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
.macro declare_rgb_funcs ifmt
|
||||
declare_func \ifmt, argb
|
||||
declare_func \ifmt, rgba
|
||||
declare_func \ifmt, abgr
|
||||
declare_func \ifmt, bgra
|
||||
.endm
|
||||
|
||||
declare_rgb_funcs nv12
|
||||
declare_rgb_funcs nv21
|
||||
declare_rgb_funcs yuv420p
|
||||
declare_rgb_funcs yuv422p
|
||||
236
externals/ffmpeg/libswscale/bayer_template.c
vendored
Executable file
236
externals/ffmpeg/libswscale/bayer_template.c
vendored
Executable file
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
* Bayer-to-RGB/YV12 template
|
||||
* Copyright (c) 2011-2014 Peter Ross <pross@xvid.org>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#if defined(BAYER_BGGR) || defined(BAYER_GBRG)
|
||||
#define BAYER_R 0
|
||||
#define BAYER_G 1
|
||||
#define BAYER_B 2
|
||||
#endif
|
||||
#if defined(BAYER_RGGB) || defined(BAYER_GRBG)
|
||||
#define BAYER_R 2
|
||||
#define BAYER_G 1
|
||||
#define BAYER_B 0
|
||||
#endif
|
||||
|
||||
#if defined(BAYER_8)
|
||||
#define BAYER_READ(x) (x)
|
||||
#define BAYER_SIZEOF 1
|
||||
#define BAYER_SHIFT 0
|
||||
#endif
|
||||
#if defined(BAYER_16LE)
|
||||
#define BAYER_READ(x) AV_RL16(&(x))
|
||||
#define BAYER_SIZEOF 2
|
||||
#define BAYER_SHIFT 8
|
||||
#endif
|
||||
#if defined(BAYER_16BE)
|
||||
#define BAYER_READ(x) AV_RB16(&(x))
|
||||
#define BAYER_SIZEOF 2
|
||||
#define BAYER_SHIFT 8
|
||||
#endif
|
||||
|
||||
#define S(y, x) BAYER_READ(src[(y)*src_stride + BAYER_SIZEOF*(x)])
|
||||
#define T(y, x) (unsigned int)S(y, x)
|
||||
#define R(y, x) dst[(y)*dst_stride + (x)*3 + BAYER_R]
|
||||
#define G(y, x) dst[(y)*dst_stride + (x)*3 + BAYER_G]
|
||||
#define B(y, x) dst[(y)*dst_stride + (x)*3 + BAYER_B]
|
||||
|
||||
#if defined(BAYER_BGGR) || defined(BAYER_RGGB)
|
||||
#define BAYER_TO_RGB24_COPY \
|
||||
R(0, 0) = \
|
||||
R(0, 1) = \
|
||||
R(1, 1) = \
|
||||
R(1, 0) = S(1, 1) >> BAYER_SHIFT; \
|
||||
\
|
||||
G(0, 1) = S(0, 1) >> BAYER_SHIFT; \
|
||||
G(0, 0) = \
|
||||
G(1, 1) = (T(0, 1) + T(1, 0)) >> (1 + BAYER_SHIFT); \
|
||||
G(1, 0) = S(1, 0) >> BAYER_SHIFT; \
|
||||
\
|
||||
B(1, 1) = \
|
||||
B(0, 0) = \
|
||||
B(0, 1) = \
|
||||
B(1, 0) = S(0, 0) >> BAYER_SHIFT;
|
||||
#define BAYER_TO_RGB24_INTERPOLATE \
|
||||
R(0, 0) = (T(-1, -1) + T(-1, 1) + T(1, -1) + T(1, 1)) >> (2 + BAYER_SHIFT); \
|
||||
G(0, 0) = (T(-1, 0) + T( 0, -1) + T(0, 1) + T(1, 0)) >> (2 + BAYER_SHIFT); \
|
||||
B(0, 0) = S(0, 0) >> BAYER_SHIFT; \
|
||||
\
|
||||
R(0, 1) = (T(-1, 1) + T(1, 1)) >> (1 + BAYER_SHIFT); \
|
||||
G(0, 1) = S(0, 1) >> BAYER_SHIFT; \
|
||||
B(0, 1) = (T(0, 0) + T(0, 2)) >> (1 + BAYER_SHIFT); \
|
||||
\
|
||||
R(1, 0) = (T(1, -1) + T(1, 1)) >> (1 + BAYER_SHIFT); \
|
||||
G(1, 0) = S(1, 0) >> BAYER_SHIFT; \
|
||||
B(1, 0) = (T(0, 0) + T(2, 0)) >> (1 + BAYER_SHIFT); \
|
||||
\
|
||||
R(1, 1) = S(1, 1) >> BAYER_SHIFT; \
|
||||
G(1, 1) = (T(0, 1) + T(1, 0) + T(1, 2) + T(2, 1)) >> (2 + BAYER_SHIFT); \
|
||||
B(1, 1) = (T(0, 0) + T(0, 2) + T(2, 0) + T(2, 2)) >> (2 + BAYER_SHIFT);
|
||||
#else
|
||||
#define BAYER_TO_RGB24_COPY \
|
||||
R(0, 0) = \
|
||||
R(0, 1) = \
|
||||
R(1, 1) = \
|
||||
R(1, 0) = S(1, 0) >> BAYER_SHIFT; \
|
||||
\
|
||||
G(0, 0) = S(0, 0) >> BAYER_SHIFT; \
|
||||
G(1, 1) = S(1, 1) >> BAYER_SHIFT; \
|
||||
G(0, 1) = \
|
||||
G(1, 0) = (T(0, 0) + T(1, 1)) >> (1 + BAYER_SHIFT); \
|
||||
\
|
||||
B(1, 1) = \
|
||||
B(0, 0) = \
|
||||
B(0, 1) = \
|
||||
B(1, 0) = S(0, 1) >> BAYER_SHIFT;
|
||||
#define BAYER_TO_RGB24_INTERPOLATE \
|
||||
R(0, 0) = (T(-1, 0) + T(1, 0)) >> (1 + BAYER_SHIFT); \
|
||||
G(0, 0) = S(0, 0) >> BAYER_SHIFT; \
|
||||
B(0, 0) = (T(0, -1) + T(0, 1)) >> (1 + BAYER_SHIFT); \
|
||||
\
|
||||
R(0, 1) = (T(-1, 0) + T(-1, 2) + T(1, 0) + T(1, 2)) >> (2 + BAYER_SHIFT); \
|
||||
G(0, 1) = (T(-1, 1) + T(0, 0) + T(0, 2) + T(1, 1)) >> (2 + BAYER_SHIFT); \
|
||||
B(0, 1) = S(0, 1) >> BAYER_SHIFT; \
|
||||
\
|
||||
R(1, 0) = S(1, 0) >> BAYER_SHIFT; \
|
||||
G(1, 0) = (T(0, 0) + T(1, -1) + T(1, 1) + T(2, 0)) >> (2 + BAYER_SHIFT); \
|
||||
B(1, 0) = (T(0, -1) + T(0, 1) + T(2, -1) + T(2, 1)) >> (2 + BAYER_SHIFT); \
|
||||
\
|
||||
R(1, 1) = (T(1, 0) + T(1, 2)) >> (1 + BAYER_SHIFT); \
|
||||
G(1, 1) = S(1, 1) >> BAYER_SHIFT; \
|
||||
B(1, 1) = (T(0, 1) + T(2, 1)) >> (1 + BAYER_SHIFT);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* invoke ff_rgb24toyv12 for 2x2 pixels
|
||||
*/
|
||||
#define rgb24toyv12_2x2(src, dstY, dstU, dstV, luma_stride, src_stride, rgb2yuv) \
|
||||
ff_rgb24toyv12(src, dstY, dstV, dstU, 2, 2, luma_stride, 0, src_stride, rgb2yuv)
|
||||
|
||||
static void BAYER_RENAME(rgb24_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int width)
|
||||
{
|
||||
int i;
|
||||
for (i = 0 ; i < width; i+= 2) {
|
||||
BAYER_TO_RGB24_COPY
|
||||
src += 2 * BAYER_SIZEOF;
|
||||
dst += 6;
|
||||
}
|
||||
}
|
||||
|
||||
static void BAYER_RENAME(rgb24_interpolate)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int width)
|
||||
{
|
||||
int i;
|
||||
|
||||
BAYER_TO_RGB24_COPY
|
||||
src += 2 * BAYER_SIZEOF;
|
||||
dst += 6;
|
||||
|
||||
for (i = 2 ; i < width - 2; i+= 2) {
|
||||
BAYER_TO_RGB24_INTERPOLATE
|
||||
src += 2 * BAYER_SIZEOF;
|
||||
dst += 6;
|
||||
}
|
||||
|
||||
if (width > 2) {
|
||||
BAYER_TO_RGB24_COPY
|
||||
}
|
||||
}
|
||||
|
||||
static void BAYER_RENAME(yv12_copy)(const uint8_t *src, int src_stride, uint8_t *dstY, uint8_t *dstU, uint8_t *dstV, int luma_stride, int width, int32_t *rgb2yuv)
|
||||
{
|
||||
uint8_t dst[12];
|
||||
const int dst_stride = 6;
|
||||
int i;
|
||||
for (i = 0 ; i < width; i+= 2) {
|
||||
BAYER_TO_RGB24_COPY
|
||||
rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv);
|
||||
src += 2 * BAYER_SIZEOF;
|
||||
dstY += 2;
|
||||
dstU++;
|
||||
dstV++;
|
||||
}
|
||||
}
|
||||
|
||||
static void BAYER_RENAME(yv12_interpolate)(const uint8_t *src, int src_stride, uint8_t *dstY, uint8_t *dstU, uint8_t *dstV, int luma_stride, int width, int32_t *rgb2yuv)
|
||||
{
|
||||
uint8_t dst[12];
|
||||
const int dst_stride = 6;
|
||||
int i;
|
||||
|
||||
BAYER_TO_RGB24_COPY
|
||||
rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv);
|
||||
src += 2 * BAYER_SIZEOF;
|
||||
dstY += 2;
|
||||
dstU++;
|
||||
dstV++;
|
||||
|
||||
for (i = 2 ; i < width - 2; i+= 2) {
|
||||
BAYER_TO_RGB24_INTERPOLATE
|
||||
rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv);
|
||||
src += 2 * BAYER_SIZEOF;
|
||||
dstY += 2;
|
||||
dstU++;
|
||||
dstV++;
|
||||
}
|
||||
|
||||
if (width > 2) {
|
||||
BAYER_TO_RGB24_COPY
|
||||
rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv);
|
||||
}
|
||||
}
|
||||
|
||||
#undef S
|
||||
#undef T
|
||||
#undef R
|
||||
#undef G
|
||||
#undef B
|
||||
#undef BAYER_TO_RGB24_COPY
|
||||
#undef BAYER_TO_RGB24_INTERPOLATE
|
||||
|
||||
#undef BAYER_RENAME
|
||||
|
||||
#undef BAYER_R
|
||||
#undef BAYER_G
|
||||
#undef BAYER_B
|
||||
#undef BAYER_READ
|
||||
#undef BAYER_SIZEOF
|
||||
#undef BAYER_SHIFT
|
||||
|
||||
#if defined(BAYER_BGGR)
|
||||
#undef BAYER_BGGR
|
||||
#endif
|
||||
#if defined(BAYER_RGGB)
|
||||
#undef BAYER_RGGB
|
||||
#endif
|
||||
#if defined(BAYER_GBRG)
|
||||
#undef BAYER_GBRG
|
||||
#endif
|
||||
#if defined(BAYER_GRBG)
|
||||
#undef BAYER_GRBG
|
||||
#endif
|
||||
#if defined(BAYER_8)
|
||||
#undef BAYER_8
|
||||
#endif
|
||||
#if defined(BAYER_16LE)
|
||||
#undef BAYER_16LE
|
||||
#endif
|
||||
#if defined(BAYER_16BE)
|
||||
#undef BAYER_16BE
|
||||
#endif
|
||||
72
externals/ffmpeg/libswscale/gamma.c
vendored
Executable file
72
externals/ffmpeg/libswscale/gamma.c
vendored
Executable file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "swscale_internal.h"
|
||||
|
||||
typedef struct GammaContext
|
||||
{
|
||||
uint16_t *table;
|
||||
} GammaContext;
|
||||
|
||||
// gamma_convert expects 16 bit rgb format
|
||||
// it writes directly in src slice thus it must be modifiable (done through cascade context)
|
||||
static int gamma_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
|
||||
{
|
||||
GammaContext *instance = desc->instance;
|
||||
uint16_t *table = instance->table;
|
||||
int srcW = desc->src->width;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < sliceH; ++i) {
|
||||
uint8_t ** src = desc->src->plane[0].line;
|
||||
int src_pos = sliceY+i - desc->src->plane[0].sliceY;
|
||||
|
||||
uint16_t *src1 = (uint16_t*)*(src+src_pos);
|
||||
int j;
|
||||
for (j = 0; j < srcW; ++j) {
|
||||
uint16_t r = AV_RL16(src1 + j*4 + 0);
|
||||
uint16_t g = AV_RL16(src1 + j*4 + 1);
|
||||
uint16_t b = AV_RL16(src1 + j*4 + 2);
|
||||
|
||||
AV_WL16(src1 + j*4 + 0, table[r]);
|
||||
AV_WL16(src1 + j*4 + 1, table[g]);
|
||||
AV_WL16(src1 + j*4 + 2, table[b]);
|
||||
}
|
||||
|
||||
}
|
||||
return sliceH;
|
||||
}
|
||||
|
||||
|
||||
int ff_init_gamma_convert(SwsFilterDescriptor *desc, SwsSlice * src, uint16_t *table)
|
||||
{
|
||||
GammaContext *li = av_malloc(sizeof(GammaContext));
|
||||
if (!li)
|
||||
return AVERROR(ENOMEM);
|
||||
li->table = table;
|
||||
|
||||
desc->instance = li;
|
||||
desc->src = src;
|
||||
desc->dst = NULL;
|
||||
desc->process = &gamma_convert;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
289
externals/ffmpeg/libswscale/hscale.c
vendored
Executable file
289
externals/ffmpeg/libswscale/hscale.c
vendored
Executable file
@@ -0,0 +1,289 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "swscale_internal.h"
|
||||
|
||||
/// Scaler instance data
|
||||
typedef struct FilterContext
|
||||
{
|
||||
uint16_t *filter;
|
||||
int *filter_pos;
|
||||
int filter_size;
|
||||
int xInc;
|
||||
} FilterContext;
|
||||
|
||||
/// Color conversion instance data
|
||||
typedef struct ColorContext
|
||||
{
|
||||
uint32_t *pal;
|
||||
} ColorContext;
|
||||
|
||||
static int lum_h_scale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
|
||||
{
|
||||
FilterContext *instance = desc->instance;
|
||||
int srcW = desc->src->width;
|
||||
int dstW = desc->dst->width;
|
||||
int xInc = instance->xInc;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < sliceH; ++i) {
|
||||
uint8_t ** src = desc->src->plane[0].line;
|
||||
uint8_t ** dst = desc->dst->plane[0].line;
|
||||
int src_pos = sliceY+i - desc->src->plane[0].sliceY;
|
||||
int dst_pos = sliceY+i - desc->dst->plane[0].sliceY;
|
||||
|
||||
|
||||
if (c->hyscale_fast) {
|
||||
c->hyscale_fast(c, (int16_t*)dst[dst_pos], dstW, src[src_pos], srcW, xInc);
|
||||
} else {
|
||||
c->hyScale(c, (int16_t*)dst[dst_pos], dstW, (const uint8_t *)src[src_pos], instance->filter,
|
||||
instance->filter_pos, instance->filter_size);
|
||||
}
|
||||
|
||||
if (c->lumConvertRange)
|
||||
c->lumConvertRange((int16_t*)dst[dst_pos], dstW);
|
||||
|
||||
desc->dst->plane[0].sliceH += 1;
|
||||
|
||||
if (desc->alpha) {
|
||||
src = desc->src->plane[3].line;
|
||||
dst = desc->dst->plane[3].line;
|
||||
|
||||
src_pos = sliceY+i - desc->src->plane[3].sliceY;
|
||||
dst_pos = sliceY+i - desc->dst->plane[3].sliceY;
|
||||
|
||||
desc->dst->plane[3].sliceH += 1;
|
||||
|
||||
if (c->hyscale_fast) {
|
||||
c->hyscale_fast(c, (int16_t*)dst[dst_pos], dstW, src[src_pos], srcW, xInc);
|
||||
} else {
|
||||
c->hyScale(c, (int16_t*)dst[dst_pos], dstW, (const uint8_t *)src[src_pos], instance->filter,
|
||||
instance->filter_pos, instance->filter_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sliceH;
|
||||
}
|
||||
|
||||
static int lum_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
|
||||
{
|
||||
int srcW = desc->src->width;
|
||||
ColorContext * instance = desc->instance;
|
||||
uint32_t * pal = instance->pal;
|
||||
int i;
|
||||
|
||||
desc->dst->plane[0].sliceY = sliceY;
|
||||
desc->dst->plane[0].sliceH = sliceH;
|
||||
desc->dst->plane[3].sliceY = sliceY;
|
||||
desc->dst->plane[3].sliceH = sliceH;
|
||||
|
||||
for (i = 0; i < sliceH; ++i) {
|
||||
int sp0 = sliceY+i - desc->src->plane[0].sliceY;
|
||||
int sp1 = ((sliceY+i) >> desc->src->v_chr_sub_sample) - desc->src->plane[1].sliceY;
|
||||
const uint8_t * src[4] = { desc->src->plane[0].line[sp0],
|
||||
desc->src->plane[1].line[sp1],
|
||||
desc->src->plane[2].line[sp1],
|
||||
desc->src->plane[3].line[sp0]};
|
||||
uint8_t * dst = desc->dst->plane[0].line[i];
|
||||
|
||||
if (c->lumToYV12) {
|
||||
c->lumToYV12(dst, src[0], src[1], src[2], srcW, pal);
|
||||
} else if (c->readLumPlanar) {
|
||||
c->readLumPlanar(dst, src, srcW, c->input_rgb2yuv_table);
|
||||
}
|
||||
|
||||
|
||||
if (desc->alpha) {
|
||||
dst = desc->dst->plane[3].line[i];
|
||||
if (c->alpToYV12) {
|
||||
c->alpToYV12(dst, src[3], src[1], src[2], srcW, pal);
|
||||
} else if (c->readAlpPlanar) {
|
||||
c->readAlpPlanar(dst, src, srcW, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sliceH;
|
||||
}
|
||||
|
||||
int ff_init_desc_fmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal)
|
||||
{
|
||||
ColorContext * li = av_malloc(sizeof(ColorContext));
|
||||
if (!li)
|
||||
return AVERROR(ENOMEM);
|
||||
li->pal = pal;
|
||||
desc->instance = li;
|
||||
|
||||
desc->alpha = isALPHA(src->fmt) && isALPHA(dst->fmt);
|
||||
desc->src =src;
|
||||
desc->dst = dst;
|
||||
desc->process = &lum_convert;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int ff_init_desc_hscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc)
|
||||
{
|
||||
FilterContext *li = av_malloc(sizeof(FilterContext));
|
||||
if (!li)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
li->filter = filter;
|
||||
li->filter_pos = filter_pos;
|
||||
li->filter_size = filter_size;
|
||||
li->xInc = xInc;
|
||||
|
||||
desc->instance = li;
|
||||
|
||||
desc->alpha = isALPHA(src->fmt) && isALPHA(dst->fmt);
|
||||
desc->src = src;
|
||||
desc->dst = dst;
|
||||
|
||||
desc->process = &lum_h_scale;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chr_h_scale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
|
||||
{
|
||||
FilterContext *instance = desc->instance;
|
||||
int srcW = AV_CEIL_RSHIFT(desc->src->width, desc->src->h_chr_sub_sample);
|
||||
int dstW = AV_CEIL_RSHIFT(desc->dst->width, desc->dst->h_chr_sub_sample);
|
||||
int xInc = instance->xInc;
|
||||
|
||||
uint8_t ** src1 = desc->src->plane[1].line;
|
||||
uint8_t ** dst1 = desc->dst->plane[1].line;
|
||||
uint8_t ** src2 = desc->src->plane[2].line;
|
||||
uint8_t ** dst2 = desc->dst->plane[2].line;
|
||||
|
||||
int src_pos1 = sliceY - desc->src->plane[1].sliceY;
|
||||
int dst_pos1 = sliceY - desc->dst->plane[1].sliceY;
|
||||
|
||||
int src_pos2 = sliceY - desc->src->plane[2].sliceY;
|
||||
int dst_pos2 = sliceY - desc->dst->plane[2].sliceY;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < sliceH; ++i) {
|
||||
if (c->hcscale_fast) {
|
||||
c->hcscale_fast(c, (uint16_t*)dst1[dst_pos1+i], (uint16_t*)dst2[dst_pos2+i], dstW, src1[src_pos1+i], src2[src_pos2+i], srcW, xInc);
|
||||
} else {
|
||||
c->hcScale(c, (uint16_t*)dst1[dst_pos1+i], dstW, src1[src_pos1+i], instance->filter, instance->filter_pos, instance->filter_size);
|
||||
c->hcScale(c, (uint16_t*)dst2[dst_pos2+i], dstW, src2[src_pos2+i], instance->filter, instance->filter_pos, instance->filter_size);
|
||||
}
|
||||
|
||||
if (c->chrConvertRange)
|
||||
c->chrConvertRange((uint16_t*)dst1[dst_pos1+i], (uint16_t*)dst2[dst_pos2+i], dstW);
|
||||
|
||||
desc->dst->plane[1].sliceH += 1;
|
||||
desc->dst->plane[2].sliceH += 1;
|
||||
}
|
||||
return sliceH;
|
||||
}
|
||||
|
||||
static int chr_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
|
||||
{
|
||||
int srcW = AV_CEIL_RSHIFT(desc->src->width, desc->src->h_chr_sub_sample);
|
||||
ColorContext * instance = desc->instance;
|
||||
uint32_t * pal = instance->pal;
|
||||
|
||||
int sp0 = (sliceY - (desc->src->plane[0].sliceY >> desc->src->v_chr_sub_sample)) << desc->src->v_chr_sub_sample;
|
||||
int sp1 = sliceY - desc->src->plane[1].sliceY;
|
||||
|
||||
int i;
|
||||
|
||||
desc->dst->plane[1].sliceY = sliceY;
|
||||
desc->dst->plane[1].sliceH = sliceH;
|
||||
desc->dst->plane[2].sliceY = sliceY;
|
||||
desc->dst->plane[2].sliceH = sliceH;
|
||||
|
||||
for (i = 0; i < sliceH; ++i) {
|
||||
const uint8_t * src[4] = { desc->src->plane[0].line[sp0+i],
|
||||
desc->src->plane[1].line[sp1+i],
|
||||
desc->src->plane[2].line[sp1+i],
|
||||
desc->src->plane[3].line[sp0+i]};
|
||||
|
||||
uint8_t * dst1 = desc->dst->plane[1].line[i];
|
||||
uint8_t * dst2 = desc->dst->plane[2].line[i];
|
||||
if (c->chrToYV12) {
|
||||
c->chrToYV12(dst1, dst2, src[0], src[1], src[2], srcW, pal);
|
||||
} else if (c->readChrPlanar) {
|
||||
c->readChrPlanar(dst1, dst2, src, srcW, c->input_rgb2yuv_table);
|
||||
}
|
||||
}
|
||||
return sliceH;
|
||||
}
|
||||
|
||||
int ff_init_desc_cfmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal)
|
||||
{
|
||||
ColorContext * li = av_malloc(sizeof(ColorContext));
|
||||
if (!li)
|
||||
return AVERROR(ENOMEM);
|
||||
li->pal = pal;
|
||||
desc->instance = li;
|
||||
|
||||
desc->src =src;
|
||||
desc->dst = dst;
|
||||
desc->process = &chr_convert;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ff_init_desc_chscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc)
|
||||
{
|
||||
FilterContext *li = av_malloc(sizeof(FilterContext));
|
||||
if (!li)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
li->filter = filter;
|
||||
li->filter_pos = filter_pos;
|
||||
li->filter_size = filter_size;
|
||||
li->xInc = xInc;
|
||||
|
||||
desc->instance = li;
|
||||
|
||||
desc->alpha = isALPHA(src->fmt) && isALPHA(dst->fmt);
|
||||
desc->src = src;
|
||||
desc->dst = dst;
|
||||
|
||||
desc->process = &chr_h_scale;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int no_chr_scale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
|
||||
{
|
||||
desc->dst->plane[1].sliceY = sliceY + sliceH - desc->dst->plane[1].available_lines;
|
||||
desc->dst->plane[1].sliceH = desc->dst->plane[1].available_lines;
|
||||
desc->dst->plane[2].sliceY = sliceY + sliceH - desc->dst->plane[2].available_lines;
|
||||
desc->dst->plane[2].sliceH = desc->dst->plane[2].available_lines;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ff_init_desc_no_chr(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst)
|
||||
{
|
||||
desc->src = src;
|
||||
desc->dst = dst;
|
||||
desc->alpha = 0;
|
||||
desc->instance = NULL;
|
||||
desc->process = &no_chr_scale;
|
||||
return 0;
|
||||
}
|
||||
55
externals/ffmpeg/libswscale/hscale_fast_bilinear.c
vendored
Executable file
55
externals/ffmpeg/libswscale/hscale_fast_bilinear.c
vendored
Executable file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "swscale_internal.h"
|
||||
|
||||
void ff_hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
|
||||
const uint8_t *src, int srcW, int xInc)
|
||||
{
|
||||
int i;
|
||||
unsigned int xpos = 0;
|
||||
for (i = 0; i < dstWidth; i++) {
|
||||
register unsigned int xx = xpos >> 16;
|
||||
register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
|
||||
dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
|
||||
xpos += xInc;
|
||||
}
|
||||
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
|
||||
dst[i] = src[srcW-1]*128;
|
||||
}
|
||||
|
||||
void ff_hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
|
||||
int dstWidth, const uint8_t *src1,
|
||||
const uint8_t *src2, int srcW, int xInc)
|
||||
{
|
||||
int i;
|
||||
unsigned int xpos = 0;
|
||||
for (i = 0; i < dstWidth; i++) {
|
||||
register unsigned int xx = xpos >> 16;
|
||||
register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
|
||||
dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
|
||||
dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
|
||||
xpos += xInc;
|
||||
}
|
||||
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
|
||||
dst1[i] = src1[srcW-1]*128;
|
||||
dst2[i] = src2[srcW-1]*128;
|
||||
}
|
||||
}
|
||||
1740
externals/ffmpeg/libswscale/input.c
vendored
Executable file
1740
externals/ffmpeg/libswscale/input.c
vendored
Executable file
File diff suppressed because it is too large
Load Diff
7
externals/ffmpeg/libswscale/libswscale.v
vendored
Executable file
7
externals/ffmpeg/libswscale/libswscale.v
vendored
Executable file
@@ -0,0 +1,7 @@
|
||||
LIBSWSCALE_MAJOR {
|
||||
global:
|
||||
swscale_*;
|
||||
sws_*;
|
||||
local:
|
||||
*;
|
||||
};
|
||||
1
externals/ffmpeg/libswscale/log2_tab.c
vendored
Executable file
1
externals/ffmpeg/libswscale/log2_tab.c
vendored
Executable file
@@ -0,0 +1 @@
|
||||
#include "libavutil/log2_tab.c"
|
||||
98
externals/ffmpeg/libswscale/options.c
vendored
Executable file
98
externals/ffmpeg/libswscale/options.c
vendored
Executable file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/avutil.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "libavutil/pixfmt.h"
|
||||
#include "swscale.h"
|
||||
#include "swscale_internal.h"
|
||||
|
||||
static const char *sws_context_to_name(void *ptr)
|
||||
{
|
||||
return "swscaler";
|
||||
}
|
||||
|
||||
#define OFFSET(x) offsetof(SwsContext, x)
|
||||
#define DEFAULT 0
|
||||
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
|
||||
|
||||
static const AVOption swscale_options[] = {
|
||||
{ "sws_flags", "scaler flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, { .i64 = SWS_BICUBIC }, 0, UINT_MAX, VE, "sws_flags" },
|
||||
{ "fast_bilinear", "fast bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FAST_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BICUBIC }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "experimental", "experimental", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_X }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "neighbor", "nearest neighbor", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_POINT }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "area", "averaging area", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_AREA }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "bicublin", "luma bicubic, chroma bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BICUBLIN }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "gauss", "Gaussian", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_GAUSS }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "sinc", "sinc", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_SINC }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "lanczos", "Lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_LANCZOS }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "spline", "natural bicubic spline", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_SPLINE }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "print_info", "print info", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_PRINT_INFO }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "accurate_rnd", "accurate rounding", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ACCURATE_RND }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "full_chroma_int", "full chroma interpolation", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "full_chroma_inp", "full chroma input", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "bitexact", "", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BITEXACT }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
{ "error_diffusion", "error diffusion dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ERROR_DIFFUSION}, INT_MIN, INT_MAX, VE, "sws_flags" },
|
||||
|
||||
{ "srcw", "source width", OFFSET(srcW), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE },
|
||||
{ "srch", "source height", OFFSET(srcH), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE },
|
||||
{ "dstw", "destination width", OFFSET(dstW), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE },
|
||||
{ "dsth", "destination height", OFFSET(dstH), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE },
|
||||
{ "src_format", "source format", OFFSET(srcFormat), AV_OPT_TYPE_PIXEL_FMT,{ .i64 = DEFAULT }, 0, INT_MAX, VE },
|
||||
{ "dst_format", "destination format", OFFSET(dstFormat), AV_OPT_TYPE_PIXEL_FMT,{ .i64 = DEFAULT }, 0, INT_MAX, VE },
|
||||
{ "src_range", "source is full range", OFFSET(srcRange), AV_OPT_TYPE_BOOL, { .i64 = DEFAULT }, 0, 1, VE },
|
||||
{ "dst_range", "destination is full range", OFFSET(dstRange), AV_OPT_TYPE_BOOL, { .i64 = DEFAULT }, 0, 1, VE },
|
||||
{ "param0", "scaler param 0", OFFSET(param[0]), AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT }, INT_MIN, INT_MAX, VE },
|
||||
{ "param1", "scaler param 1", OFFSET(param[1]), AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT }, INT_MIN, INT_MAX, VE },
|
||||
|
||||
{ "src_v_chr_pos", "source vertical chroma position in luma grid/256" , OFFSET(src_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -513 }, -513, 512, VE },
|
||||
{ "src_h_chr_pos", "source horizontal chroma position in luma grid/256", OFFSET(src_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -513 }, -513, 512, VE },
|
||||
{ "dst_v_chr_pos", "destination vertical chroma position in luma grid/256" , OFFSET(dst_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -513 }, -513, 512, VE },
|
||||
{ "dst_h_chr_pos", "destination horizontal chroma position in luma grid/256", OFFSET(dst_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -513 }, -513, 512, VE },
|
||||
|
||||
{ "sws_dither", "set dithering algorithm", OFFSET(dither), AV_OPT_TYPE_INT, { .i64 = SWS_DITHER_AUTO }, 0, NB_SWS_DITHER, VE, "sws_dither" },
|
||||
{ "auto", "leave choice to sws", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_AUTO }, INT_MIN, INT_MAX, VE, "sws_dither" },
|
||||
{ "bayer", "bayer dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_BAYER }, INT_MIN, INT_MAX, VE, "sws_dither" },
|
||||
{ "ed", "error diffusion", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_ED }, INT_MIN, INT_MAX, VE, "sws_dither" },
|
||||
{ "a_dither", "arithmetic addition dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_A_DITHER}, INT_MIN, INT_MAX, VE, "sws_dither" },
|
||||
{ "x_dither", "arithmetic xor dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_X_DITHER}, INT_MIN, INT_MAX, VE, "sws_dither" },
|
||||
{ "gamma", "gamma correct scaling", OFFSET(gamma_flag),AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
|
||||
{ "alphablend", "mode for alpha -> non alpha", OFFSET(alphablend),AV_OPT_TYPE_INT, { .i64 = SWS_ALPHA_BLEND_NONE}, 0, SWS_ALPHA_BLEND_NB-1, VE, "alphablend" },
|
||||
{ "none", "ignore alpha", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ALPHA_BLEND_NONE}, INT_MIN, INT_MAX, VE, "alphablend" },
|
||||
{ "uniform_color", "blend onto a uniform color", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ALPHA_BLEND_UNIFORM},INT_MIN, INT_MAX, VE, "alphablend" },
|
||||
{ "checkerboard", "blend onto a checkerboard", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ALPHA_BLEND_CHECKERBOARD},INT_MIN, INT_MAX, VE, "alphablend" },
|
||||
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
const AVClass ff_sws_context_class = {
|
||||
.class_name = "SWScaler",
|
||||
.item_name = sws_context_to_name,
|
||||
.option = swscale_options,
|
||||
.category = AV_CLASS_CATEGORY_SWSCALER,
|
||||
.version = LIBAVUTIL_VERSION_INT,
|
||||
};
|
||||
|
||||
const AVClass *sws_get_class(void)
|
||||
{
|
||||
return &ff_sws_context_class;
|
||||
}
|
||||
3024
externals/ffmpeg/libswscale/output.c
vendored
Executable file
3024
externals/ffmpeg/libswscale/output.c
vendored
Executable file
File diff suppressed because it is too large
Load Diff
4
externals/ffmpeg/libswscale/ppc/Makefile
vendored
Executable file
4
externals/ffmpeg/libswscale/ppc/Makefile
vendored
Executable file
@@ -0,0 +1,4 @@
|
||||
OBJS += ppc/swscale_altivec.o \
|
||||
ppc/yuv2rgb_altivec.o \
|
||||
ppc/yuv2yuv_altivec.o \
|
||||
ppc/swscale_vsx.o
|
||||
290
externals/ffmpeg/libswscale/ppc/swscale_altivec.c
vendored
Executable file
290
externals/ffmpeg/libswscale/ppc/swscale_altivec.c
vendored
Executable file
@@ -0,0 +1,290 @@
|
||||
/*
|
||||
* AltiVec-enhanced yuv2yuvX
|
||||
*
|
||||
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
|
||||
* based on the equivalent C code in swscale.c
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "yuv2rgb_altivec.h"
|
||||
#include "libavutil/ppc/util_altivec.h"
|
||||
|
||||
#if HAVE_ALTIVEC
|
||||
#if HAVE_BIGENDIAN
|
||||
#define vzero vec_splat_s32(0)
|
||||
|
||||
#define GET_LS(a,b,c,s) {\
|
||||
vector signed short l2 = vec_ld(((b) << 1) + 16, s);\
|
||||
ls = vec_perm(a, l2, c);\
|
||||
a = l2;\
|
||||
}
|
||||
|
||||
#define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do {\
|
||||
vector signed short ls;\
|
||||
vector signed int vf1, vf2, i1, i2;\
|
||||
GET_LS(l1, x, perm, src);\
|
||||
i1 = vec_mule(filter, ls);\
|
||||
i2 = vec_mulo(filter, ls);\
|
||||
vf1 = vec_mergeh(i1, i2);\
|
||||
vf2 = vec_mergel(i1, i2);\
|
||||
d1 = vec_add(d1, vf1);\
|
||||
d2 = vec_add(d2, vf2);\
|
||||
} while (0)
|
||||
|
||||
#define LOAD_FILTER(vf,f) {\
|
||||
vector unsigned char perm0 = vec_lvsl(joffset, f);\
|
||||
vf = vec_ld(joffset, f);\
|
||||
vf = vec_perm(vf, vf, perm0);\
|
||||
}
|
||||
#define LOAD_L1(ll1,s,p){\
|
||||
p = vec_lvsl(xoffset, s);\
|
||||
ll1 = vec_ld(xoffset, s);\
|
||||
}
|
||||
|
||||
// The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
|
||||
|
||||
// The neat trick: We only care for half the elements,
|
||||
// high or low depending on (i<<3)%16 (it's 0 or 8 here),
|
||||
// and we're going to use vec_mule, so we choose
|
||||
// carefully how to "unpack" the elements into the even slots.
|
||||
#define GET_VF4(a, vf, f) {\
|
||||
vf = vec_ld(a<< 3, f);\
|
||||
if ((a << 3) % 16)\
|
||||
vf = vec_mergel(vf, (vector signed short)vzero);\
|
||||
else\
|
||||
vf = vec_mergeh(vf, (vector signed short)vzero);\
|
||||
}
|
||||
#define FIRST_LOAD(sv, pos, s, per) {\
|
||||
sv = vec_ld(pos, s);\
|
||||
per = vec_lvsl(pos, s);\
|
||||
}
|
||||
#define UPDATE_PTR(s0, d0, s1, d1) {\
|
||||
d0 = s0;\
|
||||
d1 = s1;\
|
||||
}
|
||||
#define LOAD_SRCV(pos, a, s, per, v0, v1, vf) {\
|
||||
v1 = vec_ld(pos + a + 16, s);\
|
||||
vf = vec_perm(v0, v1, per);\
|
||||
}
|
||||
#define LOAD_SRCV8(pos, a, s, per, v0, v1, vf) {\
|
||||
if ((((uintptr_t)s + pos) % 16) > 8) {\
|
||||
v1 = vec_ld(pos + a + 16, s);\
|
||||
}\
|
||||
vf = vec_perm(v0, src_v1, per);\
|
||||
}
|
||||
#define GET_VFD(a, b, f, vf0, vf1, per, vf, off) {\
|
||||
vf1 = vec_ld((a * 2 * filterSize) + (b * 2) + 16 + off, f);\
|
||||
vf = vec_perm(vf0, vf1, per);\
|
||||
}
|
||||
|
||||
#define FUNC(name) name ## _altivec
|
||||
#include "swscale_ppc_template.c"
|
||||
#undef FUNC
|
||||
|
||||
#undef vzero
|
||||
|
||||
#endif /* HAVE_BIGENDIAN */
|
||||
|
||||
#define output_pixel(pos, val, bias, signedness) \
|
||||
if (big_endian) { \
|
||||
AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
|
||||
} else { \
|
||||
AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
|
||||
}
|
||||
|
||||
static void
|
||||
yuv2plane1_float_u(const int32_t *src, float *dest, int dstW, int start)
|
||||
{
|
||||
static const int big_endian = HAVE_BIGENDIAN;
|
||||
static const int shift = 3;
|
||||
static const float float_mult = 1.0f / 65535.0f;
|
||||
int i, val;
|
||||
uint16_t val_uint;
|
||||
|
||||
for (i = start; i < dstW; ++i){
|
||||
val = src[i] + (1 << (shift - 1));
|
||||
output_pixel(&val_uint, val, 0, uint);
|
||||
dest[i] = float_mult * (float)val_uint;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
yuv2plane1_float_bswap_u(const int32_t *src, uint32_t *dest, int dstW, int start)
|
||||
{
|
||||
static const int big_endian = HAVE_BIGENDIAN;
|
||||
static const int shift = 3;
|
||||
static const float float_mult = 1.0f / 65535.0f;
|
||||
int i, val;
|
||||
uint16_t val_uint;
|
||||
|
||||
for (i = start; i < dstW; ++i){
|
||||
val = src[i] + (1 << (shift - 1));
|
||||
output_pixel(&val_uint, val, 0, uint);
|
||||
dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
|
||||
}
|
||||
}
|
||||
|
||||
static void yuv2plane1_float_altivec(const int32_t *src, float *dest, int dstW)
|
||||
{
|
||||
const int dst_u = -(uintptr_t)dest & 3;
|
||||
const int shift = 3;
|
||||
const int add = (1 << (shift - 1));
|
||||
const int clip = (1 << 16) - 1;
|
||||
const float fmult = 1.0f / 65535.0f;
|
||||
const vec_u32 vadd = (vec_u32) {add, add, add, add};
|
||||
const vec_u32 vshift = (vec_u32) vec_splat_u32(shift);
|
||||
const vec_u32 vlargest = (vec_u32) {clip, clip, clip, clip};
|
||||
const vec_f vmul = (vec_f) {fmult, fmult, fmult, fmult};
|
||||
const vec_f vzero = (vec_f) {0, 0, 0, 0};
|
||||
vec_u32 v;
|
||||
vec_f vd;
|
||||
int i;
|
||||
|
||||
yuv2plane1_float_u(src, dest, dst_u, 0);
|
||||
|
||||
for (i = dst_u; i < dstW - 3; i += 4) {
|
||||
v = vec_ld(0, (const uint32_t *) &src[i]);
|
||||
v = vec_add(v, vadd);
|
||||
v = vec_sr(v, vshift);
|
||||
v = vec_min(v, vlargest);
|
||||
|
||||
vd = vec_ctf(v, 0);
|
||||
vd = vec_madd(vd, vmul, vzero);
|
||||
|
||||
vec_st(vd, 0, &dest[i]);
|
||||
}
|
||||
|
||||
yuv2plane1_float_u(src, dest, dstW, i);
|
||||
}
|
||||
|
||||
static void yuv2plane1_float_bswap_altivec(const int32_t *src, uint32_t *dest, int dstW)
|
||||
{
|
||||
const int dst_u = -(uintptr_t)dest & 3;
|
||||
const int shift = 3;
|
||||
const int add = (1 << (shift - 1));
|
||||
const int clip = (1 << 16) - 1;
|
||||
const float fmult = 1.0f / 65535.0f;
|
||||
const vec_u32 vadd = (vec_u32) {add, add, add, add};
|
||||
const vec_u32 vshift = (vec_u32) vec_splat_u32(shift);
|
||||
const vec_u32 vlargest = (vec_u32) {clip, clip, clip, clip};
|
||||
const vec_f vmul = (vec_f) {fmult, fmult, fmult, fmult};
|
||||
const vec_f vzero = (vec_f) {0, 0, 0, 0};
|
||||
const vec_u32 vswapbig = (vec_u32) {16, 16, 16, 16};
|
||||
const vec_u16 vswapsmall = vec_splat_u16(8);
|
||||
vec_u32 v;
|
||||
vec_f vd;
|
||||
int i;
|
||||
|
||||
yuv2plane1_float_bswap_u(src, dest, dst_u, 0);
|
||||
|
||||
for (i = dst_u; i < dstW - 3; i += 4) {
|
||||
v = vec_ld(0, (const uint32_t *) &src[i]);
|
||||
v = vec_add(v, vadd);
|
||||
v = vec_sr(v, vshift);
|
||||
v = vec_min(v, vlargest);
|
||||
|
||||
vd = vec_ctf(v, 0);
|
||||
vd = vec_madd(vd, vmul, vzero);
|
||||
|
||||
vd = (vec_f) vec_rl((vec_u32) vd, vswapbig);
|
||||
vd = (vec_f) vec_rl((vec_u16) vd, vswapsmall);
|
||||
|
||||
vec_st(vd, 0, (float *) &dest[i]);
|
||||
}
|
||||
|
||||
yuv2plane1_float_bswap_u(src, dest, dstW, i);
|
||||
}
|
||||
|
||||
#define yuv2plane1_float(template, dest_type, BE_LE) \
|
||||
static void yuv2plane1_float ## BE_LE ## _altivec(const int16_t *src, uint8_t *dest, \
|
||||
int dstW, \
|
||||
const uint8_t *dither, int offset) \
|
||||
{ \
|
||||
template((const int32_t *)src, (dest_type *)dest, dstW); \
|
||||
}
|
||||
|
||||
#if HAVE_BIGENDIAN
|
||||
yuv2plane1_float(yuv2plane1_float_altivec, float, BE)
|
||||
yuv2plane1_float(yuv2plane1_float_bswap_altivec, uint32_t, LE)
|
||||
#else
|
||||
yuv2plane1_float(yuv2plane1_float_altivec, float, LE)
|
||||
yuv2plane1_float(yuv2plane1_float_bswap_altivec, uint32_t, BE)
|
||||
#endif
|
||||
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
|
||||
av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
|
||||
{
|
||||
#if HAVE_ALTIVEC
|
||||
enum AVPixelFormat dstFormat = c->dstFormat;
|
||||
|
||||
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
|
||||
return;
|
||||
|
||||
#if HAVE_BIGENDIAN
|
||||
if (c->srcBpc == 8 && c->dstBpc <= 14) {
|
||||
c->hyScale = c->hcScale = hScale_real_altivec;
|
||||
}
|
||||
if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat) &&
|
||||
dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE &&
|
||||
!c->needAlpha) {
|
||||
c->yuv2planeX = yuv2planeX_altivec;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
|
||||
c->yuv2plane1 = yuv2plane1_floatBE_altivec;
|
||||
} else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
|
||||
c->yuv2plane1 = yuv2plane1_floatLE_altivec;
|
||||
}
|
||||
|
||||
/* The following list of supported dstFormat values should
|
||||
* match what's found in the body of ff_yuv2packedX_altivec() */
|
||||
if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->needAlpha) {
|
||||
switch (c->dstFormat) {
|
||||
case AV_PIX_FMT_ABGR:
|
||||
c->yuv2packedX = ff_yuv2abgr_X_altivec;
|
||||
break;
|
||||
case AV_PIX_FMT_BGRA:
|
||||
c->yuv2packedX = ff_yuv2bgra_X_altivec;
|
||||
break;
|
||||
case AV_PIX_FMT_ARGB:
|
||||
c->yuv2packedX = ff_yuv2argb_X_altivec;
|
||||
break;
|
||||
case AV_PIX_FMT_RGBA:
|
||||
c->yuv2packedX = ff_yuv2rgba_X_altivec;
|
||||
break;
|
||||
case AV_PIX_FMT_BGR24:
|
||||
c->yuv2packedX = ff_yuv2bgr24_X_altivec;
|
||||
break;
|
||||
case AV_PIX_FMT_RGB24:
|
||||
c->yuv2packedX = ff_yuv2rgb24_X_altivec;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
|
||||
ff_sws_init_swscale_vsx(c);
|
||||
}
|
||||
219
externals/ffmpeg/libswscale/ppc/swscale_ppc_template.c
vendored
Executable file
219
externals/ffmpeg/libswscale/ppc/swscale_ppc_template.c
vendored
Executable file
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
* AltiVec-enhanced yuv2yuvX
|
||||
*
|
||||
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
|
||||
* based on the equivalent C code in swscale.c
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
static void FUNC(yuv2planeX_8_16)(const int16_t *filter, int filterSize,
|
||||
const int16_t **src, uint8_t *dest,
|
||||
const uint8_t *dither, int offset, int x)
|
||||
{
|
||||
register int i, j;
|
||||
LOCAL_ALIGNED(16, int, val, [16]);
|
||||
vector signed int vo1, vo2, vo3, vo4;
|
||||
vector unsigned short vs1, vs2;
|
||||
vector unsigned char vf;
|
||||
vector unsigned int altivec_vectorShiftInt19 =
|
||||
vec_add(vec_splat_u32(10), vec_splat_u32(9));
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
val[i] = dither[(x + i + offset) & 7] << 12;
|
||||
|
||||
vo1 = vec_ld(0, val);
|
||||
vo2 = vec_ld(16, val);
|
||||
vo3 = vec_ld(32, val);
|
||||
vo4 = vec_ld(48, val);
|
||||
|
||||
for (j = 0; j < filterSize; j++) {
|
||||
unsigned int joffset=j<<1;
|
||||
unsigned int xoffset=x<<1;
|
||||
vector unsigned char av_unused perm;
|
||||
vector signed short l1,vLumFilter;
|
||||
LOAD_FILTER(vLumFilter,filter);
|
||||
vLumFilter = vec_splat(vLumFilter, 0);
|
||||
LOAD_L1(l1,src[j],perm);
|
||||
yuv2planeX_8(vo1, vo2, l1, src[j], x, perm, vLumFilter);
|
||||
yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter);
|
||||
}
|
||||
|
||||
vo1 = vec_sra(vo1, altivec_vectorShiftInt19);
|
||||
vo2 = vec_sra(vo2, altivec_vectorShiftInt19);
|
||||
vo3 = vec_sra(vo3, altivec_vectorShiftInt19);
|
||||
vo4 = vec_sra(vo4, altivec_vectorShiftInt19);
|
||||
vs1 = vec_packsu(vo1, vo2);
|
||||
vs2 = vec_packsu(vo3, vo4);
|
||||
vf = vec_packsu(vs1, vs2);
|
||||
VEC_ST(vf, 0, dest);
|
||||
}
|
||||
|
||||
|
||||
static inline void yuv2planeX_u(const int16_t *filter, int filterSize,
|
||||
const int16_t **src, uint8_t *dest, int dstW,
|
||||
const uint8_t *dither, int offset, int x)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = x; i < dstW; i++) {
|
||||
int t = dither[(i + offset) & 7] << 12;
|
||||
for (j = 0; j < filterSize; j++)
|
||||
t += src[j][i] * filter[j];
|
||||
dest[i] = av_clip_uint8(t >> 19);
|
||||
}
|
||||
}
|
||||
|
||||
static void FUNC(yuv2planeX)(const int16_t *filter, int filterSize,
|
||||
const int16_t **src, uint8_t *dest, int dstW,
|
||||
const uint8_t *dither, int offset)
|
||||
{
|
||||
int dst_u = -(uintptr_t)dest & 15;
|
||||
int i;
|
||||
|
||||
yuv2planeX_u(filter, filterSize, src, dest, dst_u, dither, offset, 0);
|
||||
|
||||
for (i = dst_u; i < dstW - 15; i += 16)
|
||||
FUNC(yuv2planeX_8_16)(filter, filterSize, src, dest + i, dither,
|
||||
offset, i);
|
||||
|
||||
yuv2planeX_u(filter, filterSize, src, dest, dstW, dither, offset, i);
|
||||
}
|
||||
|
||||
static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW,
|
||||
const uint8_t *src, const int16_t *filter,
|
||||
const int32_t *filterPos, int filterSize)
|
||||
{
|
||||
register int i;
|
||||
LOCAL_ALIGNED(16, int, tempo, [4]);
|
||||
|
||||
if (filterSize % 4) {
|
||||
for (i = 0; i < dstW; i++) {
|
||||
register int j;
|
||||
register int srcPos = filterPos[i];
|
||||
register int val = 0;
|
||||
for (j = 0; j < filterSize; j++)
|
||||
val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
|
||||
dst[i] = FFMIN(val >> 7, (1 << 15) - 1);
|
||||
}
|
||||
} else
|
||||
switch (filterSize) {
|
||||
case 4:
|
||||
for (i = 0; i < dstW; i++) {
|
||||
register int srcPos = filterPos[i];
|
||||
|
||||
vector unsigned char src_vF = unaligned_load(srcPos, src);
|
||||
vector signed short src_v, filter_v;
|
||||
vector signed int val_vEven, val_s;
|
||||
src_v = // vec_unpackh sign-extends...
|
||||
(vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
|
||||
// now put our elements in the even slots
|
||||
src_v = vec_mergeh(src_v, (vector signed short)vzero);
|
||||
GET_VF4(i, filter_v, filter);
|
||||
val_vEven = vec_mule(src_v, filter_v);
|
||||
val_s = vec_sums(val_vEven, vzero);
|
||||
vec_st(val_s, 0, tempo);
|
||||
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
for (i = 0; i < dstW; i++) {
|
||||
register int srcPos = filterPos[i];
|
||||
vector unsigned char src_vF, av_unused src_v0, av_unused src_v1;
|
||||
vector unsigned char av_unused permS;
|
||||
vector signed short src_v, filter_v;
|
||||
vector signed int val_v, val_s;
|
||||
FIRST_LOAD(src_v0, srcPos, src, permS);
|
||||
LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF);
|
||||
src_v = // vec_unpackh sign-extends...
|
||||
(vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
|
||||
filter_v = vec_ld(i << 4, filter);
|
||||
val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
|
||||
val_s = vec_sums(val_v, vzero);
|
||||
vec_st(val_s, 0, tempo);
|
||||
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
|
||||
}
|
||||
break;
|
||||
|
||||
case 16:
|
||||
for (i = 0; i < dstW; i++) {
|
||||
register int srcPos = filterPos[i];
|
||||
|
||||
vector unsigned char src_vF = unaligned_load(srcPos, src);
|
||||
vector signed short src_vA = // vec_unpackh sign-extends...
|
||||
(vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
|
||||
vector signed short src_vB = // vec_unpackh sign-extends...
|
||||
(vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF));
|
||||
vector signed short filter_v0 = vec_ld(i << 5, filter);
|
||||
vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
|
||||
|
||||
vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
|
||||
vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
|
||||
|
||||
vector signed int val_s = vec_sums(val_v, vzero);
|
||||
|
||||
VEC_ST(val_s, 0, tempo);
|
||||
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
for (i = 0; i < dstW; i++) {
|
||||
register int j, av_unused offset = i * 2 * filterSize;
|
||||
register int srcPos = filterPos[i];
|
||||
|
||||
vector signed int val_s, val_v = (vector signed int)vzero;
|
||||
vector signed short av_unused filter_v0R;
|
||||
vector unsigned char av_unused permF, av_unused src_v0, av_unused permS;
|
||||
FIRST_LOAD(filter_v0R, offset, filter, permF);
|
||||
FIRST_LOAD(src_v0, srcPos, src, permS);
|
||||
|
||||
for (j = 0; j < filterSize - 15; j += 16) {
|
||||
vector unsigned char av_unused src_v1, src_vF;
|
||||
vector signed short av_unused filter_v1R, av_unused filter_v2R,
|
||||
filter_v0, filter_v1, src_vA, src_vB;
|
||||
vector signed int val_acc;
|
||||
LOAD_SRCV(srcPos, j, src, permS, src_v0, src_v1, src_vF);
|
||||
src_vA = // vec_unpackh sign-extends...
|
||||
(vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
|
||||
src_vB = // vec_unpackh sign-extends...
|
||||
(vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF));
|
||||
GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v0, 0);
|
||||
GET_VFD(i, j, filter, filter_v1R, filter_v2R, permF, filter_v1, 16);
|
||||
|
||||
val_acc = vec_msums(src_vA, filter_v0, val_v);
|
||||
val_v = vec_msums(src_vB, filter_v1, val_acc);
|
||||
UPDATE_PTR(filter_v2R, filter_v0R, src_v1, src_v0);
|
||||
}
|
||||
|
||||
if (j < filterSize - 7) {
|
||||
// loading src_v0 is useless, it's already done above
|
||||
vector unsigned char av_unused src_v1, src_vF;
|
||||
vector signed short src_v, av_unused filter_v1R, filter_v;
|
||||
LOAD_SRCV8(srcPos, j, src, permS, src_v0, src_v1, src_vF);
|
||||
src_v = // vec_unpackh sign-extends...
|
||||
(vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
|
||||
GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v, 0);
|
||||
val_v = vec_msums(src_v, filter_v, val_v);
|
||||
}
|
||||
val_s = vec_sums(val_v, vzero);
|
||||
|
||||
VEC_ST(val_s, 0, tempo);
|
||||
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
2267
externals/ffmpeg/libswscale/ppc/swscale_vsx.c
vendored
Executable file
2267
externals/ffmpeg/libswscale/ppc/swscale_vsx.c
vendored
Executable file
File diff suppressed because it is too large
Load Diff
855
externals/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c
vendored
Executable file
855
externals/ffmpeg/libswscale/ppc/yuv2rgb_altivec.c
vendored
Executable file
@@ -0,0 +1,855 @@
|
||||
/*
|
||||
* AltiVec acceleration for colorspace conversion
|
||||
*
|
||||
* copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* Convert I420 YV12 to RGB in various formats,
|
||||
* it rejects images that are not in 420 formats,
|
||||
* it rejects images that don't have widths of multiples of 16,
|
||||
* it rejects images that don't have heights of multiples of 2.
|
||||
* Reject defers to C simulation code.
|
||||
*
|
||||
* Lots of optimizations to be done here.
|
||||
*
|
||||
* 1. Need to fix saturation code. I just couldn't get it to fly with packs
|
||||
* and adds, so we currently use max/min to clip.
|
||||
*
|
||||
* 2. The inefficient use of chroma loading needs a bit of brushing up.
|
||||
*
|
||||
* 3. Analysis of pipeline stalls needs to be done. Use shark to identify
|
||||
* pipeline stalls.
|
||||
*
|
||||
*
|
||||
* MODIFIED to calculate coeffs from currently selected color space.
|
||||
* MODIFIED core to be a macro where you specify the output format.
|
||||
* ADDED UYVY conversion which is never called due to some thing in swscale.
|
||||
* CORRECTED algorithim selection to be strict on input formats.
|
||||
* ADDED runtime detection of AltiVec.
|
||||
*
|
||||
* ADDED altivec_yuv2packedX vertical scl + RGB converter
|
||||
*
|
||||
* March 27,2004
|
||||
* PERFORMANCE ANALYSIS
|
||||
*
|
||||
* The C version uses 25% of the processor or ~250Mips for D1 video rawvideo
|
||||
* used as test.
|
||||
* The AltiVec version uses 10% of the processor or ~100Mips for D1 video
|
||||
* same sequence.
|
||||
*
|
||||
* 720 * 480 * 30 ~10MPS
|
||||
*
|
||||
* so we have roughly 10 clocks per pixel. This is too high, something has
|
||||
* to be wrong.
|
||||
*
|
||||
* OPTIMIZED clip codes to utilize vec_max and vec_packs removing the
|
||||
* need for vec_min.
|
||||
*
|
||||
* OPTIMIZED DST OUTPUT cache/DMA controls. We are pretty much guaranteed to
|
||||
* have the input video frame, it was just decompressed so it probably resides
|
||||
* in L1 caches. However, we are creating the output video stream. This needs
|
||||
* to use the DSTST instruction to optimize for the cache. We couple this with
|
||||
* the fact that we are not going to be visiting the input buffer again so we
|
||||
* mark it Least Recently Used. This shaves 25% of the processor cycles off.
|
||||
*
|
||||
* Now memcpy is the largest mips consumer in the system, probably due
|
||||
* to the inefficient X11 stuff.
|
||||
*
|
||||
* GL libraries seem to be very slow on this machine 1.33Ghz PB running
|
||||
* Jaguar, this is not the case for my 1Ghz PB. I thought it might be
|
||||
* a versioning issue, however I have libGL.1.2.dylib for both
|
||||
* machines. (We need to figure this out now.)
|
||||
*
|
||||
* GL2 libraries work now with patch for RGB32.
|
||||
*
|
||||
* NOTE: quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor.
|
||||
*
|
||||
* Integrated luma prescaling adjustment for saturation/contrast/brightness
|
||||
* adjustment.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "libswscale/rgb2rgb.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "yuv2rgb_altivec.h"
|
||||
|
||||
#if HAVE_ALTIVEC
|
||||
|
||||
#undef PROFILE_THE_BEAST
|
||||
#undef INC_SCALING
|
||||
|
||||
typedef unsigned char ubyte;
|
||||
typedef signed char sbyte;
|
||||
|
||||
/* RGB interleaver, 16 planar pels 8-bit samples per channel in
|
||||
* homogeneous vector registers x0,x1,x2 are interleaved with the
|
||||
* following technique:
|
||||
*
|
||||
* o0 = vec_mergeh(x0, x1);
|
||||
* o1 = vec_perm(o0, x2, perm_rgb_0);
|
||||
* o2 = vec_perm(o0, x2, perm_rgb_1);
|
||||
* o3 = vec_mergel(x0, x1);
|
||||
* o4 = vec_perm(o3, o2, perm_rgb_2);
|
||||
* o5 = vec_perm(o3, o2, perm_rgb_3);
|
||||
*
|
||||
* perm_rgb_0: o0(RG).h v1(B) --> o1*
|
||||
* 0 1 2 3 4
|
||||
* rgbr|gbrg|brgb|rgbr
|
||||
* 0010 0100 1001 0010
|
||||
* 0102 3145 2673 894A
|
||||
*
|
||||
* perm_rgb_1: o0(RG).h v1(B) --> o2
|
||||
* 0 1 2 3 4
|
||||
* gbrg|brgb|bbbb|bbbb
|
||||
* 0100 1001 1111 1111
|
||||
* B5CD 6EF7 89AB CDEF
|
||||
*
|
||||
* perm_rgb_2: o3(RG).l o2(rgbB.l) --> o4*
|
||||
* 0 1 2 3 4
|
||||
* gbrg|brgb|rgbr|gbrg
|
||||
* 1111 1111 0010 0100
|
||||
* 89AB CDEF 0182 3945
|
||||
*
|
||||
* perm_rgb_2: o3(RG).l o2(rgbB.l) ---> o5*
|
||||
* 0 1 2 3 4
|
||||
* brgb|rgbr|gbrg|brgb
|
||||
* 1001 0010 0100 1001
|
||||
* a67b 89cA BdCD eEFf
|
||||
*/
|
||||
static const vector unsigned char
|
||||
perm_rgb_0 = { 0x00, 0x01, 0x10, 0x02, 0x03, 0x11, 0x04, 0x05,
|
||||
0x12, 0x06, 0x07, 0x13, 0x08, 0x09, 0x14, 0x0a },
|
||||
perm_rgb_1 = { 0x0b, 0x15, 0x0c, 0x0d, 0x16, 0x0e, 0x0f, 0x17,
|
||||
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
|
||||
perm_rgb_2 = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x00, 0x01, 0x18, 0x02, 0x03, 0x19, 0x04, 0x05 },
|
||||
perm_rgb_3 = { 0x1a, 0x06, 0x07, 0x1b, 0x08, 0x09, 0x1c, 0x0a,
|
||||
0x0b, 0x1d, 0x0c, 0x0d, 0x1e, 0x0e, 0x0f, 0x1f };
|
||||
|
||||
#define vec_merge3(x2, x1, x0, y0, y1, y2) \
|
||||
do { \
|
||||
__typeof__(x0) o0, o2, o3; \
|
||||
o0 = vec_mergeh(x0, x1); \
|
||||
y0 = vec_perm(o0, x2, perm_rgb_0); \
|
||||
o2 = vec_perm(o0, x2, perm_rgb_1); \
|
||||
o3 = vec_mergel(x0, x1); \
|
||||
y1 = vec_perm(o3, o2, perm_rgb_2); \
|
||||
y2 = vec_perm(o3, o2, perm_rgb_3); \
|
||||
} while (0)
|
||||
|
||||
#define vec_mstbgr24(x0, x1, x2, ptr) \
|
||||
do { \
|
||||
__typeof__(x0) _0, _1, _2; \
|
||||
vec_merge3(x0, x1, x2, _0, _1, _2); \
|
||||
vec_st(_0, 0, ptr++); \
|
||||
vec_st(_1, 0, ptr++); \
|
||||
vec_st(_2, 0, ptr++); \
|
||||
} while (0)
|
||||
|
||||
#define vec_mstrgb24(x0, x1, x2, ptr) \
|
||||
do { \
|
||||
__typeof__(x0) _0, _1, _2; \
|
||||
vec_merge3(x2, x1, x0, _0, _1, _2); \
|
||||
vec_st(_0, 0, ptr++); \
|
||||
vec_st(_1, 0, ptr++); \
|
||||
vec_st(_2, 0, ptr++); \
|
||||
} while (0)
|
||||
|
||||
/* pack the pixels in rgb0 format
|
||||
* msb R
|
||||
* lsb 0
|
||||
*/
|
||||
#define vec_mstrgb32(T, x0, x1, x2, x3, ptr) \
|
||||
do { \
|
||||
T _0, _1, _2, _3; \
|
||||
_0 = vec_mergeh(x0, x1); \
|
||||
_1 = vec_mergeh(x2, x3); \
|
||||
_2 = (T) vec_mergeh((vector unsigned short) _0, \
|
||||
(vector unsigned short) _1); \
|
||||
_3 = (T) vec_mergel((vector unsigned short) _0, \
|
||||
(vector unsigned short) _1); \
|
||||
vec_st(_2, 0 * 16, (T *) ptr); \
|
||||
vec_st(_3, 1 * 16, (T *) ptr); \
|
||||
_0 = vec_mergel(x0, x1); \
|
||||
_1 = vec_mergel(x2, x3); \
|
||||
_2 = (T) vec_mergeh((vector unsigned short) _0, \
|
||||
(vector unsigned short) _1); \
|
||||
_3 = (T) vec_mergel((vector unsigned short) _0, \
|
||||
(vector unsigned short) _1); \
|
||||
vec_st(_2, 2 * 16, (T *) ptr); \
|
||||
vec_st(_3, 3 * 16, (T *) ptr); \
|
||||
ptr += 4; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* 1 0 1.4021 | | Y |
|
||||
* 1 -0.3441 -0.7142 |x| Cb|
|
||||
* 1 1.7718 0 | | Cr|
|
||||
*
|
||||
*
|
||||
* Y: [-128 127]
|
||||
* Cb/Cr : [-128 127]
|
||||
*
|
||||
* typical YUV conversion works on Y: 0-255 this version has been
|
||||
* optimized for JPEG decoding.
|
||||
*/
|
||||
|
||||
#if HAVE_BIGENDIAN
|
||||
#define vec_unh(x) \
|
||||
(vector signed short) \
|
||||
vec_perm(x, (__typeof__(x)) { 0 }, \
|
||||
((vector unsigned char) { \
|
||||
0x10, 0x00, 0x10, 0x01, 0x10, 0x02, 0x10, 0x03, \
|
||||
0x10, 0x04, 0x10, 0x05, 0x10, 0x06, 0x10, 0x07 }))
|
||||
|
||||
#define vec_unl(x) \
|
||||
(vector signed short) \
|
||||
vec_perm(x, (__typeof__(x)) { 0 }, \
|
||||
((vector unsigned char) { \
|
||||
0x10, 0x08, 0x10, 0x09, 0x10, 0x0A, 0x10, 0x0B, \
|
||||
0x10, 0x0C, 0x10, 0x0D, 0x10, 0x0E, 0x10, 0x0F }))
|
||||
#else
|
||||
#define vec_unh(x)(vector signed short) vec_mergeh(x,(__typeof__(x)) { 0 })
|
||||
#define vec_unl(x)(vector signed short) vec_mergel(x,(__typeof__(x)) { 0 })
|
||||
#endif
|
||||
|
||||
#define vec_clip_s16(x) \
|
||||
vec_max(vec_min(x, ((vector signed short) { \
|
||||
235, 235, 235, 235, 235, 235, 235, 235 })), \
|
||||
((vector signed short) { 16, 16, 16, 16, 16, 16, 16, 16 }))
|
||||
|
||||
#define vec_packclp(x, y) \
|
||||
(vector unsigned char) \
|
||||
vec_packs((vector unsigned short) \
|
||||
vec_max(x, ((vector signed short) { 0 })), \
|
||||
(vector unsigned short) \
|
||||
vec_max(y, ((vector signed short) { 0 })))
|
||||
|
||||
static inline void cvtyuvtoRGB(SwsContext *c, vector signed short Y,
|
||||
vector signed short U, vector signed short V,
|
||||
vector signed short *R, vector signed short *G,
|
||||
vector signed short *B)
|
||||
{
|
||||
vector signed short vx, ux, uvx;
|
||||
|
||||
Y = vec_mradds(Y, c->CY, c->OY);
|
||||
U = vec_sub(U, (vector signed short)
|
||||
vec_splat((vector signed short) { 128 }, 0));
|
||||
V = vec_sub(V, (vector signed short)
|
||||
vec_splat((vector signed short) { 128 }, 0));
|
||||
|
||||
// ux = (CBU * (u << c->CSHIFT) + 0x4000) >> 15;
|
||||
ux = vec_sl(U, c->CSHIFT);
|
||||
*B = vec_mradds(ux, c->CBU, Y);
|
||||
|
||||
// vx = (CRV * (v << c->CSHIFT) + 0x4000) >> 15;
|
||||
vx = vec_sl(V, c->CSHIFT);
|
||||
*R = vec_mradds(vx, c->CRV, Y);
|
||||
|
||||
// uvx = ((CGU * u) + (CGV * v)) >> 15;
|
||||
uvx = vec_mradds(U, c->CGU, Y);
|
||||
*G = vec_mradds(V, c->CGV, uvx);
|
||||
}
|
||||
|
||||
/*
|
||||
* ------------------------------------------------------------------------------
|
||||
* CS converters
|
||||
* ------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#define DEFCSP420_CVT(name, out_pixels) \
|
||||
static int altivec_ ## name(SwsContext *c, const unsigned char **in, \
|
||||
int *instrides, int srcSliceY, int srcSliceH, \
|
||||
unsigned char **oplanes, int *outstrides) \
|
||||
{ \
|
||||
int w = c->srcW; \
|
||||
int h = srcSliceH; \
|
||||
int i, j; \
|
||||
int instrides_scl[3]; \
|
||||
vector unsigned char y0, y1; \
|
||||
\
|
||||
vector signed char u, v; \
|
||||
\
|
||||
vector signed short Y0, Y1, Y2, Y3; \
|
||||
vector signed short U, V; \
|
||||
vector signed short vx, ux, uvx; \
|
||||
vector signed short vx0, ux0, uvx0; \
|
||||
vector signed short vx1, ux1, uvx1; \
|
||||
vector signed short R0, G0, B0; \
|
||||
vector signed short R1, G1, B1; \
|
||||
vector unsigned char R, G, B; \
|
||||
\
|
||||
vector signed short lCY = c->CY; \
|
||||
vector signed short lOY = c->OY; \
|
||||
vector signed short lCRV = c->CRV; \
|
||||
vector signed short lCBU = c->CBU; \
|
||||
vector signed short lCGU = c->CGU; \
|
||||
vector signed short lCGV = c->CGV; \
|
||||
vector unsigned short lCSHIFT = c->CSHIFT; \
|
||||
\
|
||||
const ubyte *y1i = in[0]; \
|
||||
const ubyte *y2i = in[0] + instrides[0]; \
|
||||
const ubyte *ui = in[1]; \
|
||||
const ubyte *vi = in[2]; \
|
||||
\
|
||||
vector unsigned char *oute, *outo; \
|
||||
\
|
||||
/* loop moves y{1, 2}i by w */ \
|
||||
instrides_scl[0] = instrides[0] * 2 - w; \
|
||||
/* loop moves ui by w / 2 */ \
|
||||
instrides_scl[1] = instrides[1] - w / 2; \
|
||||
/* loop moves vi by w / 2 */ \
|
||||
instrides_scl[2] = instrides[2] - w / 2; \
|
||||
\
|
||||
for (i = 0; i < h / 2; i++) { \
|
||||
oute = (vector unsigned char *)(oplanes[0] + outstrides[0] * \
|
||||
(srcSliceY + i * 2)); \
|
||||
outo = oute + (outstrides[0] >> 4); \
|
||||
vec_dstst(outo, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 0); \
|
||||
vec_dstst(oute, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 1); \
|
||||
\
|
||||
for (j = 0; j < w / 16; j++) { \
|
||||
y0 = vec_xl(0, y1i); \
|
||||
\
|
||||
y1 = vec_xl(0, y2i); \
|
||||
\
|
||||
u = (vector signed char) vec_xl(0, ui); \
|
||||
\
|
||||
v = (vector signed char) vec_xl(0, vi); \
|
||||
\
|
||||
u = (vector signed char) \
|
||||
vec_sub(u, \
|
||||
(vector signed char) \
|
||||
vec_splat((vector signed char) { 128 }, 0)); \
|
||||
v = (vector signed char) \
|
||||
vec_sub(v, \
|
||||
(vector signed char) \
|
||||
vec_splat((vector signed char) { 128 }, 0)); \
|
||||
\
|
||||
U = vec_unpackh(u); \
|
||||
V = vec_unpackh(v); \
|
||||
\
|
||||
Y0 = vec_unh(y0); \
|
||||
Y1 = vec_unl(y0); \
|
||||
Y2 = vec_unh(y1); \
|
||||
Y3 = vec_unl(y1); \
|
||||
\
|
||||
Y0 = vec_mradds(Y0, lCY, lOY); \
|
||||
Y1 = vec_mradds(Y1, lCY, lOY); \
|
||||
Y2 = vec_mradds(Y2, lCY, lOY); \
|
||||
Y3 = vec_mradds(Y3, lCY, lOY); \
|
||||
\
|
||||
/* ux = (CBU * (u << CSHIFT) + 0x4000) >> 15 */ \
|
||||
ux = vec_sl(U, lCSHIFT); \
|
||||
ux = vec_mradds(ux, lCBU, (vector signed short) { 0 }); \
|
||||
ux0 = vec_mergeh(ux, ux); \
|
||||
ux1 = vec_mergel(ux, ux); \
|
||||
\
|
||||
/* vx = (CRV * (v << CSHIFT) + 0x4000) >> 15; */ \
|
||||
vx = vec_sl(V, lCSHIFT); \
|
||||
vx = vec_mradds(vx, lCRV, (vector signed short) { 0 }); \
|
||||
vx0 = vec_mergeh(vx, vx); \
|
||||
vx1 = vec_mergel(vx, vx); \
|
||||
\
|
||||
/* uvx = ((CGU * u) + (CGV * v)) >> 15 */ \
|
||||
uvx = vec_mradds(U, lCGU, (vector signed short) { 0 }); \
|
||||
uvx = vec_mradds(V, lCGV, uvx); \
|
||||
uvx0 = vec_mergeh(uvx, uvx); \
|
||||
uvx1 = vec_mergel(uvx, uvx); \
|
||||
\
|
||||
R0 = vec_add(Y0, vx0); \
|
||||
G0 = vec_add(Y0, uvx0); \
|
||||
B0 = vec_add(Y0, ux0); \
|
||||
R1 = vec_add(Y1, vx1); \
|
||||
G1 = vec_add(Y1, uvx1); \
|
||||
B1 = vec_add(Y1, ux1); \
|
||||
\
|
||||
R = vec_packclp(R0, R1); \
|
||||
G = vec_packclp(G0, G1); \
|
||||
B = vec_packclp(B0, B1); \
|
||||
\
|
||||
out_pixels(R, G, B, oute); \
|
||||
\
|
||||
R0 = vec_add(Y2, vx0); \
|
||||
G0 = vec_add(Y2, uvx0); \
|
||||
B0 = vec_add(Y2, ux0); \
|
||||
R1 = vec_add(Y3, vx1); \
|
||||
G1 = vec_add(Y3, uvx1); \
|
||||
B1 = vec_add(Y3, ux1); \
|
||||
R = vec_packclp(R0, R1); \
|
||||
G = vec_packclp(G0, G1); \
|
||||
B = vec_packclp(B0, B1); \
|
||||
\
|
||||
\
|
||||
out_pixels(R, G, B, outo); \
|
||||
\
|
||||
y1i += 16; \
|
||||
y2i += 16; \
|
||||
ui += 8; \
|
||||
vi += 8; \
|
||||
} \
|
||||
\
|
||||
ui += instrides_scl[1]; \
|
||||
vi += instrides_scl[2]; \
|
||||
y1i += instrides_scl[0]; \
|
||||
y2i += instrides_scl[0]; \
|
||||
} \
|
||||
return srcSliceH; \
|
||||
}
|
||||
|
||||
#define out_abgr(a, b, c, ptr) \
|
||||
vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), c, b, a, ptr)
|
||||
#define out_bgra(a, b, c, ptr) \
|
||||
vec_mstrgb32(__typeof__(a), c, b, a, ((__typeof__(a)) { 255 }), ptr)
|
||||
#define out_rgba(a, b, c, ptr) \
|
||||
vec_mstrgb32(__typeof__(a), a, b, c, ((__typeof__(a)) { 255 }), ptr)
|
||||
#define out_argb(a, b, c, ptr) \
|
||||
vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), a, b, c, ptr)
|
||||
#define out_rgb24(a, b, c, ptr) vec_mstrgb24(a, b, c, ptr)
|
||||
#define out_bgr24(a, b, c, ptr) vec_mstbgr24(a, b, c, ptr)
|
||||
|
||||
DEFCSP420_CVT(yuv2_abgr, out_abgr)
|
||||
DEFCSP420_CVT(yuv2_bgra, out_bgra)
|
||||
DEFCSP420_CVT(yuv2_rgba, out_rgba)
|
||||
DEFCSP420_CVT(yuv2_argb, out_argb)
|
||||
DEFCSP420_CVT(yuv2_rgb24, out_rgb24)
|
||||
DEFCSP420_CVT(yuv2_bgr24, out_bgr24)
|
||||
|
||||
// uyvy|uyvy|uyvy|uyvy
|
||||
// 0123 4567 89ab cdef
|
||||
static const vector unsigned char
|
||||
demux_u = { 0x10, 0x00, 0x10, 0x00,
|
||||
0x10, 0x04, 0x10, 0x04,
|
||||
0x10, 0x08, 0x10, 0x08,
|
||||
0x10, 0x0c, 0x10, 0x0c },
|
||||
demux_v = { 0x10, 0x02, 0x10, 0x02,
|
||||
0x10, 0x06, 0x10, 0x06,
|
||||
0x10, 0x0A, 0x10, 0x0A,
|
||||
0x10, 0x0E, 0x10, 0x0E },
|
||||
demux_y = { 0x10, 0x01, 0x10, 0x03,
|
||||
0x10, 0x05, 0x10, 0x07,
|
||||
0x10, 0x09, 0x10, 0x0B,
|
||||
0x10, 0x0D, 0x10, 0x0F };
|
||||
|
||||
/*
|
||||
* this is so I can play live CCIR raw video
|
||||
*/
|
||||
static int altivec_uyvy_rgb32(SwsContext *c, const unsigned char **in,
|
||||
int *instrides, int srcSliceY, int srcSliceH,
|
||||
unsigned char **oplanes, int *outstrides)
|
||||
{
|
||||
int w = c->srcW;
|
||||
int h = srcSliceH;
|
||||
int i, j;
|
||||
vector unsigned char uyvy;
|
||||
vector signed short Y, U, V;
|
||||
vector signed short R0, G0, B0, R1, G1, B1;
|
||||
vector unsigned char R, G, B;
|
||||
vector unsigned char *out;
|
||||
const ubyte *img;
|
||||
|
||||
img = in[0];
|
||||
out = (vector unsigned char *) (oplanes[0] + srcSliceY * outstrides[0]);
|
||||
|
||||
for (i = 0; i < h; i++)
|
||||
for (j = 0; j < w / 16; j++) {
|
||||
uyvy = vec_ld(0, img);
|
||||
|
||||
U = (vector signed short)
|
||||
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u);
|
||||
V = (vector signed short)
|
||||
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v);
|
||||
Y = (vector signed short)
|
||||
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y);
|
||||
|
||||
cvtyuvtoRGB(c, Y, U, V, &R0, &G0, &B0);
|
||||
|
||||
uyvy = vec_ld(16, img);
|
||||
|
||||
U = (vector signed short)
|
||||
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u);
|
||||
V = (vector signed short)
|
||||
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v);
|
||||
Y = (vector signed short)
|
||||
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y);
|
||||
|
||||
cvtyuvtoRGB(c, Y, U, V, &R1, &G1, &B1);
|
||||
|
||||
R = vec_packclp(R0, R1);
|
||||
G = vec_packclp(G0, G1);
|
||||
B = vec_packclp(B0, B1);
|
||||
|
||||
// vec_mstbgr24 (R,G,B, out);
|
||||
out_rgba(R, G, B, out);
|
||||
|
||||
img += 32;
|
||||
}
|
||||
return srcSliceH;
|
||||
}
|
||||
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
|
||||
/* Ok currently the acceleration routine only supports
|
||||
* inputs of widths a multiple of 16
|
||||
* and heights a multiple 2
|
||||
*
|
||||
* So we just fall back to the C codes for this.
|
||||
*/
|
||||
av_cold SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c)
|
||||
{
|
||||
#if HAVE_ALTIVEC
|
||||
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* and this seems not to matter too much I tried a bunch of
|
||||
* videos with abnormal widths and MPlayer crashes elsewhere.
|
||||
* mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv
|
||||
* boom with X11 bad match.
|
||||
*
|
||||
*/
|
||||
if ((c->srcW & 0xf) != 0)
|
||||
return NULL;
|
||||
|
||||
switch (c->srcFormat) {
|
||||
case AV_PIX_FMT_YUV410P:
|
||||
case AV_PIX_FMT_YUV420P:
|
||||
/*case IMGFMT_CLPL: ??? */
|
||||
case AV_PIX_FMT_GRAY8:
|
||||
case AV_PIX_FMT_NV12:
|
||||
case AV_PIX_FMT_NV21:
|
||||
if ((c->srcH & 0x1) != 0)
|
||||
return NULL;
|
||||
|
||||
switch (c->dstFormat) {
|
||||
case AV_PIX_FMT_RGB24:
|
||||
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
|
||||
return altivec_yuv2_rgb24;
|
||||
case AV_PIX_FMT_BGR24:
|
||||
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
|
||||
return altivec_yuv2_bgr24;
|
||||
case AV_PIX_FMT_ARGB:
|
||||
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
|
||||
return altivec_yuv2_argb;
|
||||
case AV_PIX_FMT_ABGR:
|
||||
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
|
||||
return altivec_yuv2_abgr;
|
||||
case AV_PIX_FMT_RGBA:
|
||||
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
|
||||
return altivec_yuv2_rgba;
|
||||
case AV_PIX_FMT_BGRA:
|
||||
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
|
||||
return altivec_yuv2_bgra;
|
||||
default: return NULL;
|
||||
}
|
||||
break;
|
||||
|
||||
case AV_PIX_FMT_UYVY422:
|
||||
switch (c->dstFormat) {
|
||||
case AV_PIX_FMT_BGR32:
|
||||
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
|
||||
return altivec_uyvy_rgb32;
|
||||
default: return NULL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
av_cold void ff_yuv2rgb_init_tables_ppc(SwsContext *c,
|
||||
const int inv_table[4],
|
||||
int brightness,
|
||||
int contrast,
|
||||
int saturation)
|
||||
{
|
||||
#if HAVE_ALTIVEC
|
||||
union {
|
||||
DECLARE_ALIGNED(16, signed short, tmp)[8];
|
||||
vector signed short vec;
|
||||
} buf;
|
||||
|
||||
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
|
||||
return;
|
||||
|
||||
buf.tmp[0] = ((0xffffLL) * contrast >> 8) >> 9; // cy
|
||||
buf.tmp[1] = -256 * brightness; // oy
|
||||
buf.tmp[2] = (inv_table[0] >> 3) * (contrast >> 16) * (saturation >> 16); // crv
|
||||
buf.tmp[3] = (inv_table[1] >> 3) * (contrast >> 16) * (saturation >> 16); // cbu
|
||||
buf.tmp[4] = -((inv_table[2] >> 1) * (contrast >> 16) * (saturation >> 16)); // cgu
|
||||
buf.tmp[5] = -((inv_table[3] >> 1) * (contrast >> 16) * (saturation >> 16)); // cgv
|
||||
|
||||
c->CSHIFT = (vector unsigned short) vec_splat_u16(2);
|
||||
c->CY = vec_splat((vector signed short) buf.vec, 0);
|
||||
c->OY = vec_splat((vector signed short) buf.vec, 1);
|
||||
c->CRV = vec_splat((vector signed short) buf.vec, 2);
|
||||
c->CBU = vec_splat((vector signed short) buf.vec, 3);
|
||||
c->CGU = vec_splat((vector signed short) buf.vec, 4);
|
||||
c->CGV = vec_splat((vector signed short) buf.vec, 5);
|
||||
return;
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
}
|
||||
|
||||
#if HAVE_ALTIVEC
|
||||
|
||||
static av_always_inline void yuv2packedX_altivec(SwsContext *c,
|
||||
const int16_t *lumFilter,
|
||||
const int16_t **lumSrc,
|
||||
int lumFilterSize,
|
||||
const int16_t *chrFilter,
|
||||
const int16_t **chrUSrc,
|
||||
const int16_t **chrVSrc,
|
||||
int chrFilterSize,
|
||||
const int16_t **alpSrc,
|
||||
uint8_t *dest,
|
||||
int dstW, int dstY,
|
||||
enum AVPixelFormat target)
|
||||
{
|
||||
int i, j;
|
||||
vector signed short X, X0, X1, Y0, U0, V0, Y1, U1, V1, U, V;
|
||||
vector signed short R0, G0, B0, R1, G1, B1;
|
||||
|
||||
vector unsigned char R, G, B;
|
||||
vector unsigned char *out, *nout;
|
||||
|
||||
vector signed short RND = vec_splat_s16(1 << 3);
|
||||
vector unsigned short SCL = vec_splat_u16(4);
|
||||
DECLARE_ALIGNED(16, unsigned int, scratch)[16];
|
||||
|
||||
vector signed short *YCoeffs, *CCoeffs;
|
||||
|
||||
YCoeffs = c->vYCoeffsBank + dstY * lumFilterSize;
|
||||
CCoeffs = c->vCCoeffsBank + dstY * chrFilterSize;
|
||||
|
||||
out = (vector unsigned char *) dest;
|
||||
|
||||
for (i = 0; i < dstW; i += 16) {
|
||||
Y0 = RND;
|
||||
Y1 = RND;
|
||||
/* extract 16 coeffs from lumSrc */
|
||||
for (j = 0; j < lumFilterSize; j++) {
|
||||
X0 = vec_ld(0, &lumSrc[j][i]);
|
||||
X1 = vec_ld(16, &lumSrc[j][i]);
|
||||
Y0 = vec_mradds(X0, YCoeffs[j], Y0);
|
||||
Y1 = vec_mradds(X1, YCoeffs[j], Y1);
|
||||
}
|
||||
|
||||
U = RND;
|
||||
V = RND;
|
||||
/* extract 8 coeffs from U,V */
|
||||
for (j = 0; j < chrFilterSize; j++) {
|
||||
X = vec_ld(0, &chrUSrc[j][i / 2]);
|
||||
U = vec_mradds(X, CCoeffs[j], U);
|
||||
X = vec_ld(0, &chrVSrc[j][i / 2]);
|
||||
V = vec_mradds(X, CCoeffs[j], V);
|
||||
}
|
||||
|
||||
/* scale and clip signals */
|
||||
Y0 = vec_sra(Y0, SCL);
|
||||
Y1 = vec_sra(Y1, SCL);
|
||||
U = vec_sra(U, SCL);
|
||||
V = vec_sra(V, SCL);
|
||||
|
||||
Y0 = vec_clip_s16(Y0);
|
||||
Y1 = vec_clip_s16(Y1);
|
||||
U = vec_clip_s16(U);
|
||||
V = vec_clip_s16(V);
|
||||
|
||||
/* now we have
|
||||
* Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15
|
||||
* U = u0 u1 u2 u3 u4 u5 u6 u7 V = v0 v1 v2 v3 v4 v5 v6 v7
|
||||
*
|
||||
* Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15
|
||||
* U0 = u0 u0 u1 u1 u2 u2 u3 u3 U1 = u4 u4 u5 u5 u6 u6 u7 u7
|
||||
* V0 = v0 v0 v1 v1 v2 v2 v3 v3 V1 = v4 v4 v5 v5 v6 v6 v7 v7
|
||||
*/
|
||||
|
||||
U0 = vec_mergeh(U, U);
|
||||
V0 = vec_mergeh(V, V);
|
||||
|
||||
U1 = vec_mergel(U, U);
|
||||
V1 = vec_mergel(V, V);
|
||||
|
||||
cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0);
|
||||
cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1);
|
||||
|
||||
R = vec_packclp(R0, R1);
|
||||
G = vec_packclp(G0, G1);
|
||||
B = vec_packclp(B0, B1);
|
||||
|
||||
switch (target) {
|
||||
case AV_PIX_FMT_ABGR:
|
||||
out_abgr(R, G, B, out);
|
||||
break;
|
||||
case AV_PIX_FMT_BGRA:
|
||||
out_bgra(R, G, B, out);
|
||||
break;
|
||||
case AV_PIX_FMT_RGBA:
|
||||
out_rgba(R, G, B, out);
|
||||
break;
|
||||
case AV_PIX_FMT_ARGB:
|
||||
out_argb(R, G, B, out);
|
||||
break;
|
||||
case AV_PIX_FMT_RGB24:
|
||||
out_rgb24(R, G, B, out);
|
||||
break;
|
||||
case AV_PIX_FMT_BGR24:
|
||||
out_bgr24(R, G, B, out);
|
||||
break;
|
||||
default:
|
||||
{
|
||||
/* If this is reached, the caller should have called yuv2packedXinC
|
||||
* instead. */
|
||||
static int printed_error_message;
|
||||
if (!printed_error_message) {
|
||||
av_log(c, AV_LOG_ERROR,
|
||||
"altivec_yuv2packedX doesn't support %s output\n",
|
||||
av_get_pix_fmt_name(c->dstFormat));
|
||||
printed_error_message = 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (i < dstW) {
|
||||
i -= 16;
|
||||
|
||||
Y0 = RND;
|
||||
Y1 = RND;
|
||||
/* extract 16 coeffs from lumSrc */
|
||||
for (j = 0; j < lumFilterSize; j++) {
|
||||
X0 = vec_ld(0, &lumSrc[j][i]);
|
||||
X1 = vec_ld(16, &lumSrc[j][i]);
|
||||
Y0 = vec_mradds(X0, YCoeffs[j], Y0);
|
||||
Y1 = vec_mradds(X1, YCoeffs[j], Y1);
|
||||
}
|
||||
|
||||
U = RND;
|
||||
V = RND;
|
||||
/* extract 8 coeffs from U,V */
|
||||
for (j = 0; j < chrFilterSize; j++) {
|
||||
X = vec_ld(0, &chrUSrc[j][i / 2]);
|
||||
U = vec_mradds(X, CCoeffs[j], U);
|
||||
X = vec_ld(0, &chrVSrc[j][i / 2]);
|
||||
V = vec_mradds(X, CCoeffs[j], V);
|
||||
}
|
||||
|
||||
/* scale and clip signals */
|
||||
Y0 = vec_sra(Y0, SCL);
|
||||
Y1 = vec_sra(Y1, SCL);
|
||||
U = vec_sra(U, SCL);
|
||||
V = vec_sra(V, SCL);
|
||||
|
||||
Y0 = vec_clip_s16(Y0);
|
||||
Y1 = vec_clip_s16(Y1);
|
||||
U = vec_clip_s16(U);
|
||||
V = vec_clip_s16(V);
|
||||
|
||||
/* now we have
|
||||
* Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15
|
||||
* U = u0 u1 u2 u3 u4 u5 u6 u7 V = v0 v1 v2 v3 v4 v5 v6 v7
|
||||
*
|
||||
* Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15
|
||||
* U0 = u0 u0 u1 u1 u2 u2 u3 u3 U1 = u4 u4 u5 u5 u6 u6 u7 u7
|
||||
* V0 = v0 v0 v1 v1 v2 v2 v3 v3 V1 = v4 v4 v5 v5 v6 v6 v7 v7
|
||||
*/
|
||||
|
||||
U0 = vec_mergeh(U, U);
|
||||
V0 = vec_mergeh(V, V);
|
||||
|
||||
U1 = vec_mergel(U, U);
|
||||
V1 = vec_mergel(V, V);
|
||||
|
||||
cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0);
|
||||
cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1);
|
||||
|
||||
R = vec_packclp(R0, R1);
|
||||
G = vec_packclp(G0, G1);
|
||||
B = vec_packclp(B0, B1);
|
||||
|
||||
nout = (vector unsigned char *) scratch;
|
||||
switch (target) {
|
||||
case AV_PIX_FMT_ABGR:
|
||||
out_abgr(R, G, B, nout);
|
||||
break;
|
||||
case AV_PIX_FMT_BGRA:
|
||||
out_bgra(R, G, B, nout);
|
||||
break;
|
||||
case AV_PIX_FMT_RGBA:
|
||||
out_rgba(R, G, B, nout);
|
||||
break;
|
||||
case AV_PIX_FMT_ARGB:
|
||||
out_argb(R, G, B, nout);
|
||||
break;
|
||||
case AV_PIX_FMT_RGB24:
|
||||
out_rgb24(R, G, B, nout);
|
||||
break;
|
||||
case AV_PIX_FMT_BGR24:
|
||||
out_bgr24(R, G, B, nout);
|
||||
break;
|
||||
default:
|
||||
/* Unreachable, I think. */
|
||||
av_log(c, AV_LOG_ERROR,
|
||||
"altivec_yuv2packedX doesn't support %s output\n",
|
||||
av_get_pix_fmt_name(c->dstFormat));
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(&((uint32_t *) dest)[i], scratch, (dstW - i) / 4);
|
||||
}
|
||||
}
|
||||
|
||||
#define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
|
||||
void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \
|
||||
const int16_t *lumFilter, \
|
||||
const int16_t **lumSrc, \
|
||||
int lumFilterSize, \
|
||||
const int16_t *chrFilter, \
|
||||
const int16_t **chrUSrc, \
|
||||
const int16_t **chrVSrc, \
|
||||
int chrFilterSize, \
|
||||
const int16_t **alpSrc, \
|
||||
uint8_t *dest, int dstW, int dstY) \
|
||||
{ \
|
||||
yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
|
||||
chrFilter, chrUSrc, chrVSrc, \
|
||||
chrFilterSize, alpSrc, \
|
||||
dest, dstW, dstY, pixfmt); \
|
||||
}
|
||||
|
||||
YUV2PACKEDX_WRAPPER(abgr, AV_PIX_FMT_ABGR);
|
||||
YUV2PACKEDX_WRAPPER(bgra, AV_PIX_FMT_BGRA);
|
||||
YUV2PACKEDX_WRAPPER(argb, AV_PIX_FMT_ARGB);
|
||||
YUV2PACKEDX_WRAPPER(rgba, AV_PIX_FMT_RGBA);
|
||||
YUV2PACKEDX_WRAPPER(rgb24, AV_PIX_FMT_RGB24);
|
||||
YUV2PACKEDX_WRAPPER(bgr24, AV_PIX_FMT_BGR24);
|
||||
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
51
externals/ffmpeg/libswscale/ppc/yuv2rgb_altivec.h
vendored
Executable file
51
externals/ffmpeg/libswscale/ppc/yuv2rgb_altivec.h
vendored
Executable file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* AltiVec-enhanced yuv2yuvX
|
||||
*
|
||||
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
|
||||
* based on the equivalent C code in swscale.c
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef SWSCALE_PPC_YUV2RGB_ALTIVEC_H
|
||||
#define SWSCALE_PPC_YUV2RGB_ALTIVEC_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libswscale/swscale_internal.h"
|
||||
|
||||
#define YUV2PACKEDX_HEADER(suffix) \
|
||||
void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \
|
||||
const int16_t *lumFilter, \
|
||||
const int16_t **lumSrc, \
|
||||
int lumFilterSize, \
|
||||
const int16_t *chrFilter, \
|
||||
const int16_t **chrUSrc, \
|
||||
const int16_t **chrVSrc, \
|
||||
int chrFilterSize, \
|
||||
const int16_t **alpSrc, \
|
||||
uint8_t *dest, \
|
||||
int dstW, int dstY);
|
||||
|
||||
YUV2PACKEDX_HEADER(abgr);
|
||||
YUV2PACKEDX_HEADER(bgra);
|
||||
YUV2PACKEDX_HEADER(argb);
|
||||
YUV2PACKEDX_HEADER(rgba);
|
||||
YUV2PACKEDX_HEADER(rgb24);
|
||||
YUV2PACKEDX_HEADER(bgr24);
|
||||
|
||||
#endif /* SWSCALE_PPC_YUV2RGB_ALTIVEC_H */
|
||||
204
externals/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c
vendored
Executable file
204
externals/ffmpeg/libswscale/ppc/yuv2yuv_altivec.c
vendored
Executable file
@@ -0,0 +1,204 @@
|
||||
/*
|
||||
* AltiVec-enhanced yuv-to-yuv conversion routines.
|
||||
*
|
||||
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
|
||||
* based on the equivalent C code in swscale.c
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
|
||||
#if HAVE_ALTIVEC
|
||||
|
||||
static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[], int srcSliceY,
|
||||
int srcSliceH, uint8_t *dstParam[],
|
||||
int dstStride_a[])
|
||||
{
|
||||
uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
|
||||
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
|
||||
// srcStride[0], srcStride[1], dstStride[0]);
|
||||
const uint8_t *ysrc = src[0];
|
||||
const uint8_t *usrc = src[1];
|
||||
const uint8_t *vsrc = src[2];
|
||||
const int width = c->srcW;
|
||||
const int height = srcSliceH;
|
||||
const int lumStride = srcStride[0];
|
||||
const int chromStride = srcStride[1];
|
||||
const int dstStride = dstStride_a[0];
|
||||
const vector unsigned char yperm = vec_lvsl(0, ysrc);
|
||||
const int vertLumPerChroma = 2;
|
||||
register unsigned int y;
|
||||
|
||||
/* This code assumes:
|
||||
*
|
||||
* 1) dst is 16 bytes-aligned
|
||||
* 2) dstStride is a multiple of 16
|
||||
* 3) width is a multiple of 16
|
||||
* 4) lum & chrom stride are multiples of 8
|
||||
*/
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
int i;
|
||||
for (i = 0; i < width - 31; i += 32) {
|
||||
const unsigned int j = i >> 1;
|
||||
vector unsigned char v_yA = vec_ld(i, ysrc);
|
||||
vector unsigned char v_yB = vec_ld(i + 16, ysrc);
|
||||
vector unsigned char v_yC = vec_ld(i + 32, ysrc);
|
||||
vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
|
||||
vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
|
||||
vector unsigned char v_uA = vec_ld(j, usrc);
|
||||
vector unsigned char v_uB = vec_ld(j + 16, usrc);
|
||||
vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
|
||||
vector unsigned char v_vA = vec_ld(j, vsrc);
|
||||
vector unsigned char v_vB = vec_ld(j + 16, vsrc);
|
||||
vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
|
||||
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
|
||||
vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
|
||||
vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
|
||||
vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
|
||||
vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
|
||||
vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
|
||||
vec_st(v_yuy2_0, (i << 1), dst);
|
||||
vec_st(v_yuy2_1, (i << 1) + 16, dst);
|
||||
vec_st(v_yuy2_2, (i << 1) + 32, dst);
|
||||
vec_st(v_yuy2_3, (i << 1) + 48, dst);
|
||||
}
|
||||
if (i < width) {
|
||||
const unsigned int j = i >> 1;
|
||||
vector unsigned char v_y1 = vec_ld(i, ysrc);
|
||||
vector unsigned char v_u = vec_ld(j, usrc);
|
||||
vector unsigned char v_v = vec_ld(j, vsrc);
|
||||
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
|
||||
vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
|
||||
vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
|
||||
vec_st(v_yuy2_0, (i << 1), dst);
|
||||
vec_st(v_yuy2_1, (i << 1) + 16, dst);
|
||||
}
|
||||
if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
|
||||
usrc += chromStride;
|
||||
vsrc += chromStride;
|
||||
}
|
||||
ysrc += lumStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
|
||||
return srcSliceH;
|
||||
}
|
||||
|
||||
static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[], int srcSliceY,
|
||||
int srcSliceH, uint8_t *dstParam[],
|
||||
int dstStride_a[])
|
||||
{
|
||||
uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
|
||||
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
|
||||
// srcStride[0], srcStride[1], dstStride[0]);
|
||||
const uint8_t *ysrc = src[0];
|
||||
const uint8_t *usrc = src[1];
|
||||
const uint8_t *vsrc = src[2];
|
||||
const int width = c->srcW;
|
||||
const int height = srcSliceH;
|
||||
const int lumStride = srcStride[0];
|
||||
const int chromStride = srcStride[1];
|
||||
const int dstStride = dstStride_a[0];
|
||||
const int vertLumPerChroma = 2;
|
||||
const vector unsigned char yperm = vec_lvsl(0, ysrc);
|
||||
register unsigned int y;
|
||||
|
||||
/* This code assumes:
|
||||
*
|
||||
* 1) dst is 16 bytes-aligned
|
||||
* 2) dstStride is a multiple of 16
|
||||
* 3) width is a multiple of 16
|
||||
* 4) lum & chrom stride are multiples of 8
|
||||
*/
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
int i;
|
||||
for (i = 0; i < width - 31; i += 32) {
|
||||
const unsigned int j = i >> 1;
|
||||
vector unsigned char v_yA = vec_ld(i, ysrc);
|
||||
vector unsigned char v_yB = vec_ld(i + 16, ysrc);
|
||||
vector unsigned char v_yC = vec_ld(i + 32, ysrc);
|
||||
vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
|
||||
vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
|
||||
vector unsigned char v_uA = vec_ld(j, usrc);
|
||||
vector unsigned char v_uB = vec_ld(j + 16, usrc);
|
||||
vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
|
||||
vector unsigned char v_vA = vec_ld(j, vsrc);
|
||||
vector unsigned char v_vB = vec_ld(j + 16, vsrc);
|
||||
vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
|
||||
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
|
||||
vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
|
||||
vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
|
||||
vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
|
||||
vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
|
||||
vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
|
||||
vec_st(v_uyvy_0, (i << 1), dst);
|
||||
vec_st(v_uyvy_1, (i << 1) + 16, dst);
|
||||
vec_st(v_uyvy_2, (i << 1) + 32, dst);
|
||||
vec_st(v_uyvy_3, (i << 1) + 48, dst);
|
||||
}
|
||||
if (i < width) {
|
||||
const unsigned int j = i >> 1;
|
||||
vector unsigned char v_y1 = vec_ld(i, ysrc);
|
||||
vector unsigned char v_u = vec_ld(j, usrc);
|
||||
vector unsigned char v_v = vec_ld(j, vsrc);
|
||||
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
|
||||
vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
|
||||
vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
|
||||
vec_st(v_uyvy_0, (i << 1), dst);
|
||||
vec_st(v_uyvy_1, (i << 1) + 16, dst);
|
||||
}
|
||||
if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
|
||||
usrc += chromStride;
|
||||
vsrc += chromStride;
|
||||
}
|
||||
ysrc += lumStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
return srcSliceH;
|
||||
}
|
||||
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
|
||||
av_cold void ff_get_unscaled_swscale_ppc(SwsContext *c)
|
||||
{
|
||||
#if HAVE_ALTIVEC
|
||||
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
|
||||
return;
|
||||
|
||||
if (!(c->srcW & 15) && !(c->flags & SWS_BITEXACT) &&
|
||||
c->srcFormat == AV_PIX_FMT_YUV420P) {
|
||||
enum AVPixelFormat dstFormat = c->dstFormat;
|
||||
|
||||
// unscaled YV12 -> packed YUV, we want speed
|
||||
if (dstFormat == AV_PIX_FMT_YUYV422)
|
||||
c->swscale = yv12toyuy2_unscaled_altivec;
|
||||
else if (dstFormat == AV_PIX_FMT_UYVY422)
|
||||
c->swscale = yv12touyvy_unscaled_altivec;
|
||||
}
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
}
|
||||
418
externals/ffmpeg/libswscale/rgb2rgb.c
vendored
Executable file
418
externals/ffmpeg/libswscale/rgb2rgb.c
vendored
Executable file
@@ -0,0 +1,418 @@
|
||||
/*
|
||||
* software RGB to RGB converter
|
||||
* pluralize by software PAL8 to RGB converter
|
||||
* software YUV to YUV converter
|
||||
* software YUV to RGB converter
|
||||
* Written by Nick Kurshev.
|
||||
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/bswap.h"
|
||||
#include "config.h"
|
||||
#include "rgb2rgb.h"
|
||||
#include "swscale.h"
|
||||
#include "swscale_internal.h"
|
||||
|
||||
void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
|
||||
void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
|
||||
void (*shuffle_bytes_0321)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*shuffle_bytes_1230)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*shuffle_bytes_3012)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void (*shuffle_bytes_3210)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
|
||||
|
||||
void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc,
|
||||
const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int dstStride);
|
||||
void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc,
|
||||
const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int dstStride);
|
||||
void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc,
|
||||
const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int dstStride);
|
||||
void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc,
|
||||
const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int dstStride);
|
||||
void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst,
|
||||
uint8_t *udst, uint8_t *vdst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
|
||||
uint8_t *udst, uint8_t *vdst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int srcStride,
|
||||
int32_t *rgb2yuv);
|
||||
void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
|
||||
int srcStride, int dstStride);
|
||||
void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
|
||||
int width, int height, int src1Stride,
|
||||
int src2Stride, int dstStride);
|
||||
void (*deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
|
||||
int width, int height, int srcStride,
|
||||
int dst1Stride, int dst2Stride);
|
||||
void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
|
||||
uint8_t *dst1, uint8_t *dst2,
|
||||
int width, int height,
|
||||
int srcStride1, int srcStride2,
|
||||
int dstStride1, int dstStride2);
|
||||
void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2,
|
||||
const uint8_t *src3, uint8_t *dst,
|
||||
int width, int height,
|
||||
int srcStride1, int srcStride2,
|
||||
int srcStride3, int dstStride);
|
||||
void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
const uint8_t *src, int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
const uint8_t *src, int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
const uint8_t *src, int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
const uint8_t *src, int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
|
||||
#define BY ((int)( 0.098 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define BV ((int)(-0.071 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define BU ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define GY ((int)( 0.504 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define GV ((int)(-0.368 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define GU ((int)(-0.291 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define RY ((int)( 0.257 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define RV ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define RU ((int)(-0.148 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
|
||||
//plain C versions
|
||||
#include "rgb2rgb_template.c"
|
||||
|
||||
/*
|
||||
* RGB15->RGB16 original by Strepto/Astral
|
||||
* ported to gcc & bugfixed : A'rpi
|
||||
* MMXEXT, 3DNOW optimization by Nick Kurshev
|
||||
* 32-bit C version, and and&add trick by Michael Niedermayer
|
||||
*/
|
||||
|
||||
av_cold void ff_sws_rgb2rgb_init(void)
|
||||
{
|
||||
rgb2rgb_init_c();
|
||||
if (ARCH_AARCH64)
|
||||
rgb2rgb_init_aarch64();
|
||||
if (ARCH_X86)
|
||||
rgb2rgb_init_x86();
|
||||
}
|
||||
|
||||
void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
int i, num_pixels = src_size >> 2;
|
||||
|
||||
for (i = 0; i < num_pixels; i++) {
|
||||
#if HAVE_BIGENDIAN
|
||||
/* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
|
||||
dst[3 * i + 0] = src[4 * i + 1];
|
||||
dst[3 * i + 1] = src[4 * i + 2];
|
||||
dst[3 * i + 2] = src[4 * i + 3];
|
||||
#else
|
||||
dst[3 * i + 0] = src[4 * i + 2];
|
||||
dst[3 * i + 1] = src[4 * i + 1];
|
||||
dst[3 * i + 2] = src[4 * i + 0];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; 3 * i < src_size; i++) {
|
||||
#if HAVE_BIGENDIAN
|
||||
/* RGB24 (= R, G, B) -> BGR32 (= A, R, G, B) */
|
||||
dst[4 * i + 0] = 255;
|
||||
dst[4 * i + 1] = src[3 * i + 0];
|
||||
dst[4 * i + 2] = src[3 * i + 1];
|
||||
dst[4 * i + 3] = src[3 * i + 2];
|
||||
#else
|
||||
dst[4 * i + 0] = src[3 * i + 2];
|
||||
dst[4 * i + 1] = src[3 * i + 1];
|
||||
dst[4 * i + 2] = src[3 * i + 0];
|
||||
dst[4 * i + 3] = 255;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint8_t *d = dst;
|
||||
const uint16_t *s = (const uint16_t *)src;
|
||||
const uint16_t *end = s + src_size / 2;
|
||||
|
||||
while (s < end) {
|
||||
register uint16_t bgr = *s++;
|
||||
#if HAVE_BIGENDIAN
|
||||
*d++ = 255;
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
|
||||
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
|
||||
#else
|
||||
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
|
||||
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
*d++ = 255;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint16_t rgb, r, g, b;
|
||||
uint16_t *d = (uint16_t *)dst;
|
||||
const uint16_t *s = (const uint16_t *)src;
|
||||
const uint16_t *end = s + src_size / 2;
|
||||
|
||||
while (s < end) {
|
||||
rgb = *s++;
|
||||
r = rgb & 0xF00;
|
||||
g = rgb & 0x0F0;
|
||||
b = rgb & 0x00F;
|
||||
r = (r << 3) | ((r & 0x800) >> 1);
|
||||
g = (g << 2) | ((g & 0x080) >> 2);
|
||||
b = (b << 1) | ( b >> 3);
|
||||
*d++ = r | g | b;
|
||||
}
|
||||
}
|
||||
|
||||
void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint8_t *d = dst;
|
||||
const uint16_t *s = (const uint16_t *)src;
|
||||
const uint16_t *end = s + src_size / 2;
|
||||
|
||||
while (s < end) {
|
||||
register uint16_t bgr = *s++;
|
||||
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
|
||||
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
}
|
||||
}
|
||||
|
||||
void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
int i, num_pixels = src_size >> 1;
|
||||
|
||||
for (i = 0; i < num_pixels; i++) {
|
||||
unsigned rgb = ((const uint16_t *)src)[i];
|
||||
((uint16_t *)dst)[i] = (rgb >> 11) | (rgb & 0x7E0) | (rgb << 11);
|
||||
}
|
||||
}
|
||||
|
||||
void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
int i, num_pixels = src_size >> 1;
|
||||
|
||||
for (i = 0; i < num_pixels; i++) {
|
||||
unsigned rgb = ((const uint16_t *)src)[i];
|
||||
((uint16_t *)dst)[i] = (rgb >> 11) | ((rgb & 0x7C0) >> 1) | ((rgb & 0x1F) << 10);
|
||||
}
|
||||
}
|
||||
|
||||
void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint8_t *d = dst;
|
||||
const uint16_t *s = (const uint16_t *)src;
|
||||
const uint16_t *end = s + src_size / 2;
|
||||
|
||||
while (s < end) {
|
||||
register uint16_t bgr = *s++;
|
||||
#if HAVE_BIGENDIAN
|
||||
*d++ = 255;
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
|
||||
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
|
||||
#else
|
||||
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
|
||||
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
*d++ = 255;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint8_t *d = dst;
|
||||
const uint16_t *s = (const uint16_t *)src;
|
||||
const uint16_t *end = s + src_size / 2;
|
||||
|
||||
while (s < end) {
|
||||
register uint16_t bgr = *s++;
|
||||
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
|
||||
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
}
|
||||
}
|
||||
|
||||
void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
int i, num_pixels = src_size >> 1;
|
||||
|
||||
for (i = 0; i < num_pixels; i++) {
|
||||
unsigned rgb = ((const uint16_t *)src)[i];
|
||||
((uint16_t *)dst)[i] = ((rgb & 0x7C00) >> 10) | ((rgb & 0x3E0) << 1) | (rgb << 11);
|
||||
}
|
||||
}
|
||||
|
||||
void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
int i, num_pixels = src_size >> 1;
|
||||
|
||||
for (i = 0; i < num_pixels; i++) {
|
||||
unsigned rgb = ((const uint16_t *)src)[i];
|
||||
unsigned br = rgb & 0x7C1F;
|
||||
((uint16_t *)dst)[i] = (br >> 10) | (rgb & 0x3E0) | (br << 10);
|
||||
}
|
||||
}
|
||||
|
||||
void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint16_t *d = (uint16_t *)dst;
|
||||
uint16_t *s = (uint16_t *)src;
|
||||
int i, num_pixels = src_size >> 1;
|
||||
|
||||
for (i = 0; i < num_pixels; i++) {
|
||||
unsigned rgb = s[i];
|
||||
d[i] = (rgb << 8 | rgb & 0xF0 | rgb >> 8) & 0xFFF;
|
||||
}
|
||||
}
|
||||
|
||||
#define DEFINE_RGB48TOBGR48(need_bswap, swap) \
|
||||
void rgb48tobgr48_ ## need_bswap(const uint8_t *src, \
|
||||
uint8_t *dst, int src_size) \
|
||||
{ \
|
||||
uint16_t *d = (uint16_t *)dst; \
|
||||
uint16_t *s = (uint16_t *)src; \
|
||||
int i, num_pixels = src_size >> 1; \
|
||||
\
|
||||
for (i = 0; i < num_pixels; i += 3) { \
|
||||
d[i ] = swap ? av_bswap16(s[i + 2]) : s[i + 2]; \
|
||||
d[i + 1] = swap ? av_bswap16(s[i + 1]) : s[i + 1]; \
|
||||
d[i + 2] = swap ? av_bswap16(s[i ]) : s[i ]; \
|
||||
} \
|
||||
}
|
||||
|
||||
DEFINE_RGB48TOBGR48(nobswap, 0)
|
||||
DEFINE_RGB48TOBGR48(bswap, 1)
|
||||
|
||||
#define DEFINE_RGB64TOBGR48(need_bswap, swap) \
|
||||
void rgb64tobgr48_ ## need_bswap(const uint8_t *src, \
|
||||
uint8_t *dst, int src_size) \
|
||||
{ \
|
||||
uint16_t *d = (uint16_t *)dst; \
|
||||
uint16_t *s = (uint16_t *)src; \
|
||||
int i, num_pixels = src_size >> 3; \
|
||||
\
|
||||
for (i = 0; i < num_pixels; i++) { \
|
||||
d[3 * i ] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \
|
||||
d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \
|
||||
d[3 * i + 2] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \
|
||||
} \
|
||||
}
|
||||
|
||||
DEFINE_RGB64TOBGR48(nobswap, 0)
|
||||
DEFINE_RGB64TOBGR48(bswap, 1)
|
||||
|
||||
#define DEFINE_RGB64TO48(need_bswap, swap) \
|
||||
void rgb64to48_ ## need_bswap(const uint8_t *src, \
|
||||
uint8_t *dst, int src_size) \
|
||||
{ \
|
||||
uint16_t *d = (uint16_t *)dst; \
|
||||
uint16_t *s = (uint16_t *)src; \
|
||||
int i, num_pixels = src_size >> 3; \
|
||||
\
|
||||
for (i = 0; i < num_pixels; i++) { \
|
||||
d[3 * i ] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \
|
||||
d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \
|
||||
d[3 * i + 2] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \
|
||||
} \
|
||||
}
|
||||
|
||||
DEFINE_RGB64TO48(nobswap, 0)
|
||||
DEFINE_RGB64TO48(bswap, 1)
|
||||
|
||||
#define DEFINE_RGB48TOBGR64(need_bswap, swap) \
|
||||
void rgb48tobgr64_ ## need_bswap(const uint8_t *src, \
|
||||
uint8_t *dst, int src_size) \
|
||||
{ \
|
||||
uint16_t *d = (uint16_t *)dst; \
|
||||
uint16_t *s = (uint16_t *)src; \
|
||||
int i, num_pixels = src_size / 6; \
|
||||
\
|
||||
for (i = 0; i < num_pixels; i++) { \
|
||||
d[4 * i ] = swap ? av_bswap16(s[3 * i + 2]) : s[3 * i + 2]; \
|
||||
d[4 * i + 1] = swap ? av_bswap16(s[3 * i + 1]) : s[3 * i + 1]; \
|
||||
d[4 * i + 2] = swap ? av_bswap16(s[3 * i ]) : s[3 * i ]; \
|
||||
d[4 * i + 3] = 0xFFFF; \
|
||||
} \
|
||||
}
|
||||
|
||||
DEFINE_RGB48TOBGR64(nobswap, 0)
|
||||
DEFINE_RGB48TOBGR64(bswap, 1)
|
||||
|
||||
#define DEFINE_RGB48TO64(need_bswap, swap) \
|
||||
void rgb48to64_ ## need_bswap(const uint8_t *src, \
|
||||
uint8_t *dst, int src_size) \
|
||||
{ \
|
||||
uint16_t *d = (uint16_t *)dst; \
|
||||
uint16_t *s = (uint16_t *)src; \
|
||||
int i, num_pixels = src_size / 6; \
|
||||
\
|
||||
for (i = 0; i < num_pixels; i++) { \
|
||||
d[4 * i ] = swap ? av_bswap16(s[3 * i ]) : s[3 * i ]; \
|
||||
d[4 * i + 1] = swap ? av_bswap16(s[3 * i + 1]) : s[3 * i + 1]; \
|
||||
d[4 * i + 2] = swap ? av_bswap16(s[3 * i + 2]) : s[3 * i + 2]; \
|
||||
d[4 * i + 3] = 0xFFFF; \
|
||||
} \
|
||||
}
|
||||
|
||||
DEFINE_RGB48TO64(nobswap, 0)
|
||||
DEFINE_RGB48TO64(bswap, 1)
|
||||
175
externals/ffmpeg/libswscale/rgb2rgb.h
vendored
Executable file
175
externals/ffmpeg/libswscale/rgb2rgb.h
vendored
Executable file
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
* software RGB to RGB converter
|
||||
* pluralize by Software PAL8 to RGB converter
|
||||
* Software YUV to YUV converter
|
||||
* Software YUV to RGB converter
|
||||
* Written by Nick Kurshev.
|
||||
* YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef SWSCALE_RGB2RGB_H
|
||||
#define SWSCALE_RGB2RGB_H
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "libavutil/avutil.h"
|
||||
#include "swscale.h"
|
||||
|
||||
/* A full collection of RGB to RGB(BGR) converters */
|
||||
extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
|
||||
extern void (*shuffle_bytes_0321)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*shuffle_bytes_1230)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*shuffle_bytes_3012)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
extern void (*shuffle_bytes_3210)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
|
||||
void rgb64tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb64tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb48tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb48tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb64to48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb64to48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb48tobgr64_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb48tobgr64_bswap(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb48to64_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb48to64_bswap(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
|
||||
void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
|
||||
uint8_t *vdst, int width, int height, int lumStride,
|
||||
int chromStride, int srcStride, int32_t *rgb2yuv);
|
||||
|
||||
/**
|
||||
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||
*/
|
||||
extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int dstStride);
|
||||
|
||||
/**
|
||||
* Width should be a multiple of 16.
|
||||
*/
|
||||
extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int dstStride);
|
||||
|
||||
/**
|
||||
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||
*/
|
||||
extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
|
||||
/**
|
||||
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||
*/
|
||||
extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int dstStride);
|
||||
|
||||
/**
|
||||
* Width should be a multiple of 16.
|
||||
*/
|
||||
extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int dstStride);
|
||||
|
||||
/**
|
||||
* Height should be a multiple of 2 and width should be a multiple of 2.
|
||||
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||
* Chrominance data is only taken from every second line, others are ignored.
|
||||
* FIXME: Write high quality version.
|
||||
*/
|
||||
extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int srcStride,
|
||||
int32_t *rgb2yuv);
|
||||
extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
|
||||
int srcStride, int dstStride);
|
||||
|
||||
extern void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
|
||||
int width, int height, int src1Stride,
|
||||
int src2Stride, int dstStride);
|
||||
|
||||
extern void (*deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
|
||||
int width, int height, int srcStride,
|
||||
int dst1Stride, int dst2Stride);
|
||||
|
||||
extern void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
|
||||
uint8_t *dst1, uint8_t *dst2,
|
||||
int width, int height,
|
||||
int srcStride1, int srcStride2,
|
||||
int dstStride1, int dstStride2);
|
||||
|
||||
extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
|
||||
uint8_t *dst,
|
||||
int width, int height,
|
||||
int srcStride1, int srcStride2,
|
||||
int srcStride3, int dstStride);
|
||||
|
||||
extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
extern void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
extern void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
|
||||
void ff_sws_rgb2rgb_init(void);
|
||||
|
||||
void rgb2rgb_init_aarch64(void);
|
||||
void rgb2rgb_init_x86(void);
|
||||
|
||||
#endif /* SWSCALE_RGB2RGB_H */
|
||||
994
externals/ffmpeg/libswscale/rgb2rgb_template.c
vendored
Executable file
994
externals/ffmpeg/libswscale/rgb2rgb_template.c
vendored
Executable file
@@ -0,0 +1,994 @@
|
||||
/*
|
||||
* software RGB to RGB converter
|
||||
* pluralize by software PAL8 to RGB converter
|
||||
* software YUV to YUV converter
|
||||
* software YUV to RGB converter
|
||||
* Written by Nick Kurshev.
|
||||
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
|
||||
* lot of big-endian byte order fixes by Alex Beregszaszi
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst,
|
||||
int src_size)
|
||||
{
|
||||
uint8_t *dest = dst;
|
||||
const uint8_t *s = src;
|
||||
const uint8_t *end = s + src_size;
|
||||
|
||||
while (s < end) {
|
||||
#if HAVE_BIGENDIAN
|
||||
/* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */
|
||||
*dest++ = 255;
|
||||
*dest++ = s[2];
|
||||
*dest++ = s[1];
|
||||
*dest++ = s[0];
|
||||
s += 3;
|
||||
#else
|
||||
*dest++ = *s++;
|
||||
*dest++ = *s++;
|
||||
*dest++ = *s++;
|
||||
*dest++ = 255;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst,
|
||||
int src_size)
|
||||
{
|
||||
uint8_t *dest = dst;
|
||||
const uint8_t *s = src;
|
||||
const uint8_t *end = s + src_size;
|
||||
|
||||
while (s < end) {
|
||||
#if HAVE_BIGENDIAN
|
||||
/* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */
|
||||
s++;
|
||||
dest[2] = *s++;
|
||||
dest[1] = *s++;
|
||||
dest[0] = *s++;
|
||||
dest += 3;
|
||||
#else
|
||||
*dest++ = *s++;
|
||||
*dest++ = *s++;
|
||||
*dest++ = *s++;
|
||||
s++;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* original by Strepto/Astral
|
||||
* ported to gcc & bugfixed: A'rpi
|
||||
* MMXEXT, 3DNOW optimization by Nick Kurshev
|
||||
* 32-bit C version, and and&add trick by Michael Niedermayer
|
||||
*/
|
||||
static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
register uint8_t *d = dst;
|
||||
register const uint8_t *s = src;
|
||||
register const uint8_t *end = s + src_size;
|
||||
const uint8_t *mm_end = end - 3;
|
||||
|
||||
while (s < mm_end) {
|
||||
register unsigned x = *((const uint32_t *)s);
|
||||
*((uint32_t *)d) = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
|
||||
d += 4;
|
||||
s += 4;
|
||||
}
|
||||
if (s < end) {
|
||||
register unsigned short x = *((const uint16_t *)s);
|
||||
*((uint16_t *)d) = (x & 0x7FFF) + (x & 0x7FE0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
register uint8_t *d = dst;
|
||||
register const uint8_t *s = src;
|
||||
register const uint8_t *end = s + src_size;
|
||||
const uint8_t *mm_end = end - 3;
|
||||
|
||||
while (s < mm_end) {
|
||||
register uint32_t x = *((const uint32_t *)s);
|
||||
*((uint32_t *)d) = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
|
||||
s += 4;
|
||||
d += 4;
|
||||
}
|
||||
if (s < end) {
|
||||
register uint16_t x = *((const uint16_t *)s);
|
||||
*((uint16_t *)d) = ((x >> 1) & 0x7FE0) | (x & 0x001F);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint16_t *d = (uint16_t *)dst;
|
||||
const uint8_t *s = src;
|
||||
const uint8_t *end = s + src_size;
|
||||
|
||||
while (s < end) {
|
||||
register int rgb = *(const uint32_t *)s;
|
||||
s += 4;
|
||||
*d++ = ((rgb & 0xFF) >> 3) +
|
||||
((rgb & 0xFC00) >> 5) +
|
||||
((rgb & 0xF80000) >> 8);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst,
|
||||
int src_size)
|
||||
{
|
||||
uint16_t *d = (uint16_t *)dst;
|
||||
const uint8_t *s = src;
|
||||
const uint8_t *end = s + src_size;
|
||||
|
||||
while (s < end) {
|
||||
register int rgb = *(const uint32_t *)s;
|
||||
s += 4;
|
||||
*d++ = ((rgb & 0xF8) << 8) +
|
||||
((rgb & 0xFC00) >> 5) +
|
||||
((rgb & 0xF80000) >> 19);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint16_t *d = (uint16_t *)dst;
|
||||
const uint8_t *s = src;
|
||||
const uint8_t *end = s + src_size;
|
||||
|
||||
while (s < end) {
|
||||
register int rgb = *(const uint32_t *)s;
|
||||
s += 4;
|
||||
*d++ = ((rgb & 0xFF) >> 3) +
|
||||
((rgb & 0xF800) >> 6) +
|
||||
((rgb & 0xF80000) >> 9);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst,
|
||||
int src_size)
|
||||
{
|
||||
uint16_t *d = (uint16_t *)dst;
|
||||
const uint8_t *s = src;
|
||||
const uint8_t *end = s + src_size;
|
||||
|
||||
while (s < end) {
|
||||
register int rgb = *(const uint32_t *)s;
|
||||
s += 4;
|
||||
*d++ = ((rgb & 0xF8) << 7) +
|
||||
((rgb & 0xF800) >> 6) +
|
||||
((rgb & 0xF80000) >> 19);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst,
|
||||
int src_size)
|
||||
{
|
||||
uint16_t *d = (uint16_t *)dst;
|
||||
const uint8_t *s = src;
|
||||
const uint8_t *end = s + src_size;
|
||||
|
||||
while (s < end) {
|
||||
const int b = *s++;
|
||||
const int g = *s++;
|
||||
const int r = *s++;
|
||||
*d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint16_t *d = (uint16_t *)dst;
|
||||
const uint8_t *s = src;
|
||||
const uint8_t *end = s + src_size;
|
||||
|
||||
while (s < end) {
|
||||
const int r = *s++;
|
||||
const int g = *s++;
|
||||
const int b = *s++;
|
||||
*d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst,
|
||||
int src_size)
|
||||
{
|
||||
uint16_t *d = (uint16_t *)dst;
|
||||
const uint8_t *s = src;
|
||||
const uint8_t *end = s + src_size;
|
||||
|
||||
while (s < end) {
|
||||
const int b = *s++;
|
||||
const int g = *s++;
|
||||
const int r = *s++;
|
||||
*d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint16_t *d = (uint16_t *)dst;
|
||||
const uint8_t *s = src;
|
||||
const uint8_t *end = s + src_size;
|
||||
|
||||
while (s < end) {
|
||||
const int r = *s++;
|
||||
const int g = *s++;
|
||||
const int b = *s++;
|
||||
*d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
|
||||
int src_size)
|
||||
{
|
||||
uint8_t *d = dst;
|
||||
const uint16_t *s = (const uint16_t *)src;
|
||||
const uint16_t *end = s + src_size / 2;
|
||||
|
||||
while (s < end) {
|
||||
register uint16_t bgr = *s++;
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
|
||||
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst,
|
||||
int src_size)
|
||||
{
|
||||
uint8_t *d = (uint8_t *)dst;
|
||||
const uint16_t *s = (const uint16_t *)src;
|
||||
const uint16_t *end = s + src_size / 2;
|
||||
|
||||
while (s < end) {
|
||||
register uint16_t bgr = *s++;
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
|
||||
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint8_t *d = dst;
|
||||
const uint16_t *s = (const uint16_t *)src;
|
||||
const uint16_t *end = s + src_size / 2;
|
||||
|
||||
while (s < end) {
|
||||
register uint16_t bgr = *s++;
|
||||
#if HAVE_BIGENDIAN
|
||||
*d++ = 255;
|
||||
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
|
||||
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
#else
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
|
||||
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
|
||||
*d++ = 255;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
uint8_t *d = dst;
|
||||
const uint16_t *s = (const uint16_t *)src;
|
||||
const uint16_t *end = s + src_size / 2;
|
||||
|
||||
while (s < end) {
|
||||
register uint16_t bgr = *s++;
|
||||
#if HAVE_BIGENDIAN
|
||||
*d++ = 255;
|
||||
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
|
||||
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
#else
|
||||
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
|
||||
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
|
||||
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
|
||||
*d++ = 255;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst,
|
||||
int src_size)
|
||||
{
|
||||
int idx = 15 - src_size;
|
||||
const uint8_t *s = src - idx;
|
||||
uint8_t *d = dst - idx;
|
||||
|
||||
for (; idx < 15; idx += 4) {
|
||||
register unsigned v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
|
||||
v &= 0xff00ff;
|
||||
*(uint32_t *)&d[idx] = (v >> 16) + g + (v << 16);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void shuffle_bytes_0321_c(const uint8_t *src, uint8_t *dst,
|
||||
int src_size)
|
||||
{
|
||||
int idx = 15 - src_size;
|
||||
const uint8_t *s = src - idx;
|
||||
uint8_t *d = dst - idx;
|
||||
|
||||
for (; idx < 15; idx += 4) {
|
||||
register unsigned v = *(const uint32_t *)&s[idx], g = v & 0x00ff00ff;
|
||||
v &= 0xff00ff00;
|
||||
*(uint32_t *)&d[idx] = (v >> 16) + g + (v << 16);
|
||||
}
|
||||
}
|
||||
|
||||
#if !HAVE_BIGENDIAN
|
||||
#define DEFINE_SHUFFLE_BYTES(name, a, b, c, d) \
|
||||
static void shuffle_bytes_##name (const uint8_t *src, \
|
||||
uint8_t *dst, int src_size) \
|
||||
{ \
|
||||
int i; \
|
||||
\
|
||||
for (i = 0; i < src_size; i += 4) { \
|
||||
dst[i + 0] = src[i + a]; \
|
||||
dst[i + 1] = src[i + b]; \
|
||||
dst[i + 2] = src[i + c]; \
|
||||
dst[i + 3] = src[i + d]; \
|
||||
} \
|
||||
}
|
||||
|
||||
DEFINE_SHUFFLE_BYTES(1230_c, 1, 2, 3, 0)
|
||||
DEFINE_SHUFFLE_BYTES(3012_c, 3, 0, 1, 2)
|
||||
DEFINE_SHUFFLE_BYTES(3210_c, 3, 2, 1, 0)
|
||||
#endif
|
||||
|
||||
static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < src_size; i += 3) {
|
||||
register uint8_t x = src[i + 2];
|
||||
dst[i + 1] = src[i + 1];
|
||||
dst[i + 2] = src[i + 0];
|
||||
dst[i + 0] = x;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
|
||||
const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride,
|
||||
int dstStride, int vertLumPerChroma)
|
||||
{
|
||||
int y, i;
|
||||
const int chromWidth = width >> 1;
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
#if HAVE_FAST_64BIT
|
||||
uint64_t *ldst = (uint64_t *)dst;
|
||||
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
|
||||
for (i = 0; i < chromWidth; i += 2) {
|
||||
uint64_t k = yc[0] + (uc[0] << 8) +
|
||||
(yc[1] << 16) + ((unsigned) vc[0] << 24);
|
||||
uint64_t l = yc[2] + (uc[1] << 8) +
|
||||
(yc[3] << 16) + ((unsigned) vc[1] << 24);
|
||||
*ldst++ = k + (l << 32);
|
||||
yc += 4;
|
||||
uc += 2;
|
||||
vc += 2;
|
||||
}
|
||||
|
||||
#else
|
||||
int *idst = (int32_t *)dst;
|
||||
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
|
||||
|
||||
for (i = 0; i < chromWidth; i++) {
|
||||
#if HAVE_BIGENDIAN
|
||||
*idst++ = (yc[0] << 24) + (uc[0] << 16) +
|
||||
(yc[1] << 8) + (vc[0] << 0);
|
||||
#else
|
||||
*idst++ = yc[0] + (uc[0] << 8) +
|
||||
(yc[1] << 16) + (vc[0] << 24);
|
||||
#endif
|
||||
yc += 2;
|
||||
uc++;
|
||||
vc++;
|
||||
}
|
||||
#endif
|
||||
if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
|
||||
usrc += chromStride;
|
||||
vsrc += chromStride;
|
||||
}
|
||||
ysrc += lumStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||
*/
|
||||
static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
|
||||
const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height, int lumStride,
|
||||
int chromStride, int dstStride)
|
||||
{
|
||||
//FIXME interpolate chroma
|
||||
yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
|
||||
chromStride, dstStride, 2);
|
||||
}
|
||||
|
||||
static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
|
||||
const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height,
|
||||
int lumStride, int chromStride,
|
||||
int dstStride, int vertLumPerChroma)
|
||||
{
|
||||
int y, i;
|
||||
const int chromWidth = width >> 1;
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
#if HAVE_FAST_64BIT
|
||||
uint64_t *ldst = (uint64_t *)dst;
|
||||
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
|
||||
for (i = 0; i < chromWidth; i += 2) {
|
||||
uint64_t k = uc[0] + (yc[0] << 8) +
|
||||
(vc[0] << 16) + ((unsigned) yc[1] << 24);
|
||||
uint64_t l = uc[1] + (yc[2] << 8) +
|
||||
(vc[1] << 16) + ((unsigned) yc[3] << 24);
|
||||
*ldst++ = k + (l << 32);
|
||||
yc += 4;
|
||||
uc += 2;
|
||||
vc += 2;
|
||||
}
|
||||
|
||||
#else
|
||||
int *idst = (int32_t *)dst;
|
||||
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
|
||||
|
||||
for (i = 0; i < chromWidth; i++) {
|
||||
#if HAVE_BIGENDIAN
|
||||
*idst++ = (uc[0] << 24) + (yc[0] << 16) +
|
||||
(vc[0] << 8) + (yc[1] << 0);
|
||||
#else
|
||||
*idst++ = uc[0] + (yc[0] << 8) +
|
||||
(vc[0] << 16) + (yc[1] << 24);
|
||||
#endif
|
||||
yc += 2;
|
||||
uc++;
|
||||
vc++;
|
||||
}
|
||||
#endif
|
||||
if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
|
||||
usrc += chromStride;
|
||||
vsrc += chromStride;
|
||||
}
|
||||
ysrc += lumStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Height should be a multiple of 2 and width should be a multiple of 16
|
||||
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||
*/
|
||||
static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
|
||||
const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height, int lumStride,
|
||||
int chromStride, int dstStride)
|
||||
{
|
||||
//FIXME interpolate chroma
|
||||
yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
|
||||
chromStride, dstStride, 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Width should be a multiple of 16.
|
||||
*/
|
||||
static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
|
||||
const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height, int lumStride,
|
||||
int chromStride, int dstStride)
|
||||
{
|
||||
yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
|
||||
chromStride, dstStride, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Width should be a multiple of 16.
|
||||
*/
|
||||
static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
|
||||
const uint8_t *vsrc, uint8_t *dst,
|
||||
int width, int height, int lumStride,
|
||||
int chromStride, int dstStride)
|
||||
{
|
||||
yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
|
||||
chromStride, dstStride, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||
*/
|
||||
static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
|
||||
uint8_t *udst, uint8_t *vdst,
|
||||
int width, int height, int lumStride,
|
||||
int chromStride, int srcStride)
|
||||
{
|
||||
int y;
|
||||
const int chromWidth = width >> 1;
|
||||
|
||||
for (y = 0; y < height; y += 2) {
|
||||
int i;
|
||||
for (i = 0; i < chromWidth; i++) {
|
||||
ydst[2 * i + 0] = src[4 * i + 0];
|
||||
udst[i] = src[4 * i + 1];
|
||||
ydst[2 * i + 1] = src[4 * i + 2];
|
||||
vdst[i] = src[4 * i + 3];
|
||||
}
|
||||
ydst += lumStride;
|
||||
src += srcStride;
|
||||
|
||||
for (i = 0; i < chromWidth; i++) {
|
||||
ydst[2 * i + 0] = src[4 * i + 0];
|
||||
ydst[2 * i + 1] = src[4 * i + 2];
|
||||
}
|
||||
udst += chromStride;
|
||||
vdst += chromStride;
|
||||
ydst += lumStride;
|
||||
src += srcStride;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
|
||||
int srcHeight, int srcStride, int dstStride)
|
||||
{
|
||||
int x, y;
|
||||
|
||||
dst[0] = src[0];
|
||||
|
||||
// first line
|
||||
for (x = 0; x < srcWidth - 1; x++) {
|
||||
dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
|
||||
dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
|
||||
}
|
||||
dst[2 * srcWidth - 1] = src[srcWidth - 1];
|
||||
|
||||
dst += dstStride;
|
||||
|
||||
for (y = 1; y < srcHeight; y++) {
|
||||
const int mmxSize = 1;
|
||||
|
||||
dst[0] = (src[0] * 3 + src[srcStride]) >> 2;
|
||||
dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
|
||||
|
||||
for (x = mmxSize - 1; x < srcWidth - 1; x++) {
|
||||
dst[2 * x + 1] = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
|
||||
dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
|
||||
dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride]) >> 2;
|
||||
dst[2 * x + 2] = (src[x + 1] * 3 + src[x + srcStride]) >> 2;
|
||||
}
|
||||
dst[srcWidth * 2 - 1] = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
|
||||
dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
|
||||
|
||||
dst += dstStride * 2;
|
||||
src += srcStride;
|
||||
}
|
||||
|
||||
// last line
|
||||
dst[0] = src[0];
|
||||
|
||||
for (x = 0; x < srcWidth - 1; x++) {
|
||||
dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
|
||||
dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
|
||||
}
|
||||
dst[2 * srcWidth - 1] = src[srcWidth - 1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Height should be a multiple of 2 and width should be a multiple of 16.
|
||||
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||
* Chrominance data is only taken from every second line, others are ignored.
|
||||
* FIXME: Write HQ version.
|
||||
*/
|
||||
static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
|
||||
uint8_t *udst, uint8_t *vdst,
|
||||
int width, int height, int lumStride,
|
||||
int chromStride, int srcStride)
|
||||
{
|
||||
int y;
|
||||
const int chromWidth = width >> 1;
|
||||
|
||||
for (y = 0; y < height; y += 2) {
|
||||
int i;
|
||||
for (i = 0; i < chromWidth; i++) {
|
||||
udst[i] = src[4 * i + 0];
|
||||
ydst[2 * i + 0] = src[4 * i + 1];
|
||||
vdst[i] = src[4 * i + 2];
|
||||
ydst[2 * i + 1] = src[4 * i + 3];
|
||||
}
|
||||
ydst += lumStride;
|
||||
src += srcStride;
|
||||
|
||||
for (i = 0; i < chromWidth; i++) {
|
||||
ydst[2 * i + 0] = src[4 * i + 1];
|
||||
ydst[2 * i + 1] = src[4 * i + 3];
|
||||
}
|
||||
udst += chromStride;
|
||||
vdst += chromStride;
|
||||
ydst += lumStride;
|
||||
src += srcStride;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Height should be a multiple of 2 and width should be a multiple of 2.
|
||||
* (If this is a problem for anyone then tell me, and I will fix it.)
|
||||
* Chrominance data is only taken from every second line,
|
||||
* others are ignored in the C version.
|
||||
* FIXME: Write HQ version.
|
||||
*/
|
||||
void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
|
||||
uint8_t *vdst, int width, int height, int lumStride,
|
||||
int chromStride, int srcStride, int32_t *rgb2yuv)
|
||||
{
|
||||
int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
|
||||
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
|
||||
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
|
||||
int y;
|
||||
const int chromWidth = width >> 1;
|
||||
|
||||
for (y = 0; y < height; y += 2) {
|
||||
int i;
|
||||
for (i = 0; i < chromWidth; i++) {
|
||||
unsigned int b = src[6 * i + 0];
|
||||
unsigned int g = src[6 * i + 1];
|
||||
unsigned int r = src[6 * i + 2];
|
||||
|
||||
unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
|
||||
unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
|
||||
unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
|
||||
|
||||
udst[i] = U;
|
||||
vdst[i] = V;
|
||||
ydst[2 * i] = Y;
|
||||
|
||||
b = src[6 * i + 3];
|
||||
g = src[6 * i + 4];
|
||||
r = src[6 * i + 5];
|
||||
|
||||
Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
|
||||
ydst[2 * i + 1] = Y;
|
||||
}
|
||||
ydst += lumStride;
|
||||
src += srcStride;
|
||||
|
||||
if (y+1 == height)
|
||||
break;
|
||||
|
||||
for (i = 0; i < chromWidth; i++) {
|
||||
unsigned int b = src[6 * i + 0];
|
||||
unsigned int g = src[6 * i + 1];
|
||||
unsigned int r = src[6 * i + 2];
|
||||
|
||||
unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
|
||||
|
||||
ydst[2 * i] = Y;
|
||||
|
||||
b = src[6 * i + 3];
|
||||
g = src[6 * i + 4];
|
||||
r = src[6 * i + 5];
|
||||
|
||||
Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
|
||||
ydst[2 * i + 1] = Y;
|
||||
}
|
||||
udst += chromStride;
|
||||
vdst += chromStride;
|
||||
ydst += lumStride;
|
||||
src += srcStride;
|
||||
}
|
||||
}
|
||||
|
||||
static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
|
||||
uint8_t *dest, int width, int height,
|
||||
int src1Stride, int src2Stride, int dstStride)
|
||||
{
|
||||
int h;
|
||||
|
||||
for (h = 0; h < height; h++) {
|
||||
int w;
|
||||
for (w = 0; w < width; w++) {
|
||||
dest[2 * w + 0] = src1[w];
|
||||
dest[2 * w + 1] = src2[w];
|
||||
}
|
||||
dest += dstStride;
|
||||
src1 += src1Stride;
|
||||
src2 += src2Stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void deinterleaveBytes_c(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
|
||||
int width, int height, int srcStride,
|
||||
int dst1Stride, int dst2Stride)
|
||||
{
|
||||
int h;
|
||||
|
||||
for (h = 0; h < height; h++) {
|
||||
int w;
|
||||
for (w = 0; w < width; w++) {
|
||||
dst1[w] = src[2 * w + 0];
|
||||
dst2[w] = src[2 * w + 1];
|
||||
}
|
||||
src += srcStride;
|
||||
dst1 += dst1Stride;
|
||||
dst2 += dst2Stride;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
|
||||
uint8_t *dst1, uint8_t *dst2,
|
||||
int width, int height,
|
||||
int srcStride1, int srcStride2,
|
||||
int dstStride1, int dstStride2)
|
||||
{
|
||||
int x, y;
|
||||
int w = width / 2;
|
||||
int h = height / 2;
|
||||
|
||||
for (y = 0; y < h; y++) {
|
||||
const uint8_t *s1 = src1 + srcStride1 * (y >> 1);
|
||||
uint8_t *d = dst1 + dstStride1 * y;
|
||||
for (x = 0; x < w; x++)
|
||||
d[2 * x] = d[2 * x + 1] = s1[x];
|
||||
}
|
||||
for (y = 0; y < h; y++) {
|
||||
const uint8_t *s2 = src2 + srcStride2 * (y >> 1);
|
||||
uint8_t *d = dst2 + dstStride2 * y;
|
||||
for (x = 0; x < w; x++)
|
||||
d[2 * x] = d[2 * x + 1] = s2[x];
|
||||
}
|
||||
}
|
||||
|
||||
static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
|
||||
const uint8_t *src3, uint8_t *dst,
|
||||
int width, int height,
|
||||
int srcStride1, int srcStride2,
|
||||
int srcStride3, int dstStride)
|
||||
{
|
||||
int x, y;
|
||||
int w = width / 2;
|
||||
int h = height;
|
||||
|
||||
for (y = 0; y < h; y++) {
|
||||
const uint8_t *yp = src1 + srcStride1 * y;
|
||||
const uint8_t *up = src2 + srcStride2 * (y >> 2);
|
||||
const uint8_t *vp = src3 + srcStride3 * (y >> 2);
|
||||
uint8_t *d = dst + dstStride * y;
|
||||
for (x = 0; x < w; x++) {
|
||||
const int x2 = x << 2;
|
||||
d[8 * x + 0] = yp[x2];
|
||||
d[8 * x + 1] = up[x];
|
||||
d[8 * x + 2] = yp[x2 + 1];
|
||||
d[8 * x + 3] = vp[x];
|
||||
d[8 * x + 4] = yp[x2 + 2];
|
||||
d[8 * x + 5] = up[x];
|
||||
d[8 * x + 6] = yp[x2 + 3];
|
||||
d[8 * x + 7] = vp[x];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
|
||||
{
|
||||
dst += count;
|
||||
src += count * 2;
|
||||
count = -count;
|
||||
while (count < 0) {
|
||||
dst[count] = src[2 * count];
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
|
||||
int count)
|
||||
{
|
||||
dst0 += count;
|
||||
dst1 += count;
|
||||
src += count * 4;
|
||||
count = -count;
|
||||
while (count < 0) {
|
||||
dst0[count] = src[4 * count + 0];
|
||||
dst1[count] = src[4 * count + 2];
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
|
||||
uint8_t *dst0, uint8_t *dst1, int count)
|
||||
{
|
||||
dst0 += count;
|
||||
dst1 += count;
|
||||
src0 += count * 4;
|
||||
src1 += count * 4;
|
||||
count = -count;
|
||||
while (count < 0) {
|
||||
dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
|
||||
dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
|
||||
int count)
|
||||
{
|
||||
dst0 += count;
|
||||
dst1 += count;
|
||||
src += count * 4;
|
||||
count = -count;
|
||||
src++;
|
||||
while (count < 0) {
|
||||
dst0[count] = src[4 * count + 0];
|
||||
dst1[count] = src[4 * count + 2];
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
|
||||
uint8_t *dst0, uint8_t *dst1, int count)
|
||||
{
|
||||
dst0 += count;
|
||||
dst1 += count;
|
||||
src0 += count * 4;
|
||||
src1 += count * 4;
|
||||
count = -count;
|
||||
src0++;
|
||||
src1++;
|
||||
while (count < 0) {
|
||||
dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
|
||||
dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
const uint8_t *src, int width, int height,
|
||||
int lumStride, int chromStride, int srcStride)
|
||||
{
|
||||
int y;
|
||||
const int chromWidth = AV_CEIL_RSHIFT(width, 1);
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
extract_even_c(src, ydst, width);
|
||||
if (y & 1) {
|
||||
extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
|
||||
udst += chromStride;
|
||||
vdst += chromStride;
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
ydst += lumStride;
|
||||
}
|
||||
}
|
||||
|
||||
static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
const uint8_t *src, int width, int height,
|
||||
int lumStride, int chromStride, int srcStride)
|
||||
{
|
||||
int y;
|
||||
const int chromWidth = AV_CEIL_RSHIFT(width, 1);
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
extract_even_c(src, ydst, width);
|
||||
extract_odd2_c(src, udst, vdst, chromWidth);
|
||||
|
||||
src += srcStride;
|
||||
ydst += lumStride;
|
||||
udst += chromStride;
|
||||
vdst += chromStride;
|
||||
}
|
||||
}
|
||||
|
||||
static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
const uint8_t *src, int width, int height,
|
||||
int lumStride, int chromStride, int srcStride)
|
||||
{
|
||||
int y;
|
||||
const int chromWidth = AV_CEIL_RSHIFT(width, 1);
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
extract_even_c(src + 1, ydst, width);
|
||||
if (y & 1) {
|
||||
extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
|
||||
udst += chromStride;
|
||||
vdst += chromStride;
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
ydst += lumStride;
|
||||
}
|
||||
}
|
||||
|
||||
static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
const uint8_t *src, int width, int height,
|
||||
int lumStride, int chromStride, int srcStride)
|
||||
{
|
||||
int y;
|
||||
const int chromWidth = AV_CEIL_RSHIFT(width, 1);
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
extract_even_c(src + 1, ydst, width);
|
||||
extract_even2_c(src, udst, vdst, chromWidth);
|
||||
|
||||
src += srcStride;
|
||||
ydst += lumStride;
|
||||
udst += chromStride;
|
||||
vdst += chromStride;
|
||||
}
|
||||
}
|
||||
|
||||
static av_cold void rgb2rgb_init_c(void)
|
||||
{
|
||||
rgb15to16 = rgb15to16_c;
|
||||
rgb15tobgr24 = rgb15tobgr24_c;
|
||||
rgb15to32 = rgb15to32_c;
|
||||
rgb16tobgr24 = rgb16tobgr24_c;
|
||||
rgb16to32 = rgb16to32_c;
|
||||
rgb16to15 = rgb16to15_c;
|
||||
rgb24tobgr16 = rgb24tobgr16_c;
|
||||
rgb24tobgr15 = rgb24tobgr15_c;
|
||||
rgb24tobgr32 = rgb24tobgr32_c;
|
||||
rgb32to16 = rgb32to16_c;
|
||||
rgb32to15 = rgb32to15_c;
|
||||
rgb32tobgr24 = rgb32tobgr24_c;
|
||||
rgb24to15 = rgb24to15_c;
|
||||
rgb24to16 = rgb24to16_c;
|
||||
rgb24tobgr24 = rgb24tobgr24_c;
|
||||
#if HAVE_BIGENDIAN
|
||||
shuffle_bytes_0321 = shuffle_bytes_2103_c;
|
||||
shuffle_bytes_2103 = shuffle_bytes_0321_c;
|
||||
#else
|
||||
shuffle_bytes_0321 = shuffle_bytes_0321_c;
|
||||
shuffle_bytes_2103 = shuffle_bytes_2103_c;
|
||||
shuffle_bytes_1230 = shuffle_bytes_1230_c;
|
||||
shuffle_bytes_3012 = shuffle_bytes_3012_c;
|
||||
shuffle_bytes_3210 = shuffle_bytes_3210_c;
|
||||
#endif
|
||||
rgb32tobgr16 = rgb32tobgr16_c;
|
||||
rgb32tobgr15 = rgb32tobgr15_c;
|
||||
yv12toyuy2 = yv12toyuy2_c;
|
||||
yv12touyvy = yv12touyvy_c;
|
||||
yuv422ptoyuy2 = yuv422ptoyuy2_c;
|
||||
yuv422ptouyvy = yuv422ptouyvy_c;
|
||||
yuy2toyv12 = yuy2toyv12_c;
|
||||
planar2x = planar2x_c;
|
||||
ff_rgb24toyv12 = ff_rgb24toyv12_c;
|
||||
interleaveBytes = interleaveBytes_c;
|
||||
deinterleaveBytes = deinterleaveBytes_c;
|
||||
vu9_to_vu12 = vu9_to_vu12_c;
|
||||
yvu9_to_yuy2 = yvu9_to_yuy2_c;
|
||||
|
||||
uyvytoyuv420 = uyvytoyuv420_c;
|
||||
uyvytoyuv422 = uyvytoyuv422_c;
|
||||
yuyvtoyuv420 = yuyvtoyuv420_c;
|
||||
yuyvtoyuv422 = yuyvtoyuv422_c;
|
||||
}
|
||||
398
externals/ffmpeg/libswscale/slice.c
vendored
Executable file
398
externals/ffmpeg/libswscale/slice.c
vendored
Executable file
@@ -0,0 +1,398 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "swscale_internal.h"
|
||||
|
||||
static void free_lines(SwsSlice *s)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 2; ++i) {
|
||||
int n = s->plane[i].available_lines;
|
||||
int j;
|
||||
for (j = 0; j < n; ++j) {
|
||||
av_freep(&s->plane[i].line[j]);
|
||||
if (s->is_ring)
|
||||
s->plane[i].line[j+n] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
memset(s->plane[i].line, 0, sizeof(uint8_t*) * s->plane[i].available_lines * (s->is_ring ? 3 : 1));
|
||||
s->should_free_lines = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
slice lines contains extra bytes for vectorial code thus @size
|
||||
is the allocated memory size and @width is the number of pixels
|
||||
*/
|
||||
static int alloc_lines(SwsSlice *s, int size, int width)
|
||||
{
|
||||
int i;
|
||||
int idx[2] = {3, 2};
|
||||
|
||||
s->should_free_lines = 1;
|
||||
s->width = width;
|
||||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
int n = s->plane[i].available_lines;
|
||||
int j;
|
||||
int ii = idx[i];
|
||||
|
||||
av_assert0(n == s->plane[ii].available_lines);
|
||||
for (j = 0; j < n; ++j) {
|
||||
// chroma plane line U and V are expected to be contiguous in memory
|
||||
// by mmx vertical scaler code
|
||||
s->plane[i].line[j] = av_malloc(size * 2 + 32);
|
||||
if (!s->plane[i].line[j]) {
|
||||
free_lines(s);
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
s->plane[ii].line[j] = s->plane[i].line[j] + size + 16;
|
||||
if (s->is_ring) {
|
||||
s->plane[i].line[j+n] = s->plane[i].line[j];
|
||||
s->plane[ii].line[j+n] = s->plane[ii].line[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int alloc_slice(SwsSlice *s, enum AVPixelFormat fmt, int lumLines, int chrLines, int h_sub_sample, int v_sub_sample, int ring)
|
||||
{
|
||||
int i;
|
||||
int size[4] = { lumLines,
|
||||
chrLines,
|
||||
chrLines,
|
||||
lumLines };
|
||||
|
||||
s->h_chr_sub_sample = h_sub_sample;
|
||||
s->v_chr_sub_sample = v_sub_sample;
|
||||
s->fmt = fmt;
|
||||
s->is_ring = ring;
|
||||
s->should_free_lines = 0;
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
int n = size[i] * ( ring == 0 ? 1 : 3);
|
||||
s->plane[i].line = av_mallocz_array(sizeof(uint8_t*), n);
|
||||
if (!s->plane[i].line)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
s->plane[i].tmp = ring ? s->plane[i].line + size[i] * 2 : NULL;
|
||||
s->plane[i].available_lines = size[i];
|
||||
s->plane[i].sliceY = 0;
|
||||
s->plane[i].sliceH = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_slice(SwsSlice *s)
|
||||
{
|
||||
int i;
|
||||
if (s) {
|
||||
if (s->should_free_lines)
|
||||
free_lines(s);
|
||||
for (i = 0; i < 4; ++i) {
|
||||
av_freep(&s->plane[i].line);
|
||||
s->plane[i].tmp = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int ff_rotate_slice(SwsSlice *s, int lum, int chr)
|
||||
{
|
||||
int i;
|
||||
if (lum) {
|
||||
for (i = 0; i < 4; i+=3) {
|
||||
int n = s->plane[i].available_lines;
|
||||
int l = lum - s->plane[i].sliceY;
|
||||
|
||||
if (l >= n * 2) {
|
||||
s->plane[i].sliceY += n;
|
||||
s->plane[i].sliceH -= n;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (chr) {
|
||||
for (i = 1; i < 3; ++i) {
|
||||
int n = s->plane[i].available_lines;
|
||||
int l = chr - s->plane[i].sliceY;
|
||||
|
||||
if (l >= n * 2) {
|
||||
s->plane[i].sliceY += n;
|
||||
s->plane[i].sliceH -= n;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ff_init_slice_from_src(SwsSlice * s, uint8_t *src[4], int stride[4], int srcW, int lumY, int lumH, int chrY, int chrH, int relative)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
const int start[4] = {lumY,
|
||||
chrY,
|
||||
chrY,
|
||||
lumY};
|
||||
|
||||
const int end[4] = {lumY +lumH,
|
||||
chrY + chrH,
|
||||
chrY + chrH,
|
||||
lumY + lumH};
|
||||
|
||||
uint8_t *const src_[4] = {src[0] + (relative ? 0 : start[0]) * stride[0],
|
||||
src[1] + (relative ? 0 : start[1]) * stride[1],
|
||||
src[2] + (relative ? 0 : start[2]) * stride[2],
|
||||
src[3] + (relative ? 0 : start[3]) * stride[3]};
|
||||
|
||||
s->width = srcW;
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
int j;
|
||||
int first = s->plane[i].sliceY;
|
||||
int n = s->plane[i].available_lines;
|
||||
int lines = end[i] - start[i];
|
||||
int tot_lines = end[i] - first;
|
||||
|
||||
if (start[i] >= first && n >= tot_lines) {
|
||||
s->plane[i].sliceH = FFMAX(tot_lines, s->plane[i].sliceH);
|
||||
for (j = 0; j < lines; j+= 1)
|
||||
s->plane[i].line[start[i] - first + j] = src_[i] + j * stride[i];
|
||||
} else {
|
||||
s->plane[i].sliceY = start[i];
|
||||
lines = lines > n ? n : lines;
|
||||
s->plane[i].sliceH = lines;
|
||||
for (j = 0; j < lines; j+= 1)
|
||||
s->plane[i].line[j] = src_[i] + j * stride[i];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fill_ones(SwsSlice *s, int n, int bpc)
|
||||
{
|
||||
int i, j, k, size, end;
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
size = s->plane[i].available_lines;
|
||||
for (j = 0; j < size; ++j) {
|
||||
if (bpc == 16) {
|
||||
end = (n>>1) + 1;
|
||||
for (k = 0; k < end; ++k)
|
||||
((int32_t*)(s->plane[i].line[j]))[k] = 1<<18;
|
||||
} else if (bpc == 32) {
|
||||
end = (n>>2) + 1;
|
||||
for (k = 0; k < end; ++k)
|
||||
((int64_t*)(s->plane[i].line[j]))[k] = 1LL<<34;
|
||||
} else {
|
||||
end = n + 1;
|
||||
for (k = 0; k < end; ++k)
|
||||
((int16_t*)(s->plane[i].line[j]))[k] = 1<<14;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Calculates the minimum ring buffer size, it should be able to store vFilterSize
|
||||
more n lines where n is the max difference between each adjacent slice which
|
||||
outputs a line.
|
||||
The n lines are needed only when there is not enough src lines to output a single
|
||||
dst line, then we should buffer these lines to process them on the next call to scale.
|
||||
*/
|
||||
static void get_min_buffer_size(SwsContext *c, int *out_lum_size, int *out_chr_size)
|
||||
{
|
||||
int lumY;
|
||||
int dstH = c->dstH;
|
||||
int chrDstH = c->chrDstH;
|
||||
int *lumFilterPos = c->vLumFilterPos;
|
||||
int *chrFilterPos = c->vChrFilterPos;
|
||||
int lumFilterSize = c->vLumFilterSize;
|
||||
int chrFilterSize = c->vChrFilterSize;
|
||||
int chrSubSample = c->chrSrcVSubSample;
|
||||
|
||||
*out_lum_size = lumFilterSize;
|
||||
*out_chr_size = chrFilterSize;
|
||||
|
||||
for (lumY = 0; lumY < dstH; lumY++) {
|
||||
int chrY = (int64_t)lumY * chrDstH / dstH;
|
||||
int nextSlice = FFMAX(lumFilterPos[lumY] + lumFilterSize - 1,
|
||||
((chrFilterPos[chrY] + chrFilterSize - 1)
|
||||
<< chrSubSample));
|
||||
|
||||
nextSlice >>= chrSubSample;
|
||||
nextSlice <<= chrSubSample;
|
||||
(*out_lum_size) = FFMAX((*out_lum_size), nextSlice - lumFilterPos[lumY]);
|
||||
(*out_chr_size) = FFMAX((*out_chr_size), (nextSlice >> chrSubSample) - chrFilterPos[chrY]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
int ff_init_filters(SwsContext * c)
|
||||
{
|
||||
int i;
|
||||
int index;
|
||||
int num_ydesc;
|
||||
int num_cdesc;
|
||||
int num_vdesc = isPlanarYUV(c->dstFormat) && !isGray(c->dstFormat) ? 2 : 1;
|
||||
int need_lum_conv = c->lumToYV12 || c->readLumPlanar || c->alpToYV12 || c->readAlpPlanar;
|
||||
int need_chr_conv = c->chrToYV12 || c->readChrPlanar;
|
||||
int need_gamma = c->is_internal_gamma;
|
||||
int srcIdx, dstIdx;
|
||||
int dst_stride = FFALIGN(c->dstW * sizeof(int16_t) + 66, 16);
|
||||
|
||||
uint32_t * pal = usePal(c->srcFormat) ? c->pal_yuv : (uint32_t*)c->input_rgb2yuv_table;
|
||||
int res = 0;
|
||||
|
||||
int lumBufSize;
|
||||
int chrBufSize;
|
||||
|
||||
get_min_buffer_size(c, &lumBufSize, &chrBufSize);
|
||||
lumBufSize = FFMAX(lumBufSize, c->vLumFilterSize + MAX_LINES_AHEAD);
|
||||
chrBufSize = FFMAX(chrBufSize, c->vChrFilterSize + MAX_LINES_AHEAD);
|
||||
|
||||
if (c->dstBpc == 16)
|
||||
dst_stride <<= 1;
|
||||
|
||||
if (c->dstBpc == 32)
|
||||
dst_stride <<= 2;
|
||||
|
||||
num_ydesc = need_lum_conv ? 2 : 1;
|
||||
num_cdesc = need_chr_conv ? 2 : 1;
|
||||
|
||||
c->numSlice = FFMAX(num_ydesc, num_cdesc) + 2;
|
||||
c->numDesc = num_ydesc + num_cdesc + num_vdesc + (need_gamma ? 2 : 0);
|
||||
c->descIndex[0] = num_ydesc + (need_gamma ? 1 : 0);
|
||||
c->descIndex[1] = num_ydesc + num_cdesc + (need_gamma ? 1 : 0);
|
||||
|
||||
|
||||
|
||||
c->desc = av_mallocz_array(sizeof(SwsFilterDescriptor), c->numDesc);
|
||||
if (!c->desc)
|
||||
return AVERROR(ENOMEM);
|
||||
c->slice = av_mallocz_array(sizeof(SwsSlice), c->numSlice);
|
||||
|
||||
|
||||
res = alloc_slice(&c->slice[0], c->srcFormat, c->srcH, c->chrSrcH, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
|
||||
if (res < 0) goto cleanup;
|
||||
for (i = 1; i < c->numSlice-2; ++i) {
|
||||
res = alloc_slice(&c->slice[i], c->srcFormat, lumBufSize, chrBufSize, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
|
||||
if (res < 0) goto cleanup;
|
||||
res = alloc_lines(&c->slice[i], FFALIGN(c->srcW*2+78, 16), c->srcW);
|
||||
if (res < 0) goto cleanup;
|
||||
}
|
||||
// horizontal scaler output
|
||||
res = alloc_slice(&c->slice[i], c->srcFormat, lumBufSize, chrBufSize, c->chrDstHSubSample, c->chrDstVSubSample, 1);
|
||||
if (res < 0) goto cleanup;
|
||||
res = alloc_lines(&c->slice[i], dst_stride, c->dstW);
|
||||
if (res < 0) goto cleanup;
|
||||
|
||||
fill_ones(&c->slice[i], dst_stride>>1, c->dstBpc);
|
||||
|
||||
// vertical scaler output
|
||||
++i;
|
||||
res = alloc_slice(&c->slice[i], c->dstFormat, c->dstH, c->chrDstH, c->chrDstHSubSample, c->chrDstVSubSample, 0);
|
||||
if (res < 0) goto cleanup;
|
||||
|
||||
index = 0;
|
||||
srcIdx = 0;
|
||||
dstIdx = 1;
|
||||
|
||||
if (need_gamma) {
|
||||
res = ff_init_gamma_convert(c->desc + index, c->slice + srcIdx, c->inv_gamma);
|
||||
if (res < 0) goto cleanup;
|
||||
++index;
|
||||
}
|
||||
|
||||
if (need_lum_conv) {
|
||||
res = ff_init_desc_fmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal);
|
||||
if (res < 0) goto cleanup;
|
||||
c->desc[index].alpha = c->needAlpha;
|
||||
++index;
|
||||
srcIdx = dstIdx;
|
||||
}
|
||||
|
||||
|
||||
dstIdx = FFMAX(num_ydesc, num_cdesc);
|
||||
res = ff_init_desc_hscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hLumFilter, c->hLumFilterPos, c->hLumFilterSize, c->lumXInc);
|
||||
if (res < 0) goto cleanup;
|
||||
c->desc[index].alpha = c->needAlpha;
|
||||
|
||||
|
||||
++index;
|
||||
{
|
||||
srcIdx = 0;
|
||||
dstIdx = 1;
|
||||
if (need_chr_conv) {
|
||||
res = ff_init_desc_cfmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal);
|
||||
if (res < 0) goto cleanup;
|
||||
++index;
|
||||
srcIdx = dstIdx;
|
||||
}
|
||||
|
||||
dstIdx = FFMAX(num_ydesc, num_cdesc);
|
||||
if (c->needs_hcscale)
|
||||
res = ff_init_desc_chscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hChrFilter, c->hChrFilterPos, c->hChrFilterSize, c->chrXInc);
|
||||
else
|
||||
res = ff_init_desc_no_chr(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx]);
|
||||
if (res < 0) goto cleanup;
|
||||
}
|
||||
|
||||
++index;
|
||||
{
|
||||
srcIdx = c->numSlice - 2;
|
||||
dstIdx = c->numSlice - 1;
|
||||
res = ff_init_vscale(c, c->desc + index, c->slice + srcIdx, c->slice + dstIdx);
|
||||
if (res < 0) goto cleanup;
|
||||
}
|
||||
|
||||
++index;
|
||||
if (need_gamma) {
|
||||
res = ff_init_gamma_convert(c->desc + index, c->slice + dstIdx, c->gamma);
|
||||
if (res < 0) goto cleanup;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
ff_free_filters(c);
|
||||
return res;
|
||||
}
|
||||
|
||||
int ff_free_filters(SwsContext *c)
|
||||
{
|
||||
int i;
|
||||
if (c->desc) {
|
||||
for (i = 0; i < c->numDesc; ++i)
|
||||
av_freep(&c->desc[i].instance);
|
||||
av_freep(&c->desc);
|
||||
}
|
||||
|
||||
if (c->slice) {
|
||||
for (i = 0; i < c->numSlice; ++i)
|
||||
free_slice(&c->slice[i]);
|
||||
av_freep(&c->slice);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
984
externals/ffmpeg/libswscale/swscale.c
vendored
Executable file
984
externals/ffmpeg/libswscale/swscale.c
vendored
Executable file
@@ -0,0 +1,984 @@
|
||||
/*
|
||||
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/avassert.h"
|
||||
#include "libavutil/avutil.h"
|
||||
#include "libavutil/bswap.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/imgutils.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "libavutil/mathematics.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "config.h"
|
||||
#include "rgb2rgb.h"
|
||||
#include "swscale_internal.h"
|
||||
#include "swscale.h"
|
||||
|
||||
DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_128)[9][8] = {
|
||||
{ 36, 68, 60, 92, 34, 66, 58, 90, },
|
||||
{ 100, 4, 124, 28, 98, 2, 122, 26, },
|
||||
{ 52, 84, 44, 76, 50, 82, 42, 74, },
|
||||
{ 116, 20, 108, 12, 114, 18, 106, 10, },
|
||||
{ 32, 64, 56, 88, 38, 70, 62, 94, },
|
||||
{ 96, 0, 120, 24, 102, 6, 126, 30, },
|
||||
{ 48, 80, 40, 72, 54, 86, 46, 78, },
|
||||
{ 112, 16, 104, 8, 118, 22, 110, 14, },
|
||||
{ 36, 68, 60, 92, 34, 66, 58, 90, },
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(8, static const uint8_t, sws_pb_64)[8] = {
|
||||
64, 64, 64, 64, 64, 64, 64, 64
|
||||
};
|
||||
|
||||
static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
|
||||
int height, int y, uint8_t val)
|
||||
{
|
||||
int i;
|
||||
uint8_t *ptr = plane + stride * y;
|
||||
for (i = 0; i < height; i++) {
|
||||
memset(ptr, val, width);
|
||||
ptr += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
|
||||
const uint8_t *_src, const int16_t *filter,
|
||||
const int32_t *filterPos, int filterSize)
|
||||
{
|
||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
|
||||
int i;
|
||||
int32_t *dst = (int32_t *) _dst;
|
||||
const uint16_t *src = (const uint16_t *) _src;
|
||||
int bits = desc->comp[0].depth - 1;
|
||||
int sh = bits - 4;
|
||||
|
||||
if ((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
|
||||
sh = 9;
|
||||
} else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
|
||||
sh = 16 - 1 - 4;
|
||||
}
|
||||
|
||||
for (i = 0; i < dstW; i++) {
|
||||
int j;
|
||||
int srcPos = filterPos[i];
|
||||
int val = 0;
|
||||
|
||||
for (j = 0; j < filterSize; j++) {
|
||||
val += src[srcPos + j] * filter[filterSize * i + j];
|
||||
}
|
||||
// filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
|
||||
dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
|
||||
const uint8_t *_src, const int16_t *filter,
|
||||
const int32_t *filterPos, int filterSize)
|
||||
{
|
||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
|
||||
int i;
|
||||
const uint16_t *src = (const uint16_t *) _src;
|
||||
int sh = desc->comp[0].depth - 1;
|
||||
|
||||
if (sh<15) {
|
||||
sh = isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
|
||||
} else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
|
||||
sh = 16 - 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < dstW; i++) {
|
||||
int j;
|
||||
int srcPos = filterPos[i];
|
||||
int val = 0;
|
||||
|
||||
for (j = 0; j < filterSize; j++) {
|
||||
val += src[srcPos + j] * filter[filterSize * i + j];
|
||||
}
|
||||
// filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
|
||||
dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
// bilinear / bicubic scaling
|
||||
static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW,
|
||||
const uint8_t *src, const int16_t *filter,
|
||||
const int32_t *filterPos, int filterSize)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < dstW; i++) {
|
||||
int j;
|
||||
int srcPos = filterPos[i];
|
||||
int val = 0;
|
||||
for (j = 0; j < filterSize; j++) {
|
||||
val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
|
||||
}
|
||||
dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ...
|
||||
}
|
||||
}
|
||||
|
||||
static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW,
|
||||
const uint8_t *src, const int16_t *filter,
|
||||
const int32_t *filterPos, int filterSize)
|
||||
{
|
||||
int i;
|
||||
int32_t *dst = (int32_t *) _dst;
|
||||
for (i = 0; i < dstW; i++) {
|
||||
int j;
|
||||
int srcPos = filterPos[i];
|
||||
int val = 0;
|
||||
for (j = 0; j < filterSize; j++) {
|
||||
val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
|
||||
}
|
||||
dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME all pal and rgb srcFormats could do this conversion as well
|
||||
// FIXME all scalers more complex than bilinear could do half of this transform
|
||||
static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < width; i++) {
|
||||
dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264
|
||||
dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264
|
||||
}
|
||||
}
|
||||
|
||||
static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < width; i++) {
|
||||
dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469
|
||||
dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469
|
||||
}
|
||||
}
|
||||
|
||||
static void lumRangeToJpeg_c(int16_t *dst, int width)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < width; i++)
|
||||
dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
|
||||
}
|
||||
|
||||
static void lumRangeFromJpeg_c(int16_t *dst, int width)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < width; i++)
|
||||
dst[i] = (dst[i] * 14071 + 33561947) >> 14;
|
||||
}
|
||||
|
||||
static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
|
||||
{
|
||||
int i;
|
||||
int32_t *dstU = (int32_t *) _dstU;
|
||||
int32_t *dstV = (int32_t *) _dstV;
|
||||
for (i = 0; i < width; i++) {
|
||||
dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
|
||||
dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
|
||||
}
|
||||
}
|
||||
|
||||
static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
|
||||
{
|
||||
int i;
|
||||
int32_t *dstU = (int32_t *) _dstU;
|
||||
int32_t *dstV = (int32_t *) _dstV;
|
||||
for (i = 0; i < width; i++) {
|
||||
dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469
|
||||
dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469
|
||||
}
|
||||
}
|
||||
|
||||
static void lumRangeToJpeg16_c(int16_t *_dst, int width)
|
||||
{
|
||||
int i;
|
||||
int32_t *dst = (int32_t *) _dst;
|
||||
for (i = 0; i < width; i++) {
|
||||
dst[i] = ((int)(FFMIN(dst[i], 30189 << 4) * 4769U - (39057361 << 2))) >> 12;
|
||||
}
|
||||
}
|
||||
|
||||
static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
|
||||
{
|
||||
int i;
|
||||
int32_t *dst = (int32_t *) _dst;
|
||||
for (i = 0; i < width; i++)
|
||||
dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
|
||||
}
|
||||
|
||||
|
||||
#define DEBUG_SWSCALE_BUFFERS 0
|
||||
#define DEBUG_BUFFERS(...) \
|
||||
if (DEBUG_SWSCALE_BUFFERS) \
|
||||
av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
|
||||
|
||||
static int swscale(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[], int srcSliceY,
|
||||
int srcSliceH, uint8_t *dst[], int dstStride[])
|
||||
{
|
||||
/* load a few things into local vars to make the code more readable?
|
||||
* and faster */
|
||||
const int dstW = c->dstW;
|
||||
const int dstH = c->dstH;
|
||||
|
||||
const enum AVPixelFormat dstFormat = c->dstFormat;
|
||||
const int flags = c->flags;
|
||||
int32_t *vLumFilterPos = c->vLumFilterPos;
|
||||
int32_t *vChrFilterPos = c->vChrFilterPos;
|
||||
|
||||
const int vLumFilterSize = c->vLumFilterSize;
|
||||
const int vChrFilterSize = c->vChrFilterSize;
|
||||
|
||||
yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
|
||||
yuv2planarX_fn yuv2planeX = c->yuv2planeX;
|
||||
yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
|
||||
yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
|
||||
yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
|
||||
yuv2packedX_fn yuv2packedX = c->yuv2packedX;
|
||||
yuv2anyX_fn yuv2anyX = c->yuv2anyX;
|
||||
const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample;
|
||||
const int chrSrcSliceH = AV_CEIL_RSHIFT(srcSliceH, c->chrSrcVSubSample);
|
||||
int should_dither = isNBPS(c->srcFormat) ||
|
||||
is16BPS(c->srcFormat);
|
||||
int lastDstY;
|
||||
|
||||
/* vars which will change and which we need to store back in the context */
|
||||
int dstY = c->dstY;
|
||||
int lastInLumBuf = c->lastInLumBuf;
|
||||
int lastInChrBuf = c->lastInChrBuf;
|
||||
|
||||
int lumStart = 0;
|
||||
int lumEnd = c->descIndex[0];
|
||||
int chrStart = lumEnd;
|
||||
int chrEnd = c->descIndex[1];
|
||||
int vStart = chrEnd;
|
||||
int vEnd = c->numDesc;
|
||||
SwsSlice *src_slice = &c->slice[lumStart];
|
||||
SwsSlice *hout_slice = &c->slice[c->numSlice-2];
|
||||
SwsSlice *vout_slice = &c->slice[c->numSlice-1];
|
||||
SwsFilterDescriptor *desc = c->desc;
|
||||
|
||||
int needAlpha = c->needAlpha;
|
||||
|
||||
int hasLumHoles = 1;
|
||||
int hasChrHoles = 1;
|
||||
|
||||
if (isPacked(c->srcFormat)) {
|
||||
src[1] =
|
||||
src[2] =
|
||||
src[3] = src[0];
|
||||
srcStride[1] =
|
||||
srcStride[2] =
|
||||
srcStride[3] = srcStride[0];
|
||||
}
|
||||
srcStride[1] *= 1 << c->vChrDrop;
|
||||
srcStride[2] *= 1 << c->vChrDrop;
|
||||
|
||||
DEBUG_BUFFERS("swscale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
|
||||
src[0], srcStride[0], src[1], srcStride[1],
|
||||
src[2], srcStride[2], src[3], srcStride[3],
|
||||
dst[0], dstStride[0], dst[1], dstStride[1],
|
||||
dst[2], dstStride[2], dst[3], dstStride[3]);
|
||||
DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
|
||||
srcSliceY, srcSliceH, dstY, dstH);
|
||||
DEBUG_BUFFERS("vLumFilterSize: %d vChrFilterSize: %d\n",
|
||||
vLumFilterSize, vChrFilterSize);
|
||||
|
||||
if (dstStride[0]&15 || dstStride[1]&15 ||
|
||||
dstStride[2]&15 || dstStride[3]&15) {
|
||||
static int warnedAlready = 0; // FIXME maybe move this into the context
|
||||
if (flags & SWS_PRINT_INFO && !warnedAlready) {
|
||||
av_log(c, AV_LOG_WARNING,
|
||||
"Warning: dstStride is not aligned!\n"
|
||||
" ->cannot do aligned memory accesses anymore\n");
|
||||
warnedAlready = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( (uintptr_t)dst[0]&15 || (uintptr_t)dst[1]&15 || (uintptr_t)dst[2]&15
|
||||
|| (uintptr_t)src[0]&15 || (uintptr_t)src[1]&15 || (uintptr_t)src[2]&15
|
||||
|| dstStride[0]&15 || dstStride[1]&15 || dstStride[2]&15 || dstStride[3]&15
|
||||
|| srcStride[0]&15 || srcStride[1]&15 || srcStride[2]&15 || srcStride[3]&15
|
||||
) {
|
||||
static int warnedAlready=0;
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
|
||||
av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speed loss\n");
|
||||
warnedAlready=1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Note the user might start scaling the picture in the middle so this
|
||||
* will not get executed. This is not really intended but works
|
||||
* currently, so people might do it. */
|
||||
if (srcSliceY == 0) {
|
||||
dstY = 0;
|
||||
lastInLumBuf = -1;
|
||||
lastInChrBuf = -1;
|
||||
}
|
||||
|
||||
if (!should_dither) {
|
||||
c->chrDither8 = c->lumDither8 = sws_pb_64;
|
||||
}
|
||||
lastDstY = dstY;
|
||||
|
||||
ff_init_vscale_pfn(c, yuv2plane1, yuv2planeX, yuv2nv12cX,
|
||||
yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, c->use_mmx_vfilter);
|
||||
|
||||
ff_init_slice_from_src(src_slice, (uint8_t**)src, srcStride, c->srcW,
|
||||
srcSliceY, srcSliceH, chrSrcSliceY, chrSrcSliceH, 1);
|
||||
|
||||
ff_init_slice_from_src(vout_slice, (uint8_t**)dst, dstStride, c->dstW,
|
||||
dstY, dstH, dstY >> c->chrDstVSubSample,
|
||||
AV_CEIL_RSHIFT(dstH, c->chrDstVSubSample), 0);
|
||||
if (srcSliceY == 0) {
|
||||
hout_slice->plane[0].sliceY = lastInLumBuf + 1;
|
||||
hout_slice->plane[1].sliceY = lastInChrBuf + 1;
|
||||
hout_slice->plane[2].sliceY = lastInChrBuf + 1;
|
||||
hout_slice->plane[3].sliceY = lastInLumBuf + 1;
|
||||
|
||||
hout_slice->plane[0].sliceH =
|
||||
hout_slice->plane[1].sliceH =
|
||||
hout_slice->plane[2].sliceH =
|
||||
hout_slice->plane[3].sliceH = 0;
|
||||
hout_slice->width = dstW;
|
||||
}
|
||||
|
||||
for (; dstY < dstH; dstY++) {
|
||||
const int chrDstY = dstY >> c->chrDstVSubSample;
|
||||
int use_mmx_vfilter= c->use_mmx_vfilter;
|
||||
|
||||
// First line needed as input
|
||||
const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
|
||||
const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
|
||||
// First line needed as input
|
||||
const int firstChrSrcY = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
|
||||
|
||||
// Last line needed as input
|
||||
int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
|
||||
int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
|
||||
int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
|
||||
int enough_lines;
|
||||
|
||||
int i;
|
||||
int posY, cPosY, firstPosY, lastPosY, firstCPosY, lastCPosY;
|
||||
|
||||
// handle holes (FAST_BILINEAR & weird filters)
|
||||
if (firstLumSrcY > lastInLumBuf) {
|
||||
|
||||
hasLumHoles = lastInLumBuf != firstLumSrcY - 1;
|
||||
if (hasLumHoles) {
|
||||
hout_slice->plane[0].sliceY = firstLumSrcY;
|
||||
hout_slice->plane[3].sliceY = firstLumSrcY;
|
||||
hout_slice->plane[0].sliceH =
|
||||
hout_slice->plane[3].sliceH = 0;
|
||||
}
|
||||
|
||||
lastInLumBuf = firstLumSrcY - 1;
|
||||
}
|
||||
if (firstChrSrcY > lastInChrBuf) {
|
||||
|
||||
hasChrHoles = lastInChrBuf != firstChrSrcY - 1;
|
||||
if (hasChrHoles) {
|
||||
hout_slice->plane[1].sliceY = firstChrSrcY;
|
||||
hout_slice->plane[2].sliceY = firstChrSrcY;
|
||||
hout_slice->plane[1].sliceH =
|
||||
hout_slice->plane[2].sliceH = 0;
|
||||
}
|
||||
|
||||
lastInChrBuf = firstChrSrcY - 1;
|
||||
}
|
||||
|
||||
DEBUG_BUFFERS("dstY: %d\n", dstY);
|
||||
DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
|
||||
firstLumSrcY, lastLumSrcY, lastInLumBuf);
|
||||
DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
|
||||
firstChrSrcY, lastChrSrcY, lastInChrBuf);
|
||||
|
||||
// Do we have enough lines in this slice to output the dstY line
|
||||
enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
|
||||
lastChrSrcY < AV_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample);
|
||||
|
||||
if (!enough_lines) {
|
||||
lastLumSrcY = srcSliceY + srcSliceH - 1;
|
||||
lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
|
||||
DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
|
||||
lastLumSrcY, lastChrSrcY);
|
||||
}
|
||||
|
||||
av_assert0((lastLumSrcY - firstLumSrcY + 1) <= hout_slice->plane[0].available_lines);
|
||||
av_assert0((lastChrSrcY - firstChrSrcY + 1) <= hout_slice->plane[1].available_lines);
|
||||
|
||||
|
||||
posY = hout_slice->plane[0].sliceY + hout_slice->plane[0].sliceH;
|
||||
if (posY <= lastLumSrcY && !hasLumHoles) {
|
||||
firstPosY = FFMAX(firstLumSrcY, posY);
|
||||
lastPosY = FFMIN(firstLumSrcY + hout_slice->plane[0].available_lines - 1, srcSliceY + srcSliceH - 1);
|
||||
} else {
|
||||
firstPosY = posY;
|
||||
lastPosY = lastLumSrcY;
|
||||
}
|
||||
|
||||
cPosY = hout_slice->plane[1].sliceY + hout_slice->plane[1].sliceH;
|
||||
if (cPosY <= lastChrSrcY && !hasChrHoles) {
|
||||
firstCPosY = FFMAX(firstChrSrcY, cPosY);
|
||||
lastCPosY = FFMIN(firstChrSrcY + hout_slice->plane[1].available_lines - 1, AV_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample) - 1);
|
||||
} else {
|
||||
firstCPosY = cPosY;
|
||||
lastCPosY = lastChrSrcY;
|
||||
}
|
||||
|
||||
ff_rotate_slice(hout_slice, lastPosY, lastCPosY);
|
||||
|
||||
if (posY < lastLumSrcY + 1) {
|
||||
for (i = lumStart; i < lumEnd; ++i)
|
||||
desc[i].process(c, &desc[i], firstPosY, lastPosY - firstPosY + 1);
|
||||
}
|
||||
|
||||
lastInLumBuf = lastLumSrcY;
|
||||
|
||||
if (cPosY < lastChrSrcY + 1) {
|
||||
for (i = chrStart; i < chrEnd; ++i)
|
||||
desc[i].process(c, &desc[i], firstCPosY, lastCPosY - firstCPosY + 1);
|
||||
}
|
||||
|
||||
lastInChrBuf = lastChrSrcY;
|
||||
|
||||
if (!enough_lines)
|
||||
break; // we can't output a dstY line so let's try with the next slice
|
||||
|
||||
#if HAVE_MMX_INLINE
|
||||
ff_updateMMXDitherTables(c, dstY);
|
||||
#endif
|
||||
if (should_dither) {
|
||||
c->chrDither8 = ff_dither_8x8_128[chrDstY & 7];
|
||||
c->lumDither8 = ff_dither_8x8_128[dstY & 7];
|
||||
}
|
||||
if (dstY >= dstH - 2) {
|
||||
/* hmm looks like we can't use MMX here without overwriting
|
||||
* this array's tail */
|
||||
ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
|
||||
&yuv2packed1, &yuv2packed2, &yuv2packedX, &yuv2anyX);
|
||||
use_mmx_vfilter= 0;
|
||||
ff_init_vscale_pfn(c, yuv2plane1, yuv2planeX, yuv2nv12cX,
|
||||
yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, use_mmx_vfilter);
|
||||
}
|
||||
|
||||
{
|
||||
for (i = vStart; i < vEnd; ++i)
|
||||
desc[i].process(c, &desc[i], dstY, 1);
|
||||
}
|
||||
}
|
||||
if (isPlanar(dstFormat) && isALPHA(dstFormat) && !needAlpha) {
|
||||
int length = dstW;
|
||||
int height = dstY - lastDstY;
|
||||
|
||||
if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
|
||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
|
||||
fillPlane16(dst[3], dstStride[3], length, height, lastDstY,
|
||||
1, desc->comp[3].depth,
|
||||
isBE(dstFormat));
|
||||
} else if (is32BPS(dstFormat)) {
|
||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
|
||||
fillPlane32(dst[3], dstStride[3], length, height, lastDstY,
|
||||
1, desc->comp[3].depth,
|
||||
isBE(dstFormat), desc->flags & AV_PIX_FMT_FLAG_FLOAT);
|
||||
} else
|
||||
fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
|
||||
}
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT)
|
||||
__asm__ volatile ("sfence" ::: "memory");
|
||||
#endif
|
||||
emms_c();
|
||||
|
||||
/* store changed local vars back in the context */
|
||||
c->dstY = dstY;
|
||||
c->lastInLumBuf = lastInLumBuf;
|
||||
c->lastInChrBuf = lastInChrBuf;
|
||||
|
||||
return dstY - lastDstY;
|
||||
}
|
||||
|
||||
av_cold void ff_sws_init_range_convert(SwsContext *c)
|
||||
{
|
||||
c->lumConvertRange = NULL;
|
||||
c->chrConvertRange = NULL;
|
||||
if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
|
||||
if (c->dstBpc <= 14) {
|
||||
if (c->srcRange) {
|
||||
c->lumConvertRange = lumRangeFromJpeg_c;
|
||||
c->chrConvertRange = chrRangeFromJpeg_c;
|
||||
} else {
|
||||
c->lumConvertRange = lumRangeToJpeg_c;
|
||||
c->chrConvertRange = chrRangeToJpeg_c;
|
||||
}
|
||||
} else {
|
||||
if (c->srcRange) {
|
||||
c->lumConvertRange = lumRangeFromJpeg16_c;
|
||||
c->chrConvertRange = chrRangeFromJpeg16_c;
|
||||
} else {
|
||||
c->lumConvertRange = lumRangeToJpeg16_c;
|
||||
c->chrConvertRange = chrRangeToJpeg16_c;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static av_cold void sws_init_swscale(SwsContext *c)
|
||||
{
|
||||
enum AVPixelFormat srcFormat = c->srcFormat;
|
||||
|
||||
ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
|
||||
&c->yuv2nv12cX, &c->yuv2packed1,
|
||||
&c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX);
|
||||
|
||||
ff_sws_init_input_funcs(c);
|
||||
|
||||
if (c->srcBpc == 8) {
|
||||
if (c->dstBpc <= 14) {
|
||||
c->hyScale = c->hcScale = hScale8To15_c;
|
||||
if (c->flags & SWS_FAST_BILINEAR) {
|
||||
c->hyscale_fast = ff_hyscale_fast_c;
|
||||
c->hcscale_fast = ff_hcscale_fast_c;
|
||||
}
|
||||
} else {
|
||||
c->hyScale = c->hcScale = hScale8To19_c;
|
||||
}
|
||||
} else {
|
||||
c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c
|
||||
: hScale16To15_c;
|
||||
}
|
||||
|
||||
ff_sws_init_range_convert(c);
|
||||
|
||||
if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
|
||||
srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE))
|
||||
c->needs_hcscale = 1;
|
||||
}
|
||||
|
||||
SwsFunc ff_getSwsFunc(SwsContext *c)
|
||||
{
|
||||
sws_init_swscale(c);
|
||||
|
||||
if (ARCH_PPC)
|
||||
ff_sws_init_swscale_ppc(c);
|
||||
if (ARCH_X86)
|
||||
ff_sws_init_swscale_x86(c);
|
||||
if (ARCH_AARCH64)
|
||||
ff_sws_init_swscale_aarch64(c);
|
||||
if (ARCH_ARM)
|
||||
ff_sws_init_swscale_arm(c);
|
||||
|
||||
return swscale;
|
||||
}
|
||||
|
||||
static void reset_ptr(const uint8_t *src[], enum AVPixelFormat format)
|
||||
{
|
||||
if (!isALPHA(format))
|
||||
src[3] = NULL;
|
||||
if (!isPlanar(format)) {
|
||||
src[3] = src[2] = NULL;
|
||||
|
||||
if (!usePal(format))
|
||||
src[1] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int check_image_pointers(const uint8_t * const data[4], enum AVPixelFormat pix_fmt,
|
||||
const int linesizes[4])
|
||||
{
|
||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
|
||||
int i;
|
||||
|
||||
av_assert2(desc);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
int plane = desc->comp[i].plane;
|
||||
if (!data[plane] || !linesizes[plane])
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void xyz12Torgb48(struct SwsContext *c, uint16_t *dst,
|
||||
const uint16_t *src, int stride, int h)
|
||||
{
|
||||
int xp,yp;
|
||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
|
||||
|
||||
for (yp=0; yp<h; yp++) {
|
||||
for (xp=0; xp+2<stride; xp+=3) {
|
||||
int x, y, z, r, g, b;
|
||||
|
||||
if (desc->flags & AV_PIX_FMT_FLAG_BE) {
|
||||
x = AV_RB16(src + xp + 0);
|
||||
y = AV_RB16(src + xp + 1);
|
||||
z = AV_RB16(src + xp + 2);
|
||||
} else {
|
||||
x = AV_RL16(src + xp + 0);
|
||||
y = AV_RL16(src + xp + 1);
|
||||
z = AV_RL16(src + xp + 2);
|
||||
}
|
||||
|
||||
x = c->xyzgamma[x>>4];
|
||||
y = c->xyzgamma[y>>4];
|
||||
z = c->xyzgamma[z>>4];
|
||||
|
||||
// convert from XYZlinear to sRGBlinear
|
||||
r = c->xyz2rgb_matrix[0][0] * x +
|
||||
c->xyz2rgb_matrix[0][1] * y +
|
||||
c->xyz2rgb_matrix[0][2] * z >> 12;
|
||||
g = c->xyz2rgb_matrix[1][0] * x +
|
||||
c->xyz2rgb_matrix[1][1] * y +
|
||||
c->xyz2rgb_matrix[1][2] * z >> 12;
|
||||
b = c->xyz2rgb_matrix[2][0] * x +
|
||||
c->xyz2rgb_matrix[2][1] * y +
|
||||
c->xyz2rgb_matrix[2][2] * z >> 12;
|
||||
|
||||
// limit values to 12-bit depth
|
||||
r = av_clip_uintp2(r, 12);
|
||||
g = av_clip_uintp2(g, 12);
|
||||
b = av_clip_uintp2(b, 12);
|
||||
|
||||
// convert from sRGBlinear to RGB and scale from 12bit to 16bit
|
||||
if (desc->flags & AV_PIX_FMT_FLAG_BE) {
|
||||
AV_WB16(dst + xp + 0, c->rgbgamma[r] << 4);
|
||||
AV_WB16(dst + xp + 1, c->rgbgamma[g] << 4);
|
||||
AV_WB16(dst + xp + 2, c->rgbgamma[b] << 4);
|
||||
} else {
|
||||
AV_WL16(dst + xp + 0, c->rgbgamma[r] << 4);
|
||||
AV_WL16(dst + xp + 1, c->rgbgamma[g] << 4);
|
||||
AV_WL16(dst + xp + 2, c->rgbgamma[b] << 4);
|
||||
}
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void rgb48Toxyz12(struct SwsContext *c, uint16_t *dst,
|
||||
const uint16_t *src, int stride, int h)
|
||||
{
|
||||
int xp,yp;
|
||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
|
||||
|
||||
for (yp=0; yp<h; yp++) {
|
||||
for (xp=0; xp+2<stride; xp+=3) {
|
||||
int x, y, z, r, g, b;
|
||||
|
||||
if (desc->flags & AV_PIX_FMT_FLAG_BE) {
|
||||
r = AV_RB16(src + xp + 0);
|
||||
g = AV_RB16(src + xp + 1);
|
||||
b = AV_RB16(src + xp + 2);
|
||||
} else {
|
||||
r = AV_RL16(src + xp + 0);
|
||||
g = AV_RL16(src + xp + 1);
|
||||
b = AV_RL16(src + xp + 2);
|
||||
}
|
||||
|
||||
r = c->rgbgammainv[r>>4];
|
||||
g = c->rgbgammainv[g>>4];
|
||||
b = c->rgbgammainv[b>>4];
|
||||
|
||||
// convert from sRGBlinear to XYZlinear
|
||||
x = c->rgb2xyz_matrix[0][0] * r +
|
||||
c->rgb2xyz_matrix[0][1] * g +
|
||||
c->rgb2xyz_matrix[0][2] * b >> 12;
|
||||
y = c->rgb2xyz_matrix[1][0] * r +
|
||||
c->rgb2xyz_matrix[1][1] * g +
|
||||
c->rgb2xyz_matrix[1][2] * b >> 12;
|
||||
z = c->rgb2xyz_matrix[2][0] * r +
|
||||
c->rgb2xyz_matrix[2][1] * g +
|
||||
c->rgb2xyz_matrix[2][2] * b >> 12;
|
||||
|
||||
// limit values to 12-bit depth
|
||||
x = av_clip_uintp2(x, 12);
|
||||
y = av_clip_uintp2(y, 12);
|
||||
z = av_clip_uintp2(z, 12);
|
||||
|
||||
// convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit
|
||||
if (desc->flags & AV_PIX_FMT_FLAG_BE) {
|
||||
AV_WB16(dst + xp + 0, c->xyzgammainv[x] << 4);
|
||||
AV_WB16(dst + xp + 1, c->xyzgammainv[y] << 4);
|
||||
AV_WB16(dst + xp + 2, c->xyzgammainv[z] << 4);
|
||||
} else {
|
||||
AV_WL16(dst + xp + 0, c->xyzgammainv[x] << 4);
|
||||
AV_WL16(dst + xp + 1, c->xyzgammainv[y] << 4);
|
||||
AV_WL16(dst + xp + 2, c->xyzgammainv[z] << 4);
|
||||
}
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* swscale wrapper, so we don't need to export the SwsContext.
|
||||
* Assumes planar YUV to be in YUV order instead of YVU.
|
||||
*/
|
||||
int attribute_align_arg sws_scale(struct SwsContext *c,
|
||||
const uint8_t * const srcSlice[],
|
||||
const int srcStride[], int srcSliceY,
|
||||
int srcSliceH, uint8_t *const dst[],
|
||||
const int dstStride[])
|
||||
{
|
||||
int i, ret;
|
||||
const uint8_t *src2[4];
|
||||
uint8_t *dst2[4];
|
||||
uint8_t *rgb0_tmp = NULL;
|
||||
int macro_height = isBayer(c->srcFormat) ? 2 : (1 << c->chrSrcVSubSample);
|
||||
// copy strides, so they can safely be modified
|
||||
int srcStride2[4];
|
||||
int dstStride2[4];
|
||||
int srcSliceY_internal = srcSliceY;
|
||||
|
||||
if (!srcStride || !dstStride || !dst || !srcSlice) {
|
||||
av_log(c, AV_LOG_ERROR, "One of the input parameters to sws_scale() is NULL, please check the calling code\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i=0; i<4; i++) {
|
||||
srcStride2[i] = srcStride[i];
|
||||
dstStride2[i] = dstStride[i];
|
||||
}
|
||||
|
||||
if ((srcSliceY & (macro_height-1)) ||
|
||||
((srcSliceH& (macro_height-1)) && srcSliceY + srcSliceH != c->srcH) ||
|
||||
srcSliceY + srcSliceH > c->srcH) {
|
||||
av_log(c, AV_LOG_ERROR, "Slice parameters %d, %d are invalid\n", srcSliceY, srcSliceH);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
if (c->gamma_flag && c->cascaded_context[0]) {
|
||||
ret = sws_scale(c->cascaded_context[0],
|
||||
srcSlice, srcStride, srcSliceY, srcSliceH,
|
||||
c->cascaded_tmp, c->cascaded_tmpStride);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (c->cascaded_context[2])
|
||||
ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp, c->cascaded_tmpStride, srcSliceY, srcSliceH, c->cascaded1_tmp, c->cascaded1_tmpStride);
|
||||
else
|
||||
ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp, c->cascaded_tmpStride, srcSliceY, srcSliceH, dst, dstStride);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (c->cascaded_context[2]) {
|
||||
ret = sws_scale(c->cascaded_context[2],
|
||||
(const uint8_t * const *)c->cascaded1_tmp, c->cascaded1_tmpStride, c->cascaded_context[1]->dstY - ret, c->cascaded_context[1]->dstY,
|
||||
dst, dstStride);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (c->cascaded_context[0] && srcSliceY == 0 && srcSliceH == c->cascaded_context[0]->srcH) {
|
||||
ret = sws_scale(c->cascaded_context[0],
|
||||
srcSlice, srcStride, srcSliceY, srcSliceH,
|
||||
c->cascaded_tmp, c->cascaded_tmpStride);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = sws_scale(c->cascaded_context[1],
|
||||
(const uint8_t * const * )c->cascaded_tmp, c->cascaded_tmpStride, 0, c->cascaded_context[0]->dstH,
|
||||
dst, dstStride);
|
||||
return ret;
|
||||
}
|
||||
|
||||
memcpy(src2, srcSlice, sizeof(src2));
|
||||
memcpy(dst2, dst, sizeof(dst2));
|
||||
|
||||
// do not mess up sliceDir if we have a "trailing" 0-size slice
|
||||
if (srcSliceH == 0)
|
||||
return 0;
|
||||
|
||||
if (!check_image_pointers(srcSlice, c->srcFormat, srcStride)) {
|
||||
av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
|
||||
return 0;
|
||||
}
|
||||
if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) {
|
||||
av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
|
||||
av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
|
||||
return 0;
|
||||
}
|
||||
if (c->sliceDir == 0) {
|
||||
if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
|
||||
}
|
||||
|
||||
if (usePal(c->srcFormat)) {
|
||||
for (i = 0; i < 256; i++) {
|
||||
int r, g, b, y, u, v, a = 0xff;
|
||||
if (c->srcFormat == AV_PIX_FMT_PAL8) {
|
||||
uint32_t p = ((const uint32_t *)(srcSlice[1]))[i];
|
||||
a = (p >> 24) & 0xFF;
|
||||
r = (p >> 16) & 0xFF;
|
||||
g = (p >> 8) & 0xFF;
|
||||
b = p & 0xFF;
|
||||
} else if (c->srcFormat == AV_PIX_FMT_RGB8) {
|
||||
r = ( i >> 5 ) * 36;
|
||||
g = ((i >> 2) & 7) * 36;
|
||||
b = ( i & 3) * 85;
|
||||
} else if (c->srcFormat == AV_PIX_FMT_BGR8) {
|
||||
b = ( i >> 6 ) * 85;
|
||||
g = ((i >> 3) & 7) * 36;
|
||||
r = ( i & 7) * 36;
|
||||
} else if (c->srcFormat == AV_PIX_FMT_RGB4_BYTE) {
|
||||
r = ( i >> 3 ) * 255;
|
||||
g = ((i >> 1) & 3) * 85;
|
||||
b = ( i & 1) * 255;
|
||||
} else if (c->srcFormat == AV_PIX_FMT_GRAY8 || c->srcFormat == AV_PIX_FMT_GRAY8A) {
|
||||
r = g = b = i;
|
||||
} else {
|
||||
av_assert1(c->srcFormat == AV_PIX_FMT_BGR4_BYTE);
|
||||
b = ( i >> 3 ) * 255;
|
||||
g = ((i >> 1) & 3) * 85;
|
||||
r = ( i & 1) * 255;
|
||||
}
|
||||
#define RGB2YUV_SHIFT 15
|
||||
#define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define BV (-(int) (0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define BU ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define GY ( (int) (0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define GV (-(int) (0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define GU (-(int) (0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define RY ( (int) (0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
#define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
|
||||
|
||||
y = av_clip_uint8((RY * r + GY * g + BY * b + ( 33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
|
||||
u = av_clip_uint8((RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
|
||||
v = av_clip_uint8((RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
|
||||
c->pal_yuv[i]= y + (u<<8) + (v<<16) + ((unsigned)a<<24);
|
||||
|
||||
switch (c->dstFormat) {
|
||||
case AV_PIX_FMT_BGR32:
|
||||
#if !HAVE_BIGENDIAN
|
||||
case AV_PIX_FMT_RGB24:
|
||||
#endif
|
||||
c->pal_rgb[i]= r + (g<<8) + (b<<16) + ((unsigned)a<<24);
|
||||
break;
|
||||
case AV_PIX_FMT_BGR32_1:
|
||||
#if HAVE_BIGENDIAN
|
||||
case AV_PIX_FMT_BGR24:
|
||||
#endif
|
||||
c->pal_rgb[i]= a + (r<<8) + (g<<16) + ((unsigned)b<<24);
|
||||
break;
|
||||
case AV_PIX_FMT_RGB32_1:
|
||||
#if HAVE_BIGENDIAN
|
||||
case AV_PIX_FMT_RGB24:
|
||||
#endif
|
||||
c->pal_rgb[i]= a + (b<<8) + (g<<16) + ((unsigned)r<<24);
|
||||
break;
|
||||
case AV_PIX_FMT_RGB32:
|
||||
#if !HAVE_BIGENDIAN
|
||||
case AV_PIX_FMT_BGR24:
|
||||
#endif
|
||||
default:
|
||||
c->pal_rgb[i]= b + (g<<8) + (r<<16) + ((unsigned)a<<24);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) {
|
||||
uint8_t *base;
|
||||
int x,y;
|
||||
rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32);
|
||||
if (!rgb0_tmp)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp;
|
||||
for (y=0; y<srcSliceH; y++){
|
||||
memcpy(base + srcStride[0]*y, src2[0] + srcStride[0]*y, 4*c->srcW);
|
||||
for (x=c->src0Alpha-1; x<4*c->srcW; x+=4) {
|
||||
base[ srcStride[0]*y + x] = 0xFF;
|
||||
}
|
||||
}
|
||||
src2[0] = base;
|
||||
}
|
||||
|
||||
if (c->srcXYZ && !(c->dstXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) {
|
||||
uint8_t *base;
|
||||
rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32);
|
||||
if (!rgb0_tmp)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp;
|
||||
|
||||
xyz12Torgb48(c, (uint16_t*)base, (const uint16_t*)src2[0], srcStride[0]/2, srcSliceH);
|
||||
src2[0] = base;
|
||||
}
|
||||
|
||||
if (!srcSliceY && (c->flags & SWS_BITEXACT) && c->dither == SWS_DITHER_ED && c->dither_error[0])
|
||||
for (i = 0; i < 4; i++)
|
||||
memset(c->dither_error[i], 0, sizeof(c->dither_error[0][0]) * (c->dstW+2));
|
||||
|
||||
if (c->sliceDir != 1) {
|
||||
// slices go from bottom to top => we flip the image internally
|
||||
for (i=0; i<4; i++) {
|
||||
srcStride2[i] *= -1;
|
||||
dstStride2[i] *= -1;
|
||||
}
|
||||
|
||||
src2[0] += (srcSliceH - 1) * srcStride[0];
|
||||
if (!usePal(c->srcFormat))
|
||||
src2[1] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[1];
|
||||
src2[2] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[2];
|
||||
src2[3] += (srcSliceH - 1) * srcStride[3];
|
||||
dst2[0] += ( c->dstH - 1) * dstStride[0];
|
||||
dst2[1] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[1];
|
||||
dst2[2] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[2];
|
||||
dst2[3] += ( c->dstH - 1) * dstStride[3];
|
||||
|
||||
srcSliceY_internal = c->srcH-srcSliceY-srcSliceH;
|
||||
}
|
||||
reset_ptr(src2, c->srcFormat);
|
||||
reset_ptr((void*)dst2, c->dstFormat);
|
||||
|
||||
/* reset slice direction at end of frame */
|
||||
if (srcSliceY_internal + srcSliceH == c->srcH)
|
||||
c->sliceDir = 0;
|
||||
ret = c->swscale(c, src2, srcStride2, srcSliceY_internal, srcSliceH, dst2, dstStride2);
|
||||
|
||||
if (c->dstXYZ && !(c->srcXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) {
|
||||
int dstY = c->dstY ? c->dstY : srcSliceY + srcSliceH;
|
||||
uint16_t *dst16 = (uint16_t*)(dst2[0] + (dstY - ret) * dstStride2[0]);
|
||||
av_assert0(dstY >= ret);
|
||||
av_assert0(ret >= 0);
|
||||
av_assert0(c->dstH >= dstY);
|
||||
|
||||
/* replace on the same data */
|
||||
rgb48Toxyz12(c, dst16, dst16, dstStride2[0]/2, ret);
|
||||
}
|
||||
|
||||
av_free(rgb0_tmp);
|
||||
return ret;
|
||||
}
|
||||
336
externals/ffmpeg/libswscale/swscale.h
vendored
Executable file
336
externals/ffmpeg/libswscale/swscale.h
vendored
Executable file
@@ -0,0 +1,336 @@
|
||||
/*
|
||||
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef SWSCALE_SWSCALE_H
|
||||
#define SWSCALE_SWSCALE_H
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @ingroup libsws
|
||||
* external API header
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/avutil.h"
|
||||
#include "libavutil/log.h"
|
||||
#include "libavutil/pixfmt.h"
|
||||
#include "version.h"
|
||||
|
||||
/**
|
||||
* @defgroup libsws libswscale
|
||||
* Color conversion and scaling library.
|
||||
*
|
||||
* @{
|
||||
*
|
||||
* Return the LIBSWSCALE_VERSION_INT constant.
|
||||
*/
|
||||
unsigned swscale_version(void);
|
||||
|
||||
/**
|
||||
* Return the libswscale build-time configuration.
|
||||
*/
|
||||
const char *swscale_configuration(void);
|
||||
|
||||
/**
|
||||
* Return the libswscale license.
|
||||
*/
|
||||
const char *swscale_license(void);
|
||||
|
||||
/* values for the flags, the stuff on the command line is different */
|
||||
#define SWS_FAST_BILINEAR 1
|
||||
#define SWS_BILINEAR 2
|
||||
#define SWS_BICUBIC 4
|
||||
#define SWS_X 8
|
||||
#define SWS_POINT 0x10
|
||||
#define SWS_AREA 0x20
|
||||
#define SWS_BICUBLIN 0x40
|
||||
#define SWS_GAUSS 0x80
|
||||
#define SWS_SINC 0x100
|
||||
#define SWS_LANCZOS 0x200
|
||||
#define SWS_SPLINE 0x400
|
||||
|
||||
#define SWS_SRC_V_CHR_DROP_MASK 0x30000
|
||||
#define SWS_SRC_V_CHR_DROP_SHIFT 16
|
||||
|
||||
#define SWS_PARAM_DEFAULT 123456
|
||||
|
||||
#define SWS_PRINT_INFO 0x1000
|
||||
|
||||
//the following 3 flags are not completely implemented
|
||||
//internal chrominance subsampling info
|
||||
#define SWS_FULL_CHR_H_INT 0x2000
|
||||
//input subsampling info
|
||||
#define SWS_FULL_CHR_H_INP 0x4000
|
||||
#define SWS_DIRECT_BGR 0x8000
|
||||
#define SWS_ACCURATE_RND 0x40000
|
||||
#define SWS_BITEXACT 0x80000
|
||||
#define SWS_ERROR_DIFFUSION 0x800000
|
||||
|
||||
#define SWS_MAX_REDUCE_CUTOFF 0.002
|
||||
|
||||
#define SWS_CS_ITU709 1
|
||||
#define SWS_CS_FCC 4
|
||||
#define SWS_CS_ITU601 5
|
||||
#define SWS_CS_ITU624 5
|
||||
#define SWS_CS_SMPTE170M 5
|
||||
#define SWS_CS_SMPTE240M 7
|
||||
#define SWS_CS_DEFAULT 5
|
||||
#define SWS_CS_BT2020 9
|
||||
|
||||
/**
|
||||
* Return a pointer to yuv<->rgb coefficients for the given colorspace
|
||||
* suitable for sws_setColorspaceDetails().
|
||||
*
|
||||
* @param colorspace One of the SWS_CS_* macros. If invalid,
|
||||
* SWS_CS_DEFAULT is used.
|
||||
*/
|
||||
const int *sws_getCoefficients(int colorspace);
|
||||
|
||||
// when used for filters they must have an odd number of elements
|
||||
// coeffs cannot be shared between vectors
|
||||
typedef struct SwsVector {
|
||||
double *coeff; ///< pointer to the list of coefficients
|
||||
int length; ///< number of coefficients in the vector
|
||||
} SwsVector;
|
||||
|
||||
// vectors can be shared
|
||||
typedef struct SwsFilter {
|
||||
SwsVector *lumH;
|
||||
SwsVector *lumV;
|
||||
SwsVector *chrH;
|
||||
SwsVector *chrV;
|
||||
} SwsFilter;
|
||||
|
||||
struct SwsContext;
|
||||
|
||||
/**
|
||||
* Return a positive value if pix_fmt is a supported input format, 0
|
||||
* otherwise.
|
||||
*/
|
||||
int sws_isSupportedInput(enum AVPixelFormat pix_fmt);
|
||||
|
||||
/**
|
||||
* Return a positive value if pix_fmt is a supported output format, 0
|
||||
* otherwise.
|
||||
*/
|
||||
int sws_isSupportedOutput(enum AVPixelFormat pix_fmt);
|
||||
|
||||
/**
|
||||
* @param[in] pix_fmt the pixel format
|
||||
* @return a positive value if an endianness conversion for pix_fmt is
|
||||
* supported, 0 otherwise.
|
||||
*/
|
||||
int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt);
|
||||
|
||||
/**
|
||||
* Allocate an empty SwsContext. This must be filled and passed to
|
||||
* sws_init_context(). For filling see AVOptions, options.c and
|
||||
* sws_setColorspaceDetails().
|
||||
*/
|
||||
struct SwsContext *sws_alloc_context(void);
|
||||
|
||||
/**
|
||||
* Initialize the swscaler context sws_context.
|
||||
*
|
||||
* @return zero or positive value on success, a negative value on
|
||||
* error
|
||||
*/
|
||||
av_warn_unused_result
|
||||
int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter);
|
||||
|
||||
/**
|
||||
* Free the swscaler context swsContext.
|
||||
* If swsContext is NULL, then does nothing.
|
||||
*/
|
||||
void sws_freeContext(struct SwsContext *swsContext);
|
||||
|
||||
/**
|
||||
* Allocate and return an SwsContext. You need it to perform
|
||||
* scaling/conversion operations using sws_scale().
|
||||
*
|
||||
* @param srcW the width of the source image
|
||||
* @param srcH the height of the source image
|
||||
* @param srcFormat the source image format
|
||||
* @param dstW the width of the destination image
|
||||
* @param dstH the height of the destination image
|
||||
* @param dstFormat the destination image format
|
||||
* @param flags specify which algorithm and options to use for rescaling
|
||||
* @param param extra parameters to tune the used scaler
|
||||
* For SWS_BICUBIC param[0] and [1] tune the shape of the basis
|
||||
* function, param[0] tunes f(1) and param[1] f´(1)
|
||||
* For SWS_GAUSS param[0] tunes the exponent and thus cutoff
|
||||
* frequency
|
||||
* For SWS_LANCZOS param[0] tunes the width of the window function
|
||||
* @return a pointer to an allocated context, or NULL in case of error
|
||||
* @note this function is to be removed after a saner alternative is
|
||||
* written
|
||||
*/
|
||||
struct SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
|
||||
int dstW, int dstH, enum AVPixelFormat dstFormat,
|
||||
int flags, SwsFilter *srcFilter,
|
||||
SwsFilter *dstFilter, const double *param);
|
||||
|
||||
/**
|
||||
* Scale the image slice in srcSlice and put the resulting scaled
|
||||
* slice in the image in dst. A slice is a sequence of consecutive
|
||||
* rows in an image.
|
||||
*
|
||||
* Slices have to be provided in sequential order, either in
|
||||
* top-bottom or bottom-top order. If slices are provided in
|
||||
* non-sequential order the behavior of the function is undefined.
|
||||
*
|
||||
* @param c the scaling context previously created with
|
||||
* sws_getContext()
|
||||
* @param srcSlice the array containing the pointers to the planes of
|
||||
* the source slice
|
||||
* @param srcStride the array containing the strides for each plane of
|
||||
* the source image
|
||||
* @param srcSliceY the position in the source image of the slice to
|
||||
* process, that is the number (counted starting from
|
||||
* zero) in the image of the first row of the slice
|
||||
* @param srcSliceH the height of the source slice, that is the number
|
||||
* of rows in the slice
|
||||
* @param dst the array containing the pointers to the planes of
|
||||
* the destination image
|
||||
* @param dstStride the array containing the strides for each plane of
|
||||
* the destination image
|
||||
* @return the height of the output slice
|
||||
*/
|
||||
int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
|
||||
const int srcStride[], int srcSliceY, int srcSliceH,
|
||||
uint8_t *const dst[], const int dstStride[]);
|
||||
|
||||
/**
|
||||
* @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg)
|
||||
* @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg)
|
||||
* @param table the yuv2rgb coefficients describing the output yuv space, normally ff_yuv2rgb_coeffs[x]
|
||||
* @param inv_table the yuv2rgb coefficients describing the input yuv space, normally ff_yuv2rgb_coeffs[x]
|
||||
* @param brightness 16.16 fixed point brightness correction
|
||||
* @param contrast 16.16 fixed point contrast correction
|
||||
* @param saturation 16.16 fixed point saturation correction
|
||||
* @return -1 if not supported
|
||||
*/
|
||||
int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
|
||||
int srcRange, const int table[4], int dstRange,
|
||||
int brightness, int contrast, int saturation);
|
||||
|
||||
/**
|
||||
* @return -1 if not supported
|
||||
*/
|
||||
int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
|
||||
int *srcRange, int **table, int *dstRange,
|
||||
int *brightness, int *contrast, int *saturation);
|
||||
|
||||
/**
|
||||
* Allocate and return an uninitialized vector with length coefficients.
|
||||
*/
|
||||
SwsVector *sws_allocVec(int length);
|
||||
|
||||
/**
|
||||
* Return a normalized Gaussian curve used to filter stuff
|
||||
* quality = 3 is high quality, lower is lower quality.
|
||||
*/
|
||||
SwsVector *sws_getGaussianVec(double variance, double quality);
|
||||
|
||||
/**
|
||||
* Scale all the coefficients of a by the scalar value.
|
||||
*/
|
||||
void sws_scaleVec(SwsVector *a, double scalar);
|
||||
|
||||
/**
|
||||
* Scale all the coefficients of a so that their sum equals height.
|
||||
*/
|
||||
void sws_normalizeVec(SwsVector *a, double height);
|
||||
|
||||
#if FF_API_SWS_VECTOR
|
||||
attribute_deprecated SwsVector *sws_getConstVec(double c, int length);
|
||||
attribute_deprecated SwsVector *sws_getIdentityVec(void);
|
||||
attribute_deprecated void sws_convVec(SwsVector *a, SwsVector *b);
|
||||
attribute_deprecated void sws_addVec(SwsVector *a, SwsVector *b);
|
||||
attribute_deprecated void sws_subVec(SwsVector *a, SwsVector *b);
|
||||
attribute_deprecated void sws_shiftVec(SwsVector *a, int shift);
|
||||
attribute_deprecated SwsVector *sws_cloneVec(SwsVector *a);
|
||||
attribute_deprecated void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level);
|
||||
#endif
|
||||
|
||||
void sws_freeVec(SwsVector *a);
|
||||
|
||||
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
|
||||
float lumaSharpen, float chromaSharpen,
|
||||
float chromaHShift, float chromaVShift,
|
||||
int verbose);
|
||||
void sws_freeFilter(SwsFilter *filter);
|
||||
|
||||
/**
|
||||
* Check if context can be reused, otherwise reallocate a new one.
|
||||
*
|
||||
* If context is NULL, just calls sws_getContext() to get a new
|
||||
* context. Otherwise, checks if the parameters are the ones already
|
||||
* saved in context. If that is the case, returns the current
|
||||
* context. Otherwise, frees context and gets a new context with
|
||||
* the new parameters.
|
||||
*
|
||||
* Be warned that srcFilter and dstFilter are not checked, they
|
||||
* are assumed to remain the same.
|
||||
*/
|
||||
struct SwsContext *sws_getCachedContext(struct SwsContext *context,
|
||||
int srcW, int srcH, enum AVPixelFormat srcFormat,
|
||||
int dstW, int dstH, enum AVPixelFormat dstFormat,
|
||||
int flags, SwsFilter *srcFilter,
|
||||
SwsFilter *dstFilter, const double *param);
|
||||
|
||||
/**
|
||||
* Convert an 8-bit paletted frame into a frame with a color depth of 32 bits.
|
||||
*
|
||||
* The output frame will have the same packed format as the palette.
|
||||
*
|
||||
* @param src source frame buffer
|
||||
* @param dst destination frame buffer
|
||||
* @param num_pixels number of pixels to convert
|
||||
* @param palette array with [256] entries, which must match color arrangement (RGB or BGR) of src
|
||||
*/
|
||||
void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette);
|
||||
|
||||
/**
|
||||
* Convert an 8-bit paletted frame into a frame with a color depth of 24 bits.
|
||||
*
|
||||
* With the palette format "ABCD", the destination frame ends up with the format "ABC".
|
||||
*
|
||||
* @param src source frame buffer
|
||||
* @param dst destination frame buffer
|
||||
* @param num_pixels number of pixels to convert
|
||||
* @param palette array with [256] entries, which must match color arrangement (RGB or BGR) of src
|
||||
*/
|
||||
void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette);
|
||||
|
||||
/**
|
||||
* Get the AVClass for swsContext. It can be used in combination with
|
||||
* AV_OPT_SEARCH_FAKE_OBJ for examining options.
|
||||
*
|
||||
* @see av_opt_find().
|
||||
*/
|
||||
const AVClass *sws_get_class(void);
|
||||
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
|
||||
#endif /* SWSCALE_SWSCALE_H */
|
||||
1049
externals/ffmpeg/libswscale/swscale_internal.h
vendored
Executable file
1049
externals/ffmpeg/libswscale/swscale_internal.h
vendored
Executable file
File diff suppressed because it is too large
Load Diff
2204
externals/ffmpeg/libswscale/swscale_unscaled.c
vendored
Executable file
2204
externals/ffmpeg/libswscale/swscale_unscaled.c
vendored
Executable file
File diff suppressed because it is too large
Load Diff
55
externals/ffmpeg/libswscale/swscaleres.rc
vendored
Executable file
55
externals/ffmpeg/libswscale/swscaleres.rc
vendored
Executable file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Windows resource file for libswscale
|
||||
*
|
||||
* Copyright (C) 2012 James Almer
|
||||
* Copyright (C) 2013 Tiancheng "Timothy" Gu
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <windows.h>
|
||||
#include "libswscale/version.h"
|
||||
#include "libavutil/ffversion.h"
|
||||
#include "config.h"
|
||||
|
||||
1 VERSIONINFO
|
||||
FILEVERSION LIBSWSCALE_VERSION_MAJOR, LIBSWSCALE_VERSION_MINOR, LIBSWSCALE_VERSION_MICRO, 0
|
||||
PRODUCTVERSION LIBSWSCALE_VERSION_MAJOR, LIBSWSCALE_VERSION_MINOR, LIBSWSCALE_VERSION_MICRO, 0
|
||||
FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
|
||||
FILEOS VOS_NT_WINDOWS32
|
||||
FILETYPE VFT_DLL
|
||||
{
|
||||
BLOCK "StringFileInfo"
|
||||
{
|
||||
BLOCK "040904B0"
|
||||
{
|
||||
VALUE "CompanyName", "FFmpeg Project"
|
||||
VALUE "FileDescription", "FFmpeg image rescaling library"
|
||||
VALUE "FileVersion", AV_STRINGIFY(LIBSWSCALE_VERSION)
|
||||
VALUE "InternalName", "libswscale"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2000-" AV_STRINGIFY(CONFIG_THIS_YEAR) " FFmpeg Project"
|
||||
VALUE "OriginalFilename", "swscale" BUILDSUF "-" AV_STRINGIFY(LIBSWSCALE_VERSION_MAJOR) SLIBSUF
|
||||
VALUE "ProductName", "FFmpeg"
|
||||
VALUE "ProductVersion", FFMPEG_VERSION
|
||||
}
|
||||
}
|
||||
|
||||
BLOCK "VarFileInfo"
|
||||
{
|
||||
VALUE "Translation", 0x0409, 0x04B0
|
||||
}
|
||||
}
|
||||
3
externals/ffmpeg/libswscale/tests/.gitignore
vendored
Executable file
3
externals/ffmpeg/libswscale/tests/.gitignore
vendored
Executable file
@@ -0,0 +1,3 @@
|
||||
/colorspace
|
||||
/pixdesc_query
|
||||
/swscale
|
||||
171
externals/ffmpeg/libswscale/tests/colorspace.c
vendored
Executable file
171
externals/ffmpeg/libswscale/tests/colorspace.c
vendored
Executable file
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h> /* for memset() */
|
||||
#include <stdlib.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "libavutil/mem.h"
|
||||
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/rgb2rgb.h"
|
||||
|
||||
#define SIZE 1000
|
||||
#define srcByte 0x55
|
||||
#define dstByte 0xBB
|
||||
|
||||
#define FUNC(s, d, n) { s, d, #n, n }
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i, funcNum;
|
||||
uint8_t *srcBuffer = av_malloc(SIZE);
|
||||
uint8_t *dstBuffer = av_malloc(SIZE);
|
||||
int failedNum = 0;
|
||||
int passedNum = 0;
|
||||
|
||||
if (!srcBuffer || !dstBuffer)
|
||||
return -1;
|
||||
|
||||
av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n");
|
||||
ff_sws_rgb2rgb_init();
|
||||
|
||||
for (funcNum = 0; ; funcNum++) {
|
||||
struct func_info_s {
|
||||
int src_bpp;
|
||||
int dst_bpp;
|
||||
const char *name;
|
||||
void (*func)(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
} func_info[] = {
|
||||
FUNC(2, 2, rgb12to15),
|
||||
FUNC(2, 2, rgb15to16),
|
||||
FUNC(2, 3, rgb15to24),
|
||||
FUNC(2, 4, rgb15to32),
|
||||
FUNC(2, 3, rgb16to24),
|
||||
FUNC(2, 4, rgb16to32),
|
||||
FUNC(3, 2, rgb24to15),
|
||||
FUNC(3, 2, rgb24to16),
|
||||
FUNC(3, 4, rgb24to32),
|
||||
FUNC(4, 2, rgb32to15),
|
||||
FUNC(4, 2, rgb32to16),
|
||||
FUNC(4, 3, rgb32to24),
|
||||
FUNC(2, 2, rgb16to15),
|
||||
FUNC(2, 2, rgb12tobgr12),
|
||||
FUNC(2, 2, rgb15tobgr15),
|
||||
FUNC(2, 2, rgb15tobgr16),
|
||||
FUNC(2, 3, rgb15tobgr24),
|
||||
FUNC(2, 4, rgb15tobgr32),
|
||||
FUNC(2, 2, rgb16tobgr15),
|
||||
FUNC(2, 2, rgb16tobgr16),
|
||||
FUNC(2, 3, rgb16tobgr24),
|
||||
FUNC(2, 4, rgb16tobgr32),
|
||||
FUNC(3, 2, rgb24tobgr15),
|
||||
FUNC(3, 2, rgb24tobgr16),
|
||||
FUNC(3, 3, rgb24tobgr24),
|
||||
FUNC(3, 4, rgb24tobgr32),
|
||||
FUNC(4, 2, rgb32tobgr15),
|
||||
FUNC(4, 2, rgb32tobgr16),
|
||||
FUNC(4, 3, rgb32tobgr24),
|
||||
FUNC(4, 4, shuffle_bytes_2103), /* rgb32tobgr32 */
|
||||
FUNC(6, 6, rgb48tobgr48_nobswap),
|
||||
FUNC(6, 6, rgb48tobgr48_bswap),
|
||||
FUNC(8, 6, rgb64to48_nobswap),
|
||||
FUNC(8, 6, rgb64to48_bswap),
|
||||
FUNC(8, 6, rgb64tobgr48_nobswap),
|
||||
FUNC(8, 6, rgb64tobgr48_bswap),
|
||||
FUNC(0, 0, NULL)
|
||||
};
|
||||
int width;
|
||||
int failed = 0;
|
||||
int srcBpp = 0;
|
||||
int dstBpp = 0;
|
||||
|
||||
if (!func_info[funcNum].func)
|
||||
break;
|
||||
|
||||
av_log(NULL, AV_LOG_INFO, ".");
|
||||
memset(srcBuffer, srcByte, SIZE);
|
||||
|
||||
for (width = 63; width > 0; width--) {
|
||||
int dstOffset;
|
||||
for (dstOffset = 128; dstOffset < 196; dstOffset += 4) {
|
||||
int srcOffset;
|
||||
memset(dstBuffer, dstByte, SIZE);
|
||||
|
||||
for (srcOffset = 128; srcOffset < 196; srcOffset += 4) {
|
||||
uint8_t *src = srcBuffer + srcOffset;
|
||||
uint8_t *dst = dstBuffer + dstOffset;
|
||||
const char *name = NULL;
|
||||
|
||||
// don't fill the screen with shit ...
|
||||
if (failed)
|
||||
break;
|
||||
|
||||
srcBpp = func_info[funcNum].src_bpp;
|
||||
dstBpp = func_info[funcNum].dst_bpp;
|
||||
name = func_info[funcNum].name;
|
||||
|
||||
func_info[funcNum].func(src, dst, width * srcBpp);
|
||||
|
||||
if (!srcBpp)
|
||||
break;
|
||||
|
||||
for (i = 0; i < SIZE; i++) {
|
||||
if (srcBuffer[i] != srcByte) {
|
||||
av_log(NULL, AV_LOG_INFO,
|
||||
"src damaged at %d w:%d src:%d dst:%d %s\n",
|
||||
i, width, srcOffset, dstOffset, name);
|
||||
failed = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < dstOffset; i++) {
|
||||
if (dstBuffer[i] != dstByte) {
|
||||
av_log(NULL, AV_LOG_INFO,
|
||||
"dst damaged at %d w:%d src:%d dst:%d %s\n",
|
||||
i, width, srcOffset, dstOffset, name);
|
||||
failed = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (i = dstOffset + width * dstBpp; i < SIZE; i++) {
|
||||
if (dstBuffer[i] != dstByte) {
|
||||
av_log(NULL, AV_LOG_INFO,
|
||||
"dst damaged at %d w:%d src:%d dst:%d %s\n",
|
||||
i, width, srcOffset, dstOffset, name);
|
||||
failed = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (failed)
|
||||
failedNum++;
|
||||
else if (srcBpp)
|
||||
passedNum++;
|
||||
}
|
||||
|
||||
av_log(NULL, AV_LOG_INFO,
|
||||
"\n%d converters passed, %d converters randomly overwrote memory\n",
|
||||
passedNum, failedNum);
|
||||
return failedNum;
|
||||
}
|
||||
91
externals/ffmpeg/libswscale/tests/pixdesc_query.c
vendored
Executable file
91
externals/ffmpeg/libswscale/tests/pixdesc_query.c
vendored
Executable file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Clément Bœsch <u pkh me>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libavutil/mem.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
|
||||
static const struct {
|
||||
const char *class;
|
||||
int (*cond)(enum AVPixelFormat pix_fmt);
|
||||
} query_tab[] = {
|
||||
{"is16BPS", is16BPS},
|
||||
{"isNBPS", isNBPS},
|
||||
{"isBE", isBE},
|
||||
{"isYUV", isYUV},
|
||||
{"isPlanarYUV", isPlanarYUV},
|
||||
{"isSemiPlanarYUV", isSemiPlanarYUV},
|
||||
{"isRGB", isRGB},
|
||||
{"Gray", isGray},
|
||||
{"RGBinInt", isRGBinInt},
|
||||
{"BGRinInt", isBGRinInt},
|
||||
{"Bayer", isBayer},
|
||||
{"AnyRGB", isAnyRGB},
|
||||
{"ALPHA", isALPHA},
|
||||
{"Packed", isPacked},
|
||||
{"Planar", isPlanar},
|
||||
{"PackedRGB", isPackedRGB},
|
||||
{"PlanarRGB", isPlanarRGB},
|
||||
{"usePal", usePal},
|
||||
};
|
||||
|
||||
static int cmp_str(const void *a, const void *b)
|
||||
{
|
||||
const char *s1 = *(const char **)a;
|
||||
const char *s2 = *(const char **)b;
|
||||
return strcmp(s1, s2);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < FF_ARRAY_ELEMS(query_tab); i++) {
|
||||
const char **pix_fmts = NULL;
|
||||
int nb_pix_fmts = 0;
|
||||
const AVPixFmtDescriptor *pix_desc = NULL;
|
||||
|
||||
while ((pix_desc = av_pix_fmt_desc_next(pix_desc))) {
|
||||
enum AVPixelFormat pix_fmt = av_pix_fmt_desc_get_id(pix_desc);
|
||||
if (query_tab[i].cond(pix_fmt)) {
|
||||
const char *pix_name = pix_desc->name;
|
||||
if (pix_fmt == AV_PIX_FMT_RGB32) pix_name = "rgb32";
|
||||
else if (pix_fmt == AV_PIX_FMT_RGB32_1) pix_name = "rgb32_1";
|
||||
else if (pix_fmt == AV_PIX_FMT_BGR32) pix_name = "bgr32";
|
||||
else if (pix_fmt == AV_PIX_FMT_BGR32_1) pix_name = "bgr32_1";
|
||||
|
||||
av_dynarray_add(&pix_fmts, &nb_pix_fmts, (void *)pix_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (pix_fmts) {
|
||||
qsort(pix_fmts, nb_pix_fmts, sizeof(*pix_fmts), cmp_str);
|
||||
|
||||
printf("%s:\n", query_tab[i].class);
|
||||
for (j = 0; j < nb_pix_fmts; j++)
|
||||
printf(" %s\n", pix_fmts[j]);
|
||||
printf("\n");
|
||||
|
||||
av_free(pix_fmts);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
440
externals/ffmpeg/libswscale/tests/swscale.c
vendored
Executable file
440
externals/ffmpeg/libswscale/tests/swscale.c
vendored
Executable file
@@ -0,0 +1,440 @@
|
||||
/*
|
||||
* Copyright (C) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#undef HAVE_AV_CONFIG_H
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/imgutils.h"
|
||||
#include "libavutil/mem.h"
|
||||
#include "libavutil/avutil.h"
|
||||
#include "libavutil/crc.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "libavutil/lfg.h"
|
||||
|
||||
#include "libswscale/swscale.h"
|
||||
|
||||
/* HACK Duplicated from swscale_internal.h.
|
||||
* Should be removed when a cleaner pixel format system exists. */
|
||||
#define isGray(x) \
|
||||
((x) == AV_PIX_FMT_GRAY8 || \
|
||||
(x) == AV_PIX_FMT_YA8 || \
|
||||
(x) == AV_PIX_FMT_GRAY16BE || \
|
||||
(x) == AV_PIX_FMT_GRAY16LE || \
|
||||
(x) == AV_PIX_FMT_YA16BE || \
|
||||
(x) == AV_PIX_FMT_YA16LE)
|
||||
#define hasChroma(x) \
|
||||
(!(isGray(x) || \
|
||||
(x) == AV_PIX_FMT_MONOBLACK || \
|
||||
(x) == AV_PIX_FMT_MONOWHITE))
|
||||
#define isALPHA(x) \
|
||||
((x) == AV_PIX_FMT_BGR32 || \
|
||||
(x) == AV_PIX_FMT_BGR32_1 || \
|
||||
(x) == AV_PIX_FMT_RGB32 || \
|
||||
(x) == AV_PIX_FMT_RGB32_1 || \
|
||||
(x) == AV_PIX_FMT_YUVA420P)
|
||||
|
||||
static uint64_t getSSD(const uint8_t *src1, const uint8_t *src2,
|
||||
int stride1, int stride2, int w, int h)
|
||||
{
|
||||
int x, y;
|
||||
uint64_t ssd = 0;
|
||||
|
||||
for (y = 0; y < h; y++) {
|
||||
for (x = 0; x < w; x++) {
|
||||
int d = src1[x + y * stride1] - src2[x + y * stride2];
|
||||
ssd += d * d;
|
||||
}
|
||||
}
|
||||
return ssd;
|
||||
}
|
||||
|
||||
struct Results {
|
||||
uint64_t ssdY;
|
||||
uint64_t ssdU;
|
||||
uint64_t ssdV;
|
||||
uint64_t ssdA;
|
||||
uint32_t crc;
|
||||
};
|
||||
|
||||
// test by ref -> src -> dst -> out & compare out against ref
|
||||
// ref & out are YV12
|
||||
static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
|
||||
enum AVPixelFormat srcFormat, enum AVPixelFormat dstFormat,
|
||||
int srcW, int srcH, int dstW, int dstH, int flags,
|
||||
struct Results *r)
|
||||
{
|
||||
const AVPixFmtDescriptor *desc_yuva420p = av_pix_fmt_desc_get(AV_PIX_FMT_YUVA420P);
|
||||
const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(srcFormat);
|
||||
const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(dstFormat);
|
||||
static enum AVPixelFormat cur_srcFormat;
|
||||
static int cur_srcW, cur_srcH;
|
||||
static const uint8_t *src[4];
|
||||
static int srcStride[4];
|
||||
uint8_t *dst[4] = { 0 };
|
||||
uint8_t *out[4] = { 0 };
|
||||
int dstStride[4] = {0};
|
||||
int i;
|
||||
uint64_t ssdY, ssdU = 0, ssdV = 0, ssdA = 0;
|
||||
struct SwsContext *dstContext = NULL, *outContext = NULL;
|
||||
uint32_t crc = 0;
|
||||
int res = 0;
|
||||
|
||||
if (cur_srcFormat != srcFormat || cur_srcW != srcW || cur_srcH != srcH) {
|
||||
struct SwsContext *srcContext = NULL;
|
||||
int p;
|
||||
|
||||
for (p = 0; p < 4; p++)
|
||||
av_freep(&src[p]);
|
||||
|
||||
res = av_image_fill_linesizes(srcStride, srcFormat, srcW);
|
||||
if (res < 0) {
|
||||
fprintf(stderr, "av_image_fill_linesizes failed\n");
|
||||
goto end;
|
||||
}
|
||||
for (p = 0; p < 4; p++) {
|
||||
srcStride[p] = FFALIGN(srcStride[p], 16);
|
||||
if (srcStride[p])
|
||||
src[p] = av_mallocz(srcStride[p] * srcH + 16);
|
||||
if (srcStride[p] && !src[p]) {
|
||||
perror("Malloc");
|
||||
res = -1;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
srcContext = sws_getContext(w, h, AV_PIX_FMT_YUVA420P, srcW, srcH,
|
||||
srcFormat, SWS_BILINEAR, NULL, NULL, NULL);
|
||||
if (!srcContext) {
|
||||
fprintf(stderr, "Failed to get %s ---> %s\n",
|
||||
desc_yuva420p->name,
|
||||
desc_src->name);
|
||||
res = -1;
|
||||
goto end;
|
||||
}
|
||||
sws_scale(srcContext, ref, refStride, 0, h,
|
||||
(uint8_t * const *) src, srcStride);
|
||||
sws_freeContext(srcContext);
|
||||
|
||||
cur_srcFormat = srcFormat;
|
||||
cur_srcW = srcW;
|
||||
cur_srcH = srcH;
|
||||
}
|
||||
|
||||
res = av_image_fill_linesizes(dstStride, dstFormat, dstW);
|
||||
if (res < 0) {
|
||||
fprintf(stderr, "av_image_fill_linesizes failed\n");
|
||||
goto end;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
/* Image buffers passed into libswscale can be allocated any way you
|
||||
* prefer, as long as they're aligned enough for the architecture, and
|
||||
* they're freed appropriately (such as using av_free for buffers
|
||||
* allocated with av_malloc). */
|
||||
/* An extra 16 bytes is being allocated because some scalers may write
|
||||
* out of bounds. */
|
||||
dstStride[i] = FFALIGN(dstStride[i], 16);
|
||||
if (dstStride[i])
|
||||
dst[i] = av_mallocz(dstStride[i] * dstH + 16);
|
||||
if (dstStride[i] && !dst[i]) {
|
||||
perror("Malloc");
|
||||
res = -1;
|
||||
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
dstContext = sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat,
|
||||
flags, NULL, NULL, NULL);
|
||||
if (!dstContext) {
|
||||
fprintf(stderr, "Failed to get %s ---> %s\n",
|
||||
desc_src->name, desc_dst->name);
|
||||
res = -1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
printf(" %s %dx%d -> %s %3dx%3d flags=%2d",
|
||||
desc_src->name, srcW, srcH,
|
||||
desc_dst->name, dstW, dstH,
|
||||
flags);
|
||||
fflush(stdout);
|
||||
|
||||
sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
|
||||
|
||||
for (i = 0; i < 4 && dstStride[i]; i++)
|
||||
crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i],
|
||||
dstStride[i] * dstH);
|
||||
|
||||
if (r && crc == r->crc) {
|
||||
ssdY = r->ssdY;
|
||||
ssdU = r->ssdU;
|
||||
ssdV = r->ssdV;
|
||||
ssdA = r->ssdA;
|
||||
} else {
|
||||
for (i = 0; i < 4; i++) {
|
||||
refStride[i] = FFALIGN(refStride[i], 16);
|
||||
if (refStride[i])
|
||||
out[i] = av_mallocz(refStride[i] * h);
|
||||
if (refStride[i] && !out[i]) {
|
||||
perror("Malloc");
|
||||
res = -1;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
outContext = sws_getContext(dstW, dstH, dstFormat, w, h,
|
||||
AV_PIX_FMT_YUVA420P, SWS_BILINEAR,
|
||||
NULL, NULL, NULL);
|
||||
if (!outContext) {
|
||||
fprintf(stderr, "Failed to get %s ---> %s\n",
|
||||
desc_dst->name,
|
||||
desc_yuva420p->name);
|
||||
res = -1;
|
||||
goto end;
|
||||
}
|
||||
sws_scale(outContext, (const uint8_t * const *) dst, dstStride, 0, dstH,
|
||||
out, refStride);
|
||||
|
||||
ssdY = getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
|
||||
if (hasChroma(srcFormat) && hasChroma(dstFormat)) {
|
||||
//FIXME check that output is really gray
|
||||
ssdU = getSSD(ref[1], out[1], refStride[1], refStride[1],
|
||||
(w + 1) >> 1, (h + 1) >> 1);
|
||||
ssdV = getSSD(ref[2], out[2], refStride[2], refStride[2],
|
||||
(w + 1) >> 1, (h + 1) >> 1);
|
||||
}
|
||||
if (isALPHA(srcFormat) && isALPHA(dstFormat))
|
||||
ssdA = getSSD(ref[3], out[3], refStride[3], refStride[3], w, h);
|
||||
|
||||
ssdY /= w * h;
|
||||
ssdU /= w * h / 4;
|
||||
ssdV /= w * h / 4;
|
||||
ssdA /= w * h;
|
||||
|
||||
sws_freeContext(outContext);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (refStride[i])
|
||||
av_free(out[i]);
|
||||
}
|
||||
|
||||
printf(" CRC=%08x SSD=%5"PRId64 ",%5"PRId64 ",%5"PRId64 ",%5"PRId64 "\n",
|
||||
crc, ssdY, ssdU, ssdV, ssdA);
|
||||
|
||||
end:
|
||||
sws_freeContext(dstContext);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (dstStride[i])
|
||||
av_free(dst[i]);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void selfTest(const uint8_t * const ref[4], int refStride[4],
|
||||
int w, int h,
|
||||
enum AVPixelFormat srcFormat_in,
|
||||
enum AVPixelFormat dstFormat_in)
|
||||
{
|
||||
const int flags[] = { SWS_FAST_BILINEAR, SWS_BILINEAR, SWS_BICUBIC,
|
||||
SWS_X, SWS_POINT, SWS_AREA, 0 };
|
||||
const int srcW = w;
|
||||
const int srcH = h;
|
||||
const int dstW[] = { srcW - srcW / 3, srcW, srcW + srcW / 3, 0 };
|
||||
const int dstH[] = { srcH - srcH / 3, srcH, srcH + srcH / 3, 0 };
|
||||
enum AVPixelFormat srcFormat, dstFormat;
|
||||
const AVPixFmtDescriptor *desc_src, *desc_dst;
|
||||
|
||||
for (srcFormat = srcFormat_in != AV_PIX_FMT_NONE ? srcFormat_in : 0;
|
||||
srcFormat < AV_PIX_FMT_NB; srcFormat++) {
|
||||
if (!sws_isSupportedInput(srcFormat) ||
|
||||
!sws_isSupportedOutput(srcFormat))
|
||||
continue;
|
||||
|
||||
desc_src = av_pix_fmt_desc_get(srcFormat);
|
||||
|
||||
for (dstFormat = dstFormat_in != AV_PIX_FMT_NONE ? dstFormat_in : 0;
|
||||
dstFormat < AV_PIX_FMT_NB; dstFormat++) {
|
||||
int i, j, k;
|
||||
int res = 0;
|
||||
|
||||
if (!sws_isSupportedInput(dstFormat) ||
|
||||
!sws_isSupportedOutput(dstFormat))
|
||||
continue;
|
||||
|
||||
desc_dst = av_pix_fmt_desc_get(dstFormat);
|
||||
|
||||
printf("%s -> %s\n", desc_src->name, desc_dst->name);
|
||||
fflush(stdout);
|
||||
|
||||
for (k = 0; flags[k] && !res; k++)
|
||||
for (i = 0; dstW[i] && !res; i++)
|
||||
for (j = 0; dstH[j] && !res; j++)
|
||||
res = doTest(ref, refStride, w, h,
|
||||
srcFormat, dstFormat,
|
||||
srcW, srcH, dstW[i], dstH[j], flags[k],
|
||||
NULL);
|
||||
if (dstFormat_in != AV_PIX_FMT_NONE)
|
||||
break;
|
||||
}
|
||||
if (srcFormat_in != AV_PIX_FMT_NONE)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int fileTest(const uint8_t * const ref[4], int refStride[4],
|
||||
int w, int h, FILE *fp,
|
||||
enum AVPixelFormat srcFormat_in,
|
||||
enum AVPixelFormat dstFormat_in)
|
||||
{
|
||||
char buf[256];
|
||||
|
||||
while (fgets(buf, sizeof(buf), fp)) {
|
||||
struct Results r;
|
||||
enum AVPixelFormat srcFormat;
|
||||
char srcStr[21];
|
||||
int srcW = 0, srcH = 0;
|
||||
enum AVPixelFormat dstFormat;
|
||||
char dstStr[21];
|
||||
int dstW = 0, dstH = 0;
|
||||
int flags;
|
||||
int ret;
|
||||
|
||||
ret = sscanf(buf,
|
||||
" %20s %dx%d -> %20s %dx%d flags=%d CRC=%x"
|
||||
" SSD=%"SCNu64 ", %"SCNu64 ", %"SCNu64 ", %"SCNu64 "\n",
|
||||
srcStr, &srcW, &srcH, dstStr, &dstW, &dstH,
|
||||
&flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA);
|
||||
if (ret != 12) {
|
||||
srcStr[0] = dstStr[0] = 0;
|
||||
ret = sscanf(buf, "%20s -> %20s\n", srcStr, dstStr);
|
||||
}
|
||||
|
||||
srcFormat = av_get_pix_fmt(srcStr);
|
||||
dstFormat = av_get_pix_fmt(dstStr);
|
||||
|
||||
if (srcFormat == AV_PIX_FMT_NONE || dstFormat == AV_PIX_FMT_NONE ||
|
||||
srcW > 8192U || srcH > 8192U || dstW > 8192U || dstH > 8192U) {
|
||||
fprintf(stderr, "malformed input file\n");
|
||||
return -1;
|
||||
}
|
||||
if ((srcFormat_in != AV_PIX_FMT_NONE && srcFormat_in != srcFormat) ||
|
||||
(dstFormat_in != AV_PIX_FMT_NONE && dstFormat_in != dstFormat))
|
||||
continue;
|
||||
if (ret != 12) {
|
||||
printf("%s", buf);
|
||||
continue;
|
||||
}
|
||||
|
||||
doTest(ref, refStride, w, h,
|
||||
srcFormat, dstFormat,
|
||||
srcW, srcH, dstW, dstH, flags,
|
||||
&r);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define W 96
|
||||
#define H 96
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE;
|
||||
enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE;
|
||||
uint8_t *rgb_data = av_malloc(W * H * 4);
|
||||
const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL };
|
||||
int rgb_stride[4] = { 4 * W, 0, 0, 0 };
|
||||
uint8_t *data = av_malloc(4 * W * H);
|
||||
const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 };
|
||||
int stride[4] = { W, W, W, W };
|
||||
int x, y;
|
||||
struct SwsContext *sws;
|
||||
AVLFG rand;
|
||||
int res = -1;
|
||||
int i;
|
||||
FILE *fp = NULL;
|
||||
|
||||
if (!rgb_data || !data)
|
||||
return -1;
|
||||
|
||||
for (i = 1; i < argc; i += 2) {
|
||||
if (argv[i][0] != '-' || i + 1 == argc)
|
||||
goto bad_option;
|
||||
if (!strcmp(argv[i], "-ref")) {
|
||||
fp = fopen(argv[i + 1], "r");
|
||||
if (!fp) {
|
||||
fprintf(stderr, "could not open '%s'\n", argv[i + 1]);
|
||||
goto error;
|
||||
}
|
||||
} else if (!strcmp(argv[i], "-cpuflags")) {
|
||||
unsigned flags = av_get_cpu_flags();
|
||||
int ret = av_parse_cpu_caps(&flags, argv[i + 1]);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "invalid cpu flags %s\n", argv[i + 1]);
|
||||
return ret;
|
||||
}
|
||||
av_force_cpu_flags(flags);
|
||||
} else if (!strcmp(argv[i], "-src")) {
|
||||
srcFormat = av_get_pix_fmt(argv[i + 1]);
|
||||
if (srcFormat == AV_PIX_FMT_NONE) {
|
||||
fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
|
||||
return -1;
|
||||
}
|
||||
} else if (!strcmp(argv[i], "-dst")) {
|
||||
dstFormat = av_get_pix_fmt(argv[i + 1]);
|
||||
if (dstFormat == AV_PIX_FMT_NONE) {
|
||||
fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
bad_option:
|
||||
fprintf(stderr, "bad option or argument missing (%s)\n", argv[i]);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H,
|
||||
AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
|
||||
|
||||
av_lfg_init(&rand, 1);
|
||||
|
||||
for (y = 0; y < H; y++)
|
||||
for (x = 0; x < W * 4; x++)
|
||||
rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
|
||||
sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride);
|
||||
sws_freeContext(sws);
|
||||
av_free(rgb_data);
|
||||
|
||||
if(fp) {
|
||||
res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat);
|
||||
fclose(fp);
|
||||
} else {
|
||||
selfTest(src, stride, W, H, srcFormat, dstFormat);
|
||||
res = 0;
|
||||
}
|
||||
error:
|
||||
av_free(data);
|
||||
|
||||
return res;
|
||||
}
|
||||
2438
externals/ffmpeg/libswscale/utils.c
vendored
Executable file
2438
externals/ffmpeg/libswscale/utils.c
vendored
Executable file
File diff suppressed because it is too large
Load Diff
53
externals/ffmpeg/libswscale/version.h
vendored
Executable file
53
externals/ffmpeg/libswscale/version.h
vendored
Executable file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef SWSCALE_VERSION_H
|
||||
#define SWSCALE_VERSION_H
|
||||
|
||||
/**
|
||||
* @file
|
||||
* swscale version macros
|
||||
*/
|
||||
|
||||
#include "libavutil/version.h"
|
||||
|
||||
#define LIBSWSCALE_VERSION_MAJOR 5
|
||||
#define LIBSWSCALE_VERSION_MINOR 7
|
||||
#define LIBSWSCALE_VERSION_MICRO 100
|
||||
|
||||
#define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
|
||||
LIBSWSCALE_VERSION_MINOR, \
|
||||
LIBSWSCALE_VERSION_MICRO)
|
||||
#define LIBSWSCALE_VERSION AV_VERSION(LIBSWSCALE_VERSION_MAJOR, \
|
||||
LIBSWSCALE_VERSION_MINOR, \
|
||||
LIBSWSCALE_VERSION_MICRO)
|
||||
#define LIBSWSCALE_BUILD LIBSWSCALE_VERSION_INT
|
||||
|
||||
#define LIBSWSCALE_IDENT "SwS" AV_STRINGIFY(LIBSWSCALE_VERSION)
|
||||
|
||||
/**
|
||||
* FF_API_* defines may be placed below to indicate public API that will be
|
||||
* dropped at a future version bump. The defines themselves are not part of
|
||||
* the public API and may change, break or disappear at any time.
|
||||
*/
|
||||
|
||||
#ifndef FF_API_SWS_VECTOR
|
||||
#define FF_API_SWS_VECTOR (LIBSWSCALE_VERSION_MAJOR < 6)
|
||||
#endif
|
||||
|
||||
#endif /* SWSCALE_VERSION_H */
|
||||
322
externals/ffmpeg/libswscale/vscale.c
vendored
Executable file
322
externals/ffmpeg/libswscale/vscale.c
vendored
Executable file
@@ -0,0 +1,322 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#include "swscale_internal.h"
|
||||
|
||||
typedef struct VScalerContext
|
||||
{
|
||||
uint16_t *filter[2];
|
||||
int32_t *filter_pos;
|
||||
int filter_size;
|
||||
int isMMX;
|
||||
union {
|
||||
yuv2planar1_fn yuv2planar1;
|
||||
yuv2planarX_fn yuv2planarX;
|
||||
yuv2interleavedX_fn yuv2interleavedX;
|
||||
yuv2packed1_fn yuv2packed1;
|
||||
yuv2packed2_fn yuv2packed2;
|
||||
yuv2anyX_fn yuv2anyX;
|
||||
} pfn;
|
||||
yuv2packedX_fn yuv2packedX;
|
||||
} VScalerContext;
|
||||
|
||||
|
||||
static int lum_planar_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
|
||||
{
|
||||
VScalerContext *inst = desc->instance;
|
||||
int dstW = desc->dst->width;
|
||||
|
||||
int first = FFMAX(1-inst->filter_size, inst->filter_pos[sliceY]);
|
||||
int sp = first - desc->src->plane[0].sliceY;
|
||||
int dp = sliceY - desc->dst->plane[0].sliceY;
|
||||
uint8_t **src = desc->src->plane[0].line + sp;
|
||||
uint8_t **dst = desc->dst->plane[0].line + dp;
|
||||
uint16_t *filter = inst->filter[0] + (inst->isMMX ? 0 : sliceY * inst->filter_size);
|
||||
|
||||
if (inst->filter_size == 1)
|
||||
inst->pfn.yuv2planar1((const int16_t*)src[0], dst[0], dstW, c->lumDither8, 0);
|
||||
else
|
||||
inst->pfn.yuv2planarX(filter, inst->filter_size, (const int16_t**)src, dst[0], dstW, c->lumDither8, 0);
|
||||
|
||||
if (desc->alpha) {
|
||||
int sp = first - desc->src->plane[3].sliceY;
|
||||
int dp = sliceY - desc->dst->plane[3].sliceY;
|
||||
uint8_t **src = desc->src->plane[3].line + sp;
|
||||
uint8_t **dst = desc->dst->plane[3].line + dp;
|
||||
uint16_t *filter = inst->filter[1] + (inst->isMMX ? 0 : sliceY * inst->filter_size);
|
||||
|
||||
if (inst->filter_size == 1)
|
||||
inst->pfn.yuv2planar1((const int16_t*)src[0], dst[0], dstW, c->lumDither8, 0);
|
||||
else
|
||||
inst->pfn.yuv2planarX(filter, inst->filter_size, (const int16_t**)src, dst[0], dstW, c->lumDither8, 0);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int chr_planar_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
|
||||
{
|
||||
const int chrSkipMask = (1 << desc->dst->v_chr_sub_sample) - 1;
|
||||
if (sliceY & chrSkipMask)
|
||||
return 0;
|
||||
else {
|
||||
VScalerContext *inst = desc->instance;
|
||||
int dstW = AV_CEIL_RSHIFT(desc->dst->width, desc->dst->h_chr_sub_sample);
|
||||
int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample;
|
||||
|
||||
int first = FFMAX(1-inst->filter_size, inst->filter_pos[chrSliceY]);
|
||||
int sp1 = first - desc->src->plane[1].sliceY;
|
||||
int sp2 = first - desc->src->plane[2].sliceY;
|
||||
int dp1 = chrSliceY - desc->dst->plane[1].sliceY;
|
||||
int dp2 = chrSliceY - desc->dst->plane[2].sliceY;
|
||||
uint8_t **src1 = desc->src->plane[1].line + sp1;
|
||||
uint8_t **src2 = desc->src->plane[2].line + sp2;
|
||||
uint8_t **dst1 = desc->dst->plane[1].line + dp1;
|
||||
uint8_t **dst2 = desc->dst->plane[2].line + dp2;
|
||||
uint16_t *filter = inst->filter[0] + (inst->isMMX ? 0 : chrSliceY * inst->filter_size);
|
||||
|
||||
if (c->yuv2nv12cX) {
|
||||
inst->pfn.yuv2interleavedX(c, filter, inst->filter_size, (const int16_t**)src1, (const int16_t**)src2, dst1[0], dstW);
|
||||
} else if (inst->filter_size == 1) {
|
||||
inst->pfn.yuv2planar1((const int16_t*)src1[0], dst1[0], dstW, c->chrDither8, 0);
|
||||
inst->pfn.yuv2planar1((const int16_t*)src2[0], dst2[0], dstW, c->chrDither8, 3);
|
||||
} else {
|
||||
inst->pfn.yuv2planarX(filter, inst->filter_size, (const int16_t**)src1, dst1[0], dstW, c->chrDither8, 0);
|
||||
inst->pfn.yuv2planarX(filter, inst->filter_size, (const int16_t**)src2, dst2[0], dstW, c->chrDither8, inst->isMMX ? (c->uv_offx2 >> 1) : 3);
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int packed_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
|
||||
{
|
||||
VScalerContext *inst = desc->instance;
|
||||
int dstW = desc->dst->width;
|
||||
int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample;
|
||||
|
||||
int lum_fsize = inst[0].filter_size;
|
||||
int chr_fsize = inst[1].filter_size;
|
||||
uint16_t *lum_filter = inst[0].filter[0];
|
||||
uint16_t *chr_filter = inst[1].filter[0];
|
||||
|
||||
int firstLum = FFMAX(1-lum_fsize, inst[0].filter_pos[ sliceY]);
|
||||
int firstChr = FFMAX(1-chr_fsize, inst[1].filter_pos[chrSliceY]);
|
||||
|
||||
int sp0 = firstLum - desc->src->plane[0].sliceY;
|
||||
int sp1 = firstChr - desc->src->plane[1].sliceY;
|
||||
int sp2 = firstChr - desc->src->plane[2].sliceY;
|
||||
int sp3 = firstLum - desc->src->plane[3].sliceY;
|
||||
int dp = sliceY - desc->dst->plane[0].sliceY;
|
||||
uint8_t **src0 = desc->src->plane[0].line + sp0;
|
||||
uint8_t **src1 = desc->src->plane[1].line + sp1;
|
||||
uint8_t **src2 = desc->src->plane[2].line + sp2;
|
||||
uint8_t **src3 = desc->alpha ? desc->src->plane[3].line + sp3 : NULL;
|
||||
uint8_t **dst = desc->dst->plane[0].line + dp;
|
||||
|
||||
|
||||
if (c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 1) { // unscaled RGB
|
||||
inst->pfn.yuv2packed1(c, (const int16_t*)*src0, (const int16_t**)src1, (const int16_t**)src2,
|
||||
(const int16_t*)(desc->alpha ? *src3 : NULL), *dst, dstW, 0, sliceY);
|
||||
} else if (c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 2 &&
|
||||
chr_filter[2 * chrSliceY + 1] + chr_filter[2 * chrSliceY] == 4096 &&
|
||||
chr_filter[2 * chrSliceY + 1] <= 4096U) { // unscaled RGB
|
||||
int chrAlpha = chr_filter[2 * chrSliceY + 1];
|
||||
inst->pfn.yuv2packed1(c, (const int16_t*)*src0, (const int16_t**)src1, (const int16_t**)src2,
|
||||
(const int16_t*)(desc->alpha ? *src3 : NULL), *dst, dstW, chrAlpha, sliceY);
|
||||
} else if (c->yuv2packed2 && lum_fsize == 2 && chr_fsize == 2 &&
|
||||
lum_filter[2 * sliceY + 1] + lum_filter[2 * sliceY] == 4096 &&
|
||||
lum_filter[2 * sliceY + 1] <= 4096U &&
|
||||
chr_filter[2 * chrSliceY + 1] + chr_filter[2 * chrSliceY] == 4096 &&
|
||||
chr_filter[2 * chrSliceY + 1] <= 4096U
|
||||
) { // bilinear upscale RGB
|
||||
int lumAlpha = lum_filter[2 * sliceY + 1];
|
||||
int chrAlpha = chr_filter[2 * chrSliceY + 1];
|
||||
c->lumMmxFilter[2] =
|
||||
c->lumMmxFilter[3] = lum_filter[2 * sliceY] * 0x10001;
|
||||
c->chrMmxFilter[2] =
|
||||
c->chrMmxFilter[3] = chr_filter[2 * chrSliceY] * 0x10001;
|
||||
inst->pfn.yuv2packed2(c, (const int16_t**)src0, (const int16_t**)src1, (const int16_t**)src2, (const int16_t**)src3,
|
||||
*dst, dstW, lumAlpha, chrAlpha, sliceY);
|
||||
} else { // general RGB
|
||||
if ((c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 2) ||
|
||||
(c->yuv2packed2 && lum_fsize == 2 && chr_fsize == 2)) {
|
||||
if (!c->warned_unuseable_bilinear)
|
||||
av_log(c, AV_LOG_INFO, "Optimized 2 tap filter code cannot be used\n");
|
||||
c->warned_unuseable_bilinear = 1;
|
||||
}
|
||||
|
||||
inst->yuv2packedX(c, lum_filter + sliceY * lum_fsize,
|
||||
(const int16_t**)src0, lum_fsize, chr_filter + chrSliceY * chr_fsize,
|
||||
(const int16_t**)src1, (const int16_t**)src2, chr_fsize, (const int16_t**)src3, *dst, dstW, sliceY);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int any_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
|
||||
{
|
||||
VScalerContext *inst = desc->instance;
|
||||
int dstW = desc->dst->width;
|
||||
int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample;
|
||||
|
||||
int lum_fsize = inst[0].filter_size;
|
||||
int chr_fsize = inst[1].filter_size;
|
||||
uint16_t *lum_filter = inst[0].filter[0];
|
||||
uint16_t *chr_filter = inst[1].filter[0];
|
||||
|
||||
int firstLum = FFMAX(1-lum_fsize, inst[0].filter_pos[ sliceY]);
|
||||
int firstChr = FFMAX(1-chr_fsize, inst[1].filter_pos[chrSliceY]);
|
||||
|
||||
int sp0 = firstLum - desc->src->plane[0].sliceY;
|
||||
int sp1 = firstChr - desc->src->plane[1].sliceY;
|
||||
int sp2 = firstChr - desc->src->plane[2].sliceY;
|
||||
int sp3 = firstLum - desc->src->plane[3].sliceY;
|
||||
int dp0 = sliceY - desc->dst->plane[0].sliceY;
|
||||
int dp1 = chrSliceY - desc->dst->plane[1].sliceY;
|
||||
int dp2 = chrSliceY - desc->dst->plane[2].sliceY;
|
||||
int dp3 = sliceY - desc->dst->plane[3].sliceY;
|
||||
|
||||
uint8_t **src0 = desc->src->plane[0].line + sp0;
|
||||
uint8_t **src1 = desc->src->plane[1].line + sp1;
|
||||
uint8_t **src2 = desc->src->plane[2].line + sp2;
|
||||
uint8_t **src3 = desc->alpha ? desc->src->plane[3].line + sp3 : NULL;
|
||||
uint8_t *dst[4] = { desc->dst->plane[0].line[dp0],
|
||||
desc->dst->plane[1].line[dp1],
|
||||
desc->dst->plane[2].line[dp2],
|
||||
desc->alpha ? desc->dst->plane[3].line[dp3] : NULL };
|
||||
|
||||
av_assert1(!c->yuv2packed1 && !c->yuv2packed2);
|
||||
inst->pfn.yuv2anyX(c, lum_filter + sliceY * lum_fsize,
|
||||
(const int16_t**)src0, lum_fsize, chr_filter + sliceY * chr_fsize,
|
||||
(const int16_t**)src1, (const int16_t**)src2, chr_fsize, (const int16_t**)src3, dst, dstW, sliceY);
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
int ff_init_vscale(SwsContext *c, SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst)
|
||||
{
|
||||
VScalerContext *lumCtx = NULL;
|
||||
VScalerContext *chrCtx = NULL;
|
||||
|
||||
if (isPlanarYUV(c->dstFormat) || (isGray(c->dstFormat) && !isALPHA(c->dstFormat))) {
|
||||
lumCtx = av_mallocz(sizeof(VScalerContext));
|
||||
if (!lumCtx)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
|
||||
desc[0].process = lum_planar_vscale;
|
||||
desc[0].instance = lumCtx;
|
||||
desc[0].src = src;
|
||||
desc[0].dst = dst;
|
||||
desc[0].alpha = c->needAlpha;
|
||||
|
||||
if (!isGray(c->dstFormat)) {
|
||||
chrCtx = av_mallocz(sizeof(VScalerContext));
|
||||
if (!chrCtx)
|
||||
return AVERROR(ENOMEM);
|
||||
desc[1].process = chr_planar_vscale;
|
||||
desc[1].instance = chrCtx;
|
||||
desc[1].src = src;
|
||||
desc[1].dst = dst;
|
||||
}
|
||||
} else {
|
||||
lumCtx = av_mallocz_array(sizeof(VScalerContext), 2);
|
||||
if (!lumCtx)
|
||||
return AVERROR(ENOMEM);
|
||||
chrCtx = &lumCtx[1];
|
||||
|
||||
desc[0].process = c->yuv2packedX ? packed_vscale : any_vscale;
|
||||
desc[0].instance = lumCtx;
|
||||
desc[0].src = src;
|
||||
desc[0].dst = dst;
|
||||
desc[0].alpha = c->needAlpha;
|
||||
}
|
||||
|
||||
ff_init_vscale_pfn(c, c->yuv2plane1, c->yuv2planeX, c->yuv2nv12cX,
|
||||
c->yuv2packed1, c->yuv2packed2, c->yuv2packedX, c->yuv2anyX, c->use_mmx_vfilter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ff_init_vscale_pfn(SwsContext *c,
|
||||
yuv2planar1_fn yuv2plane1,
|
||||
yuv2planarX_fn yuv2planeX,
|
||||
yuv2interleavedX_fn yuv2nv12cX,
|
||||
yuv2packed1_fn yuv2packed1,
|
||||
yuv2packed2_fn yuv2packed2,
|
||||
yuv2packedX_fn yuv2packedX,
|
||||
yuv2anyX_fn yuv2anyX, int use_mmx)
|
||||
{
|
||||
VScalerContext *lumCtx = NULL;
|
||||
VScalerContext *chrCtx = NULL;
|
||||
int idx = c->numDesc - (c->is_internal_gamma ? 2 : 1); //FIXME avoid hardcoding indexes
|
||||
|
||||
if (isPlanarYUV(c->dstFormat) || (isGray(c->dstFormat) && !isALPHA(c->dstFormat))) {
|
||||
if (!isGray(c->dstFormat)) {
|
||||
chrCtx = c->desc[idx].instance;
|
||||
|
||||
chrCtx->filter[0] = use_mmx ? (int16_t*)c->chrMmxFilter : c->vChrFilter;
|
||||
chrCtx->filter_size = c->vChrFilterSize;
|
||||
chrCtx->filter_pos = c->vChrFilterPos;
|
||||
chrCtx->isMMX = use_mmx;
|
||||
|
||||
--idx;
|
||||
if (yuv2nv12cX) chrCtx->pfn.yuv2interleavedX = yuv2nv12cX;
|
||||
else if (c->vChrFilterSize == 1) chrCtx->pfn.yuv2planar1 = yuv2plane1;
|
||||
else chrCtx->pfn.yuv2planarX = yuv2planeX;
|
||||
}
|
||||
|
||||
lumCtx = c->desc[idx].instance;
|
||||
|
||||
lumCtx->filter[0] = use_mmx ? (int16_t*)c->lumMmxFilter : c->vLumFilter;
|
||||
lumCtx->filter[1] = use_mmx ? (int16_t*)c->alpMmxFilter : c->vLumFilter;
|
||||
lumCtx->filter_size = c->vLumFilterSize;
|
||||
lumCtx->filter_pos = c->vLumFilterPos;
|
||||
lumCtx->isMMX = use_mmx;
|
||||
|
||||
if (c->vLumFilterSize == 1) lumCtx->pfn.yuv2planar1 = yuv2plane1;
|
||||
else lumCtx->pfn.yuv2planarX = yuv2planeX;
|
||||
|
||||
} else {
|
||||
lumCtx = c->desc[idx].instance;
|
||||
chrCtx = &lumCtx[1];
|
||||
|
||||
lumCtx->filter[0] = c->vLumFilter;
|
||||
lumCtx->filter_size = c->vLumFilterSize;
|
||||
lumCtx->filter_pos = c->vLumFilterPos;
|
||||
|
||||
chrCtx->filter[0] = c->vChrFilter;
|
||||
chrCtx->filter_size = c->vChrFilterSize;
|
||||
chrCtx->filter_pos = c->vChrFilterPos;
|
||||
|
||||
lumCtx->isMMX = use_mmx;
|
||||
chrCtx->isMMX = use_mmx;
|
||||
|
||||
if (yuv2packedX) {
|
||||
if (c->yuv2packed1 && c->vLumFilterSize == 1 && c->vChrFilterSize <= 2)
|
||||
lumCtx->pfn.yuv2packed1 = yuv2packed1;
|
||||
else if (c->yuv2packed2 && c->vLumFilterSize == 2 && c->vChrFilterSize == 2)
|
||||
lumCtx->pfn.yuv2packed2 = yuv2packed2;
|
||||
lumCtx->yuv2packedX = yuv2packedX;
|
||||
} else
|
||||
lumCtx->pfn.yuv2anyX = yuv2anyX;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
15
externals/ffmpeg/libswscale/x86/Makefile
vendored
Executable file
15
externals/ffmpeg/libswscale/x86/Makefile
vendored
Executable file
@@ -0,0 +1,15 @@
|
||||
$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
|
||||
|
||||
OBJS += x86/rgb2rgb.o \
|
||||
x86/swscale.o \
|
||||
x86/yuv2rgb.o \
|
||||
|
||||
MMX-OBJS += x86/hscale_fast_bilinear_simd.o \
|
||||
|
||||
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
|
||||
|
||||
X86ASM-OBJS += x86/input.o \
|
||||
x86/output.o \
|
||||
x86/scale.o \
|
||||
x86/rgb_2_rgb.o \
|
||||
x86/yuv_2_rgb.o \
|
||||
359
externals/ffmpeg/libswscale/x86/hscale_fast_bilinear_simd.c
vendored
Executable file
359
externals/ffmpeg/libswscale/x86/hscale_fast_bilinear_simd.c
vendored
Executable file
@@ -0,0 +1,359 @@
|
||||
/*
|
||||
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "../swscale_internal.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
|
||||
#define RET 0xC3 // near return opcode for x86
|
||||
#define PREFETCH "prefetchnta"
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
|
||||
int16_t *filter, int32_t *filterPos,
|
||||
int numSplits)
|
||||
{
|
||||
uint8_t *fragmentA;
|
||||
x86_reg imm8OfPShufW1A;
|
||||
x86_reg imm8OfPShufW2A;
|
||||
x86_reg fragmentLengthA;
|
||||
uint8_t *fragmentB;
|
||||
x86_reg imm8OfPShufW1B;
|
||||
x86_reg imm8OfPShufW2B;
|
||||
x86_reg fragmentLengthB;
|
||||
int fragmentPos;
|
||||
|
||||
int xpos, i;
|
||||
|
||||
// create an optimized horizontal scaling routine
|
||||
/* This scaler is made of runtime-generated MMXEXT code using specially tuned
|
||||
* pshufw instructions. For every four output pixels, if four input pixels
|
||||
* are enough for the fast bilinear scaling, then a chunk of fragmentB is
|
||||
* used. If five input pixels are needed, then a chunk of fragmentA is used.
|
||||
*/
|
||||
|
||||
// code fragment
|
||||
|
||||
__asm__ volatile (
|
||||
"jmp 9f \n\t"
|
||||
// Begin
|
||||
"0: \n\t"
|
||||
"movq (%%"FF_REG_d", %%"FF_REG_a"), %%mm3 \n\t"
|
||||
"movd (%%"FF_REG_c", %%"FF_REG_S"), %%mm0 \n\t"
|
||||
"movd 1(%%"FF_REG_c", %%"FF_REG_S"), %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"pshufw $0xFF, %%mm1, %%mm1 \n\t"
|
||||
"1: \n\t"
|
||||
"pshufw $0xFF, %%mm0, %%mm0 \n\t"
|
||||
"2: \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t"
|
||||
"movl 8(%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t"
|
||||
"pmullw %%mm3, %%mm0 \n\t"
|
||||
"psllw $7, %%mm1 \n\t"
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
|
||||
"movq %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t"
|
||||
|
||||
"add $8, %%"FF_REG_a" \n\t"
|
||||
// End
|
||||
"9: \n\t"
|
||||
"lea " LOCAL_MANGLE(0b) ", %0 \n\t"
|
||||
"lea " LOCAL_MANGLE(1b) ", %1 \n\t"
|
||||
"lea " LOCAL_MANGLE(2b) ", %2 \n\t"
|
||||
"dec %1 \n\t"
|
||||
"dec %2 \n\t"
|
||||
"sub %0, %1 \n\t"
|
||||
"sub %0, %2 \n\t"
|
||||
"lea " LOCAL_MANGLE(9b) ", %3 \n\t"
|
||||
"sub %0, %3 \n\t"
|
||||
|
||||
|
||||
: "=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
|
||||
"=r" (fragmentLengthA)
|
||||
);
|
||||
|
||||
__asm__ volatile (
|
||||
"jmp 9f \n\t"
|
||||
// Begin
|
||||
"0: \n\t"
|
||||
"movq (%%"FF_REG_d", %%"FF_REG_a"), %%mm3 \n\t"
|
||||
"movd (%%"FF_REG_c", %%"FF_REG_S"), %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"pshufw $0xFF, %%mm0, %%mm1 \n\t"
|
||||
"1: \n\t"
|
||||
"pshufw $0xFF, %%mm0, %%mm0 \n\t"
|
||||
"2: \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t"
|
||||
"movl 8(%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t"
|
||||
"pmullw %%mm3, %%mm0 \n\t"
|
||||
"psllw $7, %%mm1 \n\t"
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
|
||||
"movq %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t"
|
||||
|
||||
"add $8, %%"FF_REG_a" \n\t"
|
||||
// End
|
||||
"9: \n\t"
|
||||
"lea " LOCAL_MANGLE(0b) ", %0 \n\t"
|
||||
"lea " LOCAL_MANGLE(1b) ", %1 \n\t"
|
||||
"lea " LOCAL_MANGLE(2b) ", %2 \n\t"
|
||||
"dec %1 \n\t"
|
||||
"dec %2 \n\t"
|
||||
"sub %0, %1 \n\t"
|
||||
"sub %0, %2 \n\t"
|
||||
"lea " LOCAL_MANGLE(9b) ", %3 \n\t"
|
||||
"sub %0, %3 \n\t"
|
||||
|
||||
|
||||
: "=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
|
||||
"=r" (fragmentLengthB)
|
||||
);
|
||||
|
||||
xpos = 0; // lumXInc/2 - 0x8000; // difference between pixel centers
|
||||
fragmentPos = 0;
|
||||
|
||||
for (i = 0; i < dstW / numSplits; i++) {
|
||||
int xx = xpos >> 16;
|
||||
|
||||
if ((i & 3) == 0) {
|
||||
int a = 0;
|
||||
int b = ((xpos + xInc) >> 16) - xx;
|
||||
int c = ((xpos + xInc * 2) >> 16) - xx;
|
||||
int d = ((xpos + xInc * 3) >> 16) - xx;
|
||||
int inc = (d + 1 < 4);
|
||||
uint8_t *fragment = inc ? fragmentB : fragmentA;
|
||||
x86_reg imm8OfPShufW1 = inc ? imm8OfPShufW1B : imm8OfPShufW1A;
|
||||
x86_reg imm8OfPShufW2 = inc ? imm8OfPShufW2B : imm8OfPShufW2A;
|
||||
x86_reg fragmentLength = inc ? fragmentLengthB : fragmentLengthA;
|
||||
int maxShift = 3 - (d + inc);
|
||||
int shift = 0;
|
||||
|
||||
if (filterCode) {
|
||||
filter[i] = ((xpos & 0xFFFF) ^ 0xFFFF) >> 9;
|
||||
filter[i + 1] = (((xpos + xInc) & 0xFFFF) ^ 0xFFFF) >> 9;
|
||||
filter[i + 2] = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9;
|
||||
filter[i + 3] = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9;
|
||||
filterPos[i / 2] = xx;
|
||||
|
||||
memcpy(filterCode + fragmentPos, fragment, fragmentLength);
|
||||
|
||||
filterCode[fragmentPos + imm8OfPShufW1] = (a + inc) |
|
||||
((b + inc) << 2) |
|
||||
((c + inc) << 4) |
|
||||
((d + inc) << 6);
|
||||
filterCode[fragmentPos + imm8OfPShufW2] = a | (b << 2) |
|
||||
(c << 4) |
|
||||
(d << 6);
|
||||
|
||||
if (i + 4 - inc >= dstW)
|
||||
shift = maxShift; // avoid overread
|
||||
else if ((filterPos[i / 2] & 3) <= maxShift)
|
||||
shift = filterPos[i / 2] & 3; // align
|
||||
|
||||
if (shift && i >= shift) {
|
||||
filterCode[fragmentPos + imm8OfPShufW1] += 0x55 * shift;
|
||||
filterCode[fragmentPos + imm8OfPShufW2] += 0x55 * shift;
|
||||
filterPos[i / 2] -= shift;
|
||||
}
|
||||
}
|
||||
|
||||
fragmentPos += fragmentLength;
|
||||
|
||||
if (filterCode)
|
||||
filterCode[fragmentPos] = RET;
|
||||
}
|
||||
xpos += xInc;
|
||||
}
|
||||
if (filterCode)
|
||||
filterPos[((i / 2) + 1) & (~1)] = xpos >> 16; // needed to jump to the next part
|
||||
|
||||
return fragmentPos + 1;
|
||||
}
|
||||
|
||||
void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
|
||||
int dstWidth, const uint8_t *src,
|
||||
int srcW, int xInc)
|
||||
{
|
||||
int32_t *filterPos = c->hLumFilterPos;
|
||||
int16_t *filter = c->hLumFilter;
|
||||
void *mmxextFilterCode = c->lumMmxextFilterCode;
|
||||
int i;
|
||||
#if ARCH_X86_64
|
||||
uint64_t retsave;
|
||||
#else
|
||||
#if !HAVE_EBX_AVAILABLE
|
||||
uint64_t ebxsave;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
__asm__ volatile(
|
||||
#if ARCH_X86_64
|
||||
"mov -8(%%rsp), %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", %5 \n\t" // retsave
|
||||
#else
|
||||
#if !HAVE_EBX_AVAILABLE
|
||||
"mov %%"FF_REG_b", %5 \n\t" // ebxsave
|
||||
#endif
|
||||
#endif
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"mov %0, %%"FF_REG_c" \n\t"
|
||||
"mov %1, %%"FF_REG_D" \n\t"
|
||||
"mov %2, %%"FF_REG_d" \n\t"
|
||||
"mov %3, %%"FF_REG_b" \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i
|
||||
PREFETCH" (%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 32(%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 64(%%"FF_REG_c") \n\t"
|
||||
|
||||
#if ARCH_X86_64
|
||||
#define CALL_MMXEXT_FILTER_CODE \
|
||||
"movl (%%"FF_REG_b"), %%esi \n\t"\
|
||||
"call *%4 \n\t"\
|
||||
"movl (%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t"\
|
||||
"add %%"FF_REG_S", %%"FF_REG_c" \n\t"\
|
||||
"add %%"FF_REG_a", %%"FF_REG_D" \n\t"\
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
|
||||
|
||||
#else
|
||||
#define CALL_MMXEXT_FILTER_CODE \
|
||||
"movl (%%"FF_REG_b"), %%esi \n\t"\
|
||||
"call *%4 \n\t"\
|
||||
"addl (%%"FF_REG_b", %%"FF_REG_a"), %%"FF_REG_c" \n\t"\
|
||||
"add %%"FF_REG_a", %%"FF_REG_D" \n\t"\
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
|
||||
|
||||
#endif /* ARCH_X86_64 */
|
||||
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
|
||||
#if ARCH_X86_64
|
||||
"mov %5, %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", -8(%%rsp) \n\t"
|
||||
#else
|
||||
#if !HAVE_EBX_AVAILABLE
|
||||
"mov %5, %%"FF_REG_b" \n\t"
|
||||
#endif
|
||||
#endif
|
||||
:: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
|
||||
"m" (mmxextFilterCode)
|
||||
#if ARCH_X86_64
|
||||
,"m"(retsave)
|
||||
#else
|
||||
#if !HAVE_EBX_AVAILABLE
|
||||
,"m" (ebxsave)
|
||||
#endif
|
||||
#endif
|
||||
: "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D
|
||||
#if ARCH_X86_64 || HAVE_EBX_AVAILABLE
|
||||
,"%"FF_REG_b
|
||||
#endif
|
||||
);
|
||||
|
||||
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
|
||||
dst[i] = src[srcW-1]*128;
|
||||
}
|
||||
|
||||
void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
|
||||
int dstWidth, const uint8_t *src1,
|
||||
const uint8_t *src2, int srcW, int xInc)
|
||||
{
|
||||
int32_t *filterPos = c->hChrFilterPos;
|
||||
int16_t *filter = c->hChrFilter;
|
||||
void *mmxextFilterCode = c->chrMmxextFilterCode;
|
||||
int i;
|
||||
#if ARCH_X86_64
|
||||
DECLARE_ALIGNED(8, uint64_t, retsave);
|
||||
#else
|
||||
#if !HAVE_EBX_AVAILABLE
|
||||
DECLARE_ALIGNED(8, uint64_t, ebxsave);
|
||||
#endif
|
||||
#endif
|
||||
__asm__ volatile(
|
||||
#if ARCH_X86_64
|
||||
"mov -8(%%rsp), %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", %7 \n\t" // retsave
|
||||
#else
|
||||
#if !HAVE_EBX_AVAILABLE
|
||||
"mov %%"FF_REG_b", %7 \n\t" // ebxsave
|
||||
#endif
|
||||
#endif
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"mov %0, %%"FF_REG_c" \n\t"
|
||||
"mov %1, %%"FF_REG_D" \n\t"
|
||||
"mov %2, %%"FF_REG_d" \n\t"
|
||||
"mov %3, %%"FF_REG_b" \n\t"
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i
|
||||
PREFETCH" (%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 32(%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 64(%%"FF_REG_c") \n\t"
|
||||
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i
|
||||
"mov %5, %%"FF_REG_c" \n\t" // src2
|
||||
"mov %6, %%"FF_REG_D" \n\t" // dst2
|
||||
PREFETCH" (%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 32(%%"FF_REG_c") \n\t"
|
||||
PREFETCH" 64(%%"FF_REG_c") \n\t"
|
||||
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
CALL_MMXEXT_FILTER_CODE
|
||||
|
||||
#if ARCH_X86_64
|
||||
"mov %7, %%"FF_REG_a" \n\t"
|
||||
"mov %%"FF_REG_a", -8(%%rsp) \n\t"
|
||||
#else
|
||||
#if !HAVE_EBX_AVAILABLE
|
||||
"mov %7, %%"FF_REG_b" \n\t"
|
||||
#endif
|
||||
#endif
|
||||
:: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
|
||||
"m" (mmxextFilterCode), "m" (src2), "m"(dst2)
|
||||
#if ARCH_X86_64
|
||||
,"m"(retsave)
|
||||
#else
|
||||
#if !HAVE_EBX_AVAILABLE
|
||||
,"m" (ebxsave)
|
||||
#endif
|
||||
#endif
|
||||
: "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D
|
||||
#if ARCH_X86_64 || HAVE_EBX_AVAILABLE
|
||||
,"%"FF_REG_b
|
||||
#endif
|
||||
);
|
||||
|
||||
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
|
||||
dst1[i] = src1[srcW-1]*128;
|
||||
dst2[i] = src2[srcW-1]*128;
|
||||
}
|
||||
}
|
||||
#endif //HAVE_INLINE_ASM
|
||||
740
externals/ffmpeg/libswscale/x86/input.asm
vendored
Executable file
740
externals/ffmpeg/libswscale/x86/input.asm
vendored
Executable file
@@ -0,0 +1,740 @@
|
||||
;******************************************************************************
|
||||
;* x86-optimized input routines; does shuffling of packed
|
||||
;* YUV formats into individual planes, and converts RGB
|
||||
;* into YUV planes also.
|
||||
;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
%define RY 0x20DE
|
||||
%define GY 0x4087
|
||||
%define BY 0x0C88
|
||||
%define RU 0xECFF
|
||||
%define GU 0xDAC8
|
||||
%define BU 0x3838
|
||||
%define RV 0x3838
|
||||
%define GV 0xD0E3
|
||||
%define BV 0xF6E4
|
||||
|
||||
rgb_Yrnd: times 4 dd 0x80100 ; 16.5 << 15
|
||||
rgb_UVrnd: times 4 dd 0x400100 ; 128.5 << 15
|
||||
%define bgr_Ycoeff_12x4 16*4 + 16* 0 + tableq
|
||||
%define bgr_Ycoeff_3x56 16*4 + 16* 1 + tableq
|
||||
%define rgb_Ycoeff_12x4 16*4 + 16* 2 + tableq
|
||||
%define rgb_Ycoeff_3x56 16*4 + 16* 3 + tableq
|
||||
%define bgr_Ucoeff_12x4 16*4 + 16* 4 + tableq
|
||||
%define bgr_Ucoeff_3x56 16*4 + 16* 5 + tableq
|
||||
%define rgb_Ucoeff_12x4 16*4 + 16* 6 + tableq
|
||||
%define rgb_Ucoeff_3x56 16*4 + 16* 7 + tableq
|
||||
%define bgr_Vcoeff_12x4 16*4 + 16* 8 + tableq
|
||||
%define bgr_Vcoeff_3x56 16*4 + 16* 9 + tableq
|
||||
%define rgb_Vcoeff_12x4 16*4 + 16*10 + tableq
|
||||
%define rgb_Vcoeff_3x56 16*4 + 16*11 + tableq
|
||||
|
||||
%define rgba_Ycoeff_rb 16*4 + 16*12 + tableq
|
||||
%define rgba_Ycoeff_br 16*4 + 16*13 + tableq
|
||||
%define rgba_Ycoeff_ga 16*4 + 16*14 + tableq
|
||||
%define rgba_Ycoeff_ag 16*4 + 16*15 + tableq
|
||||
%define rgba_Ucoeff_rb 16*4 + 16*16 + tableq
|
||||
%define rgba_Ucoeff_br 16*4 + 16*17 + tableq
|
||||
%define rgba_Ucoeff_ga 16*4 + 16*18 + tableq
|
||||
%define rgba_Ucoeff_ag 16*4 + 16*19 + tableq
|
||||
%define rgba_Vcoeff_rb 16*4 + 16*20 + tableq
|
||||
%define rgba_Vcoeff_br 16*4 + 16*21 + tableq
|
||||
%define rgba_Vcoeff_ga 16*4 + 16*22 + tableq
|
||||
%define rgba_Vcoeff_ag 16*4 + 16*23 + tableq
|
||||
|
||||
; bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY
|
||||
; bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY
|
||||
; rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY
|
||||
; rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY
|
||||
; bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU
|
||||
; bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU
|
||||
; rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU
|
||||
; rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU
|
||||
; bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV
|
||||
; bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV
|
||||
; rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV
|
||||
; rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV
|
||||
|
||||
; rgba_Ycoeff_rb: times 4 dw RY, BY
|
||||
; rgba_Ycoeff_br: times 4 dw BY, RY
|
||||
; rgba_Ycoeff_ga: times 4 dw GY, 0
|
||||
; rgba_Ycoeff_ag: times 4 dw 0, GY
|
||||
; rgba_Ucoeff_rb: times 4 dw RU, BU
|
||||
; rgba_Ucoeff_br: times 4 dw BU, RU
|
||||
; rgba_Ucoeff_ga: times 4 dw GU, 0
|
||||
; rgba_Ucoeff_ag: times 4 dw 0, GU
|
||||
; rgba_Vcoeff_rb: times 4 dw RV, BV
|
||||
; rgba_Vcoeff_br: times 4 dw BV, RV
|
||||
; rgba_Vcoeff_ga: times 4 dw GV, 0
|
||||
; rgba_Vcoeff_ag: times 4 dw 0, GV
|
||||
|
||||
shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \
|
||||
6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80
|
||||
shuf_rgb_3x56: db 2, 0x80, 3, 0x80, 4, 0x80, 5, 0x80, \
|
||||
8, 0x80, 9, 0x80, 10, 0x80, 11, 0x80
|
||||
|
||||
SECTION .text
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; RGB to Y/UV.
|
||||
;
|
||||
; void <fmt>ToY_<opt>(uint8_t *dst, const uint8_t *src, int w);
|
||||
; and
|
||||
; void <fmt>toUV_<opt>(uint8_t *dstU, uint8_t *dstV, const uint8_t *src,
|
||||
; const uint8_t *unused, int w);
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
; %1 = nr. of XMM registers
|
||||
; %2 = rgb or bgr
|
||||
%macro RGB24_TO_Y_FN 2-3
|
||||
cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table
|
||||
%if mmsize == 8
|
||||
mova m5, [%2_Ycoeff_12x4]
|
||||
mova m6, [%2_Ycoeff_3x56]
|
||||
%define coeff1 m5
|
||||
%define coeff2 m6
|
||||
%elif ARCH_X86_64
|
||||
mova m8, [%2_Ycoeff_12x4]
|
||||
mova m9, [%2_Ycoeff_3x56]
|
||||
%define coeff1 m8
|
||||
%define coeff2 m9
|
||||
%else ; x86-32 && mmsize == 16
|
||||
%define coeff1 [%2_Ycoeff_12x4]
|
||||
%define coeff2 [%2_Ycoeff_3x56]
|
||||
%endif ; x86-32/64 && mmsize == 8/16
|
||||
%if (ARCH_X86_64 || mmsize == 8) && %0 == 3
|
||||
jmp mangle(private_prefix %+ _ %+ %3 %+ 24ToY %+ SUFFIX).body
|
||||
%else ; (ARCH_X86_64 && %0 == 3) || mmsize == 8
|
||||
.body:
|
||||
%if cpuflag(ssse3)
|
||||
mova m7, [shuf_rgb_12x4]
|
||||
%define shuf_rgb1 m7
|
||||
%if ARCH_X86_64
|
||||
mova m10, [shuf_rgb_3x56]
|
||||
%define shuf_rgb2 m10
|
||||
%else ; x86-32
|
||||
%define shuf_rgb2 [shuf_rgb_3x56]
|
||||
%endif ; x86-32/64
|
||||
%endif ; cpuflag(ssse3)
|
||||
%if ARCH_X86_64
|
||||
movsxd wq, wd
|
||||
%endif
|
||||
add wq, wq
|
||||
add dstq, wq
|
||||
neg wq
|
||||
%if notcpuflag(ssse3)
|
||||
pxor m7, m7
|
||||
%endif ; !cpuflag(ssse3)
|
||||
mova m4, [rgb_Yrnd]
|
||||
.loop:
|
||||
%if cpuflag(ssse3)
|
||||
movu m0, [srcq+0] ; (byte) { Bx, Gx, Rx }[0-3]
|
||||
movu m2, [srcq+12] ; (byte) { Bx, Gx, Rx }[4-7]
|
||||
pshufb m1, m0, shuf_rgb2 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 }
|
||||
pshufb m0, shuf_rgb1 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 }
|
||||
pshufb m3, m2, shuf_rgb2 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 }
|
||||
pshufb m2, shuf_rgb1 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 }
|
||||
%else ; !cpuflag(ssse3)
|
||||
movd m0, [srcq+0] ; (byte) { B0, G0, R0, B1 }
|
||||
movd m1, [srcq+2] ; (byte) { R0, B1, G1, R1 }
|
||||
movd m2, [srcq+6] ; (byte) { B2, G2, R2, B3 }
|
||||
movd m3, [srcq+8] ; (byte) { R2, B3, G3, R3 }
|
||||
%if mmsize == 16 ; i.e. sse2
|
||||
punpckldq m0, m2 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 }
|
||||
punpckldq m1, m3 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 }
|
||||
movd m2, [srcq+12] ; (byte) { B4, G4, R4, B5 }
|
||||
movd m3, [srcq+14] ; (byte) { R4, B5, G5, R5 }
|
||||
movd m5, [srcq+18] ; (byte) { B6, G6, R6, B7 }
|
||||
movd m6, [srcq+20] ; (byte) { R6, B7, G7, R7 }
|
||||
punpckldq m2, m5 ; (byte) { B4, G4, R4, B5, B6, G6, R6, B7 }
|
||||
punpckldq m3, m6 ; (byte) { R4, B5, G5, R5, R6, B7, G7, R7 }
|
||||
%endif ; mmsize == 16
|
||||
punpcklbw m0, m7 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 }
|
||||
punpcklbw m1, m7 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 }
|
||||
punpcklbw m2, m7 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 }
|
||||
punpcklbw m3, m7 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 }
|
||||
%endif ; cpuflag(ssse3)
|
||||
add srcq, 3 * mmsize / 2
|
||||
pmaddwd m0, coeff1 ; (dword) { B0*BY + G0*GY, B1*BY, B2*BY + G2*GY, B3*BY }
|
||||
pmaddwd m1, coeff2 ; (dword) { R0*RY, G1+GY + R1*RY, R2*RY, G3+GY + R3*RY }
|
||||
pmaddwd m2, coeff1 ; (dword) { B4*BY + G4*GY, B5*BY, B6*BY + G6*GY, B7*BY }
|
||||
pmaddwd m3, coeff2 ; (dword) { R4*RY, G5+GY + R5*RY, R6*RY, G7+GY + R7*RY }
|
||||
paddd m0, m1 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[0-3]
|
||||
paddd m2, m3 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[4-7]
|
||||
paddd m0, m4 ; += rgb_Yrnd, i.e. (dword) { Y[0-3] }
|
||||
paddd m2, m4 ; += rgb_Yrnd, i.e. (dword) { Y[4-7] }
|
||||
psrad m0, 9
|
||||
psrad m2, 9
|
||||
packssdw m0, m2 ; (word) { Y[0-7] }
|
||||
mova [dstq+wq], m0
|
||||
add wq, mmsize
|
||||
jl .loop
|
||||
REP_RET
|
||||
%endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8
|
||||
%endmacro
|
||||
|
||||
; %1 = nr. of XMM registers
|
||||
; %2 = rgb or bgr
|
||||
%macro RGB24_TO_UV_FN 2-3
|
||||
cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
|
||||
%if ARCH_X86_64
|
||||
mova m8, [%2_Ucoeff_12x4]
|
||||
mova m9, [%2_Ucoeff_3x56]
|
||||
mova m10, [%2_Vcoeff_12x4]
|
||||
mova m11, [%2_Vcoeff_3x56]
|
||||
%define coeffU1 m8
|
||||
%define coeffU2 m9
|
||||
%define coeffV1 m10
|
||||
%define coeffV2 m11
|
||||
%else ; x86-32
|
||||
%define coeffU1 [%2_Ucoeff_12x4]
|
||||
%define coeffU2 [%2_Ucoeff_3x56]
|
||||
%define coeffV1 [%2_Vcoeff_12x4]
|
||||
%define coeffV2 [%2_Vcoeff_3x56]
|
||||
%endif ; x86-32/64
|
||||
%if ARCH_X86_64 && %0 == 3
|
||||
jmp mangle(private_prefix %+ _ %+ %3 %+ 24ToUV %+ SUFFIX).body
|
||||
%else ; ARCH_X86_64 && %0 == 3
|
||||
.body:
|
||||
%if cpuflag(ssse3)
|
||||
mova m7, [shuf_rgb_12x4]
|
||||
%define shuf_rgb1 m7
|
||||
%if ARCH_X86_64
|
||||
mova m12, [shuf_rgb_3x56]
|
||||
%define shuf_rgb2 m12
|
||||
%else ; x86-32
|
||||
%define shuf_rgb2 [shuf_rgb_3x56]
|
||||
%endif ; x86-32/64
|
||||
%endif ; cpuflag(ssse3)
|
||||
%if ARCH_X86_64
|
||||
movsxd wq, dword r5m
|
||||
%else ; x86-32
|
||||
mov wq, r5m
|
||||
%endif
|
||||
add wq, wq
|
||||
add dstUq, wq
|
||||
add dstVq, wq
|
||||
neg wq
|
||||
mova m6, [rgb_UVrnd]
|
||||
%if notcpuflag(ssse3)
|
||||
pxor m7, m7
|
||||
%endif
|
||||
.loop:
|
||||
%if cpuflag(ssse3)
|
||||
movu m0, [srcq+0] ; (byte) { Bx, Gx, Rx }[0-3]
|
||||
movu m4, [srcq+12] ; (byte) { Bx, Gx, Rx }[4-7]
|
||||
pshufb m1, m0, shuf_rgb2 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 }
|
||||
pshufb m0, shuf_rgb1 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 }
|
||||
%else ; !cpuflag(ssse3)
|
||||
movd m0, [srcq+0] ; (byte) { B0, G0, R0, B1 }
|
||||
movd m1, [srcq+2] ; (byte) { R0, B1, G1, R1 }
|
||||
movd m4, [srcq+6] ; (byte) { B2, G2, R2, B3 }
|
||||
movd m5, [srcq+8] ; (byte) { R2, B3, G3, R3 }
|
||||
%if mmsize == 16
|
||||
punpckldq m0, m4 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 }
|
||||
punpckldq m1, m5 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 }
|
||||
movd m4, [srcq+12] ; (byte) { B4, G4, R4, B5 }
|
||||
movd m5, [srcq+14] ; (byte) { R4, B5, G5, R5 }
|
||||
%endif ; mmsize == 16
|
||||
punpcklbw m0, m7 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 }
|
||||
punpcklbw m1, m7 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 }
|
||||
%endif ; cpuflag(ssse3)
|
||||
pmaddwd m2, m0, coeffV1 ; (dword) { B0*BV + G0*GV, B1*BV, B2*BV + G2*GV, B3*BV }
|
||||
pmaddwd m3, m1, coeffV2 ; (dword) { R0*BV, G1*GV + R1*BV, R2*BV, G3*GV + R3*BV }
|
||||
pmaddwd m0, coeffU1 ; (dword) { B0*BU + G0*GU, B1*BU, B2*BU + G2*GU, B3*BU }
|
||||
pmaddwd m1, coeffU2 ; (dword) { R0*BU, G1*GU + R1*BU, R2*BU, G3*GU + R3*BU }
|
||||
paddd m0, m1 ; (dword) { Bx*BU + Gx*GU + Rx*RU }[0-3]
|
||||
paddd m2, m3 ; (dword) { Bx*BV + Gx*GV + Rx*RV }[0-3]
|
||||
%if cpuflag(ssse3)
|
||||
pshufb m5, m4, shuf_rgb2 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 }
|
||||
pshufb m4, shuf_rgb1 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 }
|
||||
%else ; !cpuflag(ssse3)
|
||||
%if mmsize == 16
|
||||
movd m1, [srcq+18] ; (byte) { B6, G6, R6, B7 }
|
||||
movd m3, [srcq+20] ; (byte) { R6, B7, G7, R7 }
|
||||
punpckldq m4, m1 ; (byte) { B4, G4, R4, B5, B6, G6, R6, B7 }
|
||||
punpckldq m5, m3 ; (byte) { R4, B5, G5, R5, R6, B7, G7, R7 }
|
||||
%endif ; mmsize == 16 && !cpuflag(ssse3)
|
||||
punpcklbw m4, m7 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 }
|
||||
punpcklbw m5, m7 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 }
|
||||
%endif ; cpuflag(ssse3)
|
||||
add srcq, 3 * mmsize / 2
|
||||
pmaddwd m1, m4, coeffU1 ; (dword) { B4*BU + G4*GU, B5*BU, B6*BU + G6*GU, B7*BU }
|
||||
pmaddwd m3, m5, coeffU2 ; (dword) { R4*BU, G5*GU + R5*BU, R6*BU, G7*GU + R7*BU }
|
||||
pmaddwd m4, coeffV1 ; (dword) { B4*BV + G4*GV, B5*BV, B6*BV + G6*GV, B7*BV }
|
||||
pmaddwd m5, coeffV2 ; (dword) { R4*BV, G5*GV + R5*BV, R6*BV, G7*GV + R7*BV }
|
||||
paddd m1, m3 ; (dword) { Bx*BU + Gx*GU + Rx*RU }[4-7]
|
||||
paddd m4, m5 ; (dword) { Bx*BV + Gx*GV + Rx*RV }[4-7]
|
||||
paddd m0, m6 ; += rgb_UVrnd, i.e. (dword) { U[0-3] }
|
||||
paddd m2, m6 ; += rgb_UVrnd, i.e. (dword) { V[0-3] }
|
||||
paddd m1, m6 ; += rgb_UVrnd, i.e. (dword) { U[4-7] }
|
||||
paddd m4, m6 ; += rgb_UVrnd, i.e. (dword) { V[4-7] }
|
||||
psrad m0, 9
|
||||
psrad m2, 9
|
||||
psrad m1, 9
|
||||
psrad m4, 9
|
||||
packssdw m0, m1 ; (word) { U[0-7] }
|
||||
packssdw m2, m4 ; (word) { V[0-7] }
|
||||
%if mmsize == 8
|
||||
mova [dstUq+wq], m0
|
||||
mova [dstVq+wq], m2
|
||||
%else ; mmsize == 16
|
||||
mova [dstUq+wq], m0
|
||||
mova [dstVq+wq], m2
|
||||
%endif ; mmsize == 8/16
|
||||
add wq, mmsize
|
||||
jl .loop
|
||||
REP_RET
|
||||
%endif ; ARCH_X86_64 && %0 == 3
|
||||
%endmacro
|
||||
|
||||
; %1 = nr. of XMM registers for rgb-to-Y func
|
||||
; %2 = nr. of XMM registers for rgb-to-UV func
|
||||
%macro RGB24_FUNCS 2
|
||||
RGB24_TO_Y_FN %1, rgb
|
||||
RGB24_TO_Y_FN %1, bgr, rgb
|
||||
RGB24_TO_UV_FN %2, rgb
|
||||
RGB24_TO_UV_FN %2, bgr, rgb
|
||||
%endmacro
|
||||
|
||||
%if ARCH_X86_32
|
||||
INIT_MMX mmx
|
||||
RGB24_FUNCS 0, 0
|
||||
%endif
|
||||
|
||||
INIT_XMM sse2
|
||||
RGB24_FUNCS 10, 12
|
||||
|
||||
INIT_XMM ssse3
|
||||
RGB24_FUNCS 11, 13
|
||||
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_XMM avx
|
||||
RGB24_FUNCS 11, 13
|
||||
%endif
|
||||
|
||||
; %1 = nr. of XMM registers
|
||||
; %2-5 = rgba, bgra, argb or abgr (in individual characters)
|
||||
%macro RGB32_TO_Y_FN 5-6
|
||||
cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table
|
||||
mova m5, [rgba_Ycoeff_%2%4]
|
||||
mova m6, [rgba_Ycoeff_%3%5]
|
||||
%if %0 == 6
|
||||
jmp mangle(private_prefix %+ _ %+ %6 %+ ToY %+ SUFFIX).body
|
||||
%else ; %0 == 6
|
||||
.body:
|
||||
%if ARCH_X86_64
|
||||
movsxd wq, wd
|
||||
%endif
|
||||
add wq, wq
|
||||
sub wq, mmsize - 1
|
||||
lea srcq, [srcq+wq*2]
|
||||
add dstq, wq
|
||||
neg wq
|
||||
mova m4, [rgb_Yrnd]
|
||||
pcmpeqb m7, m7
|
||||
psrlw m7, 8 ; (word) { 0x00ff } x4
|
||||
.loop:
|
||||
; FIXME check alignment and use mova
|
||||
movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
|
||||
movu m2, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
|
||||
DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7]
|
||||
pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3]
|
||||
pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3]
|
||||
pmaddwd m3, m5 ; (dword) { Bx*BY + Rx*RY }[4-7]
|
||||
pmaddwd m2, m6 ; (dword) { Gx*GY }[4-7]
|
||||
paddd m0, m4 ; += rgb_Yrnd
|
||||
paddd m2, m4 ; += rgb_Yrnd
|
||||
paddd m0, m1 ; (dword) { Y[0-3] }
|
||||
paddd m2, m3 ; (dword) { Y[4-7] }
|
||||
psrad m0, 9
|
||||
psrad m2, 9
|
||||
packssdw m0, m2 ; (word) { Y[0-7] }
|
||||
mova [dstq+wq], m0
|
||||
add wq, mmsize
|
||||
jl .loop
|
||||
sub wq, mmsize - 1
|
||||
jz .end
|
||||
add srcq, 2*mmsize - 2
|
||||
add dstq, mmsize - 1
|
||||
.loop2:
|
||||
movd m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
|
||||
DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7]
|
||||
pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3]
|
||||
pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3]
|
||||
paddd m0, m4 ; += rgb_Yrnd
|
||||
paddd m0, m1 ; (dword) { Y[0-3] }
|
||||
psrad m0, 9
|
||||
packssdw m0, m0 ; (word) { Y[0-7] }
|
||||
movd [dstq+wq], m0
|
||||
add wq, 2
|
||||
jl .loop2
|
||||
.end:
|
||||
REP_RET
|
||||
%endif ; %0 == 3
|
||||
%endmacro
|
||||
|
||||
; %1 = nr. of XMM registers
|
||||
; %2-5 = rgba, bgra, argb or abgr (in individual characters)
|
||||
%macro RGB32_TO_UV_FN 5-6
|
||||
cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
|
||||
%if ARCH_X86_64
|
||||
mova m8, [rgba_Ucoeff_%2%4]
|
||||
mova m9, [rgba_Ucoeff_%3%5]
|
||||
mova m10, [rgba_Vcoeff_%2%4]
|
||||
mova m11, [rgba_Vcoeff_%3%5]
|
||||
%define coeffU1 m8
|
||||
%define coeffU2 m9
|
||||
%define coeffV1 m10
|
||||
%define coeffV2 m11
|
||||
%else ; x86-32
|
||||
%define coeffU1 [rgba_Ucoeff_%2%4]
|
||||
%define coeffU2 [rgba_Ucoeff_%3%5]
|
||||
%define coeffV1 [rgba_Vcoeff_%2%4]
|
||||
%define coeffV2 [rgba_Vcoeff_%3%5]
|
||||
%endif ; x86-64/32
|
||||
%if ARCH_X86_64 && %0 == 6
|
||||
jmp mangle(private_prefix %+ _ %+ %6 %+ ToUV %+ SUFFIX).body
|
||||
%else ; ARCH_X86_64 && %0 == 6
|
||||
.body:
|
||||
%if ARCH_X86_64
|
||||
movsxd wq, dword r5m
|
||||
%else ; x86-32
|
||||
mov wq, r5m
|
||||
%endif
|
||||
add wq, wq
|
||||
sub wq, mmsize - 1
|
||||
add dstUq, wq
|
||||
add dstVq, wq
|
||||
lea srcq, [srcq+wq*2]
|
||||
neg wq
|
||||
pcmpeqb m7, m7
|
||||
psrlw m7, 8 ; (word) { 0x00ff } x4
|
||||
mova m6, [rgb_UVrnd]
|
||||
.loop:
|
||||
; FIXME check alignment and use mova
|
||||
movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
|
||||
movu m4, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
|
||||
DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7]
|
||||
pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3]
|
||||
pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3]
|
||||
pmaddwd m1, coeffU1 ; (dword) { Bx*BU + Rx*RU }[0-3]
|
||||
pmaddwd m0, coeffU2 ; (dword) { Gx*GU }[0-3]
|
||||
paddd m3, m6 ; += rgb_UVrnd
|
||||
paddd m1, m6 ; += rgb_UVrnd
|
||||
paddd m2, m3 ; (dword) { V[0-3] }
|
||||
paddd m0, m1 ; (dword) { U[0-3] }
|
||||
pmaddwd m3, m5, coeffV1 ; (dword) { Bx*BV + Rx*RV }[4-7]
|
||||
pmaddwd m1, m4, coeffV2 ; (dword) { Gx*GV }[4-7]
|
||||
pmaddwd m5, coeffU1 ; (dword) { Bx*BU + Rx*RU }[4-7]
|
||||
pmaddwd m4, coeffU2 ; (dword) { Gx*GU }[4-7]
|
||||
paddd m3, m6 ; += rgb_UVrnd
|
||||
paddd m5, m6 ; += rgb_UVrnd
|
||||
psrad m0, 9
|
||||
paddd m1, m3 ; (dword) { V[4-7] }
|
||||
paddd m4, m5 ; (dword) { U[4-7] }
|
||||
psrad m2, 9
|
||||
psrad m4, 9
|
||||
psrad m1, 9
|
||||
packssdw m0, m4 ; (word) { U[0-7] }
|
||||
packssdw m2, m1 ; (word) { V[0-7] }
|
||||
%if mmsize == 8
|
||||
mova [dstUq+wq], m0
|
||||
mova [dstVq+wq], m2
|
||||
%else ; mmsize == 16
|
||||
mova [dstUq+wq], m0
|
||||
mova [dstVq+wq], m2
|
||||
%endif ; mmsize == 8/16
|
||||
add wq, mmsize
|
||||
jl .loop
|
||||
sub wq, mmsize - 1
|
||||
jz .end
|
||||
add srcq , 2*mmsize - 2
|
||||
add dstUq, mmsize - 1
|
||||
add dstVq, mmsize - 1
|
||||
.loop2:
|
||||
movd m0, [srcq+wq*2] ; (byte) { Bx, Gx, Rx, xx }[0-3]
|
||||
DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7]
|
||||
pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3]
|
||||
pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3]
|
||||
pmaddwd m1, coeffU1 ; (dword) { Bx*BU + Rx*RU }[0-3]
|
||||
pmaddwd m0, coeffU2 ; (dword) { Gx*GU }[0-3]
|
||||
paddd m3, m6 ; += rgb_UVrnd
|
||||
paddd m1, m6 ; += rgb_UVrnd
|
||||
paddd m2, m3 ; (dword) { V[0-3] }
|
||||
paddd m0, m1 ; (dword) { U[0-3] }
|
||||
psrad m0, 9
|
||||
psrad m2, 9
|
||||
packssdw m0, m0 ; (word) { U[0-7] }
|
||||
packssdw m2, m2 ; (word) { V[0-7] }
|
||||
movd [dstUq+wq], m0
|
||||
movd [dstVq+wq], m2
|
||||
add wq, 2
|
||||
jl .loop2
|
||||
.end:
|
||||
REP_RET
|
||||
%endif ; ARCH_X86_64 && %0 == 3
|
||||
%endmacro
|
||||
|
||||
; %1 = nr. of XMM registers for rgb-to-Y func
|
||||
; %2 = nr. of XMM registers for rgb-to-UV func
|
||||
%macro RGB32_FUNCS 2
|
||||
RGB32_TO_Y_FN %1, r, g, b, a
|
||||
RGB32_TO_Y_FN %1, b, g, r, a, rgba
|
||||
RGB32_TO_Y_FN %1, a, r, g, b, rgba
|
||||
RGB32_TO_Y_FN %1, a, b, g, r, rgba
|
||||
|
||||
RGB32_TO_UV_FN %2, r, g, b, a
|
||||
RGB32_TO_UV_FN %2, b, g, r, a, rgba
|
||||
RGB32_TO_UV_FN %2, a, r, g, b, rgba
|
||||
RGB32_TO_UV_FN %2, a, b, g, r, rgba
|
||||
%endmacro
|
||||
|
||||
%if ARCH_X86_32
|
||||
INIT_MMX mmx
|
||||
RGB32_FUNCS 0, 0
|
||||
%endif
|
||||
|
||||
INIT_XMM sse2
|
||||
RGB32_FUNCS 8, 12
|
||||
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_XMM avx
|
||||
RGB32_FUNCS 8, 12
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; YUYV/UYVY/NV12/NV21 packed pixel shuffling.
|
||||
;
|
||||
; void <fmt>ToY_<opt>(uint8_t *dst, const uint8_t *src, int w);
|
||||
; and
|
||||
; void <fmt>toUV_<opt>(uint8_t *dstU, uint8_t *dstV, const uint8_t *src,
|
||||
; const uint8_t *unused, int w);
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
; %1 = a (aligned) or u (unaligned)
|
||||
; %2 = yuyv or uyvy
|
||||
%macro LOOP_YUYV_TO_Y 2
|
||||
.loop_%1:
|
||||
mov%1 m0, [srcq+wq*2] ; (byte) { Y0, U0, Y1, V0, ... }
|
||||
mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
|
||||
%ifidn %2, yuyv
|
||||
pand m0, m2 ; (word) { Y0, Y1, ..., Y7 }
|
||||
pand m1, m2 ; (word) { Y8, Y9, ..., Y15 }
|
||||
%else ; uyvy
|
||||
psrlw m0, 8 ; (word) { Y0, Y1, ..., Y7 }
|
||||
psrlw m1, 8 ; (word) { Y8, Y9, ..., Y15 }
|
||||
%endif ; yuyv/uyvy
|
||||
packuswb m0, m1 ; (byte) { Y0, ..., Y15 }
|
||||
mova [dstq+wq], m0
|
||||
add wq, mmsize
|
||||
jl .loop_%1
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
; %1 = nr. of XMM registers
|
||||
; %2 = yuyv or uyvy
|
||||
; %3 = if specified, it means that unaligned and aligned code in loop
|
||||
; will be the same (i.e. YUYV+AVX), and thus we don't need to
|
||||
; split the loop in an aligned and unaligned case
|
||||
%macro YUYV_TO_Y_FN 2-3
|
||||
cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w
|
||||
%if ARCH_X86_64
|
||||
movsxd wq, wd
|
||||
%endif
|
||||
add dstq, wq
|
||||
%if mmsize == 16
|
||||
test srcq, 15
|
||||
%endif
|
||||
lea srcq, [srcq+wq*2]
|
||||
%ifidn %2, yuyv
|
||||
pcmpeqb m2, m2 ; (byte) { 0xff } x 16
|
||||
psrlw m2, 8 ; (word) { 0x00ff } x 8
|
||||
%endif ; yuyv
|
||||
%if mmsize == 16
|
||||
jnz .loop_u_start
|
||||
neg wq
|
||||
LOOP_YUYV_TO_Y a, %2
|
||||
.loop_u_start:
|
||||
neg wq
|
||||
LOOP_YUYV_TO_Y u, %2
|
||||
%else ; mmsize == 8
|
||||
neg wq
|
||||
LOOP_YUYV_TO_Y a, %2
|
||||
%endif ; mmsize == 8/16
|
||||
%endmacro
|
||||
|
||||
; %1 = a (aligned) or u (unaligned)
|
||||
; %2 = yuyv or uyvy
|
||||
%macro LOOP_YUYV_TO_UV 2
|
||||
.loop_%1:
|
||||
%ifidn %2, yuyv
|
||||
mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... }
|
||||
mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
|
||||
psrlw m0, 8 ; (word) { U0, V0, ..., U3, V3 }
|
||||
psrlw m1, 8 ; (word) { U4, V4, ..., U7, V7 }
|
||||
%else ; uyvy
|
||||
%if cpuflag(avx)
|
||||
vpand m0, m2, [srcq+wq*4] ; (word) { U0, V0, ..., U3, V3 }
|
||||
vpand m1, m2, [srcq+wq*4+mmsize] ; (word) { U4, V4, ..., U7, V7 }
|
||||
%else
|
||||
mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... }
|
||||
mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
|
||||
pand m0, m2 ; (word) { U0, V0, ..., U3, V3 }
|
||||
pand m1, m2 ; (word) { U4, V4, ..., U7, V7 }
|
||||
%endif
|
||||
%endif ; yuyv/uyvy
|
||||
packuswb m0, m1 ; (byte) { U0, V0, ..., U7, V7 }
|
||||
pand m1, m0, m2 ; (word) { U0, U1, ..., U7 }
|
||||
psrlw m0, 8 ; (word) { V0, V1, ..., V7 }
|
||||
%if mmsize == 16
|
||||
packuswb m1, m0 ; (byte) { U0, ... U7, V1, ... V7 }
|
||||
movh [dstUq+wq], m1
|
||||
movhps [dstVq+wq], m1
|
||||
%else ; mmsize == 8
|
||||
packuswb m1, m1 ; (byte) { U0, ... U3 }
|
||||
packuswb m0, m0 ; (byte) { V0, ... V3 }
|
||||
movh [dstUq+wq], m1
|
||||
movh [dstVq+wq], m0
|
||||
%endif ; mmsize == 8/16
|
||||
add wq, mmsize / 2
|
||||
jl .loop_%1
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
; %1 = nr. of XMM registers
|
||||
; %2 = yuyv or uyvy
|
||||
; %3 = if specified, it means that unaligned and aligned code in loop
|
||||
; will be the same (i.e. UYVY+AVX), and thus we don't need to
|
||||
; split the loop in an aligned and unaligned case
|
||||
%macro YUYV_TO_UV_FN 2-3
|
||||
cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
|
||||
%if ARCH_X86_64
|
||||
movsxd wq, dword r5m
|
||||
%else ; x86-32
|
||||
mov wq, r5m
|
||||
%endif
|
||||
add dstUq, wq
|
||||
add dstVq, wq
|
||||
%if mmsize == 16 && %0 == 2
|
||||
test srcq, 15
|
||||
%endif
|
||||
lea srcq, [srcq+wq*4]
|
||||
pcmpeqb m2, m2 ; (byte) { 0xff } x 16
|
||||
psrlw m2, 8 ; (word) { 0x00ff } x 8
|
||||
; NOTE: if uyvy+avx, u/a are identical
|
||||
%if mmsize == 16 && %0 == 2
|
||||
jnz .loop_u_start
|
||||
neg wq
|
||||
LOOP_YUYV_TO_UV a, %2
|
||||
.loop_u_start:
|
||||
neg wq
|
||||
LOOP_YUYV_TO_UV u, %2
|
||||
%else ; mmsize == 8
|
||||
neg wq
|
||||
LOOP_YUYV_TO_UV a, %2
|
||||
%endif ; mmsize == 8/16
|
||||
%endmacro
|
||||
|
||||
; %1 = a (aligned) or u (unaligned)
|
||||
; %2 = nv12 or nv21
|
||||
%macro LOOP_NVXX_TO_UV 2
|
||||
.loop_%1:
|
||||
mov%1 m0, [srcq+wq*2] ; (byte) { U0, V0, U1, V1, ... }
|
||||
mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... }
|
||||
pand m2, m0, m5 ; (word) { U0, U1, ..., U7 }
|
||||
pand m3, m1, m5 ; (word) { U8, U9, ..., U15 }
|
||||
psrlw m0, 8 ; (word) { V0, V1, ..., V7 }
|
||||
psrlw m1, 8 ; (word) { V8, V9, ..., V15 }
|
||||
packuswb m2, m3 ; (byte) { U0, ..., U15 }
|
||||
packuswb m0, m1 ; (byte) { V0, ..., V15 }
|
||||
%ifidn %2, nv12
|
||||
mova [dstUq+wq], m2
|
||||
mova [dstVq+wq], m0
|
||||
%else ; nv21
|
||||
mova [dstVq+wq], m2
|
||||
mova [dstUq+wq], m0
|
||||
%endif ; nv12/21
|
||||
add wq, mmsize
|
||||
jl .loop_%1
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
; %1 = nr. of XMM registers
|
||||
; %2 = nv12 or nv21
|
||||
%macro NVXX_TO_UV_FN 2
|
||||
cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
|
||||
%if ARCH_X86_64
|
||||
movsxd wq, dword r5m
|
||||
%else ; x86-32
|
||||
mov wq, r5m
|
||||
%endif
|
||||
add dstUq, wq
|
||||
add dstVq, wq
|
||||
%if mmsize == 16
|
||||
test srcq, 15
|
||||
%endif
|
||||
lea srcq, [srcq+wq*2]
|
||||
pcmpeqb m5, m5 ; (byte) { 0xff } x 16
|
||||
psrlw m5, 8 ; (word) { 0x00ff } x 8
|
||||
%if mmsize == 16
|
||||
jnz .loop_u_start
|
||||
neg wq
|
||||
LOOP_NVXX_TO_UV a, %2
|
||||
.loop_u_start:
|
||||
neg wq
|
||||
LOOP_NVXX_TO_UV u, %2
|
||||
%else ; mmsize == 8
|
||||
neg wq
|
||||
LOOP_NVXX_TO_UV a, %2
|
||||
%endif ; mmsize == 8/16
|
||||
%endmacro
|
||||
|
||||
%if ARCH_X86_32
|
||||
INIT_MMX mmx
|
||||
YUYV_TO_Y_FN 0, yuyv
|
||||
YUYV_TO_Y_FN 0, uyvy
|
||||
YUYV_TO_UV_FN 0, yuyv
|
||||
YUYV_TO_UV_FN 0, uyvy
|
||||
NVXX_TO_UV_FN 0, nv12
|
||||
NVXX_TO_UV_FN 0, nv21
|
||||
%endif
|
||||
|
||||
INIT_XMM sse2
|
||||
YUYV_TO_Y_FN 3, yuyv
|
||||
YUYV_TO_Y_FN 2, uyvy
|
||||
YUYV_TO_UV_FN 3, yuyv
|
||||
YUYV_TO_UV_FN 3, uyvy
|
||||
NVXX_TO_UV_FN 5, nv12
|
||||
NVXX_TO_UV_FN 5, nv21
|
||||
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_XMM avx
|
||||
; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but
|
||||
; that's not faster in practice
|
||||
YUYV_TO_UV_FN 3, yuyv
|
||||
YUYV_TO_UV_FN 3, uyvy, 1
|
||||
NVXX_TO_UV_FN 5, nv12
|
||||
NVXX_TO_UV_FN 5, nv21
|
||||
%endif
|
||||
425
externals/ffmpeg/libswscale/x86/output.asm
vendored
Executable file
425
externals/ffmpeg/libswscale/x86/output.asm
vendored
Executable file
@@ -0,0 +1,425 @@
|
||||
;******************************************************************************
|
||||
;* x86-optimized vertical line scaling functions
|
||||
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
|
||||
;* Kieran Kunhya <kieran@kunhya.com>
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
minshort: times 8 dw 0x8000
|
||||
yuv2yuvX_16_start: times 4 dd 0x4000 - 0x40000000
|
||||
yuv2yuvX_10_start: times 4 dd 0x10000
|
||||
yuv2yuvX_9_start: times 4 dd 0x20000
|
||||
yuv2yuvX_10_upper: times 8 dw 0x3ff
|
||||
yuv2yuvX_9_upper: times 8 dw 0x1ff
|
||||
pd_4: times 4 dd 4
|
||||
pd_4min0x40000:times 4 dd 4 - (0x40000)
|
||||
pw_16: times 8 dw 16
|
||||
pw_32: times 8 dw 32
|
||||
pw_512: times 8 dw 512
|
||||
pw_1024: times 8 dw 1024
|
||||
|
||||
SECTION .text
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; vertical line scaling
|
||||
;
|
||||
; void yuv2plane1_<output_size>_<opt>(const int16_t *src, uint8_t *dst, int dstW,
|
||||
; const uint8_t *dither, int offset)
|
||||
; and
|
||||
; void yuv2planeX_<output_size>_<opt>(const int16_t *filter, int filterSize,
|
||||
; const int16_t **src, uint8_t *dst, int dstW,
|
||||
; const uint8_t *dither, int offset)
|
||||
;
|
||||
; Scale one or $filterSize lines of source data to generate one line of output
|
||||
; data. The input is 15 bits in int16_t if $output_size is [8,10] and 19 bits in
|
||||
; int32_t if $output_size is 16. $filter is 12 bits. $filterSize is a multiple
|
||||
; of 2. $offset is either 0 or 3. $dither holds 8 values.
|
||||
;-----------------------------------------------------------------------------
|
||||
%macro yuv2planeX_mainloop 2
|
||||
.pixelloop_%2:
|
||||
%assign %%i 0
|
||||
; the rep here is for the 8-bit output MMX case, where dither covers
|
||||
; 8 pixels but we can only handle 2 pixels per register, and thus 4
|
||||
; pixels per iteration. In order to not have to keep track of where
|
||||
; we are w.r.t. dithering, we unroll the MMX/8-bit loop x2.
|
||||
%if %1 == 8
|
||||
%assign %%repcnt 16/mmsize
|
||||
%else
|
||||
%assign %%repcnt 1
|
||||
%endif
|
||||
|
||||
%rep %%repcnt
|
||||
|
||||
%if %1 == 8
|
||||
%if ARCH_X86_32
|
||||
mova m2, [rsp+mmsize*(0+%%i)]
|
||||
mova m1, [rsp+mmsize*(1+%%i)]
|
||||
%else ; x86-64
|
||||
mova m2, m8
|
||||
mova m1, m_dith
|
||||
%endif ; x86-32/64
|
||||
%else ; %1 == 9/10/16
|
||||
mova m1, [yuv2yuvX_%1_start]
|
||||
mova m2, m1
|
||||
%endif ; %1 == 8/9/10/16
|
||||
movsx cntr_reg, fltsizem
|
||||
.filterloop_%2_ %+ %%i:
|
||||
; input pixels
|
||||
mov r6, [srcq+gprsize*cntr_reg-2*gprsize]
|
||||
%if %1 == 16
|
||||
mova m3, [r6+r5*4]
|
||||
mova m5, [r6+r5*4+mmsize]
|
||||
%else ; %1 == 8/9/10
|
||||
mova m3, [r6+r5*2]
|
||||
%endif ; %1 == 8/9/10/16
|
||||
mov r6, [srcq+gprsize*cntr_reg-gprsize]
|
||||
%if %1 == 16
|
||||
mova m4, [r6+r5*4]
|
||||
mova m6, [r6+r5*4+mmsize]
|
||||
%else ; %1 == 8/9/10
|
||||
mova m4, [r6+r5*2]
|
||||
%endif ; %1 == 8/9/10/16
|
||||
|
||||
; coefficients
|
||||
movd m0, [filterq+2*cntr_reg-4] ; coeff[0], coeff[1]
|
||||
%if %1 == 16
|
||||
pshuflw m7, m0, 0 ; coeff[0]
|
||||
pshuflw m0, m0, 0x55 ; coeff[1]
|
||||
pmovsxwd m7, m7 ; word -> dword
|
||||
pmovsxwd m0, m0 ; word -> dword
|
||||
|
||||
pmulld m3, m7
|
||||
pmulld m5, m7
|
||||
pmulld m4, m0
|
||||
pmulld m6, m0
|
||||
|
||||
paddd m2, m3
|
||||
paddd m1, m5
|
||||
paddd m2, m4
|
||||
paddd m1, m6
|
||||
%else ; %1 == 10/9/8
|
||||
punpcklwd m5, m3, m4
|
||||
punpckhwd m3, m4
|
||||
SPLATD m0
|
||||
|
||||
pmaddwd m5, m0
|
||||
pmaddwd m3, m0
|
||||
|
||||
paddd m2, m5
|
||||
paddd m1, m3
|
||||
%endif ; %1 == 8/9/10/16
|
||||
|
||||
sub cntr_reg, 2
|
||||
jg .filterloop_%2_ %+ %%i
|
||||
|
||||
%if %1 == 16
|
||||
psrad m2, 31 - %1
|
||||
psrad m1, 31 - %1
|
||||
%else ; %1 == 10/9/8
|
||||
psrad m2, 27 - %1
|
||||
psrad m1, 27 - %1
|
||||
%endif ; %1 == 8/9/10/16
|
||||
|
||||
%if %1 == 8
|
||||
packssdw m2, m1
|
||||
packuswb m2, m2
|
||||
movh [dstq+r5*1], m2
|
||||
%else ; %1 == 9/10/16
|
||||
%if %1 == 16
|
||||
packssdw m2, m1
|
||||
paddw m2, [minshort]
|
||||
%else ; %1 == 9/10
|
||||
%if cpuflag(sse4)
|
||||
packusdw m2, m1
|
||||
%else ; mmxext/sse2
|
||||
packssdw m2, m1
|
||||
pmaxsw m2, m6
|
||||
%endif ; mmxext/sse2/sse4/avx
|
||||
pminsw m2, [yuv2yuvX_%1_upper]
|
||||
%endif ; %1 == 9/10/16
|
||||
mov%2 [dstq+r5*2], m2
|
||||
%endif ; %1 == 8/9/10/16
|
||||
|
||||
add r5, mmsize/2
|
||||
sub wd, mmsize/2
|
||||
|
||||
%assign %%i %%i+2
|
||||
%endrep
|
||||
jg .pixelloop_%2
|
||||
%endmacro
|
||||
|
||||
%macro yuv2planeX_fn 3
|
||||
|
||||
%if ARCH_X86_32
|
||||
%define cntr_reg fltsizeq
|
||||
%define movsx mov
|
||||
%else
|
||||
%define cntr_reg r7
|
||||
%define movsx movsxd
|
||||
%endif
|
||||
|
||||
cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
|
||||
%if %1 == 8 || %1 == 9 || %1 == 10
|
||||
pxor m6, m6
|
||||
%endif ; %1 == 8/9/10
|
||||
|
||||
%if %1 == 8
|
||||
%if ARCH_X86_32
|
||||
%assign pad 0x2c - (stack_offset & 15)
|
||||
SUB rsp, pad
|
||||
%define m_dith m7
|
||||
%else ; x86-64
|
||||
%define m_dith m9
|
||||
%endif ; x86-32
|
||||
|
||||
; create registers holding dither
|
||||
movq m_dith, [ditherq] ; dither
|
||||
test offsetd, offsetd
|
||||
jz .no_rot
|
||||
%if mmsize == 16
|
||||
punpcklqdq m_dith, m_dith
|
||||
%endif ; mmsize == 16
|
||||
PALIGNR m_dith, m_dith, 3, m0
|
||||
.no_rot:
|
||||
%if mmsize == 16
|
||||
punpcklbw m_dith, m6
|
||||
%if ARCH_X86_64
|
||||
punpcklwd m8, m_dith, m6
|
||||
pslld m8, 12
|
||||
%else ; x86-32
|
||||
punpcklwd m5, m_dith, m6
|
||||
pslld m5, 12
|
||||
%endif ; x86-32/64
|
||||
punpckhwd m_dith, m6
|
||||
pslld m_dith, 12
|
||||
%if ARCH_X86_32
|
||||
mova [rsp+ 0], m5
|
||||
mova [rsp+16], m_dith
|
||||
%endif
|
||||
%else ; mmsize == 8
|
||||
punpcklbw m5, m_dith, m6
|
||||
punpckhbw m_dith, m6
|
||||
punpcklwd m4, m5, m6
|
||||
punpckhwd m5, m6
|
||||
punpcklwd m3, m_dith, m6
|
||||
punpckhwd m_dith, m6
|
||||
pslld m4, 12
|
||||
pslld m5, 12
|
||||
pslld m3, 12
|
||||
pslld m_dith, 12
|
||||
mova [rsp+ 0], m4
|
||||
mova [rsp+ 8], m5
|
||||
mova [rsp+16], m3
|
||||
mova [rsp+24], m_dith
|
||||
%endif ; mmsize == 8/16
|
||||
%endif ; %1 == 8
|
||||
|
||||
xor r5, r5
|
||||
|
||||
%if mmsize == 8 || %1 == 8
|
||||
yuv2planeX_mainloop %1, a
|
||||
%else ; mmsize == 16
|
||||
test dstq, 15
|
||||
jnz .unaligned
|
||||
yuv2planeX_mainloop %1, a
|
||||
REP_RET
|
||||
.unaligned:
|
||||
yuv2planeX_mainloop %1, u
|
||||
%endif ; mmsize == 8/16
|
||||
|
||||
%if %1 == 8
|
||||
%if ARCH_X86_32
|
||||
ADD rsp, pad
|
||||
RET
|
||||
%else ; x86-64
|
||||
REP_RET
|
||||
%endif ; x86-32/64
|
||||
%else ; %1 == 9/10/16
|
||||
REP_RET
|
||||
%endif ; %1 == 8/9/10/16
|
||||
%endmacro
|
||||
|
||||
%if ARCH_X86_32
|
||||
INIT_MMX mmxext
|
||||
yuv2planeX_fn 8, 0, 7
|
||||
yuv2planeX_fn 9, 0, 5
|
||||
yuv2planeX_fn 10, 0, 5
|
||||
%endif
|
||||
|
||||
INIT_XMM sse2
|
||||
yuv2planeX_fn 8, 10, 7
|
||||
yuv2planeX_fn 9, 7, 5
|
||||
yuv2planeX_fn 10, 7, 5
|
||||
|
||||
INIT_XMM sse4
|
||||
yuv2planeX_fn 8, 10, 7
|
||||
yuv2planeX_fn 9, 7, 5
|
||||
yuv2planeX_fn 10, 7, 5
|
||||
yuv2planeX_fn 16, 8, 5
|
||||
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_XMM avx
|
||||
yuv2planeX_fn 8, 10, 7
|
||||
yuv2planeX_fn 9, 7, 5
|
||||
yuv2planeX_fn 10, 7, 5
|
||||
%endif
|
||||
|
||||
; %1=outout-bpc, %2=alignment (u/a)
|
||||
%macro yuv2plane1_mainloop 2
|
||||
.loop_%2:
|
||||
%if %1 == 8
|
||||
paddsw m0, m2, [srcq+wq*2+mmsize*0]
|
||||
paddsw m1, m3, [srcq+wq*2+mmsize*1]
|
||||
psraw m0, 7
|
||||
psraw m1, 7
|
||||
packuswb m0, m1
|
||||
mov%2 [dstq+wq], m0
|
||||
%elif %1 == 16
|
||||
paddd m0, m4, [srcq+wq*4+mmsize*0]
|
||||
paddd m1, m4, [srcq+wq*4+mmsize*1]
|
||||
paddd m2, m4, [srcq+wq*4+mmsize*2]
|
||||
paddd m3, m4, [srcq+wq*4+mmsize*3]
|
||||
psrad m0, 3
|
||||
psrad m1, 3
|
||||
psrad m2, 3
|
||||
psrad m3, 3
|
||||
%if cpuflag(sse4) ; avx/sse4
|
||||
packusdw m0, m1
|
||||
packusdw m2, m3
|
||||
%else ; mmx/sse2
|
||||
packssdw m0, m1
|
||||
packssdw m2, m3
|
||||
paddw m0, m5
|
||||
paddw m2, m5
|
||||
%endif ; mmx/sse2/sse4/avx
|
||||
mov%2 [dstq+wq*2+mmsize*0], m0
|
||||
mov%2 [dstq+wq*2+mmsize*1], m2
|
||||
%else ; %1 == 9/10
|
||||
paddsw m0, m2, [srcq+wq*2+mmsize*0]
|
||||
paddsw m1, m2, [srcq+wq*2+mmsize*1]
|
||||
psraw m0, 15 - %1
|
||||
psraw m1, 15 - %1
|
||||
pmaxsw m0, m4
|
||||
pmaxsw m1, m4
|
||||
pminsw m0, m3
|
||||
pminsw m1, m3
|
||||
mov%2 [dstq+wq*2+mmsize*0], m0
|
||||
mov%2 [dstq+wq*2+mmsize*1], m1
|
||||
%endif
|
||||
add wq, mmsize
|
||||
jl .loop_%2
|
||||
%endmacro
|
||||
|
||||
%macro yuv2plane1_fn 3
|
||||
cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset
|
||||
movsxdifnidn wq, wd
|
||||
add wq, mmsize - 1
|
||||
and wq, ~(mmsize - 1)
|
||||
%if %1 == 8
|
||||
add dstq, wq
|
||||
%else ; %1 != 8
|
||||
lea dstq, [dstq+wq*2]
|
||||
%endif ; %1 == 8
|
||||
%if %1 == 16
|
||||
lea srcq, [srcq+wq*4]
|
||||
%else ; %1 != 16
|
||||
lea srcq, [srcq+wq*2]
|
||||
%endif ; %1 == 16
|
||||
neg wq
|
||||
|
||||
%if %1 == 8
|
||||
pxor m4, m4 ; zero
|
||||
|
||||
; create registers holding dither
|
||||
movq m3, [ditherq] ; dither
|
||||
test offsetd, offsetd
|
||||
jz .no_rot
|
||||
%if mmsize == 16
|
||||
punpcklqdq m3, m3
|
||||
%endif ; mmsize == 16
|
||||
PALIGNR m3, m3, 3, m2
|
||||
.no_rot:
|
||||
%if mmsize == 8
|
||||
mova m2, m3
|
||||
punpckhbw m3, m4 ; byte->word
|
||||
punpcklbw m2, m4 ; byte->word
|
||||
%else
|
||||
punpcklbw m3, m4
|
||||
mova m2, m3
|
||||
%endif
|
||||
%elif %1 == 9
|
||||
pxor m4, m4
|
||||
mova m3, [pw_512]
|
||||
mova m2, [pw_32]
|
||||
%elif %1 == 10
|
||||
pxor m4, m4
|
||||
mova m3, [pw_1024]
|
||||
mova m2, [pw_16]
|
||||
%else ; %1 == 16
|
||||
%if cpuflag(sse4) ; sse4/avx
|
||||
mova m4, [pd_4]
|
||||
%else ; mmx/sse2
|
||||
mova m4, [pd_4min0x40000]
|
||||
mova m5, [minshort]
|
||||
%endif ; mmx/sse2/sse4/avx
|
||||
%endif ; %1 == ..
|
||||
|
||||
; actual pixel scaling
|
||||
%if mmsize == 8
|
||||
yuv2plane1_mainloop %1, a
|
||||
%else ; mmsize == 16
|
||||
test dstq, 15
|
||||
jnz .unaligned
|
||||
yuv2plane1_mainloop %1, a
|
||||
REP_RET
|
||||
.unaligned:
|
||||
yuv2plane1_mainloop %1, u
|
||||
%endif ; mmsize == 8/16
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
%if ARCH_X86_32
|
||||
INIT_MMX mmx
|
||||
yuv2plane1_fn 8, 0, 5
|
||||
yuv2plane1_fn 16, 0, 3
|
||||
|
||||
INIT_MMX mmxext
|
||||
yuv2plane1_fn 9, 0, 3
|
||||
yuv2plane1_fn 10, 0, 3
|
||||
%endif
|
||||
|
||||
INIT_XMM sse2
|
||||
yuv2plane1_fn 8, 5, 5
|
||||
yuv2plane1_fn 9, 5, 3
|
||||
yuv2plane1_fn 10, 5, 3
|
||||
yuv2plane1_fn 16, 6, 3
|
||||
|
||||
INIT_XMM sse4
|
||||
yuv2plane1_fn 16, 5, 3
|
||||
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_XMM avx
|
||||
yuv2plane1_fn 8, 5, 5
|
||||
yuv2plane1_fn 9, 5, 3
|
||||
yuv2plane1_fn 10, 5, 3
|
||||
yuv2plane1_fn 16, 5, 3
|
||||
%endif
|
||||
200
externals/ffmpeg/libswscale/x86/rgb2rgb.c
vendored
Executable file
200
externals/ffmpeg/libswscale/x86/rgb2rgb.c
vendored
Executable file
@@ -0,0 +1,200 @@
|
||||
/*
|
||||
* software RGB to RGB converter
|
||||
* pluralize by software PAL8 to RGB converter
|
||||
* software YUV to YUV converter
|
||||
* software YUV to RGB converter
|
||||
* Written by Nick Kurshev.
|
||||
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/bswap.h"
|
||||
#include "libswscale/rgb2rgb.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
DECLARE_ASM_CONST(8, uint64_t, mmx_ff) = 0x00000000000000FFULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mmx_one) = 0xFFFFFFFFFFFFFFFFULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask32b) = 0x000000FF000000FFULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask32g) = 0x0000FF000000FF00ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask32r) = 0x00FF000000FF0000ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask32a) = 0xFF000000FF000000ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask32) = 0x00FFFFFF00FFFFFFULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask3216br) = 0x00F800F800F800F8ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask3216g) = 0x0000FC000000FC00ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask3215g) = 0x0000F8000000F800ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mul3216) = 0x2000000420000004ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mul3215) = 0x2000000820000008ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask24b) = 0x00FF0000FF0000FFULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask24g) = 0xFF0000FF0000FF00ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask24r) = 0x0000FF0000FF0000ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask24l) = 0x0000000000FFFFFFULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask24h) = 0x0000FFFFFF000000ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask24hh) = 0xffff000000000000ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask24hhh) = 0xffffffff00000000ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask24hhhh) = 0xffffffffffff0000ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask15b) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask15rg) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask15s) = 0xFFE0FFE0FFE0FFE0ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask15g) = 0x03E003E003E003E0ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask15r) = 0x7C007C007C007C00ULL;
|
||||
#define mask16b mask15b
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask16g) = 0x07E007E007E007E0ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mask16r) = 0xF800F800F800F800ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, red_16mask) = 0x0000f8000000f800ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL;
|
||||
|
||||
DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2YOffset);
|
||||
DECLARE_ALIGNED(8, extern const uint64_t, ff_w1111);
|
||||
DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
|
||||
|
||||
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
|
||||
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
|
||||
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
|
||||
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
|
||||
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
|
||||
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
|
||||
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
|
||||
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
|
||||
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
|
||||
|
||||
// Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one.
|
||||
|
||||
#define COMPILE_TEMPLATE_MMXEXT 0
|
||||
#define COMPILE_TEMPLATE_AMD3DNOW 0
|
||||
#define COMPILE_TEMPLATE_SSE2 0
|
||||
#define COMPILE_TEMPLATE_AVX 0
|
||||
|
||||
//MMX versions
|
||||
#undef RENAME
|
||||
#define RENAME(a) a ## _mmx
|
||||
#include "rgb2rgb_template.c"
|
||||
|
||||
// MMXEXT versions
|
||||
#undef RENAME
|
||||
#undef COMPILE_TEMPLATE_MMXEXT
|
||||
#define COMPILE_TEMPLATE_MMXEXT 1
|
||||
#define RENAME(a) a ## _mmxext
|
||||
#include "rgb2rgb_template.c"
|
||||
|
||||
//SSE2 versions
|
||||
#undef RENAME
|
||||
#undef COMPILE_TEMPLATE_SSE2
|
||||
#define COMPILE_TEMPLATE_SSE2 1
|
||||
#define RENAME(a) a ## _sse2
|
||||
#include "rgb2rgb_template.c"
|
||||
|
||||
//AVX versions
|
||||
#undef RENAME
|
||||
#undef COMPILE_TEMPLATE_AVX
|
||||
#define COMPILE_TEMPLATE_AVX 1
|
||||
#define RENAME(a) a ## _avx
|
||||
#include "rgb2rgb_template.c"
|
||||
|
||||
//3DNOW versions
|
||||
#undef RENAME
|
||||
#undef COMPILE_TEMPLATE_MMXEXT
|
||||
#undef COMPILE_TEMPLATE_SSE2
|
||||
#undef COMPILE_TEMPLATE_AVX
|
||||
#undef COMPILE_TEMPLATE_AMD3DNOW
|
||||
#define COMPILE_TEMPLATE_MMXEXT 0
|
||||
#define COMPILE_TEMPLATE_SSE2 0
|
||||
#define COMPILE_TEMPLATE_AVX 0
|
||||
#define COMPILE_TEMPLATE_AMD3DNOW 1
|
||||
#define RENAME(a) a ## _3dnow
|
||||
#include "rgb2rgb_template.c"
|
||||
|
||||
/*
|
||||
RGB15->RGB16 original by Strepto/Astral
|
||||
ported to gcc & bugfixed : A'rpi
|
||||
MMXEXT, 3DNOW optimization by Nick Kurshev
|
||||
32-bit C version, and and&add trick by Michael Niedermayer
|
||||
*/
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
void ff_shuffle_bytes_2103_mmxext(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void ff_shuffle_bytes_2103_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void ff_shuffle_bytes_0321_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void ff_shuffle_bytes_1230_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void ff_shuffle_bytes_3012_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
void ff_shuffle_bytes_3210_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
|
||||
|
||||
#if ARCH_X86_64
|
||||
void ff_uyvytoyuv422_sse2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
const uint8_t *src, int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
void ff_uyvytoyuv422_avx(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
const uint8_t *src, int width, int height,
|
||||
int lumStride, int chromStride, int srcStride);
|
||||
#endif
|
||||
|
||||
av_cold void rgb2rgb_init_x86(void)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
if (INLINE_MMX(cpu_flags))
|
||||
rgb2rgb_init_mmx();
|
||||
if (INLINE_AMD3DNOW(cpu_flags))
|
||||
rgb2rgb_init_3dnow();
|
||||
if (INLINE_MMXEXT(cpu_flags))
|
||||
rgb2rgb_init_mmxext();
|
||||
if (INLINE_SSE2(cpu_flags))
|
||||
rgb2rgb_init_sse2();
|
||||
if (INLINE_AVX(cpu_flags))
|
||||
rgb2rgb_init_avx();
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
shuffle_bytes_2103 = ff_shuffle_bytes_2103_mmxext;
|
||||
}
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
#if ARCH_X86_64
|
||||
uyvytoyuv422 = ff_uyvytoyuv422_sse2;
|
||||
#endif
|
||||
}
|
||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||
shuffle_bytes_0321 = ff_shuffle_bytes_0321_ssse3;
|
||||
shuffle_bytes_2103 = ff_shuffle_bytes_2103_ssse3;
|
||||
shuffle_bytes_1230 = ff_shuffle_bytes_1230_ssse3;
|
||||
shuffle_bytes_3012 = ff_shuffle_bytes_3012_ssse3;
|
||||
shuffle_bytes_3210 = ff_shuffle_bytes_3210_ssse3;
|
||||
}
|
||||
if (EXTERNAL_AVX(cpu_flags)) {
|
||||
#if ARCH_X86_64
|
||||
uyvytoyuv422 = ff_uyvytoyuv422_avx;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
2545
externals/ffmpeg/libswscale/x86/rgb2rgb_template.c
vendored
Executable file
2545
externals/ffmpeg/libswscale/x86/rgb2rgb_template.c
vendored
Executable file
File diff suppressed because it is too large
Load Diff
300
externals/ffmpeg/libswscale/x86/rgb_2_rgb.asm
vendored
Executable file
300
externals/ffmpeg/libswscale/x86/rgb_2_rgb.asm
vendored
Executable file
@@ -0,0 +1,300 @@
|
||||
;******************************************************************************
|
||||
;* Copyright Nick Kurshev
|
||||
;* Copyright Michael (michaelni@gmx.at)
|
||||
;* Copyright 2018 Jokyo Images
|
||||
;* Copyright Ivo van Poorten
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
pb_mask_shuffle2103_mmx times 8 dw 255
|
||||
pb_shuffle2103: db 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15
|
||||
pb_shuffle0321: db 0, 3, 2, 1, 4, 7, 6, 5, 8, 11, 10, 9, 12, 15, 14, 13
|
||||
pb_shuffle1230: db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
|
||||
pb_shuffle3012: db 3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14
|
||||
pb_shuffle3210: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
|
||||
|
||||
SECTION .text
|
||||
|
||||
%macro RSHIFT_COPY 3
|
||||
; %1 dst ; %2 src ; %3 shift
|
||||
%if cpuflag(avx)
|
||||
psrldq %1, %2, %3
|
||||
%else
|
||||
mova %1, %2
|
||||
RSHIFT %1, %3
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; shuffle_bytes_2103_mmext (const uint8_t *src, uint8_t *dst, int src_size)
|
||||
;------------------------------------------------------------------------------
|
||||
INIT_MMX mmxext
|
||||
cglobal shuffle_bytes_2103, 3, 5, 8, src, dst, w, tmp, x
|
||||
mova m6, [pb_mask_shuffle2103_mmx]
|
||||
mova m7, m6
|
||||
psllq m7, 8
|
||||
|
||||
movsxdifnidn wq, wd
|
||||
mov xq, wq
|
||||
|
||||
add srcq, wq
|
||||
add dstq, wq
|
||||
neg wq
|
||||
|
||||
;calc scalar loop
|
||||
and xq, mmsize*2 -4
|
||||
je .loop_simd
|
||||
|
||||
.loop_scalar:
|
||||
mov tmpb, [srcq + wq + 2]
|
||||
mov [dstq+wq + 0], tmpb
|
||||
mov tmpb, [srcq + wq + 1]
|
||||
mov [dstq+wq + 1], tmpb
|
||||
mov tmpb, [srcq + wq + 0]
|
||||
mov [dstq+wq + 2], tmpb
|
||||
mov tmpb, [srcq + wq + 3]
|
||||
mov [dstq+wq + 3], tmpb
|
||||
add wq, 4
|
||||
sub xq, 4
|
||||
jg .loop_scalar
|
||||
|
||||
;check if src_size < mmsize * 2
|
||||
cmp wq, 0
|
||||
jge .end
|
||||
|
||||
.loop_simd:
|
||||
movu m0, [srcq+wq]
|
||||
movu m1, [srcq+wq+8]
|
||||
|
||||
pshufw m3, m0, 177
|
||||
pshufw m5, m1, 177
|
||||
|
||||
pand m0, m7
|
||||
pand m3, m6
|
||||
|
||||
pand m1, m7
|
||||
pand m5, m6
|
||||
|
||||
por m0, m3
|
||||
por m1, m5
|
||||
|
||||
movu [dstq+wq], m0
|
||||
movu [dstq+wq + 8], m1
|
||||
|
||||
add wq, mmsize*2
|
||||
jl .loop_simd
|
||||
|
||||
.end:
|
||||
RET
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; shuffle_bytes_## (const uint8_t *src, uint8_t *dst, int src_size)
|
||||
;------------------------------------------------------------------------------
|
||||
; %1-4 index shuffle
|
||||
%macro SHUFFLE_BYTES 4
|
||||
cglobal shuffle_bytes_%1%2%3%4, 3, 5, 2, src, dst, w, tmp, x
|
||||
VBROADCASTI128 m0, [pb_shuffle%1%2%3%4]
|
||||
movsxdifnidn wq, wd
|
||||
mov xq, wq
|
||||
|
||||
add srcq, wq
|
||||
add dstq, wq
|
||||
neg wq
|
||||
|
||||
;calc scalar loop
|
||||
and xq, mmsize-4
|
||||
je .loop_simd
|
||||
|
||||
.loop_scalar:
|
||||
mov tmpb, [srcq + wq + %1]
|
||||
mov [dstq+wq + 0], tmpb
|
||||
mov tmpb, [srcq + wq + %2]
|
||||
mov [dstq+wq + 1], tmpb
|
||||
mov tmpb, [srcq + wq + %3]
|
||||
mov [dstq+wq + 2], tmpb
|
||||
mov tmpb, [srcq + wq + %4]
|
||||
mov [dstq+wq + 3], tmpb
|
||||
add wq, 4
|
||||
sub xq, 4
|
||||
jg .loop_scalar
|
||||
|
||||
;check if src_size < mmsize
|
||||
cmp wq, 0
|
||||
jge .end
|
||||
|
||||
.loop_simd:
|
||||
movu m1, [srcq+wq]
|
||||
pshufb m1, m0
|
||||
movu [dstq+wq], m1
|
||||
add wq, mmsize
|
||||
jl .loop_simd
|
||||
|
||||
.end:
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM ssse3
|
||||
SHUFFLE_BYTES 2, 1, 0, 3
|
||||
SHUFFLE_BYTES 0, 3, 2, 1
|
||||
SHUFFLE_BYTES 1, 2, 3, 0
|
||||
SHUFFLE_BYTES 3, 0, 1, 2
|
||||
SHUFFLE_BYTES 3, 2, 1, 0
|
||||
|
||||
;-----------------------------------------------------------------------------------------------
|
||||
; uyvytoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||
; const uint8_t *src, int width, int height,
|
||||
; int lumStride, int chromStride, int srcStride)
|
||||
;-----------------------------------------------------------------------------------------------
|
||||
%macro UYVY_TO_YUV422 0
|
||||
cglobal uyvytoyuv422, 9, 14, 8, ydst, udst, vdst, src, w, h, lum_stride, chrom_stride, src_stride, wtwo, whalf, tmp, x, back_w
|
||||
pxor m0, m0
|
||||
pcmpeqw m1, m1
|
||||
psrlw m1, 8
|
||||
|
||||
movsxdifnidn wq, wd
|
||||
movsxdifnidn lum_strideq, lum_strided
|
||||
movsxdifnidn chrom_strideq, chrom_strided
|
||||
movsxdifnidn src_strideq, src_strided
|
||||
|
||||
mov back_wq, wq
|
||||
mov whalfq, wq
|
||||
shr whalfq, 1 ; whalf = width / 2
|
||||
|
||||
lea srcq, [srcq + wq * 2]
|
||||
add ydstq, wq
|
||||
add udstq, whalfq
|
||||
add vdstq, whalfq
|
||||
|
||||
.loop_line:
|
||||
mov xq, wq
|
||||
mov wtwoq, wq
|
||||
add wtwoq, wtwoq ; wtwo = width * 2
|
||||
|
||||
neg wq
|
||||
neg wtwoq
|
||||
neg whalfq
|
||||
|
||||
;calc scalar loop count
|
||||
and xq, mmsize * 2 - 1
|
||||
je .loop_simd
|
||||
|
||||
.loop_scalar:
|
||||
mov tmpb, [srcq + wtwoq + 0]
|
||||
mov [udstq + whalfq], tmpb
|
||||
|
||||
mov tmpb, [srcq + wtwoq + 1]
|
||||
mov [ydstq + wq], tmpb
|
||||
|
||||
mov tmpb, [srcq + wtwoq + 2]
|
||||
mov [vdstq + whalfq], tmpb
|
||||
|
||||
mov tmpb, [srcq + wtwoq + 3]
|
||||
mov [ydstq + wq + 1], tmpb
|
||||
|
||||
add wq, 2
|
||||
add wtwoq, 4
|
||||
add whalfq, 1
|
||||
sub xq, 2
|
||||
jg .loop_scalar
|
||||
|
||||
; check if simd loop is need
|
||||
cmp wq, 0
|
||||
jge .end_line
|
||||
|
||||
.loop_simd:
|
||||
movu m2, [srcq + wtwoq ]
|
||||
movu m3, [srcq + wtwoq + mmsize ]
|
||||
movu m4, [srcq + wtwoq + mmsize * 2]
|
||||
movu m5, [srcq + wtwoq + mmsize * 3]
|
||||
|
||||
; extract y part 1
|
||||
RSHIFT_COPY m6, m2, 1 ; UYVY UYVY -> YVYU YVY...
|
||||
pand m6, m1; YxYx YxYx...
|
||||
|
||||
RSHIFT_COPY m7, m3, 1 ; UYVY UYVY -> YVYU YVY...
|
||||
pand m7, m1 ; YxYx YxYx...
|
||||
|
||||
packuswb m6, m7 ; YYYY YYYY...
|
||||
movu [ydstq + wq], m6
|
||||
|
||||
; extract y part 2
|
||||
RSHIFT_COPY m6, m4, 1 ; UYVY UYVY -> YVYU YVY...
|
||||
pand m6, m1; YxYx YxYx...
|
||||
|
||||
RSHIFT_COPY m7, m5, 1 ; UYVY UYVY -> YVYU YVY...
|
||||
pand m7, m1 ; YxYx YxYx...
|
||||
|
||||
packuswb m6, m7 ; YYYY YYYY...
|
||||
movu [ydstq + wq + mmsize], m6
|
||||
|
||||
; extract uv
|
||||
pand m2, m1 ; UxVx...
|
||||
pand m3, m1 ; UxVx...
|
||||
pand m4, m1 ; UxVx...
|
||||
pand m5, m1 ; UxVx...
|
||||
|
||||
packuswb m2, m3 ; UVUV...
|
||||
packuswb m4, m5 ; UVUV...
|
||||
|
||||
; U
|
||||
pand m6, m2, m1 ; UxUx...
|
||||
pand m7, m4, m1 ; UxUx...
|
||||
|
||||
packuswb m6, m7 ; UUUU
|
||||
movu [udstq + whalfq], m6
|
||||
|
||||
|
||||
; V
|
||||
psrlw m2, 8 ; VxVx...
|
||||
psrlw m4, 8 ; VxVx...
|
||||
packuswb m2, m4 ; VVVV
|
||||
movu [vdstq + whalfq], m2
|
||||
|
||||
add whalfq, mmsize
|
||||
add wtwoq, mmsize * 4
|
||||
add wq, mmsize * 2
|
||||
jl .loop_simd
|
||||
|
||||
.end_line:
|
||||
add srcq, src_strideq
|
||||
add ydstq, lum_strideq
|
||||
add udstq, chrom_strideq
|
||||
add vdstq, chrom_strideq
|
||||
|
||||
;restore initial state of line variable
|
||||
mov wq, back_wq
|
||||
mov xq, wq
|
||||
mov whalfq, wq
|
||||
shr whalfq, 1 ; whalf = width / 2
|
||||
sub hd, 1
|
||||
jg .loop_line
|
||||
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
%if ARCH_X86_64
|
||||
INIT_XMM sse2
|
||||
UYVY_TO_YUV422
|
||||
|
||||
INIT_XMM avx
|
||||
UYVY_TO_YUV422
|
||||
%endif
|
||||
423
externals/ffmpeg/libswscale/x86/scale.asm
vendored
Executable file
423
externals/ffmpeg/libswscale/x86/scale.asm
vendored
Executable file
@@ -0,0 +1,423 @@
|
||||
;******************************************************************************
|
||||
;* x86-optimized horizontal line scaling functions
|
||||
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
max_19bit_int: times 4 dd 0x7ffff
|
||||
max_19bit_flt: times 4 dd 524287.0
|
||||
minshort: times 8 dw 0x8000
|
||||
unicoeff: times 4 dd 0x20000000
|
||||
|
||||
SECTION .text
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; horizontal line scaling
|
||||
;
|
||||
; void hscale<source_width>to<intermediate_nbits>_<filterSize>_<opt>
|
||||
; (SwsContext *c, int{16,32}_t *dst,
|
||||
; int dstW, const uint{8,16}_t *src,
|
||||
; const int16_t *filter,
|
||||
; const int32_t *filterPos, int filterSize);
|
||||
;
|
||||
; Scale one horizontal line. Input is either 8-bit width or 16-bit width
|
||||
; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to
|
||||
; downscale before multiplying). Filter is 14 bits. Output is either 15 bits
|
||||
; (in int16_t) or 19 bits (in int32_t), as given in $intermediate_nbits. Each
|
||||
; output pixel is generated from $filterSize input pixels, the position of
|
||||
; the first pixel is given in filterPos[nOutputPixel].
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
; SCALE_FUNC source_width, intermediate_nbits, filtersize, filtersuffix, n_args, n_xmm
|
||||
%macro SCALE_FUNC 6
|
||||
%ifnidn %3, X
|
||||
cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, src, filter, fltpos, pos1
|
||||
%else
|
||||
cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsize
|
||||
%endif
|
||||
%if ARCH_X86_64
|
||||
movsxd wq, wd
|
||||
%define mov32 movsxd
|
||||
%else ; x86-32
|
||||
%define mov32 mov
|
||||
%endif ; x86-64
|
||||
%if %2 == 19
|
||||
%if mmsize == 8 ; mmx
|
||||
mova m2, [max_19bit_int]
|
||||
%elif cpuflag(sse4)
|
||||
mova m2, [max_19bit_int]
|
||||
%else ; ssse3/sse2
|
||||
mova m2, [max_19bit_flt]
|
||||
%endif ; mmx/sse2/ssse3/sse4
|
||||
%endif ; %2 == 19
|
||||
%if %1 == 16
|
||||
mova m6, [minshort]
|
||||
mova m7, [unicoeff]
|
||||
%elif %1 == 8
|
||||
pxor m3, m3
|
||||
%endif ; %1 == 8/16
|
||||
|
||||
%if %1 == 8
|
||||
%define movlh movd
|
||||
%define movbh movh
|
||||
%define srcmul 1
|
||||
%else ; %1 == 9-16
|
||||
%define movlh movq
|
||||
%define movbh movu
|
||||
%define srcmul 2
|
||||
%endif ; %1 == 8/9-16
|
||||
|
||||
%ifnidn %3, X
|
||||
|
||||
; setup loop
|
||||
%if %3 == 8
|
||||
shl wq, 1 ; this allows *16 (i.e. now *8) in lea instructions for the 8-tap filter
|
||||
%define wshr 1
|
||||
%else ; %3 == 4
|
||||
%define wshr 0
|
||||
%endif ; %3 == 8
|
||||
lea filterq, [filterq+wq*8]
|
||||
%if %2 == 15
|
||||
lea dstq, [dstq+wq*(2>>wshr)]
|
||||
%else ; %2 == 19
|
||||
lea dstq, [dstq+wq*(4>>wshr)]
|
||||
%endif ; %2 == 15/19
|
||||
lea fltposq, [fltposq+wq*(4>>wshr)]
|
||||
neg wq
|
||||
|
||||
.loop:
|
||||
%if %3 == 4 ; filterSize == 4 scaling
|
||||
; load 2x4 or 4x4 source pixels into m0/m1
|
||||
mov32 pos0q, dword [fltposq+wq*4+ 0] ; filterPos[0]
|
||||
mov32 pos1q, dword [fltposq+wq*4+ 4] ; filterPos[1]
|
||||
movlh m0, [srcq+pos0q*srcmul] ; src[filterPos[0] + {0,1,2,3}]
|
||||
%if mmsize == 8
|
||||
movlh m1, [srcq+pos1q*srcmul] ; src[filterPos[1] + {0,1,2,3}]
|
||||
%else ; mmsize == 16
|
||||
%if %1 > 8
|
||||
movhps m0, [srcq+pos1q*srcmul] ; src[filterPos[1] + {0,1,2,3}]
|
||||
%else ; %1 == 8
|
||||
movd m4, [srcq+pos1q*srcmul] ; src[filterPos[1] + {0,1,2,3}]
|
||||
%endif
|
||||
mov32 pos0q, dword [fltposq+wq*4+ 8] ; filterPos[2]
|
||||
mov32 pos1q, dword [fltposq+wq*4+12] ; filterPos[3]
|
||||
movlh m1, [srcq+pos0q*srcmul] ; src[filterPos[2] + {0,1,2,3}]
|
||||
%if %1 > 8
|
||||
movhps m1, [srcq+pos1q*srcmul] ; src[filterPos[3] + {0,1,2,3}]
|
||||
%else ; %1 == 8
|
||||
movd m5, [srcq+pos1q*srcmul] ; src[filterPos[3] + {0,1,2,3}]
|
||||
punpckldq m0, m4
|
||||
punpckldq m1, m5
|
||||
%endif ; %1 == 8
|
||||
%endif ; mmsize == 8/16
|
||||
%if %1 == 8
|
||||
punpcklbw m0, m3 ; byte -> word
|
||||
punpcklbw m1, m3 ; byte -> word
|
||||
%endif ; %1 == 8
|
||||
|
||||
; multiply with filter coefficients
|
||||
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
|
||||
; add back 0x8000 * sum(coeffs) after the horizontal add
|
||||
psubw m0, m6
|
||||
psubw m1, m6
|
||||
%endif ; %1 == 16
|
||||
pmaddwd m0, [filterq+wq*8+mmsize*0] ; *= filter[{0,1,..,6,7}]
|
||||
pmaddwd m1, [filterq+wq*8+mmsize*1] ; *= filter[{8,9,..,14,15}]
|
||||
|
||||
; add up horizontally (4 srcpix * 4 coefficients -> 1 dstpix)
|
||||
%if mmsize == 8 ; mmx
|
||||
movq m4, m0
|
||||
punpckldq m0, m1
|
||||
punpckhdq m4, m1
|
||||
paddd m0, m4
|
||||
%elif notcpuflag(ssse3) ; sse2
|
||||
mova m4, m0
|
||||
shufps m0, m1, 10001000b
|
||||
shufps m4, m1, 11011101b
|
||||
paddd m0, m4
|
||||
%else ; ssse3/sse4
|
||||
phaddd m0, m1 ; filter[{ 0, 1, 2, 3}]*src[filterPos[0]+{0,1,2,3}],
|
||||
; filter[{ 4, 5, 6, 7}]*src[filterPos[1]+{0,1,2,3}],
|
||||
; filter[{ 8, 9,10,11}]*src[filterPos[2]+{0,1,2,3}],
|
||||
; filter[{12,13,14,15}]*src[filterPos[3]+{0,1,2,3}]
|
||||
%endif ; mmx/sse2/ssse3/sse4
|
||||
%else ; %3 == 8, i.e. filterSize == 8 scaling
|
||||
; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5
|
||||
mov32 pos0q, dword [fltposq+wq*2+0] ; filterPos[0]
|
||||
mov32 pos1q, dword [fltposq+wq*2+4] ; filterPos[1]
|
||||
movbh m0, [srcq+ pos0q *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}]
|
||||
%if mmsize == 8
|
||||
movbh m1, [srcq+(pos0q+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}]
|
||||
movbh m4, [srcq+ pos1q *srcmul] ; src[filterPos[1] + {0,1,2,3}]
|
||||
movbh m5, [srcq+(pos1q+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}]
|
||||
%else ; mmsize == 16
|
||||
movbh m1, [srcq+ pos1q *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}]
|
||||
mov32 pos0q, dword [fltposq+wq*2+8] ; filterPos[2]
|
||||
mov32 pos1q, dword [fltposq+wq*2+12] ; filterPos[3]
|
||||
movbh m4, [srcq+ pos0q *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}]
|
||||
movbh m5, [srcq+ pos1q *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}]
|
||||
%endif ; mmsize == 8/16
|
||||
%if %1 == 8
|
||||
punpcklbw m0, m3 ; byte -> word
|
||||
punpcklbw m1, m3 ; byte -> word
|
||||
punpcklbw m4, m3 ; byte -> word
|
||||
punpcklbw m5, m3 ; byte -> word
|
||||
%endif ; %1 == 8
|
||||
|
||||
; multiply
|
||||
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
|
||||
; add back 0x8000 * sum(coeffs) after the horizontal add
|
||||
psubw m0, m6
|
||||
psubw m1, m6
|
||||
psubw m4, m6
|
||||
psubw m5, m6
|
||||
%endif ; %1 == 16
|
||||
pmaddwd m0, [filterq+wq*8+mmsize*0] ; *= filter[{0,1,..,6,7}]
|
||||
pmaddwd m1, [filterq+wq*8+mmsize*1] ; *= filter[{8,9,..,14,15}]
|
||||
pmaddwd m4, [filterq+wq*8+mmsize*2] ; *= filter[{16,17,..,22,23}]
|
||||
pmaddwd m5, [filterq+wq*8+mmsize*3] ; *= filter[{24,25,..,30,31}]
|
||||
|
||||
; add up horizontally (8 srcpix * 8 coefficients -> 1 dstpix)
|
||||
%if mmsize == 8
|
||||
paddd m0, m1
|
||||
paddd m4, m5
|
||||
movq m1, m0
|
||||
punpckldq m0, m4
|
||||
punpckhdq m1, m4
|
||||
paddd m0, m1
|
||||
%elif notcpuflag(ssse3) ; sse2
|
||||
%if %1 == 8
|
||||
%define mex m6
|
||||
%else
|
||||
%define mex m3
|
||||
%endif
|
||||
; emulate horizontal add as transpose + vertical add
|
||||
mova mex, m0
|
||||
punpckldq m0, m1
|
||||
punpckhdq mex, m1
|
||||
paddd m0, mex
|
||||
mova m1, m4
|
||||
punpckldq m4, m5
|
||||
punpckhdq m1, m5
|
||||
paddd m4, m1
|
||||
mova m1, m0
|
||||
punpcklqdq m0, m4
|
||||
punpckhqdq m1, m4
|
||||
paddd m0, m1
|
||||
%else ; ssse3/sse4
|
||||
; FIXME if we rearrange the filter in pairs of 4, we can
|
||||
; load pixels likewise and use 2 x paddd + phaddd instead
|
||||
; of 3 x phaddd here, faster on older cpus
|
||||
phaddd m0, m1
|
||||
phaddd m4, m5
|
||||
phaddd m0, m4 ; filter[{ 0, 1,..., 6, 7}]*src[filterPos[0]+{0,1,...,6,7}],
|
||||
; filter[{ 8, 9,...,14,15}]*src[filterPos[1]+{0,1,...,6,7}],
|
||||
; filter[{16,17,...,22,23}]*src[filterPos[2]+{0,1,...,6,7}],
|
||||
; filter[{24,25,...,30,31}]*src[filterPos[3]+{0,1,...,6,7}]
|
||||
%endif ; mmx/sse2/ssse3/sse4
|
||||
%endif ; %3 == 4/8
|
||||
|
||||
%else ; %3 == X, i.e. any filterSize scaling
|
||||
|
||||
%ifidn %4, X4
|
||||
%define dlt 4
|
||||
%else ; %4 == X || %4 == X8
|
||||
%define dlt 0
|
||||
%endif ; %4 ==/!= X4
|
||||
%if ARCH_X86_64
|
||||
%define srcq r8
|
||||
%define pos1q r7
|
||||
%define srcendq r9
|
||||
movsxd fltsizeq, fltsized ; filterSize
|
||||
lea srcendq, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4]
|
||||
%else ; x86-32
|
||||
%define srcq srcmemq
|
||||
%define pos1q dstq
|
||||
%define srcendq r6m
|
||||
lea pos0q, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4]
|
||||
mov srcendq, pos0q
|
||||
%endif ; x86-32/64
|
||||
lea fltposq, [fltposq+wq*4]
|
||||
%if %2 == 15
|
||||
lea dstq, [dstq+wq*2]
|
||||
%else ; %2 == 19
|
||||
lea dstq, [dstq+wq*4]
|
||||
%endif ; %2 == 15/19
|
||||
movifnidn dstmp, dstq
|
||||
neg wq
|
||||
|
||||
.loop:
|
||||
mov32 pos0q, dword [fltposq+wq*4+0] ; filterPos[0]
|
||||
mov32 pos1q, dword [fltposq+wq*4+4] ; filterPos[1]
|
||||
; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)?
|
||||
pxor m4, m4
|
||||
pxor m5, m5
|
||||
mov srcq, srcmemmp
|
||||
|
||||
.innerloop:
|
||||
; load 2x4 (mmx) or 2x8 (sse) source pixels into m0/m1 -> m4/m5
|
||||
movbh m0, [srcq+ pos0q *srcmul] ; src[filterPos[0] + {0,1,2,3(,4,5,6,7)}]
|
||||
movbh m1, [srcq+(pos1q+dlt)*srcmul] ; src[filterPos[1] + {0,1,2,3(,4,5,6,7)}]
|
||||
%if %1 == 8
|
||||
punpcklbw m0, m3
|
||||
punpcklbw m1, m3
|
||||
%endif ; %1 == 8
|
||||
|
||||
; multiply
|
||||
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
|
||||
; add back 0x8000 * sum(coeffs) after the horizontal add
|
||||
psubw m0, m6
|
||||
psubw m1, m6
|
||||
%endif ; %1 == 16
|
||||
pmaddwd m0, [filterq] ; filter[{0,1,2,3(,4,5,6,7)}]
|
||||
pmaddwd m1, [filterq+(fltsizeq+dlt)*2]; filter[filtersize+{0,1,2,3(,4,5,6,7)}]
|
||||
paddd m4, m0
|
||||
paddd m5, m1
|
||||
add filterq, mmsize
|
||||
add srcq, srcmul*mmsize/2
|
||||
cmp srcq, srcendq ; while (src += 4) < &src[filterSize]
|
||||
jl .innerloop
|
||||
|
||||
%ifidn %4, X4
|
||||
mov32 pos1q, dword [fltposq+wq*4+4] ; filterPos[1]
|
||||
movlh m0, [srcq+ pos0q *srcmul] ; split last 4 srcpx of dstpx[0]
|
||||
sub pos1q, fltsizeq ; and first 4 srcpx of dstpx[1]
|
||||
%if %1 > 8
|
||||
movhps m0, [srcq+(pos1q+dlt)*srcmul]
|
||||
%else ; %1 == 8
|
||||
movd m1, [srcq+(pos1q+dlt)*srcmul]
|
||||
punpckldq m0, m1
|
||||
%endif ; %1 == 8
|
||||
%if %1 == 8
|
||||
punpcklbw m0, m3
|
||||
%endif ; %1 == 8
|
||||
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll
|
||||
; add back 0x8000 * sum(coeffs) after the horizontal add
|
||||
psubw m0, m6
|
||||
%endif ; %1 == 16
|
||||
pmaddwd m0, [filterq]
|
||||
%endif ; %4 == X4
|
||||
|
||||
lea filterq, [filterq+(fltsizeq+dlt)*2]
|
||||
|
||||
%if mmsize == 8 ; mmx
|
||||
movq m0, m4
|
||||
punpckldq m4, m5
|
||||
punpckhdq m0, m5
|
||||
paddd m0, m4
|
||||
%else ; mmsize == 16
|
||||
%if notcpuflag(ssse3) ; sse2
|
||||
mova m1, m4
|
||||
punpcklqdq m4, m5
|
||||
punpckhqdq m1, m5
|
||||
paddd m4, m1
|
||||
%else ; ssse3/sse4
|
||||
phaddd m4, m5
|
||||
%endif ; sse2/ssse3/sse4
|
||||
%ifidn %4, X4
|
||||
paddd m4, m0
|
||||
%endif ; %3 == X4
|
||||
%if notcpuflag(ssse3) ; sse2
|
||||
pshufd m4, m4, 11011000b
|
||||
movhlps m0, m4
|
||||
paddd m0, m4
|
||||
%else ; ssse3/sse4
|
||||
phaddd m4, m4
|
||||
SWAP 0, 4
|
||||
%endif ; sse2/ssse3/sse4
|
||||
%endif ; mmsize == 8/16
|
||||
%endif ; %3 ==/!= X
|
||||
|
||||
%if %1 == 16 ; add 0x8000 * sum(coeffs), i.e. back from signed -> unsigned
|
||||
paddd m0, m7
|
||||
%endif ; %1 == 16
|
||||
|
||||
; clip, store
|
||||
psrad m0, 14 + %1 - %2
|
||||
%ifidn %3, X
|
||||
movifnidn dstq, dstmp
|
||||
%endif ; %3 == X
|
||||
%if %2 == 15
|
||||
packssdw m0, m0
|
||||
%ifnidn %3, X
|
||||
movh [dstq+wq*(2>>wshr)], m0
|
||||
%else ; %3 == X
|
||||
movd [dstq+wq*2], m0
|
||||
%endif ; %3 ==/!= X
|
||||
%else ; %2 == 19
|
||||
PMINSD m0, m2, m4
|
||||
%ifnidn %3, X
|
||||
mova [dstq+wq*(4>>wshr)], m0
|
||||
%else ; %3 == X
|
||||
movq [dstq+wq*4], m0
|
||||
%endif ; %3 ==/!= X
|
||||
%endif ; %2 == 15/19
|
||||
%ifnidn %3, X
|
||||
add wq, (mmsize<<wshr)/4 ; both 8tap and 4tap really only do 4 pixels (or for mmx: 2 pixels)
|
||||
; per iteration. see "shl wq,1" above as for why we do this
|
||||
%else ; %3 == X
|
||||
add wq, 2
|
||||
%endif ; %3 ==/!= X
|
||||
jl .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
; SCALE_FUNCS source_width, intermediate_nbits, n_xmm
|
||||
%macro SCALE_FUNCS 3
|
||||
SCALE_FUNC %1, %2, 4, 4, 6, %3
|
||||
SCALE_FUNC %1, %2, 8, 8, 6, %3
|
||||
%if mmsize == 8
|
||||
SCALE_FUNC %1, %2, X, X, 7, %3
|
||||
%else
|
||||
SCALE_FUNC %1, %2, X, X4, 7, %3
|
||||
SCALE_FUNC %1, %2, X, X8, 7, %3
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
; SCALE_FUNCS2 8_xmm_args, 9to10_xmm_args, 16_xmm_args
|
||||
%macro SCALE_FUNCS2 3
|
||||
%if notcpuflag(sse4)
|
||||
SCALE_FUNCS 8, 15, %1
|
||||
SCALE_FUNCS 9, 15, %2
|
||||
SCALE_FUNCS 10, 15, %2
|
||||
SCALE_FUNCS 12, 15, %2
|
||||
SCALE_FUNCS 14, 15, %2
|
||||
SCALE_FUNCS 16, 15, %3
|
||||
%endif ; !sse4
|
||||
SCALE_FUNCS 8, 19, %1
|
||||
SCALE_FUNCS 9, 19, %2
|
||||
SCALE_FUNCS 10, 19, %2
|
||||
SCALE_FUNCS 12, 19, %2
|
||||
SCALE_FUNCS 14, 19, %2
|
||||
SCALE_FUNCS 16, 19, %3
|
||||
%endmacro
|
||||
|
||||
%if ARCH_X86_32
|
||||
INIT_MMX mmx
|
||||
SCALE_FUNCS2 0, 0, 0
|
||||
%endif
|
||||
INIT_XMM sse2
|
||||
SCALE_FUNCS2 7, 6, 8
|
||||
INIT_XMM ssse3
|
||||
SCALE_FUNCS2 6, 6, 8
|
||||
INIT_XMM sse4
|
||||
SCALE_FUNCS2 6, 6, 8
|
||||
583
externals/ffmpeg/libswscale/x86/swscale.c
vendored
Executable file
583
externals/ffmpeg/libswscale/x86/swscale.c
vendored
Executable file
@@ -0,0 +1,583 @@
|
||||
/*
|
||||
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <inttypes.h>
|
||||
#include "config.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/avassert.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
|
||||
const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
|
||||
0x0103010301030103LL,
|
||||
0x0200020002000200LL,};
|
||||
|
||||
const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
|
||||
0x0602060206020602LL,
|
||||
0x0004000400040004LL,};
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
#define DITHER1XBPP
|
||||
|
||||
DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
|
||||
|
||||
DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
|
||||
|
||||
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
|
||||
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
|
||||
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
|
||||
|
||||
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
|
||||
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
|
||||
DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
|
||||
|
||||
|
||||
//MMX versions
|
||||
#if HAVE_MMX_INLINE
|
||||
#undef RENAME
|
||||
#define COMPILE_TEMPLATE_MMXEXT 0
|
||||
#define RENAME(a) a ## _mmx
|
||||
#include "swscale_template.c"
|
||||
#endif
|
||||
|
||||
// MMXEXT versions
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#undef RENAME
|
||||
#undef COMPILE_TEMPLATE_MMXEXT
|
||||
#define COMPILE_TEMPLATE_MMXEXT 1
|
||||
#define RENAME(a) a ## _mmxext
|
||||
#include "swscale_template.c"
|
||||
#endif
|
||||
|
||||
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
|
||||
{
|
||||
const int dstH= c->dstH;
|
||||
const int flags= c->flags;
|
||||
|
||||
SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
|
||||
SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
|
||||
SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
|
||||
|
||||
int hasAlpha = c->needAlpha;
|
||||
int32_t *vLumFilterPos= c->vLumFilterPos;
|
||||
int32_t *vChrFilterPos= c->vChrFilterPos;
|
||||
int16_t *vLumFilter= c->vLumFilter;
|
||||
int16_t *vChrFilter= c->vChrFilter;
|
||||
int32_t *lumMmxFilter= c->lumMmxFilter;
|
||||
int32_t *chrMmxFilter= c->chrMmxFilter;
|
||||
int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
|
||||
const int vLumFilterSize= c->vLumFilterSize;
|
||||
const int vChrFilterSize= c->vChrFilterSize;
|
||||
const int chrDstY= dstY>>c->chrDstVSubSample;
|
||||
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
|
||||
const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
|
||||
|
||||
c->blueDither= ff_dither8[dstY&1];
|
||||
if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555)
|
||||
c->greenDither= ff_dither8[dstY&1];
|
||||
else
|
||||
c->greenDither= ff_dither4[dstY&1];
|
||||
c->redDither= ff_dither8[(dstY+1)&1];
|
||||
if (dstY < dstH - 2) {
|
||||
const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
|
||||
const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
|
||||
const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
|
||||
|
||||
int i;
|
||||
if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
|
||||
const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
|
||||
|
||||
int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
|
||||
for (i = 0; i < neg; i++)
|
||||
tmpY[i] = lumSrcPtr[neg];
|
||||
for ( ; i < end; i++)
|
||||
tmpY[i] = lumSrcPtr[i];
|
||||
for ( ; i < vLumFilterSize; i++)
|
||||
tmpY[i] = tmpY[i-1];
|
||||
lumSrcPtr = tmpY;
|
||||
|
||||
if (alpSrcPtr) {
|
||||
const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
|
||||
for (i = 0; i < neg; i++)
|
||||
tmpA[i] = alpSrcPtr[neg];
|
||||
for ( ; i < end; i++)
|
||||
tmpA[i] = alpSrcPtr[i];
|
||||
for ( ; i < vLumFilterSize; i++)
|
||||
tmpA[i] = tmpA[i - 1];
|
||||
alpSrcPtr = tmpA;
|
||||
}
|
||||
}
|
||||
if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
|
||||
const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
|
||||
int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
|
||||
for (i = 0; i < neg; i++) {
|
||||
tmpU[i] = chrUSrcPtr[neg];
|
||||
}
|
||||
for ( ; i < end; i++) {
|
||||
tmpU[i] = chrUSrcPtr[i];
|
||||
}
|
||||
for ( ; i < vChrFilterSize; i++) {
|
||||
tmpU[i] = tmpU[i - 1];
|
||||
}
|
||||
chrUSrcPtr = tmpU;
|
||||
}
|
||||
|
||||
if (flags & SWS_ACCURATE_RND) {
|
||||
int s= APCK_SIZE / 8;
|
||||
for (i=0; i<vLumFilterSize; i+=2) {
|
||||
*(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
|
||||
*(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
|
||||
lumMmxFilter[s*i+APCK_COEF/4 ]=
|
||||
lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
|
||||
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
|
||||
if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
|
||||
*(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
|
||||
*(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
|
||||
alpMmxFilter[s*i+APCK_COEF/4 ]=
|
||||
alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
|
||||
}
|
||||
}
|
||||
for (i=0; i<vChrFilterSize; i+=2) {
|
||||
*(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
|
||||
*(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
|
||||
chrMmxFilter[s*i+APCK_COEF/4 ]=
|
||||
chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
|
||||
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
|
||||
}
|
||||
} else {
|
||||
for (i=0; i<vLumFilterSize; i++) {
|
||||
*(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
|
||||
lumMmxFilter[4*i+2]=
|
||||
lumMmxFilter[4*i+3]=
|
||||
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
|
||||
if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
|
||||
*(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
|
||||
alpMmxFilter[4*i+2]=
|
||||
alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
|
||||
}
|
||||
}
|
||||
for (i=0; i<vChrFilterSize; i++) {
|
||||
*(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
|
||||
chrMmxFilter[4*i+2]=
|
||||
chrMmxFilter[4*i+3]=
|
||||
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_MMXEXT
|
||||
static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
|
||||
const int16_t **src, uint8_t *dest, int dstW,
|
||||
const uint8_t *dither, int offset)
|
||||
{
|
||||
if(((uintptr_t)dest) & 15){
|
||||
yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
|
||||
return;
|
||||
}
|
||||
filterSize--;
|
||||
#define MAIN_FUNCTION \
|
||||
"pxor %%xmm0, %%xmm0 \n\t" \
|
||||
"punpcklbw %%xmm0, %%xmm3 \n\t" \
|
||||
"movd %4, %%xmm1 \n\t" \
|
||||
"punpcklwd %%xmm1, %%xmm1 \n\t" \
|
||||
"punpckldq %%xmm1, %%xmm1 \n\t" \
|
||||
"punpcklqdq %%xmm1, %%xmm1 \n\t" \
|
||||
"psllw $3, %%xmm1 \n\t" \
|
||||
"paddw %%xmm1, %%xmm3 \n\t" \
|
||||
"psraw $4, %%xmm3 \n\t" \
|
||||
"movdqa %%xmm3, %%xmm4 \n\t" \
|
||||
"movdqa %%xmm3, %%xmm7 \n\t" \
|
||||
"movl %3, %%ecx \n\t" \
|
||||
"mov %0, %%"FF_REG_d" \n\t"\
|
||||
"mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
".p2align 4 \n\t" /* FIXME Unroll? */\
|
||||
"1: \n\t"\
|
||||
"movddup 8(%%"FF_REG_d"), %%xmm0 \n\t" /* filterCoeff */\
|
||||
"movdqa (%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm2 \n\t" /* srcData */\
|
||||
"movdqa 16(%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm5 \n\t" /* srcData */\
|
||||
"add $16, %%"FF_REG_d" \n\t"\
|
||||
"mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
|
||||
"pmulhw %%xmm0, %%xmm2 \n\t"\
|
||||
"pmulhw %%xmm0, %%xmm5 \n\t"\
|
||||
"paddw %%xmm2, %%xmm3 \n\t"\
|
||||
"paddw %%xmm5, %%xmm4 \n\t"\
|
||||
" jnz 1b \n\t"\
|
||||
"psraw $3, %%xmm3 \n\t"\
|
||||
"psraw $3, %%xmm4 \n\t"\
|
||||
"packuswb %%xmm4, %%xmm3 \n\t"\
|
||||
"movntdq %%xmm3, (%1, %%"FF_REG_c") \n\t"\
|
||||
"add $16, %%"FF_REG_c" \n\t"\
|
||||
"cmp %2, %%"FF_REG_c" \n\t"\
|
||||
"movdqa %%xmm7, %%xmm3 \n\t" \
|
||||
"movdqa %%xmm7, %%xmm4 \n\t" \
|
||||
"mov %0, %%"FF_REG_d" \n\t"\
|
||||
"mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
|
||||
"jb 1b \n\t"
|
||||
|
||||
if (offset) {
|
||||
__asm__ volatile(
|
||||
"movq %5, %%xmm3 \n\t"
|
||||
"movdqa %%xmm3, %%xmm4 \n\t"
|
||||
"psrlq $24, %%xmm3 \n\t"
|
||||
"psllq $40, %%xmm4 \n\t"
|
||||
"por %%xmm4, %%xmm3 \n\t"
|
||||
MAIN_FUNCTION
|
||||
:: "g" (filter),
|
||||
"r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
|
||||
"m"(filterSize), "m"(((uint64_t *) dither)[0])
|
||||
: XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
|
||||
"%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
|
||||
);
|
||||
} else {
|
||||
__asm__ volatile(
|
||||
"movq %5, %%xmm3 \n\t"
|
||||
MAIN_FUNCTION
|
||||
:: "g" (filter),
|
||||
"r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
|
||||
"m"(filterSize), "m"(((uint64_t *) dither)[0])
|
||||
: XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
|
||||
"%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
|
||||
);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
|
||||
void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
|
||||
SwsContext *c, int16_t *data, \
|
||||
int dstW, const uint8_t *src, \
|
||||
const int16_t *filter, \
|
||||
const int32_t *filterPos, int filterSize)
|
||||
|
||||
#define SCALE_FUNCS(filter_n, opt) \
|
||||
SCALE_FUNC(filter_n, 8, 15, opt); \
|
||||
SCALE_FUNC(filter_n, 9, 15, opt); \
|
||||
SCALE_FUNC(filter_n, 10, 15, opt); \
|
||||
SCALE_FUNC(filter_n, 12, 15, opt); \
|
||||
SCALE_FUNC(filter_n, 14, 15, opt); \
|
||||
SCALE_FUNC(filter_n, 16, 15, opt); \
|
||||
SCALE_FUNC(filter_n, 8, 19, opt); \
|
||||
SCALE_FUNC(filter_n, 9, 19, opt); \
|
||||
SCALE_FUNC(filter_n, 10, 19, opt); \
|
||||
SCALE_FUNC(filter_n, 12, 19, opt); \
|
||||
SCALE_FUNC(filter_n, 14, 19, opt); \
|
||||
SCALE_FUNC(filter_n, 16, 19, opt)
|
||||
|
||||
#define SCALE_FUNCS_MMX(opt) \
|
||||
SCALE_FUNCS(4, opt); \
|
||||
SCALE_FUNCS(8, opt); \
|
||||
SCALE_FUNCS(X, opt)
|
||||
|
||||
#define SCALE_FUNCS_SSE(opt) \
|
||||
SCALE_FUNCS(4, opt); \
|
||||
SCALE_FUNCS(8, opt); \
|
||||
SCALE_FUNCS(X4, opt); \
|
||||
SCALE_FUNCS(X8, opt)
|
||||
|
||||
#if ARCH_X86_32
|
||||
SCALE_FUNCS_MMX(mmx);
|
||||
#endif
|
||||
SCALE_FUNCS_SSE(sse2);
|
||||
SCALE_FUNCS_SSE(ssse3);
|
||||
SCALE_FUNCS_SSE(sse4);
|
||||
|
||||
#define VSCALEX_FUNC(size, opt) \
|
||||
void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
|
||||
const int16_t **src, uint8_t *dest, int dstW, \
|
||||
const uint8_t *dither, int offset)
|
||||
#define VSCALEX_FUNCS(opt) \
|
||||
VSCALEX_FUNC(8, opt); \
|
||||
VSCALEX_FUNC(9, opt); \
|
||||
VSCALEX_FUNC(10, opt)
|
||||
|
||||
#if ARCH_X86_32
|
||||
VSCALEX_FUNCS(mmxext);
|
||||
#endif
|
||||
VSCALEX_FUNCS(sse2);
|
||||
VSCALEX_FUNCS(sse4);
|
||||
VSCALEX_FUNC(16, sse4);
|
||||
VSCALEX_FUNCS(avx);
|
||||
|
||||
#define VSCALE_FUNC(size, opt) \
|
||||
void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
|
||||
const uint8_t *dither, int offset)
|
||||
#define VSCALE_FUNCS(opt1, opt2) \
|
||||
VSCALE_FUNC(8, opt1); \
|
||||
VSCALE_FUNC(9, opt2); \
|
||||
VSCALE_FUNC(10, opt2); \
|
||||
VSCALE_FUNC(16, opt1)
|
||||
|
||||
#if ARCH_X86_32
|
||||
VSCALE_FUNCS(mmx, mmxext);
|
||||
#endif
|
||||
VSCALE_FUNCS(sse2, sse2);
|
||||
VSCALE_FUNC(16, sse4);
|
||||
VSCALE_FUNCS(avx, avx);
|
||||
|
||||
#define INPUT_Y_FUNC(fmt, opt) \
|
||||
void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
|
||||
const uint8_t *unused1, const uint8_t *unused2, \
|
||||
int w, uint32_t *unused)
|
||||
#define INPUT_UV_FUNC(fmt, opt) \
|
||||
void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
|
||||
const uint8_t *unused0, \
|
||||
const uint8_t *src1, \
|
||||
const uint8_t *src2, \
|
||||
int w, uint32_t *unused)
|
||||
#define INPUT_FUNC(fmt, opt) \
|
||||
INPUT_Y_FUNC(fmt, opt); \
|
||||
INPUT_UV_FUNC(fmt, opt)
|
||||
#define INPUT_FUNCS(opt) \
|
||||
INPUT_FUNC(uyvy, opt); \
|
||||
INPUT_FUNC(yuyv, opt); \
|
||||
INPUT_UV_FUNC(nv12, opt); \
|
||||
INPUT_UV_FUNC(nv21, opt); \
|
||||
INPUT_FUNC(rgba, opt); \
|
||||
INPUT_FUNC(bgra, opt); \
|
||||
INPUT_FUNC(argb, opt); \
|
||||
INPUT_FUNC(abgr, opt); \
|
||||
INPUT_FUNC(rgb24, opt); \
|
||||
INPUT_FUNC(bgr24, opt)
|
||||
|
||||
#if ARCH_X86_32
|
||||
INPUT_FUNCS(mmx);
|
||||
#endif
|
||||
INPUT_FUNCS(sse2);
|
||||
INPUT_FUNCS(ssse3);
|
||||
INPUT_FUNCS(avx);
|
||||
|
||||
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
#if HAVE_MMX_INLINE
|
||||
if (INLINE_MMX(cpu_flags))
|
||||
sws_init_swscale_mmx(c);
|
||||
#endif
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
if (INLINE_MMXEXT(cpu_flags))
|
||||
sws_init_swscale_mmxext(c);
|
||||
if (cpu_flags & AV_CPU_FLAG_SSE3){
|
||||
if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
|
||||
c->yuv2planeX = yuv2yuvX_sse3;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
|
||||
if (c->srcBpc == 8) { \
|
||||
hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
|
||||
ff_hscale8to19_ ## filtersize ## _ ## opt1; \
|
||||
} else if (c->srcBpc == 9) { \
|
||||
hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
|
||||
ff_hscale9to19_ ## filtersize ## _ ## opt1; \
|
||||
} else if (c->srcBpc == 10) { \
|
||||
hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
|
||||
ff_hscale10to19_ ## filtersize ## _ ## opt1; \
|
||||
} else if (c->srcBpc == 12) { \
|
||||
hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
|
||||
ff_hscale12to19_ ## filtersize ## _ ## opt1; \
|
||||
} else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
|
||||
hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
|
||||
ff_hscale14to19_ ## filtersize ## _ ## opt1; \
|
||||
} else { /* c->srcBpc == 16 */ \
|
||||
av_assert0(c->srcBpc == 16);\
|
||||
hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
|
||||
ff_hscale16to19_ ## filtersize ## _ ## opt1; \
|
||||
} \
|
||||
} while (0)
|
||||
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
|
||||
switch (filtersize) { \
|
||||
case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
|
||||
case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
|
||||
default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
|
||||
}
|
||||
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
|
||||
switch(c->dstBpc){ \
|
||||
case 16: do_16_case; break; \
|
||||
case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
|
||||
case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
|
||||
case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
|
||||
}
|
||||
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
|
||||
switch(c->dstBpc){ \
|
||||
case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
|
||||
case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
|
||||
case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
|
||||
case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
|
||||
default: av_assert0(c->dstBpc>8); \
|
||||
}
|
||||
#define case_rgb(x, X, opt) \
|
||||
case AV_PIX_FMT_ ## X: \
|
||||
c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
|
||||
if (!c->chrSrcHSubSample) \
|
||||
c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
|
||||
break
|
||||
#if ARCH_X86_32
|
||||
if (EXTERNAL_MMX(cpu_flags)) {
|
||||
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
|
||||
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
|
||||
ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
|
||||
|
||||
switch (c->srcFormat) {
|
||||
case AV_PIX_FMT_YA8:
|
||||
c->lumToYV12 = ff_yuyvToY_mmx;
|
||||
if (c->needAlpha)
|
||||
c->alpToYV12 = ff_uyvyToY_mmx;
|
||||
break;
|
||||
case AV_PIX_FMT_YUYV422:
|
||||
c->lumToYV12 = ff_yuyvToY_mmx;
|
||||
c->chrToYV12 = ff_yuyvToUV_mmx;
|
||||
break;
|
||||
case AV_PIX_FMT_UYVY422:
|
||||
c->lumToYV12 = ff_uyvyToY_mmx;
|
||||
c->chrToYV12 = ff_uyvyToUV_mmx;
|
||||
break;
|
||||
case AV_PIX_FMT_NV12:
|
||||
c->chrToYV12 = ff_nv12ToUV_mmx;
|
||||
break;
|
||||
case AV_PIX_FMT_NV21:
|
||||
c->chrToYV12 = ff_nv21ToUV_mmx;
|
||||
break;
|
||||
case_rgb(rgb24, RGB24, mmx);
|
||||
case_rgb(bgr24, BGR24, mmx);
|
||||
case_rgb(bgra, BGRA, mmx);
|
||||
case_rgb(rgba, RGBA, mmx);
|
||||
case_rgb(abgr, ABGR, mmx);
|
||||
case_rgb(argb, ARGB, mmx);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
|
||||
}
|
||||
#endif /* ARCH_X86_32 */
|
||||
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
|
||||
switch (filtersize) { \
|
||||
case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
|
||||
case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
|
||||
default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
|
||||
else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
|
||||
break; \
|
||||
}
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
|
||||
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
|
||||
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
|
||||
HAVE_ALIGNED_STACK || ARCH_X86_64);
|
||||
ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
|
||||
|
||||
switch (c->srcFormat) {
|
||||
case AV_PIX_FMT_YA8:
|
||||
c->lumToYV12 = ff_yuyvToY_sse2;
|
||||
if (c->needAlpha)
|
||||
c->alpToYV12 = ff_uyvyToY_sse2;
|
||||
break;
|
||||
case AV_PIX_FMT_YUYV422:
|
||||
c->lumToYV12 = ff_yuyvToY_sse2;
|
||||
c->chrToYV12 = ff_yuyvToUV_sse2;
|
||||
break;
|
||||
case AV_PIX_FMT_UYVY422:
|
||||
c->lumToYV12 = ff_uyvyToY_sse2;
|
||||
c->chrToYV12 = ff_uyvyToUV_sse2;
|
||||
break;
|
||||
case AV_PIX_FMT_NV12:
|
||||
c->chrToYV12 = ff_nv12ToUV_sse2;
|
||||
break;
|
||||
case AV_PIX_FMT_NV21:
|
||||
c->chrToYV12 = ff_nv21ToUV_sse2;
|
||||
break;
|
||||
case_rgb(rgb24, RGB24, sse2);
|
||||
case_rgb(bgr24, BGR24, sse2);
|
||||
case_rgb(bgra, BGRA, sse2);
|
||||
case_rgb(rgba, RGBA, sse2);
|
||||
case_rgb(abgr, ABGR, sse2);
|
||||
case_rgb(argb, ARGB, sse2);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
|
||||
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
|
||||
switch (c->srcFormat) {
|
||||
case_rgb(rgb24, RGB24, ssse3);
|
||||
case_rgb(bgr24, BGR24, ssse3);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (EXTERNAL_SSE4(cpu_flags)) {
|
||||
/* Xto15 don't need special sse4 functions */
|
||||
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
|
||||
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
|
||||
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
|
||||
if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
|
||||
HAVE_ALIGNED_STACK || ARCH_X86_64);
|
||||
if (c->dstBpc == 16 && !isBE(c->dstFormat))
|
||||
c->yuv2plane1 = ff_yuv2plane1_16_sse4;
|
||||
}
|
||||
|
||||
if (EXTERNAL_AVX(cpu_flags)) {
|
||||
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
|
||||
HAVE_ALIGNED_STACK || ARCH_X86_64);
|
||||
ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
|
||||
|
||||
switch (c->srcFormat) {
|
||||
case AV_PIX_FMT_YUYV422:
|
||||
c->chrToYV12 = ff_yuyvToUV_avx;
|
||||
break;
|
||||
case AV_PIX_FMT_UYVY422:
|
||||
c->chrToYV12 = ff_uyvyToUV_avx;
|
||||
break;
|
||||
case AV_PIX_FMT_NV12:
|
||||
c->chrToYV12 = ff_nv12ToUV_avx;
|
||||
break;
|
||||
case AV_PIX_FMT_NV21:
|
||||
c->chrToYV12 = ff_nv21ToUV_avx;
|
||||
break;
|
||||
case_rgb(rgb24, RGB24, avx);
|
||||
case_rgb(bgr24, BGR24, avx);
|
||||
case_rgb(bgra, BGRA, avx);
|
||||
case_rgb(rgba, RGBA, avx);
|
||||
case_rgb(abgr, ABGR, avx);
|
||||
case_rgb(argb, ARGB, avx);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
1577
externals/ffmpeg/libswscale/x86/swscale_template.c
vendored
Executable file
1577
externals/ffmpeg/libswscale/x86/swscale_template.c
vendored
Executable file
File diff suppressed because it is too large
Load Diff
31
externals/ffmpeg/libswscale/x86/w64xmmtest.c
vendored
Executable file
31
externals/ffmpeg/libswscale/x86/w64xmmtest.c
vendored
Executable file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* check XMM registers for clobbers on Win64
|
||||
* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/x86/w64xmmtest.h"
|
||||
#include "libswscale/swscale.h"
|
||||
|
||||
wrap(sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
|
||||
const int srcStride[], int srcSliceY, int srcSliceH,
|
||||
uint8_t *const dst[], const int dstStride[]))
|
||||
{
|
||||
testxmmclobbers(sws_scale, c, srcSlice, srcStride, srcSliceY,
|
||||
srcSliceH, dst, dstStride);
|
||||
}
|
||||
155
externals/ffmpeg/libswscale/x86/yuv2rgb.c
vendored
Executable file
155
externals/ffmpeg/libswscale/x86/yuv2rgb.c
vendored
Executable file
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
* software YUV to RGB converter
|
||||
*
|
||||
* Copyright (C) 2009 Konstantin Shishkov
|
||||
*
|
||||
* MMX/MMXEXT template stuff (needed for fast movntq support),
|
||||
* 1,4,8bpp support and context / deglobalize stuff
|
||||
* by Michael Niedermayer (michaelni@gmx.at)
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "libswscale/rgb2rgb.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavutil/cpu.h"
|
||||
|
||||
#if HAVE_X86ASM
|
||||
|
||||
#define DITHER1XBPP // only for MMX
|
||||
|
||||
/* hope these constant values are cache line aligned */
|
||||
DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, pb_e0) = 0xe0e0e0e0e0e0e0e0ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
|
||||
DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
|
||||
|
||||
//MMX versions
|
||||
#if HAVE_MMX
|
||||
#undef RENAME
|
||||
#undef COMPILE_TEMPLATE_MMXEXT
|
||||
#define COMPILE_TEMPLATE_MMXEXT 0
|
||||
#define RENAME(a) a ## _mmx
|
||||
#include "yuv2rgb_template.c"
|
||||
#endif /* HAVE_MMX */
|
||||
|
||||
// MMXEXT versions
|
||||
#if HAVE_MMXEXT
|
||||
#undef RENAME
|
||||
#undef COMPILE_TEMPLATE_MMXEXT
|
||||
#define COMPILE_TEMPLATE_MMXEXT 1
|
||||
#define RENAME(a) a ## _mmxext
|
||||
#include "yuv2rgb_template.c"
|
||||
#endif /* HAVE_MMXEXT */
|
||||
|
||||
//SSSE3 versions
|
||||
#if HAVE_SSSE3
|
||||
#undef RENAME
|
||||
#undef COMPILE_TEMPLATE_MMXEXT
|
||||
#define COMPILE_TEMPLATE_MMXEXT 0
|
||||
#define RENAME(a) a ## _ssse3
|
||||
#include "yuv2rgb_template.c"
|
||||
#endif
|
||||
|
||||
#endif /* HAVE_X86ASM */
|
||||
|
||||
av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
|
||||
{
|
||||
#if HAVE_X86ASM
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||
switch (c->dstFormat) {
|
||||
case AV_PIX_FMT_RGB32:
|
||||
if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
|
||||
#if CONFIG_SWSCALE_ALPHA
|
||||
return yuva420_rgb32_ssse3;
|
||||
#endif
|
||||
break;
|
||||
} else
|
||||
return yuv420_rgb32_ssse3;
|
||||
case AV_PIX_FMT_BGR32:
|
||||
if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
|
||||
#if CONFIG_SWSCALE_ALPHA
|
||||
return yuva420_bgr32_ssse3;
|
||||
#endif
|
||||
break;
|
||||
} else
|
||||
return yuv420_bgr32_ssse3;
|
||||
case AV_PIX_FMT_RGB24:
|
||||
return yuv420_rgb24_ssse3;
|
||||
case AV_PIX_FMT_BGR24:
|
||||
return yuv420_bgr24_ssse3;
|
||||
case AV_PIX_FMT_RGB565:
|
||||
return yuv420_rgb16_ssse3;
|
||||
case AV_PIX_FMT_RGB555:
|
||||
return yuv420_rgb15_ssse3;
|
||||
}
|
||||
}
|
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
switch (c->dstFormat) {
|
||||
case AV_PIX_FMT_RGB24:
|
||||
return yuv420_rgb24_mmxext;
|
||||
case AV_PIX_FMT_BGR24:
|
||||
return yuv420_bgr24_mmxext;
|
||||
}
|
||||
}
|
||||
|
||||
if (EXTERNAL_MMX(cpu_flags)) {
|
||||
switch (c->dstFormat) {
|
||||
case AV_PIX_FMT_RGB32:
|
||||
if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
|
||||
#if CONFIG_SWSCALE_ALPHA
|
||||
return yuva420_rgb32_mmx;
|
||||
#endif
|
||||
break;
|
||||
} else
|
||||
return yuv420_rgb32_mmx;
|
||||
case AV_PIX_FMT_BGR32:
|
||||
if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
|
||||
#if CONFIG_SWSCALE_ALPHA
|
||||
return yuva420_bgr32_mmx;
|
||||
#endif
|
||||
break;
|
||||
} else
|
||||
return yuv420_bgr32_mmx;
|
||||
case AV_PIX_FMT_RGB24:
|
||||
return yuv420_rgb24_mmx;
|
||||
case AV_PIX_FMT_BGR24:
|
||||
return yuv420_bgr24_mmx;
|
||||
case AV_PIX_FMT_RGB565:
|
||||
return yuv420_rgb16_mmx;
|
||||
case AV_PIX_FMT_RGB555:
|
||||
return yuv420_rgb15_mmx;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* HAVE_X86ASM */
|
||||
return NULL;
|
||||
}
|
||||
195
externals/ffmpeg/libswscale/x86/yuv2rgb_template.c
vendored
Executable file
195
externals/ffmpeg/libswscale/x86/yuv2rgb_template.c
vendored
Executable file
@@ -0,0 +1,195 @@
|
||||
/*
|
||||
* software YUV to RGB converter
|
||||
*
|
||||
* Copyright (C) 2001-2007 Michael Niedermayer
|
||||
* (c) 2010 Konstantin Shishkov
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
|
||||
#define YUV2RGB_LOOP(depth) \
|
||||
h_size = (c->dstW + 7) & ~7; \
|
||||
if (h_size * depth > FFABS(dstStride[0])) \
|
||||
h_size -= 8; \
|
||||
\
|
||||
vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
|
||||
\
|
||||
for (y = 0; y < srcSliceH; y++) { \
|
||||
uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
|
||||
const uint8_t *py = src[0] + y * srcStride[0]; \
|
||||
const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
|
||||
const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
|
||||
x86_reg index = -h_size / 2; \
|
||||
|
||||
extern void RENAME(ff_yuv_420_rgb24)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
|
||||
const uint8_t *pv_index, const uint64_t *pointer_c_dither,
|
||||
const uint8_t *py_2index);
|
||||
extern void RENAME(ff_yuv_420_bgr24)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
|
||||
const uint8_t *pv_index, const uint64_t *pointer_c_dither,
|
||||
const uint8_t *py_2index);
|
||||
|
||||
#if !COMPILE_TEMPLATE_MMXEXT
|
||||
extern void RENAME(ff_yuv_420_rgb15)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
|
||||
const uint8_t *pv_index, const uint64_t *pointer_c_dither,
|
||||
const uint8_t *py_2index);
|
||||
extern void RENAME(ff_yuv_420_rgb16)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
|
||||
const uint8_t *pv_index, const uint64_t *pointer_c_dither,
|
||||
const uint8_t *py_2index);
|
||||
extern void RENAME(ff_yuv_420_rgb32)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
|
||||
const uint8_t *pv_index, const uint64_t *pointer_c_dither,
|
||||
const uint8_t *py_2index);
|
||||
extern void RENAME(ff_yuv_420_bgr32)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
|
||||
const uint8_t *pv_index, const uint64_t *pointer_c_dither,
|
||||
const uint8_t *py_2index);
|
||||
extern void RENAME(ff_yuva_420_rgb32)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
|
||||
const uint8_t *pv_index, const uint64_t *pointer_c_dither,
|
||||
const uint8_t *py_2index, const uint8_t *pa_2index);
|
||||
extern void RENAME(ff_yuva_420_bgr32)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
|
||||
const uint8_t *pv_index, const uint64_t *pointer_c_dither,
|
||||
const uint8_t *py_2index, const uint8_t *pa_2index);
|
||||
|
||||
static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[],
|
||||
int srcSliceY, int srcSliceH,
|
||||
uint8_t *dst[], int dstStride[])
|
||||
{
|
||||
int y, h_size, vshift;
|
||||
|
||||
YUV2RGB_LOOP(2)
|
||||
|
||||
#ifdef DITHER1XBPP
|
||||
c->blueDither = ff_dither8[y & 1];
|
||||
c->greenDither = ff_dither8[y & 1];
|
||||
c->redDither = ff_dither8[(y + 1) & 1];
|
||||
#endif
|
||||
|
||||
RENAME(ff_yuv_420_rgb15)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
|
||||
}
|
||||
return srcSliceH;
|
||||
}
|
||||
|
||||
static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[],
|
||||
int srcSliceY, int srcSliceH,
|
||||
uint8_t *dst[], int dstStride[])
|
||||
{
|
||||
int y, h_size, vshift;
|
||||
|
||||
YUV2RGB_LOOP(2)
|
||||
|
||||
#ifdef DITHER1XBPP
|
||||
c->blueDither = ff_dither8[y & 1];
|
||||
c->greenDither = ff_dither4[y & 1];
|
||||
c->redDither = ff_dither8[(y + 1) & 1];
|
||||
#endif
|
||||
|
||||
RENAME(ff_yuv_420_rgb16)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
|
||||
}
|
||||
return srcSliceH;
|
||||
}
|
||||
|
||||
static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[],
|
||||
int srcSliceY, int srcSliceH,
|
||||
uint8_t *dst[], int dstStride[])
|
||||
{
|
||||
int y, h_size, vshift;
|
||||
|
||||
YUV2RGB_LOOP(4)
|
||||
|
||||
RENAME(ff_yuv_420_rgb32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
|
||||
}
|
||||
return srcSliceH;
|
||||
}
|
||||
|
||||
static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[],
|
||||
int srcSliceY, int srcSliceH,
|
||||
uint8_t *dst[], int dstStride[])
|
||||
{
|
||||
int y, h_size, vshift;
|
||||
|
||||
YUV2RGB_LOOP(4)
|
||||
|
||||
RENAME(ff_yuv_420_bgr32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
|
||||
}
|
||||
return srcSliceH;
|
||||
}
|
||||
|
||||
static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[],
|
||||
int srcSliceY, int srcSliceH,
|
||||
uint8_t *dst[], int dstStride[])
|
||||
{
|
||||
int y, h_size, vshift;
|
||||
YUV2RGB_LOOP(4)
|
||||
|
||||
const uint8_t *pa = src[3] + y * srcStride[3];
|
||||
RENAME(ff_yuva_420_rgb32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
|
||||
}
|
||||
return srcSliceH;
|
||||
}
|
||||
|
||||
static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[],
|
||||
int srcSliceY, int srcSliceH,
|
||||
uint8_t *dst[], int dstStride[])
|
||||
{
|
||||
int y, h_size, vshift;
|
||||
|
||||
YUV2RGB_LOOP(4)
|
||||
|
||||
const uint8_t *pa = src[3] + y * srcStride[3];
|
||||
RENAME(ff_yuva_420_bgr32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
|
||||
}
|
||||
return srcSliceH;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[],
|
||||
int srcSliceY, int srcSliceH,
|
||||
uint8_t *dst[], int dstStride[])
|
||||
{
|
||||
int y, h_size, vshift;
|
||||
|
||||
YUV2RGB_LOOP(3)
|
||||
|
||||
RENAME(ff_yuv_420_rgb24)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
|
||||
}
|
||||
return srcSliceH;
|
||||
}
|
||||
|
||||
static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
|
||||
int srcStride[],
|
||||
int srcSliceY, int srcSliceH,
|
||||
uint8_t *dst[], int dstStride[])
|
||||
{
|
||||
int y, h_size, vshift;
|
||||
|
||||
YUV2RGB_LOOP(3)
|
||||
|
||||
RENAME(ff_yuv_420_bgr24)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
|
||||
}
|
||||
return srcSliceH;
|
||||
}
|
||||
|
||||
383
externals/ffmpeg/libswscale/x86/yuv_2_rgb.asm
vendored
Executable file
383
externals/ffmpeg/libswscale/x86/yuv_2_rgb.asm
vendored
Executable file
@@ -0,0 +1,383 @@
|
||||
;******************************************************************************
|
||||
;* software YUV to RGB converter
|
||||
;*
|
||||
;* Copyright (C) 2001-2007 Michael Niedermayer
|
||||
;* (c) 2010 Konstantin Shishkov
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
; below variables are named like mask_dwXY, which means to preserve dword No.X & No.Y
|
||||
mask_dw036 : db -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0
|
||||
mask_dw147 : db 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1
|
||||
mask_dw25 : db 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0
|
||||
rgb24_shuf1: db 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11
|
||||
rgb24_shuf2: db 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5
|
||||
rgb24_shuf3: db 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15
|
||||
pw_00ff: times 8 dw 255
|
||||
pb_f8: times 16 db 248
|
||||
pb_e0: times 16 db 224
|
||||
pb_03: times 16 db 3
|
||||
pb_07: times 16 db 7
|
||||
|
||||
mask_1101: dw -1, -1, 0, -1
|
||||
mask_0010: dw 0, 0, -1, 0
|
||||
mask_0110: dw 0, -1, -1, 0
|
||||
mask_1001: dw -1, 0, 0, -1
|
||||
mask_0100: dw 0, -1, 0, 0
|
||||
|
||||
SECTION .text
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
;
|
||||
; YUV420/YUVA420 to RGB/BGR 15/16/24/32
|
||||
; R = Y + ((vrCoff * (v - 128)) >> 8)
|
||||
; G = Y - ((ugCoff * (u - 128) + vgCoff * (v - 128)) >> 8)
|
||||
; B = Y + ((ubCoff * (u - 128)) >> 8)
|
||||
;
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro MOV_H2L 1
|
||||
%if mmsize == 8
|
||||
psrlq %1, 32
|
||||
%else ; mmsize == 16
|
||||
psrldq %1, 8
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro yuv2rgb_fn 3
|
||||
|
||||
%if %3 == 32
|
||||
%ifidn %1, yuva
|
||||
%define parameters index, image, pu_index, pv_index, pointer_c_dither, py_2index, pa_2index
|
||||
%define GPR_num 7
|
||||
%endif
|
||||
%else
|
||||
%define parameters index, image, pu_index, pv_index, pointer_c_dither, py_2index
|
||||
%define GPR_num 6
|
||||
%endif
|
||||
|
||||
%define m_green m2
|
||||
%define m_alpha m3
|
||||
%define m_y m6
|
||||
%define m_u m0
|
||||
%define m_v m1
|
||||
%ifidn %2, rgb
|
||||
%define m_red m1
|
||||
%define m_blue m0
|
||||
%else
|
||||
%define m_red m0
|
||||
%define m_blue m1
|
||||
%endif
|
||||
|
||||
%if mmsize == 8
|
||||
%define time_num 1
|
||||
%define reg_num 8
|
||||
%define y_offset [pointer_c_ditherq + 8 * 8]
|
||||
%define u_offset [pointer_c_ditherq + 9 * 8]
|
||||
%define v_offset [pointer_c_ditherq + 10 * 8]
|
||||
%define ug_coff [pointer_c_ditherq + 7 * 8]
|
||||
%define vg_coff [pointer_c_ditherq + 6 * 8]
|
||||
%define y_coff [pointer_c_ditherq + 3 * 8]
|
||||
%define ub_coff [pointer_c_ditherq + 5 * 8]
|
||||
%define vr_coff [pointer_c_ditherq + 4 * 8]
|
||||
%elif mmsize == 16
|
||||
%define time_num 2
|
||||
%if ARCH_X86_32
|
||||
%define reg_num 8
|
||||
%define my_offset [pointer_c_ditherq + 8 * 8]
|
||||
%define mu_offset [pointer_c_ditherq + 9 * 8]
|
||||
%define mv_offset [pointer_c_ditherq + 10 * 8]
|
||||
%define mug_coff [pointer_c_ditherq + 7 * 8]
|
||||
%define mvg_coff [pointer_c_ditherq + 6 * 8]
|
||||
%define my_coff [pointer_c_ditherq + 3 * 8]
|
||||
%define mub_coff [pointer_c_ditherq + 5 * 8]
|
||||
%define mvr_coff [pointer_c_ditherq + 4 * 8]
|
||||
%else ; ARCH_X86_64
|
||||
%define reg_num 16
|
||||
%define y_offset m8
|
||||
%define u_offset m9
|
||||
%define v_offset m10
|
||||
%define ug_coff m11
|
||||
%define vg_coff m12
|
||||
%define y_coff m13
|
||||
%define ub_coff m14
|
||||
%define vr_coff m15
|
||||
%endif ; ARCH_X86_32/64
|
||||
%endif ; coeff define mmsize == 8/16
|
||||
|
||||
cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
|
||||
|
||||
%if ARCH_X86_64
|
||||
movsxd indexq, indexd
|
||||
%if mmsize == 16
|
||||
VBROADCASTSD y_offset, [pointer_c_ditherq + 8 * 8]
|
||||
VBROADCASTSD u_offset, [pointer_c_ditherq + 9 * 8]
|
||||
VBROADCASTSD v_offset, [pointer_c_ditherq + 10 * 8]
|
||||
VBROADCASTSD ug_coff, [pointer_c_ditherq + 7 * 8]
|
||||
VBROADCASTSD vg_coff, [pointer_c_ditherq + 6 * 8]
|
||||
VBROADCASTSD y_coff, [pointer_c_ditherq + 3 * 8]
|
||||
VBROADCASTSD ub_coff, [pointer_c_ditherq + 5 * 8]
|
||||
VBROADCASTSD vr_coff, [pointer_c_ditherq + 4 * 8]
|
||||
%endif
|
||||
%endif
|
||||
movu m_y, [py_2indexq + 2 * indexq]
|
||||
movh m_u, [pu_indexq + indexq]
|
||||
movh m_v, [pv_indexq + indexq]
|
||||
.loop0:
|
||||
pxor m4, m4
|
||||
mova m7, m6
|
||||
punpcklbw m0, m4
|
||||
punpcklbw m1, m4
|
||||
mova m2, [pw_00ff]
|
||||
pand m6, m2
|
||||
psrlw m7, 8
|
||||
psllw m0, 3
|
||||
psllw m1, 3
|
||||
psllw m6, 3
|
||||
psllw m7, 3
|
||||
%if (ARCH_X86_32 && mmsize == 16)
|
||||
VBROADCASTSD m2, mu_offset
|
||||
VBROADCASTSD m3, mv_offset
|
||||
VBROADCASTSD m4, my_offset
|
||||
psubsw m0, m2 ; U = U - 128
|
||||
psubsw m1, m3 ; V = V - 128
|
||||
psubw m6, m4
|
||||
psubw m7, m4
|
||||
VBROADCASTSD m2, mug_coff
|
||||
VBROADCASTSD m3, mvg_coff
|
||||
VBROADCASTSD m4, my_coff
|
||||
VBROADCASTSD m5, mub_coff
|
||||
pmulhw m2, m0
|
||||
pmulhw m3, m1
|
||||
pmulhw m6, m4
|
||||
pmulhw m7, m4
|
||||
pmulhw m0, m5
|
||||
VBROADCASTSD m4, mvr_coff
|
||||
pmulhw m1, m4
|
||||
%else ; ARCH_X86_64 || mmsize == 8
|
||||
psubsw m0, u_offset ; U = U - 128
|
||||
psubsw m1, v_offset ; V = V - 128
|
||||
psubw m6, y_offset
|
||||
psubw m7, y_offset
|
||||
mova m2, m0
|
||||
mova m3, m1
|
||||
pmulhw m2, ug_coff
|
||||
pmulhw m3, vg_coff
|
||||
pmulhw m6, y_coff
|
||||
pmulhw m7, y_coff
|
||||
pmulhw m0, ub_coff
|
||||
pmulhw m1, vr_coff
|
||||
%endif
|
||||
paddsw m2, m3
|
||||
mova m3, m7
|
||||
mova m5, m7
|
||||
paddsw m3, m0 ; B1 B3 B5 B7 ...
|
||||
paddsw m5, m1 ; R1 R3 R5 R7 ...
|
||||
paddsw m7, m2 ; G1 G3 G4 G7 ...
|
||||
paddsw m0, m6 ; B0 B2 B4 B6 ...
|
||||
paddsw m1, m6 ; R0 R2 R4 R6 ...
|
||||
paddsw m2, m6 ; G0 G2 G4 G6 ...
|
||||
|
||||
%if %3 == 24 ; PACK RGB24
|
||||
%define depth 3
|
||||
packuswb m0, m3 ; R0 R2 R4 R6 ... R1 R3 R5 R7 ...
|
||||
packuswb m1, m5 ; B0 B2 B4 B6 ... B1 B3 B5 B7 ...
|
||||
packuswb m2, m7 ; G0 G2 G4 G6 ... G1 G3 G5 G7 ...
|
||||
mova m3, m_red
|
||||
mova m6, m_blue
|
||||
MOV_H2L m_red
|
||||
punpcklbw m3, m2 ; R0 G0 R2 G2 R4 G4 R6 G6 R8 G8 ...
|
||||
punpcklbw m6, m_red ; B0 R1 B2 R3 B4 R5 B6 R7 B8 R9 ...
|
||||
mova m5, m3
|
||||
punpckhbw m2, m_blue ; G1 B1 G3 B3 G5 B5 G7 B7 G9 B9 ...
|
||||
%if mmsize == 8
|
||||
punpcklwd m3 ,m6 ; R0 G0 B0 R1 R2 G2 B2 R3
|
||||
punpckhwd m5, m6 ; R4 G4 B4 R5 R6 G6 B6 R7
|
||||
%if cpuflag(mmxext)
|
||||
pshufw m1, m2, 0xc6
|
||||
pshufw m6, m3, 0x84
|
||||
pshufw m7, m5, 0x38
|
||||
pand m6, [mask_1101] ; R0 G0 B0 R1 -- -- R2 G2
|
||||
movq m0, m1
|
||||
pand m7, [mask_0110] ; -- -- R6 G6 B6 R7 -- --
|
||||
movq m2, m1
|
||||
pand m1, [mask_0100] ; -- -- G3 B3 -- -- -- --
|
||||
psrlq m3, 48 ; B2 R3 -- -- -- -- -- --
|
||||
pand m0, [mask_0010] ; -- -- -- -- G1 B1 -- --
|
||||
psllq m5, 32 ; -- -- -- -- R4 G4 B4 R5
|
||||
pand m2, [mask_1001] ; G5 B5 -- -- -- -- G7 B7
|
||||
por m1, m3
|
||||
por m0, m6
|
||||
por m1, m5
|
||||
por m2, m7
|
||||
movntq [imageq], m0
|
||||
movntq [imageq + 8], m1
|
||||
movntq [imageq + 16], m2
|
||||
%else ; cpuflag(mmx)
|
||||
movd [imageq], m3 ; R0 G0 R2 G2
|
||||
movd [imageq + 4], m2 ; G1 B1
|
||||
psrlq m3, 32
|
||||
psrlq m2, 16
|
||||
movd [imageq + 6], m3 ; R2 G2 B2 R3
|
||||
movd [imageq + 10], m2 ; G3 B3
|
||||
psrlq m2, 16
|
||||
movd [imageq + 12], m5 ; R4 G4 B4 R5
|
||||
movd [imageq + 16], m2 ; G5 B5
|
||||
psrlq m5, 32
|
||||
movd [imageq + 20], m2 ; -- -- G7 B7
|
||||
movd [imageq + 18], m5 ; R6 G6 B6 R7
|
||||
%endif ; mmsize = 8
|
||||
%else ; mmsize == 16
|
||||
pshufb m3, [rgb24_shuf1] ; r0 g0 r6 g6 r12 g12 r2 g2 r8 g8 r14 g14 r4 g4 r10 g10
|
||||
pshufb m6, [rgb24_shuf2] ; b10 r11 b0 r1 b6 r7 b12 r13 b2 r3 b8 r9 b14 r15 b4 r5
|
||||
pshufb m2, [rgb24_shuf3] ; g5 b5 g11 b11 g1 b1 g7 b7 g13 b13 g3 b3 g9 b9 g15 b15
|
||||
mova m7, [mask_dw036]
|
||||
mova m4, [mask_dw147]
|
||||
mova m5, [mask_dw25]
|
||||
pand m0, m7, m3 ; r0 g0 --- --- --- --- r2 g2 --- --- --- --- r4 g4 --- ---
|
||||
pand m1, m4, m6 ; --- --- b0 r1 --- --- --- --- b2 r3 --- --- --- --- b4 r5
|
||||
por m0, m1
|
||||
pand m1, m5, m2 ; --- --- --- --- g1 b1 --- --- --- --- g3 b3 --- --- --- ---
|
||||
por m0, m1 ; r0 g0 b0 r1 g1 b1 r2 g2 b2 r3 g3 b3 r4 g4 b4 r5
|
||||
pand m1, m7, m2 ; g5 b5 --- --- --- --- g7 b7 --- --- --- --- g9 b9 --- ---
|
||||
pand m7, m6 ; b10 r11 --- --- --- --- b12 r13 --- --- --- --- b14 r15 --- ---
|
||||
pand m6, m5 ; --- --- --- --- b6 r7 --- --- --- --- b8 r9 --- --- --- ---
|
||||
por m1, m6
|
||||
pand m6, m4, m3 ; --- --- r6 g6 --- --- --- --- r8 g8 --- --- --- --- r10 g10
|
||||
pand m2, m4 ; --- --- g11 b11 --- --- --- --- g13 b13 --- --- --- --- g15 b15
|
||||
pand m3, m5 ; --- --- --- --- r12 g12 --- --- --- --- r14 g14 --- --- --- ---
|
||||
por m2, m7
|
||||
por m1, m6 ; g5 b5 r6 g6 b6 r7 g7 b7 r8 g8 b8 r9 g9 b9 r10 g10
|
||||
por m2, m3 ; b10 r11 g11 b11 r12 g12 b12 r13 g13 b13 r14 g14 b14 r15 g15 b15
|
||||
mova [imageq], m0
|
||||
mova [imageq + 16], m1
|
||||
mova [imageq + 32], m2
|
||||
%endif ; mmsize = 16
|
||||
%else ; PACK RGB15/16/32
|
||||
packuswb m0, m1
|
||||
packuswb m3, m5
|
||||
packuswb m2, m2
|
||||
mova m1, m0
|
||||
packuswb m7, m7
|
||||
punpcklbw m0, m3 ; B0 B1 B2 B3 ... B7
|
||||
punpckhbw m1, m3 ; R0 R1 R2 R3 ... R7
|
||||
punpcklbw m2, m7 ; G0 G1 G2 G3 ... G7
|
||||
%if %3 == 32 ; PACK RGB32
|
||||
%define depth 4
|
||||
%ifidn %1, yuv
|
||||
pcmpeqd m3, m3 ; Set alpha empty
|
||||
%else
|
||||
mova m3, [pa_2indexq + 2 * indexq] ; Load alpha
|
||||
%endif
|
||||
mova m5, m_blue
|
||||
mova m6, m_red
|
||||
punpckhbw m5, m_green
|
||||
punpcklbw m_blue, m_green
|
||||
punpckhbw m6, m_alpha
|
||||
punpcklbw m_red, m_alpha
|
||||
mova m_green, m_blue
|
||||
mova m_alpha, m5
|
||||
punpcklwd m_blue, m_red
|
||||
punpckhwd m_green, m_red
|
||||
punpcklwd m5, m6
|
||||
punpckhwd m_alpha, m6
|
||||
mova [imageq + 0], m_blue
|
||||
mova [imageq + 8 * time_num], m_green
|
||||
mova [imageq + 16 * time_num], m5
|
||||
mova [imageq + 24 * time_num], m_alpha
|
||||
%else ; PACK RGB15/16
|
||||
%define depth 2
|
||||
%if cpuflag(ssse3)
|
||||
%define red_dither m3
|
||||
%define green_dither m4
|
||||
%define blue_dither m5
|
||||
VBROADCASTSD red_dither, [pointer_c_ditherq + 0 * 8]
|
||||
VBROADCASTSD green_dither, [pointer_c_ditherq + 1 * 8]
|
||||
VBROADCASTSD blue_dither, [pointer_c_ditherq + 2 * 8]
|
||||
%else ; cpuflag(mmx/mmxext)
|
||||
%define blue_dither [pointer_c_ditherq + 2 * 8]
|
||||
%define green_dither [pointer_c_ditherq + 1 * 8]
|
||||
%define red_dither [pointer_c_ditherq + 0 * 8]
|
||||
%endif
|
||||
%if %3 == 15
|
||||
%define gmask pb_03
|
||||
%define isRGB15 1
|
||||
%else
|
||||
%define gmask pb_07
|
||||
%define isRGB15 0
|
||||
%endif
|
||||
paddusb m0, blue_dither
|
||||
paddusb m2, green_dither
|
||||
paddusb m1, red_dither
|
||||
pand m0, [pb_f8]
|
||||
pand m1, [pb_f8]
|
||||
mova m3, m2
|
||||
psllw m2, 3 - isRGB15
|
||||
psrlw m3, 5 + isRGB15
|
||||
psrlw m0, 3
|
||||
psrlw m1, isRGB15
|
||||
pand m2, [pb_e0]
|
||||
pand m3, [gmask]
|
||||
por m0, m2
|
||||
por m1, m3
|
||||
mova m2, m0
|
||||
punpcklbw m0, m1
|
||||
punpckhbw m2, m1
|
||||
mova [imageq], m0
|
||||
mova [imageq + 8 * time_num], m2
|
||||
%endif ; PACK RGB15/16
|
||||
%endif ; PACK RGB15/16/32
|
||||
|
||||
movu m_y, [py_2indexq + 2 * indexq + 8 * time_num]
|
||||
movh m_v, [pv_indexq + indexq + 4 * time_num]
|
||||
movh m_u, [pu_indexq + indexq + 4 * time_num]
|
||||
add imageq, 8 * depth * time_num
|
||||
add indexq, 4 * time_num
|
||||
js .loop0
|
||||
|
||||
REP_RET
|
||||
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
yuv2rgb_fn yuv, rgb, 24
|
||||
yuv2rgb_fn yuv, bgr, 24
|
||||
yuv2rgb_fn yuv, rgb, 32
|
||||
yuv2rgb_fn yuv, bgr, 32
|
||||
yuv2rgb_fn yuva, rgb, 32
|
||||
yuv2rgb_fn yuva, bgr, 32
|
||||
yuv2rgb_fn yuv, rgb, 15
|
||||
yuv2rgb_fn yuv, rgb, 16
|
||||
|
||||
INIT_MMX mmxext
|
||||
yuv2rgb_fn yuv, rgb, 24
|
||||
yuv2rgb_fn yuv, bgr, 24
|
||||
|
||||
INIT_XMM ssse3
|
||||
yuv2rgb_fn yuv, rgb, 24
|
||||
yuv2rgb_fn yuv, bgr, 24
|
||||
yuv2rgb_fn yuv, rgb, 32
|
||||
yuv2rgb_fn yuv, bgr, 32
|
||||
yuv2rgb_fn yuva, rgb, 32
|
||||
yuv2rgb_fn yuva, bgr, 32
|
||||
yuv2rgb_fn yuv, rgb, 15
|
||||
yuv2rgb_fn yuv, rgb, 16
|
||||
1000
externals/ffmpeg/libswscale/yuv2rgb.c
vendored
Executable file
1000
externals/ffmpeg/libswscale/yuv2rgb.c
vendored
Executable file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user