early-access version 1680

2021-05-13 11:45:27 +02:00
parent 1434d96e7d
commit 66ed389c6f
311 changed files with 6452 additions and 2597 deletions
--- a/externals/ffmpeg/libswscale/aarch64/swscale_unscaled.c
+++ b/externals/ffmpeg/libswscale/aarch64/swscale_unscaled.c
@@ -42,15 +42,14 @@ static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[],
                                           uint8_t *dst[], int dstStride[]) {               \
    const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE };                                   \
                                                                                            \
-    ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH,                                        \
-                                 dst[0] + srcSliceY * dstStride[0], dstStride[0],           \
-                                 src[0], srcStride[0],                                      \
-                                 src[1], srcStride[1],                                      \
-                                 src[2], srcStride[2],                                      \
-                                 yuv2rgb_table,                                             \
-                                 c->yuv2rgb_y_offset >> 6,                                  \
-                                 c->yuv2rgb_y_coeff);                                       \
-    return 0;                                                                               \
+    return ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH,                                 \
+                                        dst[0] + srcSliceY * dstStride[0], dstStride[0],    \
+                                        src[0], srcStride[0],                               \
+                                        src[1], srcStride[1],                               \
+                                        src[2], srcStride[2],                               \
+                                        yuv2rgb_table,                                      \
+                                        c->yuv2rgb_y_offset >> 6,                           \
+                                        c->yuv2rgb_y_coeff);                                \
 }                                                                                           \

 #define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx)                                             \
@@ -76,14 +75,12 @@ static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[],
                                           uint8_t *dst[], int dstStride[]) {               \
    const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE };                                   \
                                                                                            \
-    ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH,                                        \
-                                 dst[0] + srcSliceY * dstStride[0], dstStride[0],           \
-                                 src[0], srcStride[0], src[1], srcStride[1],                \
-                                 yuv2rgb_table,                                             \
-                                 c->yuv2rgb_y_offset >> 6,                                  \
-                                 c->yuv2rgb_y_coeff);                                       \
-                                                                                            \
-    return 0;                                                                               \
+    return ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH,                                 \
+                                        dst[0] + srcSliceY * dstStride[0], dstStride[0],    \
+                                        src[0], srcStride[0], src[1], srcStride[1],         \
+                                        yuv2rgb_table,                                      \
+                                        c->yuv2rgb_y_offset >> 6,                           \
+                                        c->yuv2rgb_y_coeff);                                \
 }                                                                                           \

 #define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx)                                               \
--- a/externals/ffmpeg/libswscale/aarch64/yuv2rgb_neon.S
+++ b/externals/ffmpeg/libswscale/aarch64/yuv2rgb_neon.S
@@ -142,6 +142,7 @@
 .macro declare_func ifmt ofmt
 function ff_\ifmt\()_to_\ofmt\()_neon, export=1
    load_args_\ifmt
+    mov                 w9, w1
 1:
    mov                 w8, w0                                          // w8 = width
 2:
@@ -193,6 +194,7 @@ function ff_\ifmt\()_to_\ofmt\()_neon, export=1
    increment_\ifmt
    subs                w1, w1, #1                                      // height -= 1
    b.gt                1b
+    mov                 w0, w9
    ret
 endfunc
 .endm
--- a/externals/ffmpeg/libswscale/input.c
+++ b/externals/ffmpeg/libswscale/input.c
@@ -244,7 +244,8 @@ rgb48funcs(bgr, BE, AV_PIX_FMT_BGR48BE)
 #define input_pixel(i) ((origin == AV_PIX_FMT_RGBA ||                      \
                         origin == AV_PIX_FMT_BGRA ||                      \
                         origin == AV_PIX_FMT_ARGB ||                      \
-                         origin == AV_PIX_FMT_ABGR)                        \
+                         origin == AV_PIX_FMT_ABGR ||                      \
+                         origin == AV_PIX_FMT_X2RGB10)                     \
                        ? AV_RN32A(&src[(i) * 4])                       \
                        : (isBE(origin) ? AV_RB16(&src[(i) * 2])        \
                                        : AV_RL16(&src[(i) * 2])))
@@ -391,6 +392,7 @@ rgb16_32_wrapper(AV_PIX_FMT_BGR444BE, bgr12be, 0, 0,  0, 0,   0x000F, 0x00F0,
 rgb16_32_wrapper(AV_PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT + 8)
 rgb16_32_wrapper(AV_PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT + 7)
 rgb16_32_wrapper(AV_PIX_FMT_RGB444BE, rgb12be, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_X2RGB10LE, rgb30le, 16, 6, 0, 0, 0x3FF00000, 0xFFC00, 0x3FF, 0, 0, 4, RGB2YUV_SHIFT + 6)

 static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV,
                         const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
@@ -1341,6 +1343,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
        case AV_PIX_FMT_RGB444BE:
            c->chrToYV12 = rgb12beToUV_half_c;
            break;
+        case AV_PIX_FMT_X2RGB10LE:
+            c->chrToYV12 = rgb30leToUV_half_c;
+            break;
        }
    } else {
        switch (srcFormat) {
@@ -1422,6 +1427,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
        case AV_PIX_FMT_RGB444BE:
            c->chrToYV12 = rgb12beToUV_c;
            break;
+        case AV_PIX_FMT_X2RGB10LE:
+            c->chrToYV12 = rgb30leToUV_c;
+            break;
        }
    }

@@ -1701,6 +1709,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
    case AV_PIX_FMT_Y210LE:
        c->lumToYV12 = y210le_Y_c;
        break;
+    case AV_PIX_FMT_X2RGB10LE:
+        c->lumToYV12 =rgb30leToY_c;
+        break;
    }
    if (c->needAlpha) {
        if (is16BPS(srcFormat) || isNBPS(srcFormat)) {
--- a/externals/ffmpeg/libswscale/output.c
+++ b/externals/ffmpeg/libswscale/output.c
@@ -180,7 +180,8 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
    }
 }

-static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
+static void yuv2p016cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+                         const int16_t *chrFilter, int chrFilterSize,
                         const int16_t **chrUSrc, const int16_t **chrVSrc,
                         uint8_t *dest8, int chrDstW)
 {
@@ -188,7 +189,7 @@ static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS
    const int32_t **uSrc = (const int32_t **)chrUSrc;
    const int32_t **vSrc = (const int32_t **)chrVSrc;
    int shift = 15;
-    int big_endian = c->dstFormat == AV_PIX_FMT_P016BE;
+    int big_endian = dstFormat == AV_PIX_FMT_P016BE;
    int i, j;

    for (i = 0; i < chrDstW; i++) {
@@ -402,12 +403,11 @@ static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
    }
 }

-static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
-                        const int16_t **chrUSrc, const int16_t **chrVSrc,
-                        uint8_t *dest, int chrDstW)
+static void yuv2nv12cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+                         const int16_t *chrFilter, int chrFilterSize,
+                         const int16_t **chrUSrc, const int16_t **chrVSrc,
+                         uint8_t *dest, int chrDstW)
 {
-    enum AVPixelFormat dstFormat = c->dstFormat;
-    const uint8_t *chrDither = c->chrDither8;
    int i;

    if (dstFormat == AV_PIX_FMT_NV12 ||
@@ -477,13 +477,14 @@ static void yuv2p010lX_c(const int16_t *filter, int filterSize,
    }
 }

-static void yuv2p010cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
+static void yuv2p010cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+                         const int16_t *chrFilter, int chrFilterSize,
                         const int16_t **chrUSrc, const int16_t **chrVSrc,
                         uint8_t *dest8, int chrDstW)
 {
    uint16_t *dest = (uint16_t*)dest8;
    int shift = 17;
-    int big_endian = c->dstFormat == AV_PIX_FMT_P010BE;
+    int big_endian = dstFormat == AV_PIX_FMT_P010BE;
    int i, j;

    for (i = 0; i < chrDstW; i++) {
@@ -1602,6 +1603,13 @@ yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,

        dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
        dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
+    } else if (target == AV_PIX_FMT_X2RGB10) {
+        uint32_t *dest = (uint32_t *) _dest;
+        const uint32_t *r = (const uint32_t *) _r;
+        const uint32_t *g = (const uint32_t *) _g;
+        const uint32_t *b = (const uint32_t *) _b;
+        dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
+        dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
    } else /* 8/4 bits */ {
        uint8_t *dest = (uint8_t *) _dest;
        const uint8_t *r = (const uint8_t *) _r;
@@ -1839,6 +1847,7 @@ YUV2RGBWRAPPER(yuv2rgb,,  12,    AV_PIX_FMT_RGB444,    0)
 YUV2RGBWRAPPER(yuv2rgb,,   8,    AV_PIX_FMT_RGB8,      0)
 YUV2RGBWRAPPER(yuv2rgb,,   4,    AV_PIX_FMT_RGB4,      0)
 YUV2RGBWRAPPER(yuv2rgb,,   4b,   AV_PIX_FMT_RGB4_BYTE, 0)
+YUV2RGBWRAPPER(yuv2, rgb, x2rgb10, AV_PIX_FMT_X2RGB10, 0)

 static av_always_inline void yuv2rgb_write_full(SwsContext *c,
    uint8_t *dest, int i, int Y, int A, int U, int V,
@@ -2974,6 +2983,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
            *yuv2packed2 = yuv2rgb4b_2_c;
            *yuv2packedX = yuv2rgb4b_X_c;
            break;
+        case AV_PIX_FMT_X2RGB10LE:
+        case AV_PIX_FMT_X2RGB10BE:
+            *yuv2packed1 = yuv2x2rgb10_1_c;
+            *yuv2packed2 = yuv2x2rgb10_2_c;
+            *yuv2packedX = yuv2x2rgb10_X_c;
+            break;
        }
    }
    switch (dstFormat) {
--- a/externals/ffmpeg/libswscale/swscale_internal.h
+++ b/externals/ffmpeg/libswscale/swscale_internal.h
@@ -119,7 +119,8 @@ typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
 * Write one line of horizontally scaled chroma to interleaved output
 * with multi-point vertical scaling between input pixels.
 *
- * @param c             SWS scaling context
+ * @param dstFormat     destination pixel format
+ * @param chrDither     ordered dither array of type uint8_t and size 8
 * @param chrFilter     vertical chroma scaling coefficients, 12 bits [0,4096]
 * @param chrUSrc       scaled chroma (U) source data, 15 bits for 8-10-bit
 *                      output, 19 bits for 16-bit output (in int32_t)
@@ -130,7 +131,8 @@ typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
 *                      output, this is in uint16_t
 * @param dstW          width of chroma planes
 */
-typedef void (*yuv2interleavedX_fn)(struct SwsContext *c,
+typedef void (*yuv2interleavedX_fn)(enum AVPixelFormat dstFormat,
+                                    const uint8_t *chrDither,
                                    const int16_t *chrFilter,
                                    int chrFilterSize,
                                    const int16_t **chrUSrc,
--- a/externals/ffmpeg/libswscale/tests/swscale.c
+++ b/externals/ffmpeg/libswscale/tests/swscale.c
@@ -422,7 +422,9 @@ bad_option:
    for (y = 0; y < H; y++)
        for (x = 0; x < W * 4; x++)
            rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
-    sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride);
+    res = sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride);
+    if (res < 0 || res != (H / 12))
+        goto error;
    sws_freeContext(sws);
    av_free(rgb_data);

--- a/externals/ffmpeg/libswscale/utils.c
+++ b/externals/ffmpeg/libswscale/utils.c
@@ -271,6 +271,7 @@ static const FormatEntry format_entries[] = {
    [AV_PIX_FMT_NV24]        = { 1, 1 },
    [AV_PIX_FMT_NV42]        = { 1, 1 },
    [AV_PIX_FMT_Y210LE]      = { 1, 0 },
+    [AV_PIX_FMT_X2RGB10LE]   = { 1, 1 },
 };

 int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
@@ -352,13 +353,14 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
    emms_c(); // FIXME should not be required but IS (even for non-MMX versions)

    // NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
-    FF_ALLOC_ARRAY_OR_GOTO(NULL, *filterPos, (dstW + 3), sizeof(**filterPos), fail);
+    if (!FF_ALLOC_TYPED_ARRAY(*filterPos, dstW + 3))
+        goto nomem;

    if (FFABS(xInc - 0x10000) < 10 && srcPos == dstPos) { // unscaled
        int i;
        filterSize = 1;
-        FF_ALLOCZ_ARRAY_OR_GOTO(NULL, filter,
-                                dstW, sizeof(*filter) * filterSize, fail);
+        if (!FF_ALLOCZ_TYPED_ARRAY(filter, dstW * filterSize))
+            goto nomem;

        for (i = 0; i < dstW; i++) {
            filter[i * filterSize] = fone;
@@ -368,8 +370,8 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
        int i;
        int64_t xDstInSrc;
        filterSize = 1;
-        FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
-                               dstW, sizeof(*filter) * filterSize, fail);
+        if (!FF_ALLOC_TYPED_ARRAY(filter, dstW * filterSize))
+            goto nomem;

        xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
        for (i = 0; i < dstW; i++) {
@@ -384,8 +386,8 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
        int i;
        int64_t xDstInSrc;
        filterSize = 2;
-        FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
-                               dstW, sizeof(*filter) * filterSize, fail);
+        if (!FF_ALLOC_TYPED_ARRAY(filter, dstW * filterSize))
+            goto nomem;

        xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
        for (i = 0; i < dstW; i++) {
@@ -425,9 +427,8 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
        filterSize = FFMIN(filterSize, srcW - 2);
        filterSize = FFMAX(filterSize, 1);

-        FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
-                               dstW, sizeof(*filter) * filterSize, fail);
-
+        if (!FF_ALLOC_TYPED_ARRAY(filter, dstW * filterSize))
+            goto nomem;
        xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7);
        for (i = 0; i < dstW; i++) {
            int xx = (xDstInSrc - (filterSize - 2) * (1LL<<16)) / (1 << 17);
@@ -525,8 +526,8 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
    if (dstFilter)
        filter2Size += dstFilter->length - 1;
    av_assert0(filter2Size > 0);
-    FF_ALLOCZ_ARRAY_OR_GOTO(NULL, filter2, dstW, filter2Size * sizeof(*filter2), fail);
-
+    if (!FF_ALLOCZ_TYPED_ARRAY(filter2, dstW * filter2Size))
+        goto nomem;
    for (i = 0; i < dstW; i++) {
        int j, k;

@@ -611,7 +612,7 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
    av_assert0(filterSize > 0);
    filter = av_malloc_array(dstW, filterSize * sizeof(*filter));
    if (!filter)
-        goto fail;
+        goto nomem;
    if (filterSize >= MAX_FILTER_SIZE * 16 /
                      ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16)) {
        ret = RETCODE_USE_CASCADE;
@@ -684,8 +685,8 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,

    // Note the +1 is for the MMX scaler which reads over the end
    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
-    FF_ALLOCZ_ARRAY_OR_GOTO(NULL, *outFilter,
-                            (dstW + 3), *outFilterSize * sizeof(int16_t), fail);
+    if (!FF_ALLOCZ_TYPED_ARRAY(*outFilter, *outFilterSize * (dstW + 3)))
+        goto nomem;

    /* normalize & store in outFilter */
    for (i = 0; i < dstW; i++) {
@@ -721,10 +722,13 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
    }

    ret = 0;
-
+    goto done;
+nomem:
+    ret = AVERROR(ENOMEM);
 fail:
    if(ret < 0)
        av_log(NULL, ret == RETCODE_USE_CASCADE ? AV_LOG_DEBUG : AV_LOG_ERROR, "sws: initFilter failed\n");
+done:
    av_free(filter);
    av_free(filter2);
    return ret;
@@ -1417,7 +1421,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
    c->chrDstW = AV_CEIL_RSHIFT(dstW, c->chrDstHSubSample);
    c->chrDstH = AV_CEIL_RSHIFT(dstH, c->chrDstVSubSample);

-    FF_ALLOCZ_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
+    if (!FF_ALLOCZ_TYPED_ARRAY(c->formatConvBuffer, FFALIGN(srcW * 2 + 78, 16) * 2))
+        goto nomem;

    c->srcBpc = desc_src->comp[0].depth;
    if (c->srcBpc < 8)
@@ -1486,7 +1491,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                                                srcW, srcH, tmpFmt,
                                                flags, NULL, NULL, c->param);
        if (!c->cascaded_context[0]) {
-            return -1;
+            return AVERROR(ENOMEM);
        }

        c->cascaded_context[1] = sws_getContext(srcW, srcH, tmpFmt,
@@ -1494,7 +1499,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                                                flags, srcFilter, dstFilter, c->param);

        if (!c->cascaded_context[1])
-            return -1;
+            return AVERROR(ENOMEM);

        c2 = c->cascaded_context[1];
        c2->is_internal_gamma = 1;
@@ -1507,10 +1512,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
        // to properly create the gamma convert FilterDescriptor
        // we have to re-initialize it
        ff_free_filters(c2);
-        if (ff_init_filters(c2) < 0) {
+        if ((ret = ff_init_filters(c2)) < 0) {
            sws_freeContext(c2);
            c->cascaded_context[1] = NULL;
-            return -1;
+            return ret;
        }

        c->cascaded_context[2] = NULL;
@@ -1524,7 +1529,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                                                dstW, dstH, dstFormat,
                                                flags, NULL, NULL, c->param);
            if (!c->cascaded_context[2])
-                return -1;
+                return AVERROR(ENOMEM);
        }
        return 0;
    }
@@ -1543,13 +1548,13 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                                                    srcW, srcH, tmpFormat,
                                                    flags, srcFilter, NULL, c->param);
            if (!c->cascaded_context[0])
-                return -1;
+                return AVERROR(ENOMEM);

            c->cascaded_context[1] = sws_getContext(srcW, srcH, tmpFormat,
                                                    dstW, dstH, dstFormat,
                                                    flags, NULL, dstFilter, c->param);
            if (!c->cascaded_context[1])
-                return -1;
+                return AVERROR(ENOMEM);
            return 0;
        }
    }
@@ -1570,41 +1575,42 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
    if (CONFIG_SWSCALE_ALPHA && isALPHA(srcFormat) && !isALPHA(dstFormat)) {
        enum AVPixelFormat tmpFormat = alphaless_fmt(srcFormat);

-        if (tmpFormat != AV_PIX_FMT_NONE && c->alphablend != SWS_ALPHA_BLEND_NONE)
-        if (!unscaled ||
-            dstFormat != tmpFormat ||
-            usesHFilter || usesVFilter ||
-            c->srcRange != c->dstRange
-        ) {
-            c->cascaded_mainindex = 1;
-            ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride,
-                                srcW, srcH, tmpFormat, 64);
-            if (ret < 0)
-                return ret;
+        if (tmpFormat != AV_PIX_FMT_NONE && c->alphablend != SWS_ALPHA_BLEND_NONE) {
+            if (!unscaled ||
+                dstFormat != tmpFormat ||
+                usesHFilter || usesVFilter ||
+                c->srcRange != c->dstRange
+            ) {
+                c->cascaded_mainindex = 1;
+                ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride,
+                                     srcW, srcH, tmpFormat, 64);
+                if (ret < 0)
+                    return ret;

-            c->cascaded_context[0] = sws_alloc_set_opts(srcW, srcH, srcFormat,
-                                                        srcW, srcH, tmpFormat,
-                                                        flags, c->param);
-            if (!c->cascaded_context[0])
-                return -1;
-            c->cascaded_context[0]->alphablend = c->alphablend;
-            ret = sws_init_context(c->cascaded_context[0], NULL , NULL);
-            if (ret < 0)
-                return ret;
+                c->cascaded_context[0] = sws_alloc_set_opts(srcW, srcH, srcFormat,
+                                                            srcW, srcH, tmpFormat,
+                                                            flags, c->param);
+                if (!c->cascaded_context[0])
+                    return AVERROR(EINVAL);
+                c->cascaded_context[0]->alphablend = c->alphablend;
+                ret = sws_init_context(c->cascaded_context[0], NULL , NULL);
+                if (ret < 0)
+                    return ret;

-            c->cascaded_context[1] = sws_alloc_set_opts(srcW, srcH, tmpFormat,
-                                                        dstW, dstH, dstFormat,
-                                                        flags, c->param);
-            if (!c->cascaded_context[1])
-                return -1;
+                c->cascaded_context[1] = sws_alloc_set_opts(srcW, srcH, tmpFormat,
+                                                            dstW, dstH, dstFormat,
+                                                            flags, c->param);
+                if (!c->cascaded_context[1])
+                    return AVERROR(EINVAL);

-            c->cascaded_context[1]->srcRange = c->srcRange;
-            c->cascaded_context[1]->dstRange = c->dstRange;
-            ret = sws_init_context(c->cascaded_context[1], srcFilter , dstFilter);
-            if (ret < 0)
-                return ret;
+                c->cascaded_context[1]->srcRange = c->srcRange;
+                c->cascaded_context[1]->dstRange = c->dstRange;
+                ret = sws_init_context(c->cascaded_context[1], srcFilter , dstFilter);
+                if (ret < 0)
+                    return ret;

-            return 0;
+                return 0;
+            }
        }
    }

@@ -1657,10 +1663,11 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                return AVERROR(ENOMEM);
            }

-            FF_ALLOCZ_OR_GOTO(c, c->hLumFilter,    (dstW           / 8 + 8) * sizeof(int16_t), fail);
-            FF_ALLOCZ_OR_GOTO(c, c->hChrFilter,    (c->chrDstW     / 4 + 8) * sizeof(int16_t), fail);
-            FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW       / 2 / 8 + 8) * sizeof(int32_t), fail);
-            FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail);
+            if (!FF_ALLOCZ_TYPED_ARRAY(c->hLumFilter,    dstW           / 8 + 8) ||
+                !FF_ALLOCZ_TYPED_ARRAY(c->hChrFilter,    c->chrDstW     / 4 + 8) ||
+                !FF_ALLOCZ_TYPED_ARRAY(c->hLumFilterPos, dstW       / 2 / 8 + 8) ||
+                !FF_ALLOCZ_TYPED_ARRAY(c->hChrFilterPos, c->chrDstW / 2 / 4 + 8))
+                goto nomem;

            ff_init_hscaler_mmxext(      dstW, c->lumXInc, c->lumMmxextFilterCode,
                                c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8);
@@ -1671,6 +1678,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
            if (   mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1
                || mprotect(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1) {
                av_log(c, AV_LOG_ERROR, "mprotect failed, cannot use fast bilinear scaler\n");
+                ret = AVERROR(EINVAL);
                goto fail;
            }
 #endif
@@ -1728,8 +1736,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
            goto fail;

 #if HAVE_ALTIVEC
-        FF_ALLOC_OR_GOTO(c, c->vYCoeffsBank, sizeof(vector signed short) * c->vLumFilterSize * c->dstH,    fail);
-        FF_ALLOC_OR_GOTO(c, c->vCCoeffsBank, sizeof(vector signed short) * c->vChrFilterSize * c->chrDstH, fail);
+        if (!FF_ALLOC_TYPED_ARRAY(c->vYCoeffsBank, c->vLumFilterSize * c->dstH) ||
+            !FF_ALLOC_TYPED_ARRAY(c->vCCoeffsBank, c->vChrFilterSize * c->chrDstH))
+            goto nomem;

        for (i = 0; i < c->vLumFilterSize * c->dstH; i++) {
            int j;
@@ -1748,7 +1757,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
    }

    for (i = 0; i < 4; i++)
-        FF_ALLOCZ_OR_GOTO(c, c->dither_error[i], (c->dstW+2) * sizeof(int), fail);
+        if (!FF_ALLOCZ_TYPED_ARRAY(c->dither_error[i], c->dstW + 2))
+            goto nomem;

    c->needAlpha = (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) ? 1 : 0;

@@ -1838,6 +1848,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,

    c->swscale = ff_getSwsFunc(c);
    return ff_init_filters(c);
+nomem:
+    ret = AVERROR(ENOMEM);
 fail: // FIXME replace things by appropriate error codes
    if (ret == RETCODE_USE_CASCADE)  {
        int tmpW = sqrt(srcW * (int64_t)dstW);
@@ -1859,16 +1871,16 @@ fail: // FIXME replace things by appropriate error codes
                                                tmpW, tmpH, tmpFormat,
                                                flags, srcFilter, NULL, c->param);
        if (!c->cascaded_context[0])
-            return -1;
+            return AVERROR(ENOMEM);

        c->cascaded_context[1] = sws_getContext(tmpW, tmpH, tmpFormat,
                                                dstW, dstH, dstFormat,
                                                flags, NULL, dstFilter, c->param);
        if (!c->cascaded_context[1])
-            return -1;
+            return AVERROR(ENOMEM);
        return 0;
    }
-    return -1;
+    return ret;
 }

 SwsContext *sws_alloc_set_opts(int srcW, int srcH, enum AVPixelFormat srcFormat,
--- a/externals/ffmpeg/libswscale/version.h
+++ b/externals/ffmpeg/libswscale/version.h
@@ -27,7 +27,7 @@
 #include "libavutil/version.h"

 #define LIBSWSCALE_VERSION_MAJOR   5
-#define LIBSWSCALE_VERSION_MINOR   7
+#define LIBSWSCALE_VERSION_MINOR   8
 #define LIBSWSCALE_VERSION_MICRO 100

 #define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
--- a/externals/ffmpeg/libswscale/vscale.c
+++ b/externals/ffmpeg/libswscale/vscale.c
@@ -92,7 +92,7 @@ static int chr_planar_vscale(SwsContext *c, SwsFilterDescriptor *desc, int slice
        uint16_t *filter = inst->filter[0] + (inst->isMMX ? 0 : chrSliceY * inst->filter_size);

        if (c->yuv2nv12cX) {
-            inst->pfn.yuv2interleavedX(c, filter, inst->filter_size, (const int16_t**)src1, (const int16_t**)src2, dst1[0], dstW);
+            inst->pfn.yuv2interleavedX(c->dstFormat, c->chrDither8, filter, inst->filter_size, (const int16_t**)src1, (const int16_t**)src2, dst1[0], dstW);
        } else if (inst->filter_size == 1) {
            inst->pfn.yuv2planar1((const int16_t*)src1[0], dst1[0], dstW, c->chrDither8, 0);
            inst->pfn.yuv2planar1((const int16_t*)src2[0], dst2[0], dstW, c->chrDither8, 3);
--- a/externals/ffmpeg/libswscale/x86/output.asm
+++ b/externals/ffmpeg/libswscale/x86/output.asm
@@ -2,6 +2,7 @@
 ;* x86-optimized vertical line scaling functions
 ;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
 ;*                    Kieran Kunhya <kieran@kunhya.com>
+;*           (c) 2020 Nelson Gomez <nelson.gomez@microsoft.com>
 ;*
 ;* This file is part of FFmpeg.
 ;*
@@ -22,7 +23,7 @@

 %include "libavutil/x86/x86util.asm"

-SECTION_RODATA
+SECTION_RODATA 32

 minshort:      times 8 dw 0x8000
 yuv2yuvX_16_start:  times 4 dd 0x4000 - 0x40000000
@@ -34,9 +35,20 @@ pd_4:          times 4 dd 4
 pd_4min0x40000:times 4 dd 4 - (0x40000)
 pw_16:         times 8 dw 16
 pw_32:         times 8 dw 32
+pd_255:        times 8 dd 255
 pw_512:        times 8 dw 512
 pw_1024:       times 8 dw 1024

+yuv2nv12_shuffle_mask: times 2 db 0,  4,  8, 12, \
+                                 -1, -1, -1, -1, \
+                                 -1, -1, -1, -1, \
+                                 -1, -1, -1, -1
+yuv2nv21_shuffle_mask: times 2 db 4,  0, 12,  8, \
+                                 -1, -1, -1, -1, \
+                                 -1, -1, -1, -1, \
+                                 -1, -1, -1, -1
+yuv2nv12_permute_mask: dd 0, 4, 1, 2, 3, 5, 6, 7
+
 SECTION .text

 ;-----------------------------------------------------------------------------
@@ -423,3 +435,115 @@ yuv2plane1_fn  9, 5, 3
 yuv2plane1_fn 10, 5, 3
 yuv2plane1_fn 16, 5, 3
 %endif
+
+%undef movsx
+
+;-----------------------------------------------------------------------------
+; AVX2 yuv2nv12cX implementation
+;
+; void ff_yuv2nv12cX_avx2(enum AVPixelFormat format, const uint8_t *dither,
+;                         const int16_t *filter, int filterSize,
+;                         const int16_t **u, const int16_t **v,
+;                         uint8_t *dst, int dstWidth)
+;
+; void ff_yuv2nv21cX_avx2(enum AVPixelFormat format, const uint8_t *dither,
+;                         const int16_t *filter, int filterSize,
+;                         const int16_t **u, const int16_t **v,
+;                         uint8_t *dst, int dstWidth)
+;-----------------------------------------------------------------------------
+
+%if ARCH_X86_64
+%macro yuv2nv12cX_fn 1
+cglobal %1cX, 8, 11, 13, tmp1, dither, filter, filterSize, u, v, dst, dstWidth
+
+    mov tmp1q, qword [ditherq]
+    movq xm0, tmp1q
+    ror tmp1q, 24
+    movq xm1, tmp1q
+
+    pmovzxbd m0, xm0
+    pslld m0, m0, 12                        ; ditherLo
+    pmovzxbd m1, xm1
+    pslld m1, m1, 12                        ; ditherHi
+
+    pxor m9, m9                             ; uint8_min dwords
+    mova m10, [pd_255]                      ; uint8_max dwords
+    mova m11, [%1_shuffle_mask]             ; shuffle_mask
+    mova m12, [yuv2nv12_permute_mask]       ; permute mask
+
+    DEFINE_ARGS tmp1, tmp2, filter, filterSize, u, v, dst, dstWidth
+
+    xor r8q, r8q
+
+nv12_outer_%1:
+    mova m2, m0                             ; resultLo
+    mova m3, m1                             ; resultHi
+    xor r9q, r9q
+
+nv12_inner_%1:
+    movsx r10d, word [filterq + (2 * r9q)]
+    movd xm4, r10d
+    vpbroadcastd m4, xm4                    ; filter
+
+    mov tmp1q, [uq + (gprsize * r9q)]
+    mova xm7, oword [tmp1q + 2 * r8q]
+
+    mov tmp2q, [vq + (gprsize * r9q)]
+    mova xm8, oword [tmp2q + 2 * r8q]
+
+    punpcklwd xm5, xm7, xm8
+    pmovsxwd m5, xm5                        ; multiplicandsLo
+    punpckhwd xm6, xm7, xm8
+    pmovsxwd m6, xm6                        ; multiplicandsHi
+
+    pmulld m7, m5, m4                       ; mulResultLo
+    pmulld m8, m6, m4                       ; mulResultHi
+    paddd m2, m2, m7                        ; resultLo += mulResultLo
+    paddd m3, m3, m8                        ; resultHi += mulResultHi
+
+    inc r9d
+    cmp r9d, filterSized
+    jl nv12_inner_%1
+    ; end of inner loop
+
+    psrad m2, m2, 19
+    psrad m3, m3, 19
+
+    ; Vectorized av_clip_uint8
+    pmaxsd m2, m2, m9
+    pmaxsd m3, m3, m9
+    pminsd m2, m2, m10
+    pminsd m3, m3, m10
+
+    ; At this point we have clamped uint8s arranged in this order:
+    ;     m2: u1  0  0  0  v1  0  0  0  [...]
+    ;     m3: u5  0  0  0  v5  0  0  0  [...]
+    ;
+    ; First, we shuffle the bytes to make the bytes semi-contiguous.
+    ; AVX-2 doesn't have cross-lane shuffling, so we'll end up with:
+    ;     m2: u1  v1  u2  v2  0  0  0  0  0  0  0  0  u3  v3  u4  v4
+    ;     m3: u5  v5  u6  v6  0  0  0  0  0  0  0  0  u7  v7  u8  v8
+    pshufb m2, m2, m11
+    pshufb m3, m3, m11
+
+    ; To fix the cross-lane shuffling issue, we'll then use cross-lane
+    ; permutation to combine the two segments
+    vpermd m2, m12, m2
+    vpermd m3, m12, m3
+
+    ; Now we have the final results in the lower 8 bytes of each register
+    movq [dstq], xm2
+    movq [dstq + 8], xm3
+
+    add r8d, 8
+    add dstq, 16
+
+    cmp r8d, dstWidthd
+    jl nv12_outer_%1
+    RET
+%endmacro
+
+INIT_YMM avx2
+yuv2nv12cX_fn yuv2nv12
+yuv2nv12cX_fn yuv2nv21
+%endif ; ARCH_X86_64
--- a/externals/ffmpeg/libswscale/x86/swscale.c
+++ b/externals/ffmpeg/libswscale/x86/swscale.c
@@ -380,6 +380,17 @@ INPUT_FUNCS(sse2);
 INPUT_FUNCS(ssse3);
 INPUT_FUNCS(avx);

+#if ARCH_X86_64
+#define YUV2NV_DECL(fmt, opt) \
+void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
+                                  const int16_t *filter, int filterSize, \
+                                  const int16_t **u, const int16_t **v, \
+                                  uint8_t *dst, int dstWidth)
+
+YUV2NV_DECL(nv12, avx2);
+YUV2NV_DECL(nv21, avx2);
+#endif
+
 av_cold void ff_sws_init_swscale_x86(SwsContext *c)
 {
    int cpu_flags = av_get_cpu_flags();
@@ -580,4 +591,21 @@ switch(c->dstBpc){ \
            break;
        }
    }
+
+#if ARCH_X86_64
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        switch (c->dstFormat) {
+        case AV_PIX_FMT_NV12:
+        case AV_PIX_FMT_NV24:
+            c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
+            break;
+        case AV_PIX_FMT_NV21:
+        case AV_PIX_FMT_NV42:
+            c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
+            break;
+        default:
+            break;
+        }
+    }
+#endif
 }
--- a/externals/ffmpeg/libswscale/yuv2rgb.c
+++ b/externals/ffmpeg/libswscale/yuv2rgb.c
@@ -965,6 +965,28 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
        fill_table(c->table_bU, 1, cbu, y_table + yoffs);
        fill_gv_table(c->table_gV, 1, cgv);
        break;
+    case 30:
+        rbase = 20;
+        gbase = 10;
+        bbase = 0;
+        needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat);
+        if (!needAlpha)
+            abase = 30;
+        ALLOC_YUV_TABLE(table_plane_size * 3 * 4);
+        y_table32   = c->yuvTable;
+        yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy;
+        for (i = 0; i < table_plane_size; i++) {
+            unsigned yval = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table32[i]= (yval << rbase) + (needAlpha ? 0 : (255u << abase));
+            y_table32[i + table_plane_size] = yval << gbase;
+            y_table32[i + 2 * table_plane_size] = yval << bbase;
+            yb += cy;
+        }
+        fill_table(c->table_rV, 4, crv, y_table32 + yoffs);
+        fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + table_plane_size);
+        fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2 * table_plane_size);
+        fill_gv_table(c->table_gV, 4, cgv);
+        break;
    case 32:
    case 64:
        base      = (c->dstFormat == AV_PIX_FMT_RGB32_1 ||