diff --git a/README.md b/README.md
index fec4d641b..778d2948a 100755
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 yuzu emulator early access
 =============
 
-This is the source code for early-access 2153.
+This is the source code for early-access 2156.
 
 ## Legal Notice
 
diff --git a/externals/FidelityFX-FSR/ffx-fsr/ffx_a.h b/externals/FidelityFX-FSR/ffx-fsr/ffx_a.h
new file mode 100755
index 000000000..d04bff55c
--- /dev/null
+++ b/externals/FidelityFX-FSR/ffx-fsr/ffx_a.h
@@ -0,0 +1,2656 @@
+//==============================================================================================================================
+//
+//                                               [A] SHADER PORTABILITY 1.20210629
+//
+//==============================================================================================================================
+// FidelityFX Super Resolution Sample
+//
+// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//------------------------------------------------------------------------------------------------------------------------------
+// MIT LICENSE
+// ===========
+// Copyright (c) 2014 Michal Drobot (for concepts used in "FLOAT APPROXIMATIONS").
+// -----------
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+// -----------
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+// Software.
+// -----------
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//------------------------------------------------------------------------------------------------------------------------------
+// ABOUT
+// =====
+// Common central point for high-level shading language and C portability for various shader headers.
+//------------------------------------------------------------------------------------------------------------------------------
+// DEFINES
+// =======
+// A_CPU ..... Include the CPU related code.
+// A_GPU ..... Include the GPU related code.
+// A_GLSL .... Using GLSL.
+// A_HLSL .... Using HLSL.
+// A_HLSL_6_2  Using HLSL 6.2 with new 'uint16_t' and related types (requires '-enable-16bit-types').
+// A_NO_16_BIT_CAST Don't use instructions that are not availabe in SPIR-V (needed for running A_HLSL_6_2 on Vulkan)
+// A_GCC ..... Using a GCC compatible compiler (else assume MSVC compatible compiler by default).
+// =======
+// A_BYTE .... Support 8-bit integer.
+// A_HALF .... Support 16-bit integer and floating point.
+// A_LONG .... Support 64-bit integer.
+// A_DUBL .... Support 64-bit floating point.
+// =======
+// A_WAVE .... Support wave-wide operations.
+//------------------------------------------------------------------------------------------------------------------------------
+// To get #include "ffx_a.h" working in GLSL use '#extension GL_GOOGLE_include_directive:require'.
+//------------------------------------------------------------------------------------------------------------------------------
+// SIMPLIFIED TYPE SYSTEM
+// ======================
+//  - All ints will be unsigned with exception of when signed is required.
+//  - Type naming simplified and shortened "A<type><#components>",
+//     - H = 16-bit float (half)
+//     - F = 32-bit float (float)
+//     - D = 64-bit float (double)
+//     - P = 1-bit integer (predicate, not using bool because 'B' is used for byte)
+//     - B = 8-bit integer (byte)
+//     - W = 16-bit integer (word)
+//     - U = 32-bit integer (unsigned)
+//     - L = 64-bit integer (long)
+//  - Using "AS<type><#components>" for signed when required.
+//------------------------------------------------------------------------------------------------------------------------------
+// TODO
+// ====
+//  - Make sure 'ALerp*(a,b,m)' does 'b*m+(-a*m+a)' (2 ops).
+//------------------------------------------------------------------------------------------------------------------------------
+// CHANGE LOG
+// ==========
+// 20200914 - Expanded wave ops and prx code.
+// 20200713 - Added [ZOL] section, fixed serious bugs in sRGB and Rec.709 color conversion code, etc.
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                           COMMON
+//==============================================================================================================================
+#define A_2PI 6.28318530718
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+//                                                             CPU
+//
+//
+//==============================================================================================================================
+#ifdef A_CPU
+ // Supporting user defined overrides.
+ #ifndef A_RESTRICT
+  #define A_RESTRICT __restrict
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifndef A_STATIC
+  #define A_STATIC static
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ // Same types across CPU and GPU.
+ // Predicate uses 32-bit integer (C friendly bool).
+ typedef uint32_t AP1;
+ typedef float AF1;
+ typedef double AD1;
+ typedef uint8_t AB1;
+ typedef uint16_t AW1;
+ typedef uint32_t AU1;
+ typedef uint64_t AL1;
+ typedef int8_t ASB1;
+ typedef int16_t ASW1;
+ typedef int32_t ASU1;
+ typedef int64_t ASL1;
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AD1_(a) ((AD1)(a))
+ #define AF1_(a) ((AF1)(a))
+ #define AL1_(a) ((AL1)(a))
+ #define AU1_(a) ((AU1)(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASL1_(a) ((ASL1)(a))
+ #define ASU1_(a) ((ASU1)(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AU1 AU1_AF1(AF1 a){union{AF1 f;AU1 u;}bits;bits.f=a;return bits.u;}
+//------------------------------------------------------------------------------------------------------------------------------
+ #define A_TRUE 1
+ #define A_FALSE 0
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                                       CPU/GPU PORTING
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Get CPU and GPU to share all setup code, without duplicate code paths.
+// This uses a lower-case prefix for special vector constructs.
+//  - In C restrict pointers are used.
+//  - In the shading language, in/inout/out arguments are used.
+// This depends on the ability to access a vector value in both languages via array syntax (aka color[2]).
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                     VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY
+//==============================================================================================================================
+ #define retAD2 AD1 *A_RESTRICT
+ #define retAD3 AD1 *A_RESTRICT
+ #define retAD4 AD1 *A_RESTRICT
+ #define retAF2 AF1 *A_RESTRICT
+ #define retAF3 AF1 *A_RESTRICT
+ #define retAF4 AF1 *A_RESTRICT
+ #define retAL2 AL1 *A_RESTRICT
+ #define retAL3 AL1 *A_RESTRICT
+ #define retAL4 AL1 *A_RESTRICT
+ #define retAU2 AU1 *A_RESTRICT
+ #define retAU3 AU1 *A_RESTRICT
+ #define retAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inAD2 AD1 *A_RESTRICT
+ #define inAD3 AD1 *A_RESTRICT
+ #define inAD4 AD1 *A_RESTRICT
+ #define inAF2 AF1 *A_RESTRICT
+ #define inAF3 AF1 *A_RESTRICT
+ #define inAF4 AF1 *A_RESTRICT
+ #define inAL2 AL1 *A_RESTRICT
+ #define inAL3 AL1 *A_RESTRICT
+ #define inAL4 AL1 *A_RESTRICT
+ #define inAU2 AU1 *A_RESTRICT
+ #define inAU3 AU1 *A_RESTRICT
+ #define inAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inoutAD2 AD1 *A_RESTRICT
+ #define inoutAD3 AD1 *A_RESTRICT
+ #define inoutAD4 AD1 *A_RESTRICT
+ #define inoutAF2 AF1 *A_RESTRICT
+ #define inoutAF3 AF1 *A_RESTRICT
+ #define inoutAF4 AF1 *A_RESTRICT
+ #define inoutAL2 AL1 *A_RESTRICT
+ #define inoutAL3 AL1 *A_RESTRICT
+ #define inoutAL4 AL1 *A_RESTRICT
+ #define inoutAU2 AU1 *A_RESTRICT
+ #define inoutAU3 AU1 *A_RESTRICT
+ #define inoutAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define outAD2 AD1 *A_RESTRICT
+ #define outAD3 AD1 *A_RESTRICT
+ #define outAD4 AD1 *A_RESTRICT
+ #define outAF2 AF1 *A_RESTRICT
+ #define outAF3 AF1 *A_RESTRICT
+ #define outAF4 AF1 *A_RESTRICT
+ #define outAL2 AL1 *A_RESTRICT
+ #define outAL3 AL1 *A_RESTRICT
+ #define outAL4 AL1 *A_RESTRICT
+ #define outAU2 AU1 *A_RESTRICT
+ #define outAU3 AU1 *A_RESTRICT
+ #define outAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define varAD2(x) AD1 x[2]
+ #define varAD3(x) AD1 x[3]
+ #define varAD4(x) AD1 x[4]
+ #define varAF2(x) AF1 x[2]
+ #define varAF3(x) AF1 x[3]
+ #define varAF4(x) AF1 x[4]
+ #define varAL2(x) AL1 x[2]
+ #define varAL3(x) AL1 x[3]
+ #define varAL4(x) AL1 x[4]
+ #define varAU2(x) AU1 x[2]
+ #define varAU3(x) AU1 x[3]
+ #define varAU4(x) AU1 x[4]
+//------------------------------------------------------------------------------------------------------------------------------
+ #define initAD2(x,y) {x,y}
+ #define initAD3(x,y,z) {x,y,z}
+ #define initAD4(x,y,z,w) {x,y,z,w}
+ #define initAF2(x,y) {x,y}
+ #define initAF3(x,y,z) {x,y,z}
+ #define initAF4(x,y,z,w) {x,y,z,w}
+ #define initAL2(x,y) {x,y}
+ #define initAL3(x,y,z) {x,y,z}
+ #define initAL4(x,y,z,w) {x,y,z,w}
+ #define initAU2(x,y) {x,y}
+ #define initAU3(x,y,z) {x,y,z}
+ #define initAU4(x,y,z,w) {x,y,z,w}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     SCALAR RETURN OPS
+//------------------------------------------------------------------------------------------------------------------------------
+// TODO
+// ====
+//  - Replace transcendentals with manual versions. 
+//==============================================================================================================================
+ #ifdef A_GCC
+  A_STATIC AD1 AAbsD1(AD1 a){return __builtin_fabs(a);}
+  A_STATIC AF1 AAbsF1(AF1 a){return __builtin_fabsf(a);}
+  A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(__builtin_abs(ASU1_(a)));}
+  A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(__builtin_llabs(ASL1_(a)));}
+ #else
+  A_STATIC AD1 AAbsD1(AD1 a){return fabs(a);}
+  A_STATIC AF1 AAbsF1(AF1 a){return fabsf(a);}
+  A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(abs(ASU1_(a)));}
+  A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(labs((long)ASL1_(a)));}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 ACosD1(AD1 a){return __builtin_cos(a);}
+  A_STATIC AF1 ACosF1(AF1 a){return __builtin_cosf(a);}
+ #else
+  A_STATIC AD1 ACosD1(AD1 a){return cos(a);}
+  A_STATIC AF1 ACosF1(AF1 a){return cosf(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ADotD2(inAD2 a,inAD2 b){return a[0]*b[0]+a[1]*b[1];}
+ A_STATIC AD1 ADotD3(inAD3 a,inAD3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];}
+ A_STATIC AD1 ADotD4(inAD4 a,inAD4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];}
+ A_STATIC AF1 ADotF2(inAF2 a,inAF2 b){return a[0]*b[0]+a[1]*b[1];}
+ A_STATIC AF1 ADotF3(inAF3 a,inAF3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];}
+ A_STATIC AF1 ADotF4(inAF4 a,inAF4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 AExp2D1(AD1 a){return __builtin_exp2(a);}
+  A_STATIC AF1 AExp2F1(AF1 a){return __builtin_exp2f(a);}
+ #else
+  A_STATIC AD1 AExp2D1(AD1 a){return exp2(a);}
+  A_STATIC AF1 AExp2F1(AF1 a){return exp2f(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 AFloorD1(AD1 a){return __builtin_floor(a);}
+  A_STATIC AF1 AFloorF1(AF1 a){return __builtin_floorf(a);}
+ #else
+  A_STATIC AD1 AFloorD1(AD1 a){return floor(a);}
+  A_STATIC AF1 AFloorF1(AF1 a){return floorf(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ALerpD1(AD1 a,AD1 b,AD1 c){return b*c+(-a*c+a);}
+ A_STATIC AF1 ALerpF1(AF1 a,AF1 b,AF1 c){return b*c+(-a*c+a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 ALog2D1(AD1 a){return __builtin_log2(a);}
+  A_STATIC AF1 ALog2F1(AF1 a){return __builtin_log2f(a);}
+ #else
+  A_STATIC AD1 ALog2D1(AD1 a){return log2(a);}
+  A_STATIC AF1 ALog2F1(AF1 a){return log2f(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 AMaxD1(AD1 a,AD1 b){return a>b?a:b;}
+ A_STATIC AF1 AMaxF1(AF1 a,AF1 b){return a>b?a:b;}
+ A_STATIC AL1 AMaxL1(AL1 a,AL1 b){return a>b?a:b;}
+ A_STATIC AU1 AMaxU1(AU1 a,AU1 b){return a>b?a:b;}
+//------------------------------------------------------------------------------------------------------------------------------
+ // These follow the convention that A integer types don't have signage, until they are operated on. 
+ A_STATIC AL1 AMaxSL1(AL1 a,AL1 b){return (ASL1_(a)>ASL1_(b))?a:b;}
+ A_STATIC AU1 AMaxSU1(AU1 a,AU1 b){return (ASU1_(a)>ASU1_(b))?a:b;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 AMinD1(AD1 a,AD1 b){return a<b?a:b;}
+ A_STATIC AF1 AMinF1(AF1 a,AF1 b){return a<b?a:b;}
+ A_STATIC AL1 AMinL1(AL1 a,AL1 b){return a<b?a:b;}
+ A_STATIC AU1 AMinU1(AU1 a,AU1 b){return a<b?a:b;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AL1 AMinSL1(AL1 a,AL1 b){return (ASL1_(a)<ASL1_(b))?a:b;}
+ A_STATIC AU1 AMinSU1(AU1 a,AU1 b){return (ASU1_(a)<ASU1_(b))?a:b;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ARcpD1(AD1 a){return 1.0/a;}
+ A_STATIC AF1 ARcpF1(AF1 a){return 1.0f/a;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AL1 AShrSL1(AL1 a,AL1 b){return AL1_(ASL1_(a)>>ASL1_(b));}
+ A_STATIC AU1 AShrSU1(AU1 a,AU1 b){return AU1_(ASU1_(a)>>ASU1_(b));}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 ASinD1(AD1 a){return __builtin_sin(a);}
+  A_STATIC AF1 ASinF1(AF1 a){return __builtin_sinf(a);}
+ #else
+  A_STATIC AD1 ASinD1(AD1 a){return sin(a);}
+  A_STATIC AF1 ASinF1(AF1 a){return sinf(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 ASqrtD1(AD1 a){return __builtin_sqrt(a);}
+  A_STATIC AF1 ASqrtF1(AF1 a){return __builtin_sqrtf(a);}
+ #else
+  A_STATIC AD1 ASqrtD1(AD1 a){return sqrt(a);}
+  A_STATIC AF1 ASqrtF1(AF1 a){return sqrtf(a);}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                               SCALAR RETURN OPS - DEPENDENT
+//==============================================================================================================================
+ A_STATIC AD1 AClampD1(AD1 x,AD1 n,AD1 m){return AMaxD1(n,AMinD1(x,m));}
+ A_STATIC AF1 AClampF1(AF1 x,AF1 n,AF1 m){return AMaxF1(n,AMinF1(x,m));}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 AFractD1(AD1 a){return a-AFloorD1(a);}
+ A_STATIC AF1 AFractF1(AF1 a){return a-AFloorF1(a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 APowD1(AD1 a,AD1 b){return AExp2D1(b*ALog2D1(a));}
+ A_STATIC AF1 APowF1(AF1 a,AF1 b){return AExp2F1(b*ALog2F1(a));}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ARsqD1(AD1 a){return ARcpD1(ASqrtD1(a));}
+ A_STATIC AF1 ARsqF1(AF1 a){return ARcpF1(ASqrtF1(a));}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ASatD1(AD1 a){return AMinD1(1.0,AMaxD1(0.0,a));}
+ A_STATIC AF1 ASatF1(AF1 a){return AMinF1(1.0f,AMaxF1(0.0f,a));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                         VECTOR OPS
+//------------------------------------------------------------------------------------------------------------------------------
+// These are added as needed for production or prototyping, so not necessarily a complete set.
+// They follow a convention of taking in a destination and also returning the destination value to increase utility.
+//==============================================================================================================================
+ A_STATIC retAD2 opAAbsD2(outAD2 d,inAD2 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);return d;}
+ A_STATIC retAD3 opAAbsD3(outAD3 d,inAD3 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);return d;}
+ A_STATIC retAD4 opAAbsD4(outAD4 d,inAD4 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);d[3]=AAbsD1(a[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAAbsF2(outAF2 d,inAF2 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);return d;}
+ A_STATIC retAF3 opAAbsF3(outAF3 d,inAF3 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);return d;}
+ A_STATIC retAF4 opAAbsF4(outAF4 d,inAF4 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);d[3]=AAbsF1(a[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;}
+ A_STATIC retAD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;}
+ A_STATIC retAD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;}
+ A_STATIC retAF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;}
+ A_STATIC retAF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;}
+ A_STATIC retAD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;}
+ A_STATIC retAD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;}
+ A_STATIC retAF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;}
+ A_STATIC retAF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opACpyD2(outAD2 d,inAD2 a){d[0]=a[0];d[1]=a[1];return d;}
+ A_STATIC retAD3 opACpyD3(outAD3 d,inAD3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;}
+ A_STATIC retAD4 opACpyD4(outAD4 d,inAD4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opACpyF2(outAF2 d,inAF2 a){d[0]=a[0];d[1]=a[1];return d;}
+ A_STATIC retAF3 opACpyF3(outAF3 d,inAF3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;}
+ A_STATIC retAF4 opACpyF4(outAF4 d,inAF4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);return d;}
+ A_STATIC retAD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);return d;}
+ A_STATIC retAD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);d[3]=ALerpD1(a[3],b[3],c[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);return d;}
+ A_STATIC retAF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);return d;}
+ A_STATIC retAF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);d[3]=ALerpF1(a[3],b[3],c[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);return d;}
+ A_STATIC retAD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);return d;}
+ A_STATIC retAD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);d[3]=ALerpD1(a[3],b[3],c);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);return d;}
+ A_STATIC retAF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);return d;}
+ A_STATIC retAF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);d[3]=ALerpF1(a[3],b[3],c);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);return d;}
+ A_STATIC retAD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);return d;}
+ A_STATIC retAD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);d[3]=AMaxD1(a[3],b[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);return d;}
+ A_STATIC retAF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);return d;}
+ A_STATIC retAF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);d[3]=AMaxF1(a[3],b[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);return d;}
+ A_STATIC retAD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);return d;}
+ A_STATIC retAD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);d[3]=AMinD1(a[3],b[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);return d;}
+ A_STATIC retAF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);return d;}
+ A_STATIC retAF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);d[3]=AMinF1(a[3],b[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;}
+ A_STATIC retAD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;}
+ A_STATIC retAD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;}
+ A_STATIC retAF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;}
+ A_STATIC retAF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;}
+ A_STATIC retAD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;}
+ A_STATIC retAD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;}
+ A_STATIC retAF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;}
+ A_STATIC retAF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opANegD2(outAD2 d,inAD2 a){d[0]=-a[0];d[1]=-a[1];return d;}
+ A_STATIC retAD3 opANegD3(outAD3 d,inAD3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;}
+ A_STATIC retAD4 opANegD4(outAD4 d,inAD4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opANegF2(outAF2 d,inAF2 a){d[0]=-a[0];d[1]=-a[1];return d;}
+ A_STATIC retAF3 opANegF3(outAF3 d,inAF3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;}
+ A_STATIC retAF4 opANegF4(outAF4 d,inAF4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opARcpD2(outAD2 d,inAD2 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);return d;}
+ A_STATIC retAD3 opARcpD3(outAD3 d,inAD3 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);return d;}
+ A_STATIC retAD4 opARcpD4(outAD4 d,inAD4 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);d[3]=ARcpD1(a[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opARcpF2(outAF2 d,inAF2 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);return d;}
+ A_STATIC retAF3 opARcpF3(outAF3 d,inAF3 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);return d;}
+ A_STATIC retAF4 opARcpF4(outAF4 d,inAF4 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);d[3]=ARcpF1(a[3]);return d;}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     HALF FLOAT PACKING
+//==============================================================================================================================
+ // Convert float to half (in lower 16-bits of output).
+ // Same fast technique as documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
+ // Supports denormals.
+ // Conversion rules are to make computations possibly "safer" on the GPU,
+ //  -INF & -NaN -> -65504
+ //  +INF & +NaN -> +65504
+ A_STATIC AU1 AU1_AH1_AF1(AF1 f){
+  static AW1 base[512]={
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080,0x0100,
+   0x0200,0x0400,0x0800,0x0c00,0x1000,0x1400,0x1800,0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,
+   0x4000,0x4400,0x4800,0x4c00,0x5000,0x5400,0x5800,0x5c00,0x6000,0x6400,0x6800,0x6c00,0x7000,0x7400,0x7800,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8001,0x8002,0x8004,0x8008,0x8010,0x8020,0x8040,0x8080,0x8100,
+   0x8200,0x8400,0x8800,0x8c00,0x9000,0x9400,0x9800,0x9c00,0xa000,0xa400,0xa800,0xac00,0xb000,0xb400,0xb800,0xbc00,
+   0xc000,0xc400,0xc800,0xcc00,0xd000,0xd400,0xd800,0xdc00,0xe000,0xe400,0xe800,0xec00,0xf000,0xf400,0xf800,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff};
+  static AB1 shift[512]={
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f,
+   0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,
+   0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f,
+   0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,
+   0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18};
+  union{AF1 f;AU1 u;}bits;bits.f=f;AU1 u=bits.u;AU1 i=u>>23;return (AU1)(base[i])+((u&0x7fffff)>>shift[i]);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Used to output packed constant.
+ A_STATIC AU1 AU1_AH2_AF2(inAF2 a){return AU1_AH1_AF1(a[0])+(AU1_AH1_AF1(a[1])<<16);}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+//                                                            GLSL
+//
+//
+//==============================================================================================================================
+#if defined(A_GLSL) && defined(A_GPU)
+ #ifndef A_SKIP_EXT
+  #ifdef A_HALF
+   #extension GL_EXT_shader_16bit_storage:require
+   #extension GL_EXT_shader_explicit_arithmetic_types:require 
+  #endif
+//------------------------------------------------------------------------------------------------------------------------------
+  #ifdef A_LONG
+   #extension GL_ARB_gpu_shader_int64:require
+   #extension GL_NV_shader_atomic_int64:require
+  #endif
+//------------------------------------------------------------------------------------------------------------------------------
+  #ifdef A_WAVE
+   #extension GL_KHR_shader_subgroup_arithmetic:require
+   #extension GL_KHR_shader_subgroup_ballot:require
+   #extension GL_KHR_shader_subgroup_quad:require
+   #extension GL_KHR_shader_subgroup_shuffle:require
+  #endif
+ #endif
+//==============================================================================================================================
+ #define AP1 bool
+ #define AP2 bvec2
+ #define AP3 bvec3
+ #define AP4 bvec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AF1 float
+ #define AF2 vec2
+ #define AF3 vec3
+ #define AF4 vec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1 uint
+ #define AU2 uvec2
+ #define AU3 uvec3
+ #define AU4 uvec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASU1 int
+ #define ASU2 ivec2
+ #define ASU3 ivec3
+ #define ASU4 ivec4
+//==============================================================================================================================
+ #define AF1_AU1(x) uintBitsToFloat(AU1(x))
+ #define AF2_AU2(x) uintBitsToFloat(AU2(x))
+ #define AF3_AU3(x) uintBitsToFloat(AU3(x))
+ #define AF4_AU4(x) uintBitsToFloat(AU4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1_AF1(x) floatBitsToUint(AF1(x))
+ #define AU2_AF2(x) floatBitsToUint(AF2(x))
+ #define AU3_AF3(x) floatBitsToUint(AF3(x))
+ #define AU4_AF4(x) floatBitsToUint(AF4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_AH1_AF1_x(AF1 a){return packHalf2x16(AF2(a,0.0));}
+ #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1_AH2_AF2 packHalf2x16
+ #define AU1_AW2Unorm_AF2 packUnorm2x16
+ #define AU1_AB4Unorm_AF4 packUnorm4x8
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AF2_AH2_AU1 unpackHalf2x16
+ #define AF2_AW2Unorm_AU1 unpackUnorm2x16
+ #define AF4_AB4Unorm_AU1 unpackUnorm4x8
+//==============================================================================================================================
+ AF1 AF1_x(AF1 a){return AF1(a);}
+ AF2 AF2_x(AF1 a){return AF2(a,a);}
+ AF3 AF3_x(AF1 a){return AF3(a,a,a);}
+ AF4 AF4_x(AF1 a){return AF4(a,a,a,a);}
+ #define AF1_(a) AF1_x(AF1(a))
+ #define AF2_(a) AF2_x(AF1(a))
+ #define AF3_(a) AF3_x(AF1(a))
+ #define AF4_(a) AF4_x(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_x(AU1 a){return AU1(a);}
+ AU2 AU2_x(AU1 a){return AU2(a,a);}
+ AU3 AU3_x(AU1 a){return AU3(a,a,a);}
+ AU4 AU4_x(AU1 a){return AU4(a,a,a,a);}
+ #define AU1_(a) AU1_x(AU1(a))
+ #define AU2_(a) AU2_x(AU1(a))
+ #define AU3_(a) AU3_x(AU1(a))
+ #define AU4_(a) AU4_x(AU1(a))
+//==============================================================================================================================
+ AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));}
+ AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));}
+ AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));}
+ AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 ABfe(AU1 src,AU1 off,AU1 bits){return bitfieldExtract(src,ASU1(off),ASU1(bits));}
+ AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));}
+ // Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<<bits)-1', and 'bits' needs to be an immediate.
+ AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){return bitfieldInsert(src,ins,0,ASU1(bits));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // V_MED3_F32.
+ AF1 AClampF1(AF1 x,AF1 n,AF1 m){return clamp(x,n,m);}
+ AF2 AClampF2(AF2 x,AF2 n,AF2 m){return clamp(x,n,m);}
+ AF3 AClampF3(AF3 x,AF3 n,AF3 m){return clamp(x,n,m);}
+ AF4 AClampF4(AF4 x,AF4 n,AF4 m){return clamp(x,n,m);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // V_FRACT_F32 (note DX frac() is different).
+ AF1 AFractF1(AF1 x){return fract(x);}
+ AF2 AFractF2(AF2 x){return fract(x);}
+ AF3 AFractF3(AF3 x){return fract(x);}
+ AF4 AFractF4(AF4 x){return fract(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ALerpF1(AF1 x,AF1 y,AF1 a){return mix(x,y,a);}
+ AF2 ALerpF2(AF2 x,AF2 y,AF2 a){return mix(x,y,a);}
+ AF3 ALerpF3(AF3 x,AF3 y,AF3 a){return mix(x,y,a);}
+ AF4 ALerpF4(AF4 x,AF4 y,AF4 a){return mix(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // V_MAX3_F32.
+ AF1 AMax3F1(AF1 x,AF1 y,AF1 z){return max(x,max(y,z));}
+ AF2 AMax3F2(AF2 x,AF2 y,AF2 z){return max(x,max(y,z));}
+ AF3 AMax3F3(AF3 x,AF3 y,AF3 z){return max(x,max(y,z));}
+ AF4 AMax3F4(AF4 x,AF4 y,AF4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMax3SU1(AU1 x,AU1 y,AU1 z){return AU1(max(ASU1(x),max(ASU1(y),ASU1(z))));}
+ AU2 AMax3SU2(AU2 x,AU2 y,AU2 z){return AU2(max(ASU2(x),max(ASU2(y),ASU2(z))));}
+ AU3 AMax3SU3(AU3 x,AU3 y,AU3 z){return AU3(max(ASU3(x),max(ASU3(y),ASU3(z))));}
+ AU4 AMax3SU4(AU4 x,AU4 y,AU4 z){return AU4(max(ASU4(x),max(ASU4(y),ASU4(z))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMax3U1(AU1 x,AU1 y,AU1 z){return max(x,max(y,z));}
+ AU2 AMax3U2(AU2 x,AU2 y,AU2 z){return max(x,max(y,z));}
+ AU3 AMax3U3(AU3 x,AU3 y,AU3 z){return max(x,max(y,z));}
+ AU4 AMax3U4(AU4 x,AU4 y,AU4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMaxSU1(AU1 a,AU1 b){return AU1(max(ASU1(a),ASU1(b)));}
+ AU2 AMaxSU2(AU2 a,AU2 b){return AU2(max(ASU2(a),ASU2(b)));}
+ AU3 AMaxSU3(AU3 a,AU3 b){return AU3(max(ASU3(a),ASU3(b)));}
+ AU4 AMaxSU4(AU4 a,AU4 b){return AU4(max(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Clamp has an easier pattern match for med3 when some ordering is known.
+ // V_MED3_F32.
+ AF1 AMed3F1(AF1 x,AF1 y,AF1 z){return max(min(x,y),min(max(x,y),z));}
+ AF2 AMed3F2(AF2 x,AF2 y,AF2 z){return max(min(x,y),min(max(x,y),z));}
+ AF3 AMed3F3(AF3 x,AF3 y,AF3 z){return max(min(x,y),min(max(x,y),z));}
+ AF4 AMed3F4(AF4 x,AF4 y,AF4 z){return max(min(x,y),min(max(x,y),z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // V_MIN3_F32.
+ AF1 AMin3F1(AF1 x,AF1 y,AF1 z){return min(x,min(y,z));}
+ AF2 AMin3F2(AF2 x,AF2 y,AF2 z){return min(x,min(y,z));}
+ AF3 AMin3F3(AF3 x,AF3 y,AF3 z){return min(x,min(y,z));}
+ AF4 AMin3F4(AF4 x,AF4 y,AF4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMin3SU1(AU1 x,AU1 y,AU1 z){return AU1(min(ASU1(x),min(ASU1(y),ASU1(z))));}
+ AU2 AMin3SU2(AU2 x,AU2 y,AU2 z){return AU2(min(ASU2(x),min(ASU2(y),ASU2(z))));}
+ AU3 AMin3SU3(AU3 x,AU3 y,AU3 z){return AU3(min(ASU3(x),min(ASU3(y),ASU3(z))));}
+ AU4 AMin3SU4(AU4 x,AU4 y,AU4 z){return AU4(min(ASU4(x),min(ASU4(y),ASU4(z))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMin3U1(AU1 x,AU1 y,AU1 z){return min(x,min(y,z));}
+ AU2 AMin3U2(AU2 x,AU2 y,AU2 z){return min(x,min(y,z));}
+ AU3 AMin3U3(AU3 x,AU3 y,AU3 z){return min(x,min(y,z));}
+ AU4 AMin3U4(AU4 x,AU4 y,AU4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMinSU1(AU1 a,AU1 b){return AU1(min(ASU1(a),ASU1(b)));}
+ AU2 AMinSU2(AU2 a,AU2 b){return AU2(min(ASU2(a),ASU2(b)));}
+ AU3 AMinSU3(AU3 a,AU3 b){return AU3(min(ASU3(a),ASU3(b)));}
+ AU4 AMinSU4(AU4 a,AU4 b){return AU4(min(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Normalized trig. Valid input domain is {-256 to +256}. No GLSL compiler intrinsic exists to map to this currently.
+ // V_COS_F32.
+ AF1 ANCosF1(AF1 x){return cos(x*AF1_(A_2PI));}
+ AF2 ANCosF2(AF2 x){return cos(x*AF2_(A_2PI));}
+ AF3 ANCosF3(AF3 x){return cos(x*AF3_(A_2PI));}
+ AF4 ANCosF4(AF4 x){return cos(x*AF4_(A_2PI));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Normalized trig. Valid input domain is {-256 to +256}. No GLSL compiler intrinsic exists to map to this currently.
+ // V_SIN_F32.
+ AF1 ANSinF1(AF1 x){return sin(x*AF1_(A_2PI));}
+ AF2 ANSinF2(AF2 x){return sin(x*AF2_(A_2PI));}
+ AF3 ANSinF3(AF3 x){return sin(x*AF3_(A_2PI));}
+ AF4 ANSinF4(AF4 x){return sin(x*AF4_(A_2PI));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ARcpF1(AF1 x){return AF1_(1.0)/x;}
+ AF2 ARcpF2(AF2 x){return AF2_(1.0)/x;}
+ AF3 ARcpF3(AF3 x){return AF3_(1.0)/x;}
+ AF4 ARcpF4(AF4 x){return AF4_(1.0)/x;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ARsqF1(AF1 x){return AF1_(1.0)/sqrt(x);}
+ AF2 ARsqF2(AF2 x){return AF2_(1.0)/sqrt(x);}
+ AF3 ARsqF3(AF3 x){return AF3_(1.0)/sqrt(x);}
+ AF4 ARsqF4(AF4 x){return AF4_(1.0)/sqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ASatF1(AF1 x){return clamp(x,AF1_(0.0),AF1_(1.0));}
+ AF2 ASatF2(AF2 x){return clamp(x,AF2_(0.0),AF2_(1.0));}
+ AF3 ASatF3(AF3 x){return clamp(x,AF3_(0.0),AF3_(1.0));}
+ AF4 ASatF4(AF4 x){return clamp(x,AF4_(0.0),AF4_(1.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AShrSU1(AU1 a,AU1 b){return AU1(ASU1(a)>>ASU1(b));}
+ AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));}
+ AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));}
+ AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                          GLSL BYTE
+//==============================================================================================================================
+ #ifdef A_BYTE
+  #define AB1 uint8_t
+  #define AB2 u8vec2
+  #define AB3 u8vec3
+  #define AB4 u8vec4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define ASB1 int8_t
+  #define ASB2 i8vec2
+  #define ASB3 i8vec3
+  #define ASB4 i8vec4
+//------------------------------------------------------------------------------------------------------------------------------
+  AB1 AB1_x(AB1 a){return AB1(a);}
+  AB2 AB2_x(AB1 a){return AB2(a,a);}
+  AB3 AB3_x(AB1 a){return AB3(a,a,a);}
+  AB4 AB4_x(AB1 a){return AB4(a,a,a,a);}
+  #define AB1_(a) AB1_x(AB1(a))
+  #define AB2_(a) AB2_x(AB1(a))
+  #define AB3_(a) AB3_x(AB1(a))
+  #define AB4_(a) AB4_x(AB1(a))
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                          GLSL HALF
+//==============================================================================================================================
+ #ifdef A_HALF
+  #define AH1 float16_t
+  #define AH2 f16vec2
+  #define AH3 f16vec3
+  #define AH4 f16vec4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AW1 uint16_t
+  #define AW2 u16vec2
+  #define AW3 u16vec3
+  #define AW4 u16vec4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define ASW1 int16_t
+  #define ASW2 i16vec2
+  #define ASW3 i16vec3
+  #define ASW4 i16vec4
+//==============================================================================================================================
+  #define AH2_AU1(x) unpackFloat2x16(AU1(x))
+  AH4 AH4_AU2_x(AU2 x){return AH4(unpackFloat2x16(x.x),unpackFloat2x16(x.y));}
+  #define AH4_AU2(x) AH4_AU2_x(AU2(x))
+  #define AW2_AU1(x) unpackUint2x16(AU1(x))
+  #define AW4_AU2(x) unpackUint4x16(pack64(AU2(x)))
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AU1_AH2(x) packFloat2x16(AH2(x))
+  AU2 AU2_AH4_x(AH4 x){return AU2(packFloat2x16(x.xy),packFloat2x16(x.zw));}
+  #define AU2_AH4(x) AU2_AH4_x(AH4(x))
+  #define AU1_AW2(x) packUint2x16(AW2(x))
+  #define AU2_AW4(x) unpack32(packUint4x16(AW4(x)))
+//==============================================================================================================================
+  #define AW1_AH1(x) halfBitsToUint16(AH1(x))
+  #define AW2_AH2(x) halfBitsToUint16(AH2(x))
+  #define AW3_AH3(x) halfBitsToUint16(AH3(x))
+  #define AW4_AH4(x) halfBitsToUint16(AH4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AH1_AW1(x) uint16BitsToHalf(AW1(x))
+  #define AH2_AW2(x) uint16BitsToHalf(AW2(x))
+  #define AH3_AW3(x) uint16BitsToHalf(AW3(x))
+  #define AH4_AW4(x) uint16BitsToHalf(AW4(x))
+//==============================================================================================================================
+  AH1 AH1_x(AH1 a){return AH1(a);}
+  AH2 AH2_x(AH1 a){return AH2(a,a);}
+  AH3 AH3_x(AH1 a){return AH3(a,a,a);}
+  AH4 AH4_x(AH1 a){return AH4(a,a,a,a);}
+  #define AH1_(a) AH1_x(AH1(a))
+  #define AH2_(a) AH2_x(AH1(a))
+  #define AH3_(a) AH3_x(AH1(a))
+  #define AH4_(a) AH4_x(AH1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+  AW1 AW1_x(AW1 a){return AW1(a);}
+  AW2 AW2_x(AW1 a){return AW2(a,a);}
+  AW3 AW3_x(AW1 a){return AW3(a,a,a);}
+  AW4 AW4_x(AW1 a){return AW4(a,a,a,a);}
+  #define AW1_(a) AW1_x(AW1(a))
+  #define AW2_(a) AW2_x(AW1(a))
+  #define AW3_(a) AW3_x(AW1(a))
+  #define AW4_(a) AW4_x(AW1(a))
+//==============================================================================================================================
+  AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));}
+  AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));}
+  AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));}
+  AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AClampH1(AH1 x,AH1 n,AH1 m){return clamp(x,n,m);}
+  AH2 AClampH2(AH2 x,AH2 n,AH2 m){return clamp(x,n,m);}
+  AH3 AClampH3(AH3 x,AH3 n,AH3 m){return clamp(x,n,m);}
+  AH4 AClampH4(AH4 x,AH4 n,AH4 m){return clamp(x,n,m);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AFractH1(AH1 x){return fract(x);}
+  AH2 AFractH2(AH2 x){return fract(x);}
+  AH3 AFractH3(AH3 x){return fract(x);}
+  AH4 AFractH4(AH4 x){return fract(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return mix(x,y,a);}
+  AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return mix(x,y,a);}
+  AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return mix(x,y,a);}
+  AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return mix(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+  // No packed version of max3.
+  AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));}
+  AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));}
+  AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));}
+  AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));}
+  AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));}
+  AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));}
+  AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  // No packed version of min3.
+  AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));}
+  AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));}
+  AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));}
+  AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));}
+  AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));}
+  AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));}
+  AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 ARcpH1(AH1 x){return AH1_(1.0)/x;}
+  AH2 ARcpH2(AH2 x){return AH2_(1.0)/x;}
+  AH3 ARcpH3(AH3 x){return AH3_(1.0)/x;}
+  AH4 ARcpH4(AH4 x){return AH4_(1.0)/x;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 ARsqH1(AH1 x){return AH1_(1.0)/sqrt(x);}
+  AH2 ARsqH2(AH2 x){return AH2_(1.0)/sqrt(x);}
+  AH3 ARsqH3(AH3 x){return AH3_(1.0)/sqrt(x);}
+  AH4 ARsqH4(AH4 x){return AH4_(1.0)/sqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 ASatH1(AH1 x){return clamp(x,AH1_(0.0),AH1_(1.0));}
+  AH2 ASatH2(AH2 x){return clamp(x,AH2_(0.0),AH2_(1.0));}
+  AH3 ASatH3(AH3 x){return clamp(x,AH3_(0.0),AH3_(1.0));}
+  AH4 ASatH4(AH4 x){return clamp(x,AH4_(0.0),AH4_(1.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));}
+  AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));}
+  AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));}
+  AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                         GLSL DOUBLE
+//==============================================================================================================================
+ #ifdef A_DUBL
+  #define AD1 double
+  #define AD2 dvec2
+  #define AD3 dvec3
+  #define AD4 dvec4
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 AD1_x(AD1 a){return AD1(a);}
+  AD2 AD2_x(AD1 a){return AD2(a,a);}
+  AD3 AD3_x(AD1 a){return AD3(a,a,a);}
+  AD4 AD4_x(AD1 a){return AD4(a,a,a,a);}
+  #define AD1_(a) AD1_x(AD1(a))
+  #define AD2_(a) AD2_x(AD1(a))
+  #define AD3_(a) AD3_x(AD1(a))
+  #define AD4_(a) AD4_x(AD1(a))
+//==============================================================================================================================
+  AD1 AFractD1(AD1 x){return fract(x);}
+  AD2 AFractD2(AD2 x){return fract(x);}
+  AD3 AFractD3(AD3 x){return fract(x);}
+  AD4 AFractD4(AD4 x){return fract(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return mix(x,y,a);}
+  AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return mix(x,y,a);}
+  AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return mix(x,y,a);}
+  AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return mix(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ARcpD1(AD1 x){return AD1_(1.0)/x;}
+  AD2 ARcpD2(AD2 x){return AD2_(1.0)/x;}
+  AD3 ARcpD3(AD3 x){return AD3_(1.0)/x;}
+  AD4 ARcpD4(AD4 x){return AD4_(1.0)/x;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ARsqD1(AD1 x){return AD1_(1.0)/sqrt(x);}
+  AD2 ARsqD2(AD2 x){return AD2_(1.0)/sqrt(x);}
+  AD3 ARsqD3(AD3 x){return AD3_(1.0)/sqrt(x);}
+  AD4 ARsqD4(AD4 x){return AD4_(1.0)/sqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ASatD1(AD1 x){return clamp(x,AD1_(0.0),AD1_(1.0));}
+  AD2 ASatD2(AD2 x){return clamp(x,AD2_(0.0),AD2_(1.0));}
+  AD3 ASatD3(AD3 x){return clamp(x,AD3_(0.0),AD3_(1.0));}
+  AD4 ASatD4(AD4 x){return clamp(x,AD4_(0.0),AD4_(1.0));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                         GLSL LONG
+//==============================================================================================================================
+ #ifdef A_LONG
+  #define AL1 uint64_t
+  #define AL2 u64vec2
+  #define AL3 u64vec3
+  #define AL4 u64vec4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define ASL1 int64_t
+  #define ASL2 i64vec2
+  #define ASL3 i64vec3
+  #define ASL4 i64vec4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AL1_AU2(x) packUint2x32(AU2(x))
+  #define AU2_AL1(x) unpackUint2x32(AL1(x))
+//------------------------------------------------------------------------------------------------------------------------------
+  AL1 AL1_x(AL1 a){return AL1(a);}
+  AL2 AL2_x(AL1 a){return AL2(a,a);}
+  AL3 AL3_x(AL1 a){return AL3(a,a,a);}
+  AL4 AL4_x(AL1 a){return AL4(a,a,a,a);}
+  #define AL1_(a) AL1_x(AL1(a))
+  #define AL2_(a) AL2_x(AL1(a))
+  #define AL3_(a) AL3_x(AL1(a))
+  #define AL4_(a) AL4_x(AL1(a))
+//==============================================================================================================================
+  AL1 AAbsSL1(AL1 a){return AL1(abs(ASL1(a)));}
+  AL2 AAbsSL2(AL2 a){return AL2(abs(ASL2(a)));}
+  AL3 AAbsSL3(AL3 a){return AL3(abs(ASL3(a)));}
+  AL4 AAbsSL4(AL4 a){return AL4(abs(ASL4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AL1 AMaxSL1(AL1 a,AL1 b){return AL1(max(ASU1(a),ASU1(b)));}
+  AL2 AMaxSL2(AL2 a,AL2 b){return AL2(max(ASU2(a),ASU2(b)));}
+  AL3 AMaxSL3(AL3 a,AL3 b){return AL3(max(ASU3(a),ASU3(b)));}
+  AL4 AMaxSL4(AL4 a,AL4 b){return AL4(max(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AL1 AMinSL1(AL1 a,AL1 b){return AL1(min(ASU1(a),ASU1(b)));}
+  AL2 AMinSL2(AL2 a,AL2 b){return AL2(min(ASU2(a),ASU2(b)));}
+  AL3 AMinSL3(AL3 a,AL3 b){return AL3(min(ASU3(a),ASU3(b)));}
+  AL4 AMinSL4(AL4 a,AL4 b){return AL4(min(ASU4(a),ASU4(b)));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                      WAVE OPERATIONS
+//==============================================================================================================================
+ #ifdef A_WAVE
+  // Where 'x' must be a compile time literal.
+  AF1 AWaveXorF1(AF1 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AF2 AWaveXorF2(AF2 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AF3 AWaveXorF3(AF3 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AF4 AWaveXorF4(AF4 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AU1 AWaveXorU1(AU1 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AU2 AWaveXorU2(AU2 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AU3 AWaveXorU3(AU3 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AU4 AWaveXorU4(AU4 v,AU1 x){return subgroupShuffleXor(v,x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  #ifdef A_HALF
+   AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(subgroupShuffleXor(AU1_AH2(v),x));}
+   AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(subgroupShuffleXor(AU2_AH4(v),x));}
+   AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(subgroupShuffleXor(AU1_AW2(v),x));}
+   AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU2(subgroupShuffleXor(AU2_AW4(v),x));}
+  #endif
+ #endif
+//==============================================================================================================================
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+//                                                            HLSL
+//
+//
+//==============================================================================================================================
+#if defined(A_HLSL) && defined(A_GPU)
+ #ifdef A_HLSL_6_2
+  #define AP1 bool
+  #define AP2 bool2
+  #define AP3 bool3
+  #define AP4 bool4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AF1 float32_t
+  #define AF2 float32_t2
+  #define AF3 float32_t3
+  #define AF4 float32_t4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AU1 uint32_t
+  #define AU2 uint32_t2
+  #define AU3 uint32_t3
+  #define AU4 uint32_t4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define ASU1 int32_t
+  #define ASU2 int32_t2
+  #define ASU3 int32_t3
+  #define ASU4 int32_t4
+ #else
+  #define AP1 bool
+  #define AP2 bool2
+  #define AP3 bool3
+  #define AP4 bool4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AF1 float
+  #define AF2 float2
+  #define AF3 float3
+  #define AF4 float4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AU1 uint
+  #define AU2 uint2
+  #define AU3 uint3
+  #define AU4 uint4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define ASU1 int
+  #define ASU2 int2
+  #define ASU3 int3
+  #define ASU4 int4
+ #endif
+//==============================================================================================================================
+ #define AF1_AU1(x) asfloat(AU1(x))
+ #define AF2_AU2(x) asfloat(AU2(x))
+ #define AF3_AU3(x) asfloat(AU3(x))
+ #define AF4_AU4(x) asfloat(AU4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1_AF1(x) asuint(AF1(x))
+ #define AU2_AF2(x) asuint(AF2(x))
+ #define AU3_AF3(x) asuint(AF3(x))
+ #define AU4_AF4(x) asuint(AF4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_AH1_AF1_x(AF1 a){return f32tof16(a);}
+ #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_AH2_AF2_x(AF2 a){return f32tof16(a.x)|(f32tof16(a.y)<<16);}
+ #define AU1_AH2_AF2(a) AU1_AH2_AF2_x(AF2(a)) 
+ #define AU1_AB4Unorm_AF4(x) D3DCOLORtoUBYTE4(AF4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 AF2_AH2_AU1_x(AU1 x){return AF2(f16tof32(x&0xFFFF),f16tof32(x>>16));}
+ #define AF2_AH2_AU1(x) AF2_AH2_AU1_x(AU1(x))
+//==============================================================================================================================
+ AF1 AF1_x(AF1 a){return AF1(a);}
+ AF2 AF2_x(AF1 a){return AF2(a,a);}
+ AF3 AF3_x(AF1 a){return AF3(a,a,a);}
+ AF4 AF4_x(AF1 a){return AF4(a,a,a,a);}
+ #define AF1_(a) AF1_x(AF1(a))
+ #define AF2_(a) AF2_x(AF1(a))
+ #define AF3_(a) AF3_x(AF1(a))
+ #define AF4_(a) AF4_x(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_x(AU1 a){return AU1(a);}
+ AU2 AU2_x(AU1 a){return AU2(a,a);}
+ AU3 AU3_x(AU1 a){return AU3(a,a,a);}
+ AU4 AU4_x(AU1 a){return AU4(a,a,a,a);}
+ #define AU1_(a) AU1_x(AU1(a))
+ #define AU2_(a) AU2_x(AU1(a))
+ #define AU3_(a) AU3_x(AU1(a))
+ #define AU4_(a) AU4_x(AU1(a))
+//==============================================================================================================================
+ AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));}
+ AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));}
+ AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));}
+ AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 ABfe(AU1 src,AU1 off,AU1 bits){AU1 mask=(1u<<bits)-1;return (src>>off)&mask;}
+ AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));}
+ AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){AU1 mask=(1u<<bits)-1;return (ins&mask)|(src&(~mask));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AClampF1(AF1 x,AF1 n,AF1 m){return max(n,min(x,m));}
+ AF2 AClampF2(AF2 x,AF2 n,AF2 m){return max(n,min(x,m));}
+ AF3 AClampF3(AF3 x,AF3 n,AF3 m){return max(n,min(x,m));}
+ AF4 AClampF4(AF4 x,AF4 n,AF4 m){return max(n,min(x,m));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AFractF1(AF1 x){return x-floor(x);}
+ AF2 AFractF2(AF2 x){return x-floor(x);}
+ AF3 AFractF3(AF3 x){return x-floor(x);}
+ AF4 AFractF4(AF4 x){return x-floor(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ALerpF1(AF1 x,AF1 y,AF1 a){return lerp(x,y,a);}
+ AF2 ALerpF2(AF2 x,AF2 y,AF2 a){return lerp(x,y,a);}
+ AF3 ALerpF3(AF3 x,AF3 y,AF3 a){return lerp(x,y,a);}
+ AF4 ALerpF4(AF4 x,AF4 y,AF4 a){return lerp(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AMax3F1(AF1 x,AF1 y,AF1 z){return max(x,max(y,z));}
+ AF2 AMax3F2(AF2 x,AF2 y,AF2 z){return max(x,max(y,z));}
+ AF3 AMax3F3(AF3 x,AF3 y,AF3 z){return max(x,max(y,z));}
+ AF4 AMax3F4(AF4 x,AF4 y,AF4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMax3SU1(AU1 x,AU1 y,AU1 z){return AU1(max(ASU1(x),max(ASU1(y),ASU1(z))));}
+ AU2 AMax3SU2(AU2 x,AU2 y,AU2 z){return AU2(max(ASU2(x),max(ASU2(y),ASU2(z))));}
+ AU3 AMax3SU3(AU3 x,AU3 y,AU3 z){return AU3(max(ASU3(x),max(ASU3(y),ASU3(z))));}
+ AU4 AMax3SU4(AU4 x,AU4 y,AU4 z){return AU4(max(ASU4(x),max(ASU4(y),ASU4(z))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMax3U1(AU1 x,AU1 y,AU1 z){return max(x,max(y,z));}
+ AU2 AMax3U2(AU2 x,AU2 y,AU2 z){return max(x,max(y,z));}
+ AU3 AMax3U3(AU3 x,AU3 y,AU3 z){return max(x,max(y,z));}
+ AU4 AMax3U4(AU4 x,AU4 y,AU4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMaxSU1(AU1 a,AU1 b){return AU1(max(ASU1(a),ASU1(b)));}
+ AU2 AMaxSU2(AU2 a,AU2 b){return AU2(max(ASU2(a),ASU2(b)));}
+ AU3 AMaxSU3(AU3 a,AU3 b){return AU3(max(ASU3(a),ASU3(b)));}
+ AU4 AMaxSU4(AU4 a,AU4 b){return AU4(max(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AMed3F1(AF1 x,AF1 y,AF1 z){return max(min(x,y),min(max(x,y),z));}
+ AF2 AMed3F2(AF2 x,AF2 y,AF2 z){return max(min(x,y),min(max(x,y),z));}
+ AF3 AMed3F3(AF3 x,AF3 y,AF3 z){return max(min(x,y),min(max(x,y),z));}
+ AF4 AMed3F4(AF4 x,AF4 y,AF4 z){return max(min(x,y),min(max(x,y),z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AMin3F1(AF1 x,AF1 y,AF1 z){return min(x,min(y,z));}
+ AF2 AMin3F2(AF2 x,AF2 y,AF2 z){return min(x,min(y,z));}
+ AF3 AMin3F3(AF3 x,AF3 y,AF3 z){return min(x,min(y,z));}
+ AF4 AMin3F4(AF4 x,AF4 y,AF4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMin3SU1(AU1 x,AU1 y,AU1 z){return AU1(min(ASU1(x),min(ASU1(y),ASU1(z))));}
+ AU2 AMin3SU2(AU2 x,AU2 y,AU2 z){return AU2(min(ASU2(x),min(ASU2(y),ASU2(z))));}
+ AU3 AMin3SU3(AU3 x,AU3 y,AU3 z){return AU3(min(ASU3(x),min(ASU3(y),ASU3(z))));}
+ AU4 AMin3SU4(AU4 x,AU4 y,AU4 z){return AU4(min(ASU4(x),min(ASU4(y),ASU4(z))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMin3U1(AU1 x,AU1 y,AU1 z){return min(x,min(y,z));}
+ AU2 AMin3U2(AU2 x,AU2 y,AU2 z){return min(x,min(y,z));}
+ AU3 AMin3U3(AU3 x,AU3 y,AU3 z){return min(x,min(y,z));}
+ AU4 AMin3U4(AU4 x,AU4 y,AU4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMinSU1(AU1 a,AU1 b){return AU1(min(ASU1(a),ASU1(b)));}
+ AU2 AMinSU2(AU2 a,AU2 b){return AU2(min(ASU2(a),ASU2(b)));}
+ AU3 AMinSU3(AU3 a,AU3 b){return AU3(min(ASU3(a),ASU3(b)));}
+ AU4 AMinSU4(AU4 a,AU4 b){return AU4(min(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ANCosF1(AF1 x){return cos(x*AF1_(A_2PI));}
+ AF2 ANCosF2(AF2 x){return cos(x*AF2_(A_2PI));}
+ AF3 ANCosF3(AF3 x){return cos(x*AF3_(A_2PI));}
+ AF4 ANCosF4(AF4 x){return cos(x*AF4_(A_2PI));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ANSinF1(AF1 x){return sin(x*AF1_(A_2PI));}
+ AF2 ANSinF2(AF2 x){return sin(x*AF2_(A_2PI));}
+ AF3 ANSinF3(AF3 x){return sin(x*AF3_(A_2PI));}
+ AF4 ANSinF4(AF4 x){return sin(x*AF4_(A_2PI));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ARcpF1(AF1 x){return rcp(x);}
+ AF2 ARcpF2(AF2 x){return rcp(x);}
+ AF3 ARcpF3(AF3 x){return rcp(x);}
+ AF4 ARcpF4(AF4 x){return rcp(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ARsqF1(AF1 x){return rsqrt(x);}
+ AF2 ARsqF2(AF2 x){return rsqrt(x);}
+ AF3 ARsqF3(AF3 x){return rsqrt(x);}
+ AF4 ARsqF4(AF4 x){return rsqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ASatF1(AF1 x){return saturate(x);}
+ AF2 ASatF2(AF2 x){return saturate(x);}
+ AF3 ASatF3(AF3 x){return saturate(x);}
+ AF4 ASatF4(AF4 x){return saturate(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AShrSU1(AU1 a,AU1 b){return AU1(ASU1(a)>>ASU1(b));}
+ AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));}
+ AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));}
+ AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                          HLSL BYTE
+//==============================================================================================================================
+ #ifdef A_BYTE
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                          HLSL HALF
+//==============================================================================================================================
+ #ifdef A_HALF
+  #ifdef A_HLSL_6_2
+   #define AH1 float16_t
+   #define AH2 float16_t2
+   #define AH3 float16_t3
+   #define AH4 float16_t4
+//------------------------------------------------------------------------------------------------------------------------------
+   #define AW1 uint16_t
+   #define AW2 uint16_t2
+   #define AW3 uint16_t3
+   #define AW4 uint16_t4
+//------------------------------------------------------------------------------------------------------------------------------
+   #define ASW1 int16_t
+   #define ASW2 int16_t2
+   #define ASW3 int16_t3
+   #define ASW4 int16_t4
+  #else
+   #define AH1 min16float
+   #define AH2 min16float2
+   #define AH3 min16float3
+   #define AH4 min16float4
+//------------------------------------------------------------------------------------------------------------------------------
+   #define AW1 min16uint
+   #define AW2 min16uint2
+   #define AW3 min16uint3
+   #define AW4 min16uint4
+//------------------------------------------------------------------------------------------------------------------------------
+   #define ASW1 min16int
+   #define ASW2 min16int2
+   #define ASW3 min16int3
+   #define ASW4 min16int4
+  #endif
+//==============================================================================================================================
+  // Need to use manual unpack to get optimal execution (don't use packed types in buffers directly).
+  // Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/
+  AH2 AH2_AU1_x(AU1 x){AF2 t=f16tof32(AU2(x&0xFFFF,x>>16));return AH2(t);}
+  AH4 AH4_AU2_x(AU2 x){return AH4(AH2_AU1_x(x.x),AH2_AU1_x(x.y));}
+  AW2 AW2_AU1_x(AU1 x){AU2 t=AU2(x&0xFFFF,x>>16);return AW2(t);}
+  AW4 AW4_AU2_x(AU2 x){return AW4(AW2_AU1_x(x.x),AW2_AU1_x(x.y));}
+  #define AH2_AU1(x) AH2_AU1_x(AU1(x))
+  #define AH4_AU2(x) AH4_AU2_x(AU2(x))
+  #define AW2_AU1(x) AW2_AU1_x(AU1(x))
+  #define AW4_AU2(x) AW4_AU2_x(AU2(x))
+//------------------------------------------------------------------------------------------------------------------------------
+  AU1 AU1_AH2_x(AH2 x){return f32tof16(x.x)+(f32tof16(x.y)<<16);}
+  AU2 AU2_AH4_x(AH4 x){return AU2(AU1_AH2_x(x.xy),AU1_AH2_x(x.zw));}
+  AU1 AU1_AW2_x(AW2 x){return AU1(x.x)+(AU1(x.y)<<16);}
+  AU2 AU2_AW4_x(AW4 x){return AU2(AU1_AW2_x(x.xy),AU1_AW2_x(x.zw));}
+  #define AU1_AH2(x) AU1_AH2_x(AH2(x))
+  #define AU2_AH4(x) AU2_AH4_x(AH4(x))
+  #define AU1_AW2(x) AU1_AW2_x(AW2(x))
+  #define AU2_AW4(x) AU2_AW4_x(AW4(x))
+//==============================================================================================================================
+  #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST)
+   #define AW1_AH1(x) asuint16(x)
+   #define AW2_AH2(x) asuint16(x)
+   #define AW3_AH3(x) asuint16(x)
+   #define AW4_AH4(x) asuint16(x)
+  #else
+   #define AW1_AH1(a) AW1(f32tof16(AF1(a)))
+   #define AW2_AH2(a) AW2(AW1_AH1((a).x),AW1_AH1((a).y))
+   #define AW3_AH3(a) AW3(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z))
+   #define AW4_AH4(a) AW4(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z),AW1_AH1((a).w))
+  #endif
+//------------------------------------------------------------------------------------------------------------------------------
+  #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST)
+   #define AH1_AW1(x) asfloat16(x)
+   #define AH2_AW2(x) asfloat16(x)
+   #define AH3_AW3(x) asfloat16(x)
+   #define AH4_AW4(x) asfloat16(x)
+  #else
+   #define AH1_AW1(a) AH1(f16tof32(AU1(a)))
+   #define AH2_AW2(a) AH2(AH1_AW1((a).x),AH1_AW1((a).y))
+   #define AH3_AW3(a) AH3(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z))
+   #define AH4_AW4(a) AH4(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z),AH1_AW1((a).w))
+  #endif
+//==============================================================================================================================
+  AH1 AH1_x(AH1 a){return AH1(a);}
+  AH2 AH2_x(AH1 a){return AH2(a,a);}
+  AH3 AH3_x(AH1 a){return AH3(a,a,a);}
+  AH4 AH4_x(AH1 a){return AH4(a,a,a,a);}
+  #define AH1_(a) AH1_x(AH1(a))
+  #define AH2_(a) AH2_x(AH1(a))
+  #define AH3_(a) AH3_x(AH1(a))
+  #define AH4_(a) AH4_x(AH1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+  AW1 AW1_x(AW1 a){return AW1(a);}
+  AW2 AW2_x(AW1 a){return AW2(a,a);}
+  AW3 AW3_x(AW1 a){return AW3(a,a,a);}
+  AW4 AW4_x(AW1 a){return AW4(a,a,a,a);}
+  #define AW1_(a) AW1_x(AW1(a))
+  #define AW2_(a) AW2_x(AW1(a))
+  #define AW3_(a) AW3_x(AW1(a))
+  #define AW4_(a) AW4_x(AW1(a))
+//==============================================================================================================================
+  AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));}
+  AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));}
+  AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));}
+  AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AClampH1(AH1 x,AH1 n,AH1 m){return max(n,min(x,m));}
+  AH2 AClampH2(AH2 x,AH2 n,AH2 m){return max(n,min(x,m));}
+  AH3 AClampH3(AH3 x,AH3 n,AH3 m){return max(n,min(x,m));}
+  AH4 AClampH4(AH4 x,AH4 n,AH4 m){return max(n,min(x,m));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // V_FRACT_F16 (note DX frac() is different).
+  AH1 AFractH1(AH1 x){return x-floor(x);}
+  AH2 AFractH2(AH2 x){return x-floor(x);}
+  AH3 AFractH3(AH3 x){return x-floor(x);}
+  AH4 AFractH4(AH4 x){return x-floor(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return lerp(x,y,a);}
+  AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return lerp(x,y,a);}
+  AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return lerp(x,y,a);}
+  AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return lerp(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));}
+  AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));}
+  AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));}
+  AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));}
+  AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));}
+  AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));}
+  AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));}
+  AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));}
+  AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));}
+  AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));}
+  AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));}
+  AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));}
+  AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 ARcpH1(AH1 x){return rcp(x);}
+  AH2 ARcpH2(AH2 x){return rcp(x);}
+  AH3 ARcpH3(AH3 x){return rcp(x);}
+  AH4 ARcpH4(AH4 x){return rcp(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 ARsqH1(AH1 x){return rsqrt(x);}
+  AH2 ARsqH2(AH2 x){return rsqrt(x);}
+  AH3 ARsqH3(AH3 x){return rsqrt(x);}
+  AH4 ARsqH4(AH4 x){return rsqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 ASatH1(AH1 x){return saturate(x);}
+  AH2 ASatH2(AH2 x){return saturate(x);}
+  AH3 ASatH3(AH3 x){return saturate(x);}
+  AH4 ASatH4(AH4 x){return saturate(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));}
+  AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));}
+  AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));}
+  AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                         HLSL DOUBLE
+//==============================================================================================================================
+ #ifdef A_DUBL
+  #ifdef A_HLSL_6_2
+   #define AD1 float64_t
+   #define AD2 float64_t2
+   #define AD3 float64_t3
+   #define AD4 float64_t4
+  #else
+   #define AD1 double
+   #define AD2 double2
+   #define AD3 double3
+   #define AD4 double4
+  #endif
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 AD1_x(AD1 a){return AD1(a);}
+  AD2 AD2_x(AD1 a){return AD2(a,a);}
+  AD3 AD3_x(AD1 a){return AD3(a,a,a);}
+  AD4 AD4_x(AD1 a){return AD4(a,a,a,a);}
+  #define AD1_(a) AD1_x(AD1(a))
+  #define AD2_(a) AD2_x(AD1(a))
+  #define AD3_(a) AD3_x(AD1(a))
+  #define AD4_(a) AD4_x(AD1(a))
+//==============================================================================================================================
+  AD1 AFractD1(AD1 a){return a-floor(a);}
+  AD2 AFractD2(AD2 a){return a-floor(a);}
+  AD3 AFractD3(AD3 a){return a-floor(a);}
+  AD4 AFractD4(AD4 a){return a-floor(a);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return lerp(x,y,a);}
+  AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return lerp(x,y,a);}
+  AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return lerp(x,y,a);}
+  AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return lerp(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ARcpD1(AD1 x){return rcp(x);}
+  AD2 ARcpD2(AD2 x){return rcp(x);}
+  AD3 ARcpD3(AD3 x){return rcp(x);}
+  AD4 ARcpD4(AD4 x){return rcp(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ARsqD1(AD1 x){return rsqrt(x);}
+  AD2 ARsqD2(AD2 x){return rsqrt(x);}
+  AD3 ARsqD3(AD3 x){return rsqrt(x);}
+  AD4 ARsqD4(AD4 x){return rsqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ASatD1(AD1 x){return saturate(x);}
+  AD2 ASatD2(AD2 x){return saturate(x);}
+  AD3 ASatD3(AD3 x){return saturate(x);}
+  AD4 ASatD4(AD4 x){return saturate(x);}
+ #endif
+//==============================================================================================================================
+//                                                         HLSL WAVE
+//==============================================================================================================================
+ #ifdef A_WAVE
+  // Where 'x' must be a compile time literal.
+  AF1 AWaveXorF1(AF1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AF2 AWaveXorF2(AF2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AF3 AWaveXorF3(AF3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AF4 AWaveXorF4(AF4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AU1 AWaveXorU1(AU1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AU2 AWaveXorU1(AU2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AU3 AWaveXorU1(AU3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AU4 AWaveXorU1(AU4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  #ifdef A_HALF
+   AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(WaveReadLaneAt(AU1_AH2(v),WaveGetLaneIndex()^x));}
+   AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(WaveReadLaneAt(AU2_AH4(v),WaveGetLaneIndex()^x));}
+   AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(WaveReadLaneAt(AU1_AW2(v),WaveGetLaneIndex()^x));}
+   AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU1(WaveReadLaneAt(AU1_AW4(v),WaveGetLaneIndex()^x));}
+  #endif
+ #endif
+//==============================================================================================================================
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+//                                                          GPU COMMON
+//
+//
+//==============================================================================================================================
+#ifdef A_GPU
+ // Negative and positive infinity.
+ #define A_INFP_F AF1_AU1(0x7f800000u)
+ #define A_INFN_F AF1_AU1(0xff800000u)
+//------------------------------------------------------------------------------------------------------------------------------
+ // Copy sign from 's' to positive 'd'.
+ AF1 ACpySgnF1(AF1 d,AF1 s){return AF1_AU1(AU1_AF1(d)|(AU1_AF1(s)&AU1_(0x80000000u)));}
+ AF2 ACpySgnF2(AF2 d,AF2 s){return AF2_AU2(AU2_AF2(d)|(AU2_AF2(s)&AU2_(0x80000000u)));}
+ AF3 ACpySgnF3(AF3 d,AF3 s){return AF3_AU3(AU3_AF3(d)|(AU3_AF3(s)&AU3_(0x80000000u)));}
+ AF4 ACpySgnF4(AF4 d,AF4 s){return AF4_AU4(AU4_AF4(d)|(AU4_AF4(s)&AU4_(0x80000000u)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Single operation to return (useful to create a mask to use in lerp for branch free logic),
+ //  m=NaN := 0
+ //  m>=0  := 0
+ //  m<0   := 1
+ // Uses the following useful floating point logic,
+ //  saturate(+a*(-INF)==-INF) := 0
+ //  saturate( 0*(-INF)== NaN) := 0
+ //  saturate(-a*(-INF)==+INF) := 1
+ AF1 ASignedF1(AF1 m){return ASatF1(m*AF1_(A_INFN_F));}
+ AF2 ASignedF2(AF2 m){return ASatF2(m*AF2_(A_INFN_F));}
+ AF3 ASignedF3(AF3 m){return ASatF3(m*AF3_(A_INFN_F));}
+ AF4 ASignedF4(AF4 m){return ASatF4(m*AF4_(A_INFN_F));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AGtZeroF1(AF1 m){return ASatF1(m*AF1_(A_INFP_F));}
+ AF2 AGtZeroF2(AF2 m){return ASatF2(m*AF2_(A_INFP_F));}
+ AF3 AGtZeroF3(AF3 m){return ASatF3(m*AF3_(A_INFP_F));}
+ AF4 AGtZeroF4(AF4 m){return ASatF4(m*AF4_(A_INFP_F));}
+//==============================================================================================================================
+ #ifdef A_HALF
+  #ifdef A_HLSL_6_2
+   #define A_INFP_H AH1_AW1((uint16_t)0x7c00u)
+   #define A_INFN_H AH1_AW1((uint16_t)0xfc00u)
+  #else
+   #define A_INFP_H AH1_AW1(0x7c00u)
+   #define A_INFN_H AH1_AW1(0xfc00u)
+  #endif
+
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 ACpySgnH1(AH1 d,AH1 s){return AH1_AW1(AW1_AH1(d)|(AW1_AH1(s)&AW1_(0x8000u)));}
+  AH2 ACpySgnH2(AH2 d,AH2 s){return AH2_AW2(AW2_AH2(d)|(AW2_AH2(s)&AW2_(0x8000u)));}
+  AH3 ACpySgnH3(AH3 d,AH3 s){return AH3_AW3(AW3_AH3(d)|(AW3_AH3(s)&AW3_(0x8000u)));}
+  AH4 ACpySgnH4(AH4 d,AH4 s){return AH4_AW4(AW4_AH4(d)|(AW4_AH4(s)&AW4_(0x8000u)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 ASignedH1(AH1 m){return ASatH1(m*AH1_(A_INFN_H));}
+  AH2 ASignedH2(AH2 m){return ASatH2(m*AH2_(A_INFN_H));}
+  AH3 ASignedH3(AH3 m){return ASatH3(m*AH3_(A_INFN_H));}
+  AH4 ASignedH4(AH4 m){return ASatH4(m*AH4_(A_INFN_H));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AGtZeroH1(AH1 m){return ASatH1(m*AH1_(A_INFP_H));}
+  AH2 AGtZeroH2(AH2 m){return ASatH2(m*AH2_(A_INFP_H));}
+  AH3 AGtZeroH3(AH3 m){return ASatH3(m*AH3_(A_INFP_H));}
+  AH4 AGtZeroH4(AH4 m){return ASatH4(m*AH4_(A_INFP_H));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                [FIS] FLOAT INTEGER SORTABLE
+//------------------------------------------------------------------------------------------------------------------------------
+// Float to integer sortable.
+//  - If sign bit=0, flip the sign bit (positives).
+//  - If sign bit=1, flip all bits     (negatives).
+// Integer sortable to float.
+//  - If sign bit=1, flip the sign bit (positives).
+//  - If sign bit=0, flip all bits     (negatives).
+// Has nice side effects.
+//  - Larger integers are more positive values.
+//  - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+// Burns 3 ops for conversion {shift,or,xor}.
+//==============================================================================================================================
+ AU1 AFisToU1(AU1 x){return x^(( AShrSU1(x,AU1_(31)))|AU1_(0x80000000));}
+ AU1 AFisFromU1(AU1 x){return x^((~AShrSU1(x,AU1_(31)))|AU1_(0x80000000));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Just adjust high 16-bit value (useful when upper part of 32-bit word is a 16-bit float value).
+ AU1 AFisToHiU1(AU1 x){return x^(( AShrSU1(x,AU1_(15)))|AU1_(0x80000000));}
+ AU1 AFisFromHiU1(AU1 x){return x^((~AShrSU1(x,AU1_(15)))|AU1_(0x80000000));}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_HALF
+  AW1 AFisToW1(AW1 x){return x^(( AShrSW1(x,AW1_(15)))|AW1_(0x8000));}
+  AW1 AFisFromW1(AW1 x){return x^((~AShrSW1(x,AW1_(15)))|AW1_(0x8000));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AW2 AFisToW2(AW2 x){return x^(( AShrSW2(x,AW2_(15)))|AW2_(0x8000));}
+  AW2 AFisFromW2(AW2 x){return x^((~AShrSW2(x,AW2_(15)))|AW2_(0x8000));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                      [PERM] V_PERM_B32
+//------------------------------------------------------------------------------------------------------------------------------
+// Support for V_PERM_B32 started in the 3rd generation of GCN.
+//------------------------------------------------------------------------------------------------------------------------------
+// yyyyxxxx - The 'i' input.
+// 76543210
+// ========
+// HGFEDCBA - Naming on permutation.
+//------------------------------------------------------------------------------------------------------------------------------
+// TODO
+// ====
+//  - Make sure compiler optimizes this.
+//==============================================================================================================================
+ #ifdef A_HALF
+  AU1 APerm0E0A(AU2 i){return((i.x    )&0xffu)|((i.y<<16)&0xff0000u);}
+  AU1 APerm0F0B(AU2 i){return((i.x>> 8)&0xffu)|((i.y<< 8)&0xff0000u);}
+  AU1 APerm0G0C(AU2 i){return((i.x>>16)&0xffu)|((i.y    )&0xff0000u);}
+  AU1 APerm0H0D(AU2 i){return((i.x>>24)&0xffu)|((i.y>> 8)&0xff0000u);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AU1 APermHGFA(AU2 i){return((i.x    )&0x000000ffu)|(i.y&0xffffff00u);}
+  AU1 APermHGFC(AU2 i){return((i.x>>16)&0x000000ffu)|(i.y&0xffffff00u);}
+  AU1 APermHGAE(AU2 i){return((i.x<< 8)&0x0000ff00u)|(i.y&0xffff00ffu);}
+  AU1 APermHGCE(AU2 i){return((i.x>> 8)&0x0000ff00u)|(i.y&0xffff00ffu);}
+  AU1 APermHAFE(AU2 i){return((i.x<<16)&0x00ff0000u)|(i.y&0xff00ffffu);}
+  AU1 APermHCFE(AU2 i){return((i.x    )&0x00ff0000u)|(i.y&0xff00ffffu);}
+  AU1 APermAGFE(AU2 i){return((i.x<<24)&0xff000000u)|(i.y&0x00ffffffu);}
+  AU1 APermCGFE(AU2 i){return((i.x<< 8)&0xff000000u)|(i.y&0x00ffffffu);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AU1 APermGCEA(AU2 i){return((i.x)&0x00ff00ffu)|((i.y<<8)&0xff00ff00u);}
+  AU1 APermGECA(AU2 i){return(((i.x)&0xffu)|((i.x>>8)&0xff00u)|((i.y<<16)&0xff0000u)|((i.y<<8)&0xff000000u));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                               [BUC] BYTE UNSIGNED CONVERSION
+//------------------------------------------------------------------------------------------------------------------------------
+// Designed to use the optimal conversion, enables the scaling to possibly be factored into other computation.
+// Works on a range of {0 to A_BUC_<32,16>}, for <32-bit, and 16-bit> respectively.
+//------------------------------------------------------------------------------------------------------------------------------
+// OPCODE NOTES
+// ============
+// GCN does not do UNORM or SNORM for bytes in opcodes.
+//  - V_CVT_F32_UBYTE{0,1,2,3} - Unsigned byte to float.
+//  - V_CVT_PKACC_U8_F32 - Float to unsigned byte (does bit-field insert into 32-bit integer).
+// V_PERM_B32 does byte packing with ability to zero fill bytes as well.
+//  - Can pull out byte values from two sources, and zero fill upper 8-bits of packed hi and lo. 
+//------------------------------------------------------------------------------------------------------------------------------
+// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U1() - Designed for V_CVT_F32_UBYTE* and V_CVT_PKACCUM_U8_F32 ops.
+// ====   =====
+//    0 : 0
+//    1 : 1
+//     ...
+//  255 : 255
+//      : 256 (just outside the encoding range)
+//------------------------------------------------------------------------------------------------------------------------------
+// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32.
+// ====   =====
+//    0 : 0
+//    1 : 1/512
+//    2 : 1/256
+//     ...
+//   64 : 1/8
+//  128 : 1/4
+//  255 : 255/512
+//      : 1/2 (just outside the encoding range)
+//------------------------------------------------------------------------------------------------------------------------------
+// OPTIMAL IMPLEMENTATIONS ON AMD ARCHITECTURES
+// ============================================
+// r=ABuc0FromU1(i)
+//   V_CVT_F32_UBYTE0 r,i
+// --------------------------------------------
+// r=ABuc0ToU1(d,i)
+//   V_CVT_PKACCUM_U8_F32 r,i,0,d
+// --------------------------------------------
+// d=ABuc0FromU2(i)
+//   Where 'k0' is an SGPR with 0x0E0A
+//   Where 'k1' is an SGPR with {32768.0} packed into the lower 16-bits
+//   V_PERM_B32 d,i.x,i.y,k0
+//   V_PK_FMA_F16 d,d,k1.x,0
+// --------------------------------------------
+// r=ABuc0ToU2(d,i)
+//   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+//   Where 'k1' is an SGPR with 0x????
+//   Where 'k2' is an SGPR with 0x????
+//   V_PK_FMA_F16 i,i,k0.x,0
+//   V_PERM_B32 r.x,i,i,k1
+//   V_PERM_B32 r.y,i,i,k2
+//==============================================================================================================================
+ // Peak range for 32-bit and 16-bit operations.
+ #define A_BUC_32 (255.0)
+ #define A_BUC_16 (255.0/512.0)
+//==============================================================================================================================
+ #if 1
+  // Designed to be one V_CVT_PKACCUM_U8_F32.
+  // The extra min is required to pattern match to V_CVT_PKACCUM_U8_F32.
+  AU1 ABuc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i),255u)    )&(0x000000ffu));}
+  AU1 ABuc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i),255u)<< 8)&(0x0000ff00u));}
+  AU1 ABuc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i),255u)<<16)&(0x00ff0000u));}
+  AU1 ABuc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i),255u)<<24)&(0xff000000u));}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Designed to be one V_CVT_F32_UBYTE*.
+  AF1 ABuc0FromU1(AU1 i){return AF1((i    )&255u);}
+  AF1 ABuc1FromU1(AU1 i){return AF1((i>> 8)&255u);}
+  AF1 ABuc2FromU1(AU1 i){return AF1((i>>16)&255u);}
+  AF1 ABuc3FromU1(AU1 i){return AF1((i>>24)&255u);}
+ #endif
+//==============================================================================================================================
+ #ifdef A_HALF
+  // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}.
+  AW2 ABuc01ToW2(AH2 x,AH2 y){x*=AH2_(1.0/32768.0);y*=AH2_(1.0/32768.0);
+   return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Designed for 3 ops to do SOA to AOS and conversion.
+  AU2 ABuc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)));
+   return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));}
+  AU2 ABuc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)));
+   return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));}
+  AU2 ABuc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)));
+   return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));}
+  AU2 ABuc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)));
+   return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Designed for 2 ops to do both AOS to SOA, and conversion.
+  AH2 ABuc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0);}
+  AH2 ABuc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0);}
+  AH2 ABuc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0);}
+  AH2 ABuc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0);}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                 [BSC] BYTE SIGNED CONVERSION
+//------------------------------------------------------------------------------------------------------------------------------
+// Similar to [BUC].
+// Works on a range of {-/+ A_BSC_<32,16>}, for <32-bit, and 16-bit> respectively.
+//------------------------------------------------------------------------------------------------------------------------------
+// ENCODING (without zero-based encoding)
+// ========
+//   0 = unused (can be used to mean something else)
+//   1 = lowest value 
+// 128 = exact zero center (zero based encoding 
+// 255 = highest value
+//------------------------------------------------------------------------------------------------------------------------------
+// Zero-based [Zb] flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+// This is useful if there is a desire for cleared values to decode as zero.
+//------------------------------------------------------------------------------------------------------------------------------
+// BYTE : FLOAT - ABsc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32.
+// ====   =====
+//    0 : -127/512 (unused)
+//    1 : -126/512
+//    2 : -125/512
+//     ...
+//  128 : 0 
+//     ... 
+//  255 : 127/512
+//      : 1/4 (just outside the encoding range)
+//==============================================================================================================================
+ // Peak range for 32-bit and 16-bit operations.
+ #define A_BSC_32 (127.0)
+ #define A_BSC_16 (127.0/512.0)
+//==============================================================================================================================
+ #if 1
+  AU1 ABsc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i+128.0),255u)    )&(0x000000ffu));}
+  AU1 ABsc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i+128.0),255u)<< 8)&(0x0000ff00u));}
+  AU1 ABsc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i+128.0),255u)<<16)&(0x00ff0000u));}
+  AU1 ABsc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i+128.0),255u)<<24)&(0xff000000u));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AU1 ABsc0ToZbU1(AU1 d,AF1 i){return ((d&0xffffff00u)|((min(AU1(trunc(i)+128.0),255u)    )&(0x000000ffu)))^0x00000080u;}
+  AU1 ABsc1ToZbU1(AU1 d,AF1 i){return ((d&0xffff00ffu)|((min(AU1(trunc(i)+128.0),255u)<< 8)&(0x0000ff00u)))^0x00008000u;}
+  AU1 ABsc2ToZbU1(AU1 d,AF1 i){return ((d&0xff00ffffu)|((min(AU1(trunc(i)+128.0),255u)<<16)&(0x00ff0000u)))^0x00800000u;}
+  AU1 ABsc3ToZbU1(AU1 d,AF1 i){return ((d&0x00ffffffu)|((min(AU1(trunc(i)+128.0),255u)<<24)&(0xff000000u)))^0x80000000u;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 ABsc0FromU1(AU1 i){return AF1((i    )&255u)-128.0;}
+  AF1 ABsc1FromU1(AU1 i){return AF1((i>> 8)&255u)-128.0;}
+  AF1 ABsc2FromU1(AU1 i){return AF1((i>>16)&255u)-128.0;}
+  AF1 ABsc3FromU1(AU1 i){return AF1((i>>24)&255u)-128.0;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 ABsc0FromZbU1(AU1 i){return AF1(((i    )&255u)^0x80u)-128.0;}
+  AF1 ABsc1FromZbU1(AU1 i){return AF1(((i>> 8)&255u)^0x80u)-128.0;}
+  AF1 ABsc2FromZbU1(AU1 i){return AF1(((i>>16)&255u)^0x80u)-128.0;}
+  AF1 ABsc3FromZbU1(AU1 i){return AF1(((i>>24)&255u)^0x80u)-128.0;}
+ #endif
+//==============================================================================================================================
+ #ifdef A_HALF
+  // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}.
+  AW2 ABsc01ToW2(AH2 x,AH2 y){x=x*AH2_(1.0/32768.0)+AH2_(0.25/32768.0);y=y*AH2_(1.0/32768.0)+AH2_(0.25/32768.0);
+   return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AU2 ABsc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)));
+   return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));}
+  AU2 ABsc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)));
+   return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));}
+  AU2 ABsc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)));
+   return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));}
+  AU2 ABsc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)));
+   return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AU2 ABsc0ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u;
+   return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));}
+  AU2 ABsc1ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u;
+   return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));}
+  AU2 ABsc2ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u;
+   return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));}
+  AU2 ABsc3ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u;
+   return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH2 ABsc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0)-AH2_(0.25);}
+  AH2 ABsc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0)-AH2_(0.25);}
+  AH2 ABsc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0)-AH2_(0.25);}
+  AH2 ABsc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0)-AH2_(0.25);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH2 ABsc0FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);}
+  AH2 ABsc1FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);}
+  AH2 ABsc2FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);}
+  AH2 ABsc3FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     HALF APPROXIMATIONS
+//------------------------------------------------------------------------------------------------------------------------------
+// These support only positive inputs.
+// Did not see value yet in specialization for range.
+// Using quick testing, ended up mostly getting the same "best" approximation for various ranges.
+// With hardware that can co-execute transcendentals, the value in approximations could be less than expected.
+// However from a latency perspective, if execution of a transcendental is 4 clk, with no packed support, -> 8 clk total.
+// And co-execution would require a compiler interleaving a lot of independent work for packed usage.
+//------------------------------------------------------------------------------------------------------------------------------
+// The one Newton Raphson iteration form of rsq() was skipped (requires 6 ops total).
+// Same with sqrt(), as this could be x*rsq() (7 ops).
+//==============================================================================================================================
+ #ifdef A_HALF
+  // Minimize squared error across full positive range, 2 ops.
+  // The 0x1de2 based approximation maps {0 to 1} input maps to < 1 output.
+  AH1 APrxLoSqrtH1(AH1 a){return AH1_AW1((AW1_AH1(a)>>AW1_(1))+AW1_(0x1de2));}
+  AH2 APrxLoSqrtH2(AH2 a){return AH2_AW2((AW2_AH2(a)>>AW2_(1))+AW2_(0x1de2));}
+  AH3 APrxLoSqrtH3(AH3 a){return AH3_AW3((AW3_AH3(a)>>AW3_(1))+AW3_(0x1de2));}
+  AH4 APrxLoSqrtH4(AH4 a){return AH4_AW4((AW4_AH4(a)>>AW4_(1))+AW4_(0x1de2));}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Lower precision estimation, 1 op.
+  // Minimize squared error across {smallest normal to 16384.0}.
+  AH1 APrxLoRcpH1(AH1 a){return AH1_AW1(AW1_(0x7784)-AW1_AH1(a));}
+  AH2 APrxLoRcpH2(AH2 a){return AH2_AW2(AW2_(0x7784)-AW2_AH2(a));}
+  AH3 APrxLoRcpH3(AH3 a){return AH3_AW3(AW3_(0x7784)-AW3_AH3(a));}
+  AH4 APrxLoRcpH4(AH4 a){return AH4_AW4(AW4_(0x7784)-AW4_AH4(a));}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Medium precision estimation, one Newton Raphson iteration, 3 ops.
+  AH1 APrxMedRcpH1(AH1 a){AH1 b=AH1_AW1(AW1_(0x778d)-AW1_AH1(a));return b*(-b*a+AH1_(2.0));}
+  AH2 APrxMedRcpH2(AH2 a){AH2 b=AH2_AW2(AW2_(0x778d)-AW2_AH2(a));return b*(-b*a+AH2_(2.0));}
+  AH3 APrxMedRcpH3(AH3 a){AH3 b=AH3_AW3(AW3_(0x778d)-AW3_AH3(a));return b*(-b*a+AH3_(2.0));}
+  AH4 APrxMedRcpH4(AH4 a){AH4 b=AH4_AW4(AW4_(0x778d)-AW4_AH4(a));return b*(-b*a+AH4_(2.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Minimize squared error across {smallest normal to 16384.0}, 2 ops.
+  AH1 APrxLoRsqH1(AH1 a){return AH1_AW1(AW1_(0x59a3)-(AW1_AH1(a)>>AW1_(1)));}
+  AH2 APrxLoRsqH2(AH2 a){return AH2_AW2(AW2_(0x59a3)-(AW2_AH2(a)>>AW2_(1)));}
+  AH3 APrxLoRsqH3(AH3 a){return AH3_AW3(AW3_(0x59a3)-(AW3_AH3(a)>>AW3_(1)));}
+  AH4 APrxLoRsqH4(AH4 a){return AH4_AW4(AW4_(0x59a3)-(AW4_AH4(a)>>AW4_(1)));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                    FLOAT APPROXIMATIONS
+//------------------------------------------------------------------------------------------------------------------------------
+// Michal Drobot has an excellent presentation on these: "Low Level Optimizations For GCN",
+//  - Idea dates back to SGI, then to Quake 3, etc.
+//  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+//     - sqrt(x)=rsqrt(x)*x
+//     - rcp(x)=rsqrt(x)*rsqrt(x) for positive x
+//  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+//------------------------------------------------------------------------------------------------------------------------------
+// These below are from perhaps less complete searching for optimal.
+// Used FP16 normal range for testing with +4096 32-bit step size for sampling error.
+// So these match up well with the half approximations.
+//==============================================================================================================================
+ AF1 APrxLoSqrtF1(AF1 a){return AF1_AU1((AU1_AF1(a)>>AU1_(1))+AU1_(0x1fbc4639));}
+ AF1 APrxLoRcpF1(AF1 a){return AF1_AU1(AU1_(0x7ef07ebb)-AU1_AF1(a));}
+ AF1 APrxMedRcpF1(AF1 a){AF1 b=AF1_AU1(AU1_(0x7ef19fff)-AU1_AF1(a));return b*(-b*a+AF1_(2.0));}
+ AF1 APrxLoRsqF1(AF1 a){return AF1_AU1(AU1_(0x5f347d74)-(AU1_AF1(a)>>AU1_(1)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 APrxLoSqrtF2(AF2 a){return AF2_AU2((AU2_AF2(a)>>AU2_(1))+AU2_(0x1fbc4639));}
+ AF2 APrxLoRcpF2(AF2 a){return AF2_AU2(AU2_(0x7ef07ebb)-AU2_AF2(a));}
+ AF2 APrxMedRcpF2(AF2 a){AF2 b=AF2_AU2(AU2_(0x7ef19fff)-AU2_AF2(a));return b*(-b*a+AF2_(2.0));}
+ AF2 APrxLoRsqF2(AF2 a){return AF2_AU2(AU2_(0x5f347d74)-(AU2_AF2(a)>>AU2_(1)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF3 APrxLoSqrtF3(AF3 a){return AF3_AU3((AU3_AF3(a)>>AU3_(1))+AU3_(0x1fbc4639));}
+ AF3 APrxLoRcpF3(AF3 a){return AF3_AU3(AU3_(0x7ef07ebb)-AU3_AF3(a));}
+ AF3 APrxMedRcpF3(AF3 a){AF3 b=AF3_AU3(AU3_(0x7ef19fff)-AU3_AF3(a));return b*(-b*a+AF3_(2.0));}
+ AF3 APrxLoRsqF3(AF3 a){return AF3_AU3(AU3_(0x5f347d74)-(AU3_AF3(a)>>AU3_(1)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF4 APrxLoSqrtF4(AF4 a){return AF4_AU4((AU4_AF4(a)>>AU4_(1))+AU4_(0x1fbc4639));}
+ AF4 APrxLoRcpF4(AF4 a){return AF4_AU4(AU4_(0x7ef07ebb)-AU4_AF4(a));}
+ AF4 APrxMedRcpF4(AF4 a){AF4 b=AF4_AU4(AU4_(0x7ef19fff)-AU4_AF4(a));return b*(-b*a+AF4_(2.0));}
+ AF4 APrxLoRsqF4(AF4 a){return AF4_AU4(AU4_(0x5f347d74)-(AU4_AF4(a)>>AU4_(1)));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                    PQ APPROXIMATIONS
+//------------------------------------------------------------------------------------------------------------------------------
+// PQ is very close to x^(1/8). The functions below Use the fast float approximation method to do
+// PQ<~>Gamma2 (4th power and fast 4th root) and PQ<~>Linear (8th power and fast 8th root). Maximum error is ~0.2%.
+//==============================================================================================================================
+// Helpers
+ AF1 Quart(AF1 a) { a = a * a; return a * a;}
+ AF1 Oct(AF1 a) { a = a * a; a = a * a; return a * a; }
+ AF2 Quart(AF2 a) { a = a * a; return a * a; }
+ AF2 Oct(AF2 a) { a = a * a; a = a * a; return a * a; }
+ AF3 Quart(AF3 a) { a = a * a; return a * a; }
+ AF3 Oct(AF3 a) { a = a * a; a = a * a; return a * a; }
+ AF4 Quart(AF4 a) { a = a * a; return a * a; }
+ AF4 Oct(AF4 a) { a = a * a; a = a * a; return a * a; }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF1 APrxPQToGamma2(AF1 a) { return Quart(a); }
+ AF1 APrxPQToLinear(AF1 a) { return Oct(a); }
+ AF1 APrxLoGamma2ToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); }
+ AF1 APrxMedGamma2ToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); AF1 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF1 APrxHighGamma2ToPQ(AF1 a) { return sqrt(sqrt(a)); }
+ AF1 APrxLoLinearToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); }
+ AF1 APrxMedLinearToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); AF1 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF1 APrxHighLinearToPQ(AF1 a) { return sqrt(sqrt(sqrt(a))); }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF2 APrxPQToGamma2(AF2 a) { return Quart(a); }
+ AF2 APrxPQToLinear(AF2 a) { return Oct(a); }
+ AF2 APrxLoGamma2ToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); }
+ AF2 APrxMedGamma2ToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); AF2 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF2 APrxHighGamma2ToPQ(AF2 a) { return sqrt(sqrt(a)); }
+ AF2 APrxLoLinearToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); }
+ AF2 APrxMedLinearToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); AF2 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF2 APrxHighLinearToPQ(AF2 a) { return sqrt(sqrt(sqrt(a))); }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF3 APrxPQToGamma2(AF3 a) { return Quart(a); }
+ AF3 APrxPQToLinear(AF3 a) { return Oct(a); }
+ AF3 APrxLoGamma2ToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); }
+ AF3 APrxMedGamma2ToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); AF3 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF3 APrxHighGamma2ToPQ(AF3 a) { return sqrt(sqrt(a)); }
+ AF3 APrxLoLinearToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); }
+ AF3 APrxMedLinearToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); AF3 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF3 APrxHighLinearToPQ(AF3 a) { return sqrt(sqrt(sqrt(a))); }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF4 APrxPQToGamma2(AF4 a) { return Quart(a); }
+ AF4 APrxPQToLinear(AF4 a) { return Oct(a); }
+ AF4 APrxLoGamma2ToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); }
+ AF4 APrxMedGamma2ToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); AF4 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF4 APrxHighGamma2ToPQ(AF4 a) { return sqrt(sqrt(a)); }
+ AF4 APrxLoLinearToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); }
+ AF4 APrxMedLinearToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); AF4 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF4 APrxHighLinearToPQ(AF4 a) { return sqrt(sqrt(sqrt(a))); }
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                    PARABOLIC SIN & COS
+//------------------------------------------------------------------------------------------------------------------------------
+// Approximate answers to transcendental questions.
+//------------------------------------------------------------------------------------------------------------------------------
+//==============================================================================================================================
+ #if 1
+  // Valid input range is {-1 to 1} representing {0 to 2 pi}.
+  // Output range is {-1/4 to 1/4} representing {-1 to 1}.
+  AF1 APSinF1(AF1 x){return x*abs(x)-x;} // MAD.
+  AF2 APSinF2(AF2 x){return x*abs(x)-x;}
+  AF1 APCosF1(AF1 x){x=AFractF1(x*AF1_(0.5)+AF1_(0.75));x=x*AF1_(2.0)-AF1_(1.0);return APSinF1(x);} // 3x MAD, FRACT
+  AF2 APCosF2(AF2 x){x=AFractF2(x*AF2_(0.5)+AF2_(0.75));x=x*AF2_(2.0)-AF2_(1.0);return APSinF2(x);}
+  AF2 APSinCosF1(AF1 x){AF1 y=AFractF1(x*AF1_(0.5)+AF1_(0.75));y=y*AF1_(2.0)-AF1_(1.0);return APSinF2(AF2(x,y));}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_HALF
+  // For a packed {sin,cos} pair,
+  //  - Native takes 16 clocks and 4 issue slots (no packed transcendentals).
+  //  - Parabolic takes 8 clocks and 8 issue slots (only fract is non-packed).
+  AH1 APSinH1(AH1 x){return x*abs(x)-x;}
+  AH2 APSinH2(AH2 x){return x*abs(x)-x;} // AND,FMA
+  AH1 APCosH1(AH1 x){x=AFractH1(x*AH1_(0.5)+AH1_(0.75));x=x*AH1_(2.0)-AH1_(1.0);return APSinH1(x);} 
+  AH2 APCosH2(AH2 x){x=AFractH2(x*AH2_(0.5)+AH2_(0.75));x=x*AH2_(2.0)-AH2_(1.0);return APSinH2(x);} // 3x FMA, 2xFRACT, AND
+  AH2 APSinCosH1(AH1 x){AH1 y=AFractH1(x*AH1_(0.5)+AH1_(0.75));y=y*AH1_(2.0)-AH1_(1.0);return APSinH2(AH2(x,y));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     [ZOL] ZERO ONE LOGIC
+//------------------------------------------------------------------------------------------------------------------------------
+// Conditional free logic designed for easy 16-bit packing, and backwards porting to 32-bit.
+//------------------------------------------------------------------------------------------------------------------------------
+// 0 := false
+// 1 := true
+//------------------------------------------------------------------------------------------------------------------------------
+// AndNot(x,y)   -> !(x&y) .... One op.
+// AndOr(x,y,z)  -> (x&y)|z ... One op.
+// GtZero(x)     -> x>0.0 ..... One op.
+// Sel(x,y,z)    -> x?y:z ..... Two ops, has no precision loss.
+// Signed(x)     -> x<0.0 ..... One op.
+// ZeroPass(x,y) -> x?0:y ..... Two ops, 'y' is a pass through safe for aliasing as integer.
+//------------------------------------------------------------------------------------------------------------------------------
+// OPTIMIZATION NOTES
+// ==================
+// - On Vega to use 2 constants in a packed op, pass in as one AW2 or one AH2 'k.xy' and use as 'k.xx' and 'k.yy'.
+//   For example 'a.xy*k.xx+k.yy'.
+//==============================================================================================================================
+ #if 1
+  AU1 AZolAndU1(AU1 x,AU1 y){return min(x,y);}
+  AU2 AZolAndU2(AU2 x,AU2 y){return min(x,y);}
+  AU3 AZolAndU3(AU3 x,AU3 y){return min(x,y);}
+  AU4 AZolAndU4(AU4 x,AU4 y){return min(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AU1 AZolNotU1(AU1 x){return x^AU1_(1);}
+  AU2 AZolNotU2(AU2 x){return x^AU2_(1);}
+  AU3 AZolNotU3(AU3 x){return x^AU3_(1);}
+  AU4 AZolNotU4(AU4 x){return x^AU4_(1);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AU1 AZolOrU1(AU1 x,AU1 y){return max(x,y);}
+  AU2 AZolOrU2(AU2 x,AU2 y){return max(x,y);}
+  AU3 AZolOrU3(AU3 x,AU3 y){return max(x,y);}
+  AU4 AZolOrU4(AU4 x,AU4 y){return max(x,y);}
+//==============================================================================================================================
+  AU1 AZolF1ToU1(AF1 x){return AU1(x);}
+  AU2 AZolF2ToU2(AF2 x){return AU2(x);}
+  AU3 AZolF3ToU3(AF3 x){return AU3(x);}
+  AU4 AZolF4ToU4(AF4 x){return AU4(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  // 2 ops, denormals don't work in 32-bit on PC (and if they are enabled, OMOD is disabled).
+  AU1 AZolNotF1ToU1(AF1 x){return AU1(AF1_(1.0)-x);}
+  AU2 AZolNotF2ToU2(AF2 x){return AU2(AF2_(1.0)-x);}
+  AU3 AZolNotF3ToU3(AF3 x){return AU3(AF3_(1.0)-x);}
+  AU4 AZolNotF4ToU4(AF4 x){return AU4(AF4_(1.0)-x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolU1ToF1(AU1 x){return AF1(x);}
+  AF2 AZolU2ToF2(AU2 x){return AF2(x);}
+  AF3 AZolU3ToF3(AU3 x){return AF3(x);}
+  AF4 AZolU4ToF4(AU4 x){return AF4(x);}
+//==============================================================================================================================
+  AF1 AZolAndF1(AF1 x,AF1 y){return min(x,y);}
+  AF2 AZolAndF2(AF2 x,AF2 y){return min(x,y);}
+  AF3 AZolAndF3(AF3 x,AF3 y){return min(x,y);}
+  AF4 AZolAndF4(AF4 x,AF4 y){return min(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 ASolAndNotF1(AF1 x,AF1 y){return (-x)*y+AF1_(1.0);}
+  AF2 ASolAndNotF2(AF2 x,AF2 y){return (-x)*y+AF2_(1.0);}
+  AF3 ASolAndNotF3(AF3 x,AF3 y){return (-x)*y+AF3_(1.0);}
+  AF4 ASolAndNotF4(AF4 x,AF4 y){return (-x)*y+AF4_(1.0);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolAndOrF1(AF1 x,AF1 y,AF1 z){return ASatF1(x*y+z);}
+  AF2 AZolAndOrF2(AF2 x,AF2 y,AF2 z){return ASatF2(x*y+z);}
+  AF3 AZolAndOrF3(AF3 x,AF3 y,AF3 z){return ASatF3(x*y+z);}
+  AF4 AZolAndOrF4(AF4 x,AF4 y,AF4 z){return ASatF4(x*y+z);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolGtZeroF1(AF1 x){return ASatF1(x*AF1_(A_INFP_F));}
+  AF2 AZolGtZeroF2(AF2 x){return ASatF2(x*AF2_(A_INFP_F));}
+  AF3 AZolGtZeroF3(AF3 x){return ASatF3(x*AF3_(A_INFP_F));}
+  AF4 AZolGtZeroF4(AF4 x){return ASatF4(x*AF4_(A_INFP_F));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolNotF1(AF1 x){return AF1_(1.0)-x;}
+  AF2 AZolNotF2(AF2 x){return AF2_(1.0)-x;}
+  AF3 AZolNotF3(AF3 x){return AF3_(1.0)-x;}
+  AF4 AZolNotF4(AF4 x){return AF4_(1.0)-x;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolOrF1(AF1 x,AF1 y){return max(x,y);}
+  AF2 AZolOrF2(AF2 x,AF2 y){return max(x,y);}
+  AF3 AZolOrF3(AF3 x,AF3 y){return max(x,y);}
+  AF4 AZolOrF4(AF4 x,AF4 y){return max(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolSelF1(AF1 x,AF1 y,AF1 z){AF1 r=(-x)*z+z;return x*y+r;}
+  AF2 AZolSelF2(AF2 x,AF2 y,AF2 z){AF2 r=(-x)*z+z;return x*y+r;}
+  AF3 AZolSelF3(AF3 x,AF3 y,AF3 z){AF3 r=(-x)*z+z;return x*y+r;}
+  AF4 AZolSelF4(AF4 x,AF4 y,AF4 z){AF4 r=(-x)*z+z;return x*y+r;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolSignedF1(AF1 x){return ASatF1(x*AF1_(A_INFN_F));}
+  AF2 AZolSignedF2(AF2 x){return ASatF2(x*AF2_(A_INFN_F));}
+  AF3 AZolSignedF3(AF3 x){return ASatF3(x*AF3_(A_INFN_F));}
+  AF4 AZolSignedF4(AF4 x){return ASatF4(x*AF4_(A_INFN_F));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolZeroPassF1(AF1 x,AF1 y){return AF1_AU1((AU1_AF1(x)!=AU1_(0))?AU1_(0):AU1_AF1(y));}
+  AF2 AZolZeroPassF2(AF2 x,AF2 y){return AF2_AU2((AU2_AF2(x)!=AU2_(0))?AU2_(0):AU2_AF2(y));}
+  AF3 AZolZeroPassF3(AF3 x,AF3 y){return AF3_AU3((AU3_AF3(x)!=AU3_(0))?AU3_(0):AU3_AF3(y));}
+  AF4 AZolZeroPassF4(AF4 x,AF4 y){return AF4_AU4((AU4_AF4(x)!=AU4_(0))?AU4_(0):AU4_AF4(y));}
+ #endif
+//==============================================================================================================================
+ #ifdef A_HALF
+  AW1 AZolAndW1(AW1 x,AW1 y){return min(x,y);}
+  AW2 AZolAndW2(AW2 x,AW2 y){return min(x,y);}
+  AW3 AZolAndW3(AW3 x,AW3 y){return min(x,y);}
+  AW4 AZolAndW4(AW4 x,AW4 y){return min(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AW1 AZolNotW1(AW1 x){return x^AW1_(1);}
+  AW2 AZolNotW2(AW2 x){return x^AW2_(1);}
+  AW3 AZolNotW3(AW3 x){return x^AW3_(1);}
+  AW4 AZolNotW4(AW4 x){return x^AW4_(1);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AW1 AZolOrW1(AW1 x,AW1 y){return max(x,y);}
+  AW2 AZolOrW2(AW2 x,AW2 y){return max(x,y);}
+  AW3 AZolOrW3(AW3 x,AW3 y){return max(x,y);}
+  AW4 AZolOrW4(AW4 x,AW4 y){return max(x,y);}
+//==============================================================================================================================
+  // Uses denormal trick.
+  AW1 AZolH1ToW1(AH1 x){return AW1_AH1(x*AH1_AW1(AW1_(1)));}
+  AW2 AZolH2ToW2(AH2 x){return AW2_AH2(x*AH2_AW2(AW2_(1)));}
+  AW3 AZolH3ToW3(AH3 x){return AW3_AH3(x*AH3_AW3(AW3_(1)));}
+  AW4 AZolH4ToW4(AH4 x){return AW4_AH4(x*AH4_AW4(AW4_(1)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  // AMD arch lacks a packed conversion opcode.
+  AH1 AZolW1ToH1(AW1 x){return AH1_AW1(x*AW1_AH1(AH1_(1.0)));}
+  AH2 AZolW2ToH2(AW2 x){return AH2_AW2(x*AW2_AH2(AH2_(1.0)));}
+  AH3 AZolW1ToH3(AW3 x){return AH3_AW3(x*AW3_AH3(AH3_(1.0)));}
+  AH4 AZolW2ToH4(AW4 x){return AH4_AW4(x*AW4_AH4(AH4_(1.0)));}
+//==============================================================================================================================
+  AH1 AZolAndH1(AH1 x,AH1 y){return min(x,y);}
+  AH2 AZolAndH2(AH2 x,AH2 y){return min(x,y);}
+  AH3 AZolAndH3(AH3 x,AH3 y){return min(x,y);}
+  AH4 AZolAndH4(AH4 x,AH4 y){return min(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 ASolAndNotH1(AH1 x,AH1 y){return (-x)*y+AH1_(1.0);}
+  AH2 ASolAndNotH2(AH2 x,AH2 y){return (-x)*y+AH2_(1.0);}
+  AH3 ASolAndNotH3(AH3 x,AH3 y){return (-x)*y+AH3_(1.0);}
+  AH4 ASolAndNotH4(AH4 x,AH4 y){return (-x)*y+AH4_(1.0);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AZolAndOrH1(AH1 x,AH1 y,AH1 z){return ASatH1(x*y+z);}
+  AH2 AZolAndOrH2(AH2 x,AH2 y,AH2 z){return ASatH2(x*y+z);}
+  AH3 AZolAndOrH3(AH3 x,AH3 y,AH3 z){return ASatH3(x*y+z);}
+  AH4 AZolAndOrH4(AH4 x,AH4 y,AH4 z){return ASatH4(x*y+z);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AZolGtZeroH1(AH1 x){return ASatH1(x*AH1_(A_INFP_H));}
+  AH2 AZolGtZeroH2(AH2 x){return ASatH2(x*AH2_(A_INFP_H));}
+  AH3 AZolGtZeroH3(AH3 x){return ASatH3(x*AH3_(A_INFP_H));}
+  AH4 AZolGtZeroH4(AH4 x){return ASatH4(x*AH4_(A_INFP_H));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AZolNotH1(AH1 x){return AH1_(1.0)-x;}
+  AH2 AZolNotH2(AH2 x){return AH2_(1.0)-x;}
+  AH3 AZolNotH3(AH3 x){return AH3_(1.0)-x;}
+  AH4 AZolNotH4(AH4 x){return AH4_(1.0)-x;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AZolOrH1(AH1 x,AH1 y){return max(x,y);}
+  AH2 AZolOrH2(AH2 x,AH2 y){return max(x,y);}
+  AH3 AZolOrH3(AH3 x,AH3 y){return max(x,y);}
+  AH4 AZolOrH4(AH4 x,AH4 y){return max(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AZolSelH1(AH1 x,AH1 y,AH1 z){AH1 r=(-x)*z+z;return x*y+r;}
+  AH2 AZolSelH2(AH2 x,AH2 y,AH2 z){AH2 r=(-x)*z+z;return x*y+r;}
+  AH3 AZolSelH3(AH3 x,AH3 y,AH3 z){AH3 r=(-x)*z+z;return x*y+r;}
+  AH4 AZolSelH4(AH4 x,AH4 y,AH4 z){AH4 r=(-x)*z+z;return x*y+r;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AZolSignedH1(AH1 x){return ASatH1(x*AH1_(A_INFN_H));}
+  AH2 AZolSignedH2(AH2 x){return ASatH2(x*AH2_(A_INFN_H));}
+  AH3 AZolSignedH3(AH3 x){return ASatH3(x*AH3_(A_INFN_H));}
+  AH4 AZolSignedH4(AH4 x){return ASatH4(x*AH4_(A_INFN_H));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                      COLOR CONVERSIONS
+//------------------------------------------------------------------------------------------------------------------------------
+// These are all linear to/from some other space (where 'linear' has been shortened out of the function name).
+// So 'ToGamma' is 'LinearToGamma', and 'FromGamma' is 'LinearFromGamma'.
+// These are branch free implementations.
+// The AToSrgbF1() function is useful for stores for compute shaders for GPUs without hardware linear->sRGB store conversion.
+//------------------------------------------------------------------------------------------------------------------------------
+// TRANSFER FUNCTIONS
+// ==================
+// 709 ..... Rec709 used for some HDTVs
+// Gamma ... Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native
+// Pq ...... PQ native for HDR10
+// Srgb .... The sRGB output, typical of PC displays, useful for 10-bit output, or storing to 8-bit UNORM without SRGB type
+// Two ..... Gamma 2.0, fastest conversion (useful for intermediate pass approximations)
+// Three ... Gamma 3.0, less fast, but good for HDR.
+//------------------------------------------------------------------------------------------------------------------------------
+// KEEPING TO SPEC
+// ===============
+// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+//  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+//  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+// Also there is a slight step in the transition regions.
+// Precision of the coefficients in the spec being the likely cause.
+// Main usage case of the sRGB code is to do the linear->sRGB converstion in a compute shader before store.
+// This is to work around lack of hardware (typically only ROP does the conversion for free).
+// To "correct" the linear segment, would be to introduce error, because hardware decode of sRGB->linear is fixed (and free).
+// So this header keeps with the spec.
+// For linear->sRGB transforms, the linear segment in some respects reduces error, because rounding in that region is linear.
+// Rounding in the curved region in hardware (and fast software code) introduces error due to rounding in non-linear.
+//------------------------------------------------------------------------------------------------------------------------------
+// FOR PQ
+// ======
+// Both input and output is {0.0-1.0}, and where output 1.0 represents 10000.0 cd/m^2.
+// All constants are only specified to FP32 precision.
+// External PQ source reference,
+//  - https://github.com/ampas/aces-dev/blob/master/transforms/ctl/utilities/ACESlib.Utilities_Color.a1.0.1.ctl
+//------------------------------------------------------------------------------------------------------------------------------
+// PACKED VERSIONS
+// ===============
+// These are the A*H2() functions.
+// There is no PQ functions as FP16 seemed to not have enough precision for the conversion.
+// The remaining functions are "good enough" for 8-bit, and maybe 10-bit if not concerned about a few 1-bit errors.
+// Precision is lowest in the 709 conversion, higher in sRGB, higher still in Two and Gamma (when using 2.2 at least).
+//------------------------------------------------------------------------------------------------------------------------------
+// NOTES
+// =====
+// Could be faster for PQ conversions to be in ALU or a texture lookup depending on usage case.
+//==============================================================================================================================
+ #if 1
+  AF1 ATo709F1(AF1 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099);
+   return clamp(j.x  ,c*j.y  ,pow(c,j.z  )*k.x  +k.y  );}
+  AF2 ATo709F2(AF2 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099);
+   return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );}
+  AF3 ATo709F3(AF3 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099);
+   return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Note 'rcpX' is '1/x', where the 'x' is what would be used in AFromGamma().
+  AF1 AToGammaF1(AF1 c,AF1 rcpX){return pow(c,AF1_(rcpX));} 
+  AF2 AToGammaF2(AF2 c,AF1 rcpX){return pow(c,AF2_(rcpX));} 
+  AF3 AToGammaF3(AF3 c,AF1 rcpX){return pow(c,AF3_(rcpX));} 
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AToPqF1(AF1 x){AF1 p=pow(x,AF1_(0.159302));
+   return pow((AF1_(0.835938)+AF1_(18.8516)*p)/(AF1_(1.0)+AF1_(18.6875)*p),AF1_(78.8438));}
+  AF2 AToPqF1(AF2 x){AF2 p=pow(x,AF2_(0.159302));
+   return pow((AF2_(0.835938)+AF2_(18.8516)*p)/(AF2_(1.0)+AF2_(18.6875)*p),AF2_(78.8438));}
+  AF3 AToPqF1(AF3 x){AF3 p=pow(x,AF3_(0.159302));
+   return pow((AF3_(0.835938)+AF3_(18.8516)*p)/(AF3_(1.0)+AF3_(18.6875)*p),AF3_(78.8438));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AToSrgbF1(AF1 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055);
+   return clamp(j.x  ,c*j.y  ,pow(c,j.z  )*k.x  +k.y  );}
+  AF2 AToSrgbF2(AF2 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055);
+   return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );}
+  AF3 AToSrgbF3(AF3 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055);
+   return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AToTwoF1(AF1 c){return sqrt(c);}
+  AF2 AToTwoF2(AF2 c){return sqrt(c);}
+  AF3 AToTwoF3(AF3 c){return sqrt(c);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AToThreeF1(AF1 c){return pow(c,AF1_(1.0/3.0));}
+  AF2 AToThreeF2(AF2 c){return pow(c,AF2_(1.0/3.0));}
+  AF3 AToThreeF3(AF3 c){return pow(c,AF3_(1.0/3.0));}
+ #endif
+//==============================================================================================================================
+ #if 1
+  // Unfortunately median won't work here.
+  AF1 AFrom709F1(AF1 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099);
+   return AZolSelF1(AZolSignedF1(c-j.x  ),c*j.y  ,pow(c*k.x  +k.y  ,j.z  ));}
+  AF2 AFrom709F2(AF2 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099);
+   return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));}
+  AF3 AFrom709F3(AF3 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099);
+   return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AFromGammaF1(AF1 c,AF1 x){return pow(c,AF1_(x));} 
+  AF2 AFromGammaF2(AF2 c,AF1 x){return pow(c,AF2_(x));} 
+  AF3 AFromGammaF3(AF3 c,AF1 x){return pow(c,AF3_(x));} 
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AFromPqF1(AF1 x){AF1 p=pow(x,AF1_(0.0126833));
+   return pow(ASatF1(p-AF1_(0.835938))/(AF1_(18.8516)-AF1_(18.6875)*p),AF1_(6.27739));}
+  AF2 AFromPqF1(AF2 x){AF2 p=pow(x,AF2_(0.0126833));
+   return pow(ASatF2(p-AF2_(0.835938))/(AF2_(18.8516)-AF2_(18.6875)*p),AF2_(6.27739));}
+  AF3 AFromPqF1(AF3 x){AF3 p=pow(x,AF3_(0.0126833));
+   return pow(ASatF3(p-AF3_(0.835938))/(AF3_(18.8516)-AF3_(18.6875)*p),AF3_(6.27739));}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Unfortunately median won't work here.
+  AF1 AFromSrgbF1(AF1 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055);
+   return AZolSelF1(AZolSignedF1(c-j.x  ),c*j.y  ,pow(c*k.x  +k.y  ,j.z  ));}
+  AF2 AFromSrgbF2(AF2 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055);
+   return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));}
+  AF3 AFromSrgbF3(AF3 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055);
+   return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AFromTwoF1(AF1 c){return c*c;}
+  AF2 AFromTwoF2(AF2 c){return c*c;}
+  AF3 AFromTwoF3(AF3 c){return c*c;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AFromThreeF1(AF1 c){return c*c*c;}
+  AF2 AFromThreeF2(AF2 c){return c*c*c;}
+  AF3 AFromThreeF3(AF3 c){return c*c*c;}
+ #endif
+//==============================================================================================================================
+ #ifdef A_HALF
+  AH1 ATo709H1(AH1 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099);
+   return clamp(j.x  ,c*j.y  ,pow(c,j.z  )*k.x  +k.y  );}
+  AH2 ATo709H2(AH2 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099);
+   return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );}
+  AH3 ATo709H3(AH3 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099);
+   return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AToGammaH1(AH1 c,AH1 rcpX){return pow(c,AH1_(rcpX));}
+  AH2 AToGammaH2(AH2 c,AH1 rcpX){return pow(c,AH2_(rcpX));}
+  AH3 AToGammaH3(AH3 c,AH1 rcpX){return pow(c,AH3_(rcpX));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AToSrgbH1(AH1 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055);
+   return clamp(j.x  ,c*j.y  ,pow(c,j.z  )*k.x  +k.y  );}
+  AH2 AToSrgbH2(AH2 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055);
+   return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );}
+  AH3 AToSrgbH3(AH3 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055);
+   return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AToTwoH1(AH1 c){return sqrt(c);}
+  AH2 AToTwoH2(AH2 c){return sqrt(c);}
+  AH3 AToTwoH3(AH3 c){return sqrt(c);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AToThreeF1(AH1 c){return pow(c,AH1_(1.0/3.0));}
+  AH2 AToThreeF2(AH2 c){return pow(c,AH2_(1.0/3.0));}
+  AH3 AToThreeF3(AH3 c){return pow(c,AH3_(1.0/3.0));}
+ #endif
+//==============================================================================================================================
+ #ifdef A_HALF
+  AH1 AFrom709H1(AH1 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099);
+   return AZolSelH1(AZolSignedH1(c-j.x  ),c*j.y  ,pow(c*k.x  +k.y  ,j.z  ));}
+  AH2 AFrom709H2(AH2 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099);
+   return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));}
+  AH3 AFrom709H3(AH3 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099);
+   return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AFromGammaH1(AH1 c,AH1 x){return pow(c,AH1_(x));}
+  AH2 AFromGammaH2(AH2 c,AH1 x){return pow(c,AH2_(x));}
+  AH3 AFromGammaH3(AH3 c,AH1 x){return pow(c,AH3_(x));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AHromSrgbF1(AH1 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055);
+   return AZolSelH1(AZolSignedH1(c-j.x  ),c*j.y  ,pow(c*k.x  +k.y  ,j.z  ));}
+  AH2 AHromSrgbF2(AH2 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055);
+   return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));}
+  AH3 AHromSrgbF3(AH3 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055);
+   return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AFromTwoH1(AH1 c){return c*c;}
+  AH2 AFromTwoH2(AH2 c){return c*c;}
+  AH3 AFromTwoH3(AH3 c){return c*c;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AH1 AFromThreeH1(AH1 c){return c*c*c;}
+  AH2 AFromThreeH2(AH2 c){return c*c*c;}
+  AH3 AFromThreeH3(AH3 c){return c*c*c;}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                          CS REMAP
+//==============================================================================================================================
+ // Simple remap 64x1 to 8x8 with rotated 2x2 pixel quads in quad linear.
+ //  543210
+ //  ======
+ //  ..xxx.
+ //  yy...y
+ AU2 ARmp8x8(AU1 a){return AU2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));}
+//==============================================================================================================================
+ // More complex remap 64x1 to 8x8 which is necessary for 2D wave reductions.
+ //  543210
+ //  ======
+ //  .xx..x
+ //  y..yy.
+ // Details,
+ //  LANE TO 8x8 MAPPING
+ //  ===================
+ //  00 01 08 09 10 11 18 19 
+ //  02 03 0a 0b 12 13 1a 1b
+ //  04 05 0c 0d 14 15 1c 1d
+ //  06 07 0e 0f 16 17 1e 1f 
+ //  20 21 28 29 30 31 38 39 
+ //  22 23 2a 2b 32 33 3a 3b
+ //  24 25 2c 2d 34 35 3c 3d
+ //  26 27 2e 2f 36 37 3e 3f 
+ AU2 ARmpRed8x8(AU1 a){return AU2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));}
+//==============================================================================================================================
+ #ifdef A_HALF
+  AW2 ARmp8x8H(AU1 a){return AW2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));}
+  AW2 ARmpRed8x8H(AU1 a){return AW2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));}
+ #endif
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                                          REFERENCE
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// IEEE FLOAT RULES
+// ================
+//  - saturate(NaN)=0, saturate(-INF)=0, saturate(+INF)=1
+//  - {+/-}0 * {+/-}INF = NaN
+//  - -INF + (+INF) = NaN
+//  - {+/-}0 / {+/-}0 = NaN
+//  - {+/-}INF / {+/-}INF = NaN
+//  - a<(-0) := sqrt(a) = NaN (a=-0.0 won't NaN)
+//  - 0 == -0
+//  - 4/0 = +INF
+//  - 4/-0 = -INF
+//  - 4+INF = +INF
+//  - 4-INF = -INF
+//  - 4*(+INF) = +INF
+//  - 4*(-INF) = -INF
+//  - -4*(+INF) = -INF
+//  - sqrt(+INF) = +INF
+//------------------------------------------------------------------------------------------------------------------------------
+// FP16 ENCODING
+// =============
+// fedcba9876543210
+// ----------------
+// ......mmmmmmmmmm  10-bit mantissa (encodes 11-bit 0.5 to 1.0 except for denormals)
+// .eeeee..........  5-bit exponent
+// .00000..........  denormals
+// .00001..........  -14 exponent
+// .11110..........   15 exponent
+// .111110000000000  infinity
+// .11111nnnnnnnnnn  NaN with n!=0
+// s...............  sign
+//------------------------------------------------------------------------------------------------------------------------------
+// FP16/INT16 ALIASING DENORMAL
+// ============================
+// 11-bit unsigned integers alias with half float denormal/normal values,
+//     1 = 2^(-24) = 1/16777216 ....................... first denormal value
+//     2 = 2^(-23)
+//   ...
+//  1023 = 2^(-14)*(1-2^(-10)) = 2^(-14)*(1-1/1024) ... last denormal value
+//  1024 = 2^(-14) = 1/16384 .......................... first normal value that still maps to integers
+//  2047 .............................................. last normal value that still maps to integers 
+// Scaling limits,
+//  2^15 = 32768 ...................................... largest power of 2 scaling
+// Largest pow2 conversion mapping is at *32768,
+//     1 : 2^(-9) = 1/512
+//     2 : 1/256
+//     4 : 1/128
+//     8 : 1/64
+//    16 : 1/32
+//    32 : 1/16
+//    64 : 1/8
+//   128 : 1/4
+//   256 : 1/2
+//   512 : 1
+//  1024 : 2
+//  2047 : a little less than 4
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+//                                                     GPU/CPU PORTABILITY
+//
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// This is the GPU implementation.
+// See the CPU implementation for docs.
+//==============================================================================================================================
+#ifdef A_GPU
+ #define A_TRUE true
+ #define A_FALSE false
+ #define A_STATIC
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                     VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY
+//==============================================================================================================================
+ #define retAD2 AD2
+ #define retAD3 AD3
+ #define retAD4 AD4
+ #define retAF2 AF2
+ #define retAF3 AF3
+ #define retAF4 AF4
+ #define retAL2 AL2
+ #define retAL3 AL3
+ #define retAL4 AL4
+ #define retAU2 AU2
+ #define retAU3 AU3
+ #define retAU4 AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inAD2 in AD2
+ #define inAD3 in AD3
+ #define inAD4 in AD4
+ #define inAF2 in AF2
+ #define inAF3 in AF3
+ #define inAF4 in AF4
+ #define inAL2 in AL2
+ #define inAL3 in AL3
+ #define inAL4 in AL4
+ #define inAU2 in AU2
+ #define inAU3 in AU3
+ #define inAU4 in AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inoutAD2 inout AD2
+ #define inoutAD3 inout AD3
+ #define inoutAD4 inout AD4
+ #define inoutAF2 inout AF2
+ #define inoutAF3 inout AF3
+ #define inoutAF4 inout AF4
+ #define inoutAL2 inout AL2
+ #define inoutAL3 inout AL3
+ #define inoutAL4 inout AL4
+ #define inoutAU2 inout AU2
+ #define inoutAU3 inout AU3
+ #define inoutAU4 inout AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define outAD2 out AD2
+ #define outAD3 out AD3
+ #define outAD4 out AD4
+ #define outAF2 out AF2
+ #define outAF3 out AF3
+ #define outAF4 out AF4
+ #define outAL2 out AL2
+ #define outAL3 out AL3
+ #define outAL4 out AL4
+ #define outAU2 out AU2
+ #define outAU3 out AU3
+ #define outAU4 out AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define varAD2(x) AD2 x
+ #define varAD3(x) AD3 x
+ #define varAD4(x) AD4 x
+ #define varAF2(x) AF2 x
+ #define varAF3(x) AF3 x
+ #define varAF4(x) AF4 x
+ #define varAL2(x) AL2 x
+ #define varAL3(x) AL3 x
+ #define varAL4(x) AL4 x
+ #define varAU2(x) AU2 x
+ #define varAU3(x) AU3 x
+ #define varAU4(x) AU4 x
+//------------------------------------------------------------------------------------------------------------------------------
+ #define initAD2(x,y) AD2(x,y)
+ #define initAD3(x,y,z) AD3(x,y,z)
+ #define initAD4(x,y,z,w) AD4(x,y,z,w)
+ #define initAF2(x,y) AF2(x,y)
+ #define initAF3(x,y,z) AF3(x,y,z)
+ #define initAF4(x,y,z,w) AF4(x,y,z,w)
+ #define initAL2(x,y) AL2(x,y)
+ #define initAL3(x,y,z) AL3(x,y,z)
+ #define initAL4(x,y,z,w) AL4(x,y,z,w)
+ #define initAU2(x,y) AU2(x,y)
+ #define initAU3(x,y,z) AU3(x,y,z)
+ #define initAU4(x,y,z,w) AU4(x,y,z,w)
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     SCALAR RETURN OPS
+//==============================================================================================================================
+ #define AAbsD1(a) abs(AD1(a))
+ #define AAbsF1(a) abs(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ACosD1(a) cos(AD1(a))
+ #define ACosF1(a) cos(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ADotD2(a,b) dot(AD2(a),AD2(b))
+ #define ADotD3(a,b) dot(AD3(a),AD3(b))
+ #define ADotD4(a,b) dot(AD4(a),AD4(b))
+ #define ADotF2(a,b) dot(AF2(a),AF2(b))
+ #define ADotF3(a,b) dot(AF3(a),AF3(b))
+ #define ADotF4(a,b) dot(AF4(a),AF4(b))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AExp2D1(a) exp2(AD1(a))
+ #define AExp2F1(a) exp2(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AFloorD1(a) floor(AD1(a))
+ #define AFloorF1(a) floor(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ALog2D1(a) log2(AD1(a))
+ #define ALog2F1(a) log2(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AMaxD1(a,b) max(a,b)
+ #define AMaxF1(a,b) max(a,b)
+ #define AMaxL1(a,b) max(a,b)
+ #define AMaxU1(a,b) max(a,b)
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AMinD1(a,b) min(a,b)
+ #define AMinF1(a,b) min(a,b)
+ #define AMinL1(a,b) min(a,b)
+ #define AMinU1(a,b) min(a,b)
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASinD1(a) sin(AD1(a))
+ #define ASinF1(a) sin(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASqrtD1(a) sqrt(AD1(a))
+ #define ASqrtF1(a) sqrt(AF1(a))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                               SCALAR RETURN OPS - DEPENDENT
+//==============================================================================================================================
+ #define APowD1(a,b) pow(AD1(a),AF1(b))
+ #define APowF1(a,b) pow(AF1(a),AF1(b))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                         VECTOR OPS
+//------------------------------------------------------------------------------------------------------------------------------
+// These are added as needed for production or prototyping, so not necessarily a complete set.
+// They follow a convention of taking in a destination and also returning the destination value to increase utility.
+//==============================================================================================================================
+ #ifdef A_DUBL
+  AD2 opAAbsD2(outAD2 d,inAD2 a){d=abs(a);return d;}
+  AD3 opAAbsD3(outAD3 d,inAD3 a){d=abs(a);return d;}
+  AD4 opAAbsD4(outAD4 d,inAD4 a){d=abs(a);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d=a+b;return d;}
+  AD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d=a+b;return d;}
+  AD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d=a+b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d=a+AD2_(b);return d;}
+  AD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d=a+AD3_(b);return d;}
+  AD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d=a+AD4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opACpyD2(outAD2 d,inAD2 a){d=a;return d;}
+  AD3 opACpyD3(outAD3 d,inAD3 a){d=a;return d;}
+  AD4 opACpyD4(outAD4 d,inAD4 a){d=a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d=ALerpD2(a,b,c);return d;}
+  AD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d=ALerpD3(a,b,c);return d;}
+  AD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d=ALerpD4(a,b,c);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d=ALerpD2(a,b,AD2_(c));return d;}
+  AD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d=ALerpD3(a,b,AD3_(c));return d;}
+  AD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d=ALerpD4(a,b,AD4_(c));return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d=max(a,b);return d;}
+  AD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d=max(a,b);return d;}
+  AD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d=max(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d=min(a,b);return d;}
+  AD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d=min(a,b);return d;}
+  AD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d=min(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d=a*b;return d;}
+  AD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d=a*b;return d;}
+  AD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d=a*b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d=a*AD2_(b);return d;}
+  AD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d=a*AD3_(b);return d;}
+  AD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d=a*AD4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opANegD2(outAD2 d,inAD2 a){d=-a;return d;}
+  AD3 opANegD3(outAD3 d,inAD3 a){d=-a;return d;}
+  AD4 opANegD4(outAD4 d,inAD4 a){d=-a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opARcpD2(outAD2 d,inAD2 a){d=ARcpD2(a);return d;}
+  AD3 opARcpD3(outAD3 d,inAD3 a){d=ARcpD3(a);return d;}
+  AD4 opARcpD4(outAD4 d,inAD4 a){d=ARcpD4(a);return d;}
+ #endif
+//==============================================================================================================================
+ AF2 opAAbsF2(outAF2 d,inAF2 a){d=abs(a);return d;}
+ AF3 opAAbsF3(outAF3 d,inAF3 a){d=abs(a);return d;}
+ AF4 opAAbsF4(outAF4 d,inAF4 a){d=abs(a);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d=a+b;return d;}
+ AF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d=a+b;return d;}
+ AF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d=a+b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d=a+AF2_(b);return d;}
+ AF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d=a+AF3_(b);return d;}
+ AF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d=a+AF4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opACpyF2(outAF2 d,inAF2 a){d=a;return d;}
+ AF3 opACpyF3(outAF3 d,inAF3 a){d=a;return d;}
+ AF4 opACpyF4(outAF4 d,inAF4 a){d=a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d=ALerpF2(a,b,c);return d;}
+ AF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d=ALerpF3(a,b,c);return d;}
+ AF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d=ALerpF4(a,b,c);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d=ALerpF2(a,b,AF2_(c));return d;}
+ AF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d=ALerpF3(a,b,AF3_(c));return d;}
+ AF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d=ALerpF4(a,b,AF4_(c));return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d=max(a,b);return d;}
+ AF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d=max(a,b);return d;}
+ AF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d=max(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d=min(a,b);return d;}
+ AF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d=min(a,b);return d;}
+ AF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d=min(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d=a*b;return d;}
+ AF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d=a*b;return d;}
+ AF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d=a*b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d=a*AF2_(b);return d;}
+ AF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d=a*AF3_(b);return d;}
+ AF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d=a*AF4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opANegF2(outAF2 d,inAF2 a){d=-a;return d;}
+ AF3 opANegF3(outAF3 d,inAF3 a){d=-a;return d;}
+ AF4 opANegF4(outAF4 d,inAF4 a){d=-a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opARcpF2(outAF2 d,inAF2 a){d=ARcpF2(a);return d;}
+ AF3 opARcpF3(outAF3 d,inAF3 a){d=ARcpF3(a);return d;}
+ AF4 opARcpF4(outAF4 d,inAF4 a){d=ARcpF4(a);return d;}
+#endif
diff --git a/externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h b/externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h
new file mode 100755
index 000000000..15ecfde5c
--- /dev/null
+++ b/externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h
@@ -0,0 +1,1199 @@
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+//                    AMD FidelityFX SUPER RESOLUTION [FSR 1] ::: SPATIAL SCALING & EXTRAS - v1.20210629
+//
+//
+//------------------------------------------------------------------------------------------------------------------------------
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//------------------------------------------------------------------------------------------------------------------------------
+// FidelityFX Super Resolution Sample
+//
+// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//------------------------------------------------------------------------------------------------------------------------------
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//------------------------------------------------------------------------------------------------------------------------------
+// ABOUT
+// =====
+// FSR is a collection of algorithms relating to generating a higher resolution image.
+// This specific header focuses on single-image non-temporal image scaling, and related tools.
+// 
+// The core functions are EASU and RCAS:
+//  [EASU] Edge Adaptive Spatial Upsampling ....... 1x to 4x area range spatial scaling, clamped adaptive elliptical filter.
+//  [RCAS] Robust Contrast Adaptive Sharpening .... A non-scaling variation on CAS.
+// RCAS needs to be applied after EASU as a separate pass.
+// 
+// Optional utility functions are:
+//  [LFGA] Linear Film Grain Applicator ........... Tool to apply film grain after scaling.
+//  [SRTM] Simple Reversible Tone-Mapper .......... Linear HDR {0 to FP16_MAX} to {0 to 1} and back.
+//  [TEPD] Temporal Energy Preserving Dither ...... Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion.
+// See each individual sub-section for inline documentation.
+//------------------------------------------------------------------------------------------------------------------------------
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//------------------------------------------------------------------------------------------------------------------------------
+// FUNCTION PERMUTATIONS
+// =====================
+// *F() ..... Single item computation with 32-bit.
+// *H() ..... Single item computation with 16-bit, with packing (aka two 16-bit ops in parallel) when possible.
+// *Hx2() ... Processing two items in parallel with 16-bit, easier packing.
+//            Not all interfaces in this file have a *Hx2() form.
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                        FSR - [EASU] EDGE ADAPTIVE SPATIAL UPSAMPLING
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// EASU provides a high quality spatial-only scaling at relatively low cost.
+// Meaning EASU is appropiate for laptops and other low-end GPUs.
+// Quality from 1x to 4x area scaling is good.
+//------------------------------------------------------------------------------------------------------------------------------
+// The scalar uses a modified fast approximation to the standard lanczos(size=2) kernel.
+// EASU runs in a single pass, so it applies a directionally and anisotropically adaptive radial lanczos.
+// This is also kept as simple as possible to have minimum runtime.
+//------------------------------------------------------------------------------------------------------------------------------
+// The lanzcos filter has negative lobes, so by itself it will introduce ringing.
+// To remove all ringing, the algorithm uses the nearest 2x2 input texels as a neighborhood,
+// and limits output to the minimum and maximum of that neighborhood.
+//------------------------------------------------------------------------------------------------------------------------------
+// Input image requirements:
+// 
+// Color needs to be encoded as 3 channel[red, green, blue](e.g.XYZ not supported)
+// Each channel needs to be in the range[0, 1]
+// Any color primaries are supported
+// Display / tonemapping curve needs to be as if presenting to sRGB display or similar(e.g.Gamma 2.0)
+// There should be no banding in the input
+// There should be no high amplitude noise in the input
+// There should be no noise in the input that is not at input pixel granularity
+// For performance purposes, use 32bpp formats
+//------------------------------------------------------------------------------------------------------------------------------
+// Best to apply EASU at the end of the frame after tonemapping 
+// but before film grain or composite of the UI.
+//------------------------------------------------------------------------------------------------------------------------------
+// Example of including this header for D3D HLSL :
+// 
+//  #define A_GPU 1
+//  #define A_HLSL 1
+//  #define A_HALF 1
+//  #include "ffx_a.h"
+//  #define FSR_EASU_H 1
+//  #define FSR_RCAS_H 1
+//  //declare input callbacks
+//  #include "ffx_fsr1.h"
+// 
+// Example of including this header for Vulkan GLSL :
+// 
+//  #define A_GPU 1
+//  #define A_GLSL 1
+//  #define A_HALF 1
+//  #include "ffx_a.h"
+//  #define FSR_EASU_H 1
+//  #define FSR_RCAS_H 1
+//  //declare input callbacks
+//  #include "ffx_fsr1.h"
+// 
+// Example of including this header for Vulkan HLSL :
+// 
+//  #define A_GPU 1
+//  #define A_HLSL 1
+//  #define A_HLSL_6_2 1
+//  #define A_NO_16_BIT_CAST 1
+//  #define A_HALF 1
+//  #include "ffx_a.h"
+//  #define FSR_EASU_H 1
+//  #define FSR_RCAS_H 1
+//  //declare input callbacks
+//  #include "ffx_fsr1.h"
+// 
+//  Example of declaring the required input callbacks for GLSL :
+//  The callbacks need to gather4 for each color channel using the specified texture coordinate 'p'.
+//  EASU uses gather4 to reduce position computation logic and for free Arrays of Structures to Structures of Arrays conversion.
+// 
+//  AH4 FsrEasuRH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,0));}
+//  AH4 FsrEasuGH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,1));}
+//  AH4 FsrEasuBH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,2));}
+//  ...
+//  The FsrEasuCon function needs to be called from the CPU or GPU to set up constants.
+//  The difference in viewport and input image size is there to support Dynamic Resolution Scaling.
+//  To use FsrEasuCon() on the CPU, define A_CPU before including ffx_a and ffx_fsr1.
+//  Including a GPU example here, the 'con0' through 'con3' values would be stored out to a constant buffer.
+//  AU4 con0,con1,con2,con3;
+//  FsrEasuCon(con0,con1,con2,con3,
+//    1920.0,1080.0,  // Viewport size (top left aligned) in the input image which is to be scaled.
+//    3840.0,2160.0,  // The size of the input image.
+//    2560.0,1440.0); // The output resolution.
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                      CONSTANT SETUP
+//==============================================================================================================================
+// Call to setup required constant values (works on CPU or GPU).
+A_STATIC void FsrEasuCon(
+outAU4 con0,
+outAU4 con1,
+outAU4 con2,
+outAU4 con3,
+// This the rendered image resolution being upscaled
+AF1 inputViewportInPixelsX,
+AF1 inputViewportInPixelsY,
+// This is the resolution of the resource containing the input image (useful for dynamic resolution)
+AF1 inputSizeInPixelsX,
+AF1 inputSizeInPixelsY,
+// This is the display resolution which the input image gets upscaled to
+AF1 outputSizeInPixelsX,
+AF1 outputSizeInPixelsY){
+ // Output integer position to a pixel position in viewport.
+ con0[0]=AU1_AF1(inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX));
+ con0[1]=AU1_AF1(inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY));
+ con0[2]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX)-AF1_(0.5));
+ con0[3]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY)-AF1_(0.5));
+ // Viewport pixel position to normalized image space.
+ // This is used to get upper-left of 'F' tap.
+ con1[0]=AU1_AF1(ARcpF1(inputSizeInPixelsX));
+ con1[1]=AU1_AF1(ARcpF1(inputSizeInPixelsY));
+ // Centers of gather4, first offset from upper-left of 'F'.
+ //      +---+---+
+ //      |   |   |
+ //      +--(0)--+
+ //      | b | c |
+ //  +---F---+---+---+
+ //  | e | f | g | h |
+ //  +--(1)--+--(2)--+
+ //  | i | j | k | l |
+ //  +---+---+---+---+
+ //      | n | o |
+ //      +--(3)--+
+ //      |   |   |
+ //      +---+---+
+ con1[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX));
+ con1[3]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsY));
+ // These are from (0) instead of 'F'.
+ con2[0]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsX));
+ con2[1]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY));
+ con2[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX));
+ con2[3]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY));
+ con3[0]=AU1_AF1(AF1_( 0.0)*ARcpF1(inputSizeInPixelsX));
+ con3[1]=AU1_AF1(AF1_( 4.0)*ARcpF1(inputSizeInPixelsY));
+ con3[2]=con3[3]=0;}
+
+//If the an offset into the input image resource
+A_STATIC void FsrEasuConOffset(
+    outAU4 con0,
+    outAU4 con1,
+    outAU4 con2,
+    outAU4 con3,
+    // This the rendered image resolution being upscaled
+    AF1 inputViewportInPixelsX,
+    AF1 inputViewportInPixelsY,
+    // This is the resolution of the resource containing the input image (useful for dynamic resolution)
+    AF1 inputSizeInPixelsX,
+    AF1 inputSizeInPixelsY,
+    // This is the display resolution which the input image gets upscaled to
+    AF1 outputSizeInPixelsX,
+    AF1 outputSizeInPixelsY,
+    // This is the input image offset into the resource containing it (useful for dynamic resolution)
+    AF1 inputOffsetInPixelsX,
+    AF1 inputOffsetInPixelsY) {
+    FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY);
+    con0[2] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsX * ARcpF1(outputSizeInPixelsX) - AF1_(0.5) + inputOffsetInPixelsX);
+    con0[3] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsY * ARcpF1(outputSizeInPixelsY) - AF1_(0.5) + inputOffsetInPixelsY);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                   NON-PACKED 32-BIT VERSION
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(FSR_EASU_F)
+ // Input callback prototypes, need to be implemented by calling shader
+ AF4 FsrEasuRF(AF2 p);
+ AF4 FsrEasuGF(AF2 p);
+ AF4 FsrEasuBF(AF2 p);
+//------------------------------------------------------------------------------------------------------------------------------
+ // Filtering for a given tap for the scalar.
+ void FsrEasuTapF(
+ inout AF3 aC, // Accumulated color, with negative lobe.
+ inout AF1 aW, // Accumulated weight.
+ AF2 off, // Pixel offset from resolve position to tap.
+ AF2 dir, // Gradient direction.
+ AF2 len, // Length.
+ AF1 lob, // Negative lobe strength.
+ AF1 clp, // Clipping point.
+ AF3 c){ // Tap color.
+  // Rotate offset by direction.
+  AF2 v;
+  v.x=(off.x*( dir.x))+(off.y*dir.y);
+  v.y=(off.x*(-dir.y))+(off.y*dir.x);
+  // Anisotropy.
+  v*=len;
+  // Compute distance^2.
+  AF1 d2=v.x*v.x+v.y*v.y;
+  // Limit to the window as at corner, 2 taps can easily be outside.
+  d2=min(d2,clp);
+  // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.
+  //  (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2
+  //  |_______________________________________|   |_______________|
+  //                   base                             window
+  // The general form of the 'base' is,
+  //  (a*(b*x^2-1)^2-(a-1))
+  // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.
+  AF1 wB=AF1_(2.0/5.0)*d2+AF1_(-1.0);
+  AF1 wA=lob*d2+AF1_(-1.0);
+  wB*=wB;
+  wA*=wA;
+  wB=AF1_(25.0/16.0)*wB+AF1_(-(25.0/16.0-1.0));
+  AF1 w=wB*wA;
+  // Do weighted average.
+  aC+=c*w;aW+=w;}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Accumulate direction and length.
+ void FsrEasuSetF(
+ inout AF2 dir,
+ inout AF1 len,
+ AF2 pp,
+ AP1 biS,AP1 biT,AP1 biU,AP1 biV,
+ AF1 lA,AF1 lB,AF1 lC,AF1 lD,AF1 lE){
+  // Compute bilinear weight, branches factor out as predicates are compiler time immediates.
+  //  s t
+  //  u v
+  AF1 w = AF1_(0.0);
+  if(biS)w=(AF1_(1.0)-pp.x)*(AF1_(1.0)-pp.y);
+  if(biT)w=           pp.x *(AF1_(1.0)-pp.y);
+  if(biU)w=(AF1_(1.0)-pp.x)*           pp.y ;
+  if(biV)w=           pp.x *           pp.y ;
+  // Direction is the '+' diff.
+  //    a
+  //  b c d
+  //    e
+  // Then takes magnitude from abs average of both sides of 'c'.
+  // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.
+  AF1 dc=lD-lC;
+  AF1 cb=lC-lB;
+  AF1 lenX=max(abs(dc),abs(cb));
+  lenX=APrxLoRcpF1(lenX);
+  AF1 dirX=lD-lB;
+  dir.x+=dirX*w;
+  lenX=ASatF1(abs(dirX)*lenX);
+  lenX*=lenX;
+  len+=lenX*w;
+  // Repeat for the y axis.
+  AF1 ec=lE-lC;
+  AF1 ca=lC-lA;
+  AF1 lenY=max(abs(ec),abs(ca));
+  lenY=APrxLoRcpF1(lenY);
+  AF1 dirY=lE-lA;
+  dir.y+=dirY*w;
+  lenY=ASatF1(abs(dirY)*lenY);
+  lenY*=lenY;
+  len+=lenY*w;}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrEasuF(
+ out AF3 pix,
+ AU2 ip, // Integer pixel position in output.
+ AU4 con0, // Constants generated by FsrEasuCon().
+ AU4 con1,
+ AU4 con2,
+ AU4 con3){
+//------------------------------------------------------------------------------------------------------------------------------
+  // Get position of 'f'.
+  AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw);
+  AF2 fp=floor(pp);
+  pp-=fp;
+//------------------------------------------------------------------------------------------------------------------------------
+  // 12-tap kernel.
+  //    b c
+  //  e f g h
+  //  i j k l
+  //    n o
+  // Gather 4 ordering.
+  //  a b
+  //  r g
+  // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions,
+  //    a b    <- unused (z)
+  //    r g
+  //  a b a b
+  //  r g r g
+  //    a b
+  //    r g    <- unused (z)
+  // Allowing dead-code removal to remove the 'z's.
+  AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw);
+  // These are from p0 to avoid pulling two constants on pre-Navi hardware.
+  AF2 p1=p0+AF2_AU2(con2.xy);
+  AF2 p2=p0+AF2_AU2(con2.zw);
+  AF2 p3=p0+AF2_AU2(con3.xy);
+  AF4 bczzR=FsrEasuRF(p0);
+  AF4 bczzG=FsrEasuGF(p0);
+  AF4 bczzB=FsrEasuBF(p0);
+  AF4 ijfeR=FsrEasuRF(p1);
+  AF4 ijfeG=FsrEasuGF(p1);
+  AF4 ijfeB=FsrEasuBF(p1);
+  AF4 klhgR=FsrEasuRF(p2);
+  AF4 klhgG=FsrEasuGF(p2);
+  AF4 klhgB=FsrEasuBF(p2);
+  AF4 zzonR=FsrEasuRF(p3);
+  AF4 zzonG=FsrEasuGF(p3);
+  AF4 zzonB=FsrEasuBF(p3);
+//------------------------------------------------------------------------------------------------------------------------------
+  // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD).
+  AF4 bczzL=bczzB*AF4_(0.5)+(bczzR*AF4_(0.5)+bczzG);
+  AF4 ijfeL=ijfeB*AF4_(0.5)+(ijfeR*AF4_(0.5)+ijfeG);
+  AF4 klhgL=klhgB*AF4_(0.5)+(klhgR*AF4_(0.5)+klhgG);
+  AF4 zzonL=zzonB*AF4_(0.5)+(zzonR*AF4_(0.5)+zzonG);
+  // Rename.
+  AF1 bL=bczzL.x;
+  AF1 cL=bczzL.y;
+  AF1 iL=ijfeL.x;
+  AF1 jL=ijfeL.y;
+  AF1 fL=ijfeL.z;
+  AF1 eL=ijfeL.w;
+  AF1 kL=klhgL.x;
+  AF1 lL=klhgL.y;
+  AF1 hL=klhgL.z;
+  AF1 gL=klhgL.w;
+  AF1 oL=zzonL.z;
+  AF1 nL=zzonL.w;
+  // Accumulate for bilinear interpolation.
+  AF2 dir=AF2_(0.0);
+  AF1 len=AF1_(0.0);
+  FsrEasuSetF(dir,len,pp,true, false,false,false,bL,eL,fL,gL,jL);
+  FsrEasuSetF(dir,len,pp,false,true ,false,false,cL,fL,gL,hL,kL);
+  FsrEasuSetF(dir,len,pp,false,false,true ,false,fL,iL,jL,kL,nL);
+  FsrEasuSetF(dir,len,pp,false,false,false,true ,gL,jL,kL,lL,oL);
+//------------------------------------------------------------------------------------------------------------------------------
+  // Normalize with approximation, and cleanup close to zero.
+  AF2 dir2=dir*dir;
+  AF1 dirR=dir2.x+dir2.y;
+  AP1 zro=dirR<AF1_(1.0/32768.0);
+  dirR=APrxLoRsqF1(dirR);
+  dirR=zro?AF1_(1.0):dirR;
+  dir.x=zro?AF1_(1.0):dir.x;
+  dir*=AF2_(dirR);
+  // Transform from {0 to 2} to {0 to 1} range, and shape with square.
+  len=len*AF1_(0.5);
+  len*=len;
+  // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}.
+  AF1 stretch=(dir.x*dir.x+dir.y*dir.y)*APrxLoRcpF1(max(abs(dir.x),abs(dir.y)));
+  // Anisotropic length after rotation,
+  //  x := 1.0 lerp to 'stretch' on edges
+  //  y := 1.0 lerp to 2x on edges
+  AF2 len2=AF2(AF1_(1.0)+(stretch-AF1_(1.0))*len,AF1_(1.0)+AF1_(-0.5)*len);
+  // Based on the amount of 'edge',
+  // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}.
+  AF1 lob=AF1_(0.5)+AF1_((1.0/4.0-0.04)-0.5)*len;
+  // Set distance^2 clipping point to the end of the adjustable window.
+  AF1 clp=APrxLoRcpF1(lob);
+//------------------------------------------------------------------------------------------------------------------------------
+  // Accumulation mixed with min/max of 4 nearest.
+  //    b c
+  //  e f g h
+  //  i j k l
+  //    n o
+  AF3 min4=min(AMin3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),AF3(ijfeR.y,ijfeG.y,ijfeB.y)),
+               AF3(klhgR.x,klhgG.x,klhgB.x));
+  AF3 max4=max(AMax3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),AF3(ijfeR.y,ijfeG.y,ijfeB.y)),
+               AF3(klhgR.x,klhgG.x,klhgB.x));
+  // Accumulation.
+  AF3 aC=AF3_(0.0);
+  AF1 aW=AF1_(0.0);
+  FsrEasuTapF(aC,aW,AF2( 0.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.x,bczzG.x,bczzB.x)); // b
+  FsrEasuTapF(aC,aW,AF2( 1.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.y,bczzG.y,bczzB.y)); // c
+  FsrEasuTapF(aC,aW,AF2(-1.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.x,ijfeG.x,ijfeB.x)); // i
+  FsrEasuTapF(aC,aW,AF2( 0.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.y,ijfeG.y,ijfeB.y)); // j
+  FsrEasuTapF(aC,aW,AF2( 0.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.z,ijfeG.z,ijfeB.z)); // f
+  FsrEasuTapF(aC,aW,AF2(-1.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.w,ijfeG.w,ijfeB.w)); // e
+  FsrEasuTapF(aC,aW,AF2( 1.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.x,klhgG.x,klhgB.x)); // k
+  FsrEasuTapF(aC,aW,AF2( 2.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.y,klhgG.y,klhgB.y)); // l
+  FsrEasuTapF(aC,aW,AF2( 2.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.z,klhgG.z,klhgB.z)); // h
+  FsrEasuTapF(aC,aW,AF2( 1.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.w,klhgG.w,klhgB.w)); // g
+  FsrEasuTapF(aC,aW,AF2( 1.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.z,zzonG.z,zzonB.z)); // o
+  FsrEasuTapF(aC,aW,AF2( 0.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.w,zzonG.w,zzonB.w)); // n
+//------------------------------------------------------------------------------------------------------------------------------
+  // Normalize and dering.
+  pix=min(max4,max(min4,aC*AF3_(ARcpF1(aW))));}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                    PACKED 16-BIT VERSION
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_EASU_H)
+// Input callback prototypes, need to be implemented by calling shader
+ AH4 FsrEasuRH(AF2 p);
+ AH4 FsrEasuGH(AF2 p);
+ AH4 FsrEasuBH(AF2 p);
+//------------------------------------------------------------------------------------------------------------------------------
+ // This runs 2 taps in parallel.
+ void FsrEasuTapH(
+ inout AH2 aCR,inout AH2 aCG,inout AH2 aCB,
+ inout AH2 aW,
+ AH2 offX,AH2 offY,
+ AH2 dir,
+ AH2 len,
+ AH1 lob,
+ AH1 clp,
+ AH2 cR,AH2 cG,AH2 cB){
+  AH2 vX,vY;
+  vX=offX*  dir.xx +offY*dir.yy;
+  vY=offX*(-dir.yy)+offY*dir.xx;
+  vX*=len.x;vY*=len.y;
+  AH2 d2=vX*vX+vY*vY;
+  d2=min(d2,AH2_(clp));
+  AH2 wB=AH2_(2.0/5.0)*d2+AH2_(-1.0);
+  AH2 wA=AH2_(lob)*d2+AH2_(-1.0);
+  wB*=wB;
+  wA*=wA;
+  wB=AH2_(25.0/16.0)*wB+AH2_(-(25.0/16.0-1.0));
+  AH2 w=wB*wA;
+  aCR+=cR*w;aCG+=cG*w;aCB+=cB*w;aW+=w;}
+//------------------------------------------------------------------------------------------------------------------------------
+ // This runs 2 taps in parallel.
+ void FsrEasuSetH(
+ inout AH2 dirPX,inout AH2 dirPY,
+ inout AH2 lenP,
+ AH2 pp,
+ AP1 biST,AP1 biUV,
+ AH2 lA,AH2 lB,AH2 lC,AH2 lD,AH2 lE){
+  AH2 w = AH2_(0.0);
+  if(biST)w=(AH2(1.0,0.0)+AH2(-pp.x,pp.x))*AH2_(AH1_(1.0)-pp.y);
+  if(biUV)w=(AH2(1.0,0.0)+AH2(-pp.x,pp.x))*AH2_(          pp.y);
+  // ABS is not free in the packed FP16 path.
+  AH2 dc=lD-lC;
+  AH2 cb=lC-lB;
+  AH2 lenX=max(abs(dc),abs(cb));
+  lenX=ARcpH2(lenX);
+  AH2 dirX=lD-lB;
+  dirPX+=dirX*w;
+  lenX=ASatH2(abs(dirX)*lenX);
+  lenX*=lenX;
+  lenP+=lenX*w;
+  AH2 ec=lE-lC;
+  AH2 ca=lC-lA;
+  AH2 lenY=max(abs(ec),abs(ca));
+  lenY=ARcpH2(lenY);
+  AH2 dirY=lE-lA;
+  dirPY+=dirY*w;
+  lenY=ASatH2(abs(dirY)*lenY);
+  lenY*=lenY;
+  lenP+=lenY*w;}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrEasuH(
+ out AH3 pix,
+ AU2 ip,
+ AU4 con0,
+ AU4 con1,
+ AU4 con2,
+ AU4 con3){
+//------------------------------------------------------------------------------------------------------------------------------
+  AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw);
+  AF2 fp=floor(pp);
+  pp-=fp;
+  AH2 ppp=AH2(pp);
+//------------------------------------------------------------------------------------------------------------------------------
+  AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw);
+  AF2 p1=p0+AF2_AU2(con2.xy);
+  AF2 p2=p0+AF2_AU2(con2.zw);
+  AF2 p3=p0+AF2_AU2(con3.xy);
+  AH4 bczzR=FsrEasuRH(p0);
+  AH4 bczzG=FsrEasuGH(p0);
+  AH4 bczzB=FsrEasuBH(p0);
+  AH4 ijfeR=FsrEasuRH(p1);
+  AH4 ijfeG=FsrEasuGH(p1);
+  AH4 ijfeB=FsrEasuBH(p1);
+  AH4 klhgR=FsrEasuRH(p2);
+  AH4 klhgG=FsrEasuGH(p2);
+  AH4 klhgB=FsrEasuBH(p2);
+  AH4 zzonR=FsrEasuRH(p3);
+  AH4 zzonG=FsrEasuGH(p3);
+  AH4 zzonB=FsrEasuBH(p3);
+//------------------------------------------------------------------------------------------------------------------------------
+  AH4 bczzL=bczzB*AH4_(0.5)+(bczzR*AH4_(0.5)+bczzG);
+  AH4 ijfeL=ijfeB*AH4_(0.5)+(ijfeR*AH4_(0.5)+ijfeG);
+  AH4 klhgL=klhgB*AH4_(0.5)+(klhgR*AH4_(0.5)+klhgG);
+  AH4 zzonL=zzonB*AH4_(0.5)+(zzonR*AH4_(0.5)+zzonG);
+  AH1 bL=bczzL.x;
+  AH1 cL=bczzL.y;
+  AH1 iL=ijfeL.x;
+  AH1 jL=ijfeL.y;
+  AH1 fL=ijfeL.z;
+  AH1 eL=ijfeL.w;
+  AH1 kL=klhgL.x;
+  AH1 lL=klhgL.y;
+  AH1 hL=klhgL.z;
+  AH1 gL=klhgL.w;
+  AH1 oL=zzonL.z;
+  AH1 nL=zzonL.w;
+  // This part is different, accumulating 2 taps in parallel.
+  AH2 dirPX=AH2_(0.0);
+  AH2 dirPY=AH2_(0.0);
+  AH2 lenP=AH2_(0.0);
+  FsrEasuSetH(dirPX,dirPY,lenP,ppp,true, false,AH2(bL,cL),AH2(eL,fL),AH2(fL,gL),AH2(gL,hL),AH2(jL,kL));
+  FsrEasuSetH(dirPX,dirPY,lenP,ppp,false,true ,AH2(fL,gL),AH2(iL,jL),AH2(jL,kL),AH2(kL,lL),AH2(nL,oL));
+  AH2 dir=AH2(dirPX.r+dirPX.g,dirPY.r+dirPY.g);
+  AH1 len=lenP.r+lenP.g;
+//------------------------------------------------------------------------------------------------------------------------------
+  AH2 dir2=dir*dir;
+  AH1 dirR=dir2.x+dir2.y;
+  AP1 zro=dirR<AH1_(1.0/32768.0);
+  dirR=APrxLoRsqH1(dirR);
+  dirR=zro?AH1_(1.0):dirR;
+  dir.x=zro?AH1_(1.0):dir.x;
+  dir*=AH2_(dirR);
+  len=len*AH1_(0.5);
+  len*=len;
+  AH1 stretch=(dir.x*dir.x+dir.y*dir.y)*APrxLoRcpH1(max(abs(dir.x),abs(dir.y)));
+  AH2 len2=AH2(AH1_(1.0)+(stretch-AH1_(1.0))*len,AH1_(1.0)+AH1_(-0.5)*len);
+  AH1 lob=AH1_(0.5)+AH1_((1.0/4.0-0.04)-0.5)*len;
+  AH1 clp=APrxLoRcpH1(lob);
+//------------------------------------------------------------------------------------------------------------------------------
+  // FP16 is different, using packed trick to do min and max in same operation.
+  AH2 bothR=max(max(AH2(-ijfeR.z,ijfeR.z),AH2(-klhgR.w,klhgR.w)),max(AH2(-ijfeR.y,ijfeR.y),AH2(-klhgR.x,klhgR.x)));
+  AH2 bothG=max(max(AH2(-ijfeG.z,ijfeG.z),AH2(-klhgG.w,klhgG.w)),max(AH2(-ijfeG.y,ijfeG.y),AH2(-klhgG.x,klhgG.x)));
+  AH2 bothB=max(max(AH2(-ijfeB.z,ijfeB.z),AH2(-klhgB.w,klhgB.w)),max(AH2(-ijfeB.y,ijfeB.y),AH2(-klhgB.x,klhgB.x)));
+  // This part is different for FP16, working pairs of taps at a time.
+  AH2 pR=AH2_(0.0);
+  AH2 pG=AH2_(0.0);
+  AH2 pB=AH2_(0.0);
+  AH2 pW=AH2_(0.0);
+  FsrEasuTapH(pR,pG,pB,pW,AH2( 0.0, 1.0)-ppp.xx,AH2(-1.0,-1.0)-ppp.yy,dir,len2,lob,clp,bczzR.xy,bczzG.xy,bczzB.xy);
+  FsrEasuTapH(pR,pG,pB,pW,AH2(-1.0, 0.0)-ppp.xx,AH2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,ijfeR.xy,ijfeG.xy,ijfeB.xy);
+  FsrEasuTapH(pR,pG,pB,pW,AH2( 0.0,-1.0)-ppp.xx,AH2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,ijfeR.zw,ijfeG.zw,ijfeB.zw);
+  FsrEasuTapH(pR,pG,pB,pW,AH2( 1.0, 2.0)-ppp.xx,AH2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,klhgR.xy,klhgG.xy,klhgB.xy);
+  FsrEasuTapH(pR,pG,pB,pW,AH2( 2.0, 1.0)-ppp.xx,AH2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,klhgR.zw,klhgG.zw,klhgB.zw);
+  FsrEasuTapH(pR,pG,pB,pW,AH2( 1.0, 0.0)-ppp.xx,AH2( 2.0, 2.0)-ppp.yy,dir,len2,lob,clp,zzonR.zw,zzonG.zw,zzonB.zw);
+  AH3 aC=AH3(pR.x+pR.y,pG.x+pG.y,pB.x+pB.y);
+  AH1 aW=pW.x+pW.y;
+//------------------------------------------------------------------------------------------------------------------------------
+  // Slightly different for FP16 version due to combined min and max.
+  pix=min(AH3(bothR.y,bothG.y,bothB.y),max(-AH3(bothR.x,bothG.x,bothB.x),aC*AH3_(ARcpH1(aW))));}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                      FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness.
+// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping.
+// RCAS also has a built in process to limit sharpening of what it detects as possible noise.
+// RCAS sharper does not support scaling, as it should be applied after EASU scaling.
+// Pass EASU output straight into RCAS, no color conversions necessary.
+//------------------------------------------------------------------------------------------------------------------------------
+// RCAS is based on the following logic.
+// RCAS uses a 5 tap filter in a cross pattern (same as CAS),
+//    w                n
+//  w 1 w  for taps  w m e 
+//    w                s
+// Where 'w' is the negative lobe weight.
+//  output = (w*(n+e+w+s)+m)/(4*w+1)
+// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range,
+//  0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s)
+//  1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1)
+// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount.
+// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues.
+// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps.
+// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation.
+// This stabilizes RCAS.
+// RCAS does a simple highpass which is normalized against the local contrast then shaped,
+//       0.25
+//  0.25  -1  0.25
+//       0.25
+// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges.
+//
+//  GLSL example for the required callbacks :
+// 
+//  AH4 FsrRcasLoadH(ASW2 p){return AH4(imageLoad(imgSrc,ASU2(p)));}
+//  void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b)
+//  {
+//    //do any simple input color conversions here or leave empty if none needed
+//  }
+//  
+//  FsrRcasCon need to be called from the CPU or GPU to set up constants.
+//  Including a GPU example here, the 'con' value would be stored out to a constant buffer.
+// 
+//  AU4 con;
+//  FsrRcasCon(con,
+//   0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
+// ---------------
+// RCAS sharpening supports a CAS-like pass-through alpha via,
+//  #define FSR_RCAS_PASSTHROUGH_ALPHA 1
+// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise.
+// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define,
+//  #define FSR_RCAS_DENOISE 1
+//==============================================================================================================================
+// This is set at the limit of providing unnatural results for sharpening.
+#define FSR_RCAS_LIMIT (0.25-(1.0/16.0))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                      CONSTANT SETUP
+//==============================================================================================================================
+// Call to setup required constant values (works on CPU or GPU).
+A_STATIC void FsrRcasCon(
+outAU4 con,
+// The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
+AF1 sharpness){
+ // Transform from stops to linear value.
+ sharpness=AExp2F1(-sharpness);
+ varAF2(hSharp)=initAF2(sharpness,sharpness);
+ con[0]=AU1_AF1(sharpness);
+ con[1]=AU1_AH2_AF2(hSharp);
+ con[2]=0;
+ con[3]=0;}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                   NON-PACKED 32-BIT VERSION
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(FSR_RCAS_F)
+ // Input callback prototypes that need to be implemented by calling shader
+ AF4 FsrRcasLoadF(ASU2 p);
+ void FsrRcasInputF(inout AF1 r,inout AF1 g,inout AF1 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasF(
+ out AF1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
+ out AF1 pixG,
+ out AF1 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+  out AF1 pixA,
+ #endif
+ AU2 ip, // Integer pixel position in output.
+ AU4 con){ // Constant generated by RcasSetup().
+  // Algorithm uses minimal 3x3 pixel neighborhood.
+  //    b 
+  //  d e f
+  //    h
+  ASU2 sp=ASU2(ip);
+  AF3 b=FsrRcasLoadF(sp+ASU2( 0,-1)).rgb;
+  AF3 d=FsrRcasLoadF(sp+ASU2(-1, 0)).rgb;
+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+   AF4 ee=FsrRcasLoadF(sp);
+   AF3 e=ee.rgb;pixA=ee.a;
+  #else
+   AF3 e=FsrRcasLoadF(sp).rgb;
+  #endif
+  AF3 f=FsrRcasLoadF(sp+ASU2( 1, 0)).rgb;
+  AF3 h=FsrRcasLoadF(sp+ASU2( 0, 1)).rgb;
+  // Rename (32-bit) or regroup (16-bit).
+  AF1 bR=b.r;
+  AF1 bG=b.g;
+  AF1 bB=b.b;
+  AF1 dR=d.r;
+  AF1 dG=d.g;
+  AF1 dB=d.b;
+  AF1 eR=e.r;
+  AF1 eG=e.g;
+  AF1 eB=e.b;
+  AF1 fR=f.r;
+  AF1 fG=f.g;
+  AF1 fB=f.b;
+  AF1 hR=h.r;
+  AF1 hG=h.g;
+  AF1 hB=h.b;
+  // Run optional input transform.
+  FsrRcasInputF(bR,bG,bB);
+  FsrRcasInputF(dR,dG,dB);
+  FsrRcasInputF(eR,eG,eB);
+  FsrRcasInputF(fR,fG,fB);
+  FsrRcasInputF(hR,hG,hB);
+  // Luma times 2.
+  AF1 bL=bB*AF1_(0.5)+(bR*AF1_(0.5)+bG);
+  AF1 dL=dB*AF1_(0.5)+(dR*AF1_(0.5)+dG);
+  AF1 eL=eB*AF1_(0.5)+(eR*AF1_(0.5)+eG);
+  AF1 fL=fB*AF1_(0.5)+(fR*AF1_(0.5)+fG);
+  AF1 hL=hB*AF1_(0.5)+(hR*AF1_(0.5)+hG);
+  // Noise detection.
+  AF1 nz=AF1_(0.25)*bL+AF1_(0.25)*dL+AF1_(0.25)*fL+AF1_(0.25)*hL-eL;
+  nz=ASatF1(abs(nz)*APrxMedRcpF1(AMax3F1(AMax3F1(bL,dL,eL),fL,hL)-AMin3F1(AMin3F1(bL,dL,eL),fL,hL)));
+  nz=AF1_(-0.5)*nz+AF1_(1.0);
+  // Min and max of ring.
+  AF1 mn4R=min(AMin3F1(bR,dR,fR),hR);
+  AF1 mn4G=min(AMin3F1(bG,dG,fG),hG);
+  AF1 mn4B=min(AMin3F1(bB,dB,fB),hB);
+  AF1 mx4R=max(AMax3F1(bR,dR,fR),hR);
+  AF1 mx4G=max(AMax3F1(bG,dG,fG),hG);
+  AF1 mx4B=max(AMax3F1(bB,dB,fB),hB);
+  // Immediate constants for peak range.
+  AF2 peakC=AF2(1.0,-1.0*4.0);
+  // Limiters, these need to be high precision RCPs.
+  AF1 hitMinR=mn4R*ARcpF1(AF1_(4.0)*mx4R);
+  AF1 hitMinG=mn4G*ARcpF1(AF1_(4.0)*mx4G);
+  AF1 hitMinB=mn4B*ARcpF1(AF1_(4.0)*mx4B);
+  AF1 hitMaxR=(peakC.x-mx4R)*ARcpF1(AF1_(4.0)*mn4R+peakC.y);
+  AF1 hitMaxG=(peakC.x-mx4G)*ARcpF1(AF1_(4.0)*mn4G+peakC.y);
+  AF1 hitMaxB=(peakC.x-mx4B)*ARcpF1(AF1_(4.0)*mn4B+peakC.y);
+  AF1 lobeR=max(-hitMinR,hitMaxR);
+  AF1 lobeG=max(-hitMinG,hitMaxG);
+  AF1 lobeB=max(-hitMinB,hitMaxB);
+  AF1 lobe=max(AF1_(-FSR_RCAS_LIMIT),min(AMax3F1(lobeR,lobeG,lobeB),AF1_(0.0)))*AF1_AU1(con.x);
+  // Apply noise removal.
+  #ifdef FSR_RCAS_DENOISE
+   lobe*=nz;
+  #endif
+  // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+  AF1 rcpL=APrxMedRcpF1(AF1_(4.0)*lobe+AF1_(1.0));
+  pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+  pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+  pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;
+  return;} 
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                  NON-PACKED 16-BIT VERSION
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_H)
+ // Input callback prototypes that need to be implemented by calling shader
+ AH4 FsrRcasLoadH(ASW2 p);
+ void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasH(
+ out AH1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
+ out AH1 pixG,
+ out AH1 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+  out AH1 pixA,
+ #endif
+ AU2 ip, // Integer pixel position in output.
+ AU4 con){ // Constant generated by RcasSetup().
+  // Sharpening algorithm uses minimal 3x3 pixel neighborhood.
+  //    b 
+  //  d e f
+  //    h
+  ASW2 sp=ASW2(ip);
+  AH3 b=FsrRcasLoadH(sp+ASW2( 0,-1)).rgb;
+  AH3 d=FsrRcasLoadH(sp+ASW2(-1, 0)).rgb;
+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+   AH4 ee=FsrRcasLoadH(sp);
+   AH3 e=ee.rgb;pixA=ee.a;
+  #else
+   AH3 e=FsrRcasLoadH(sp).rgb;
+  #endif
+  AH3 f=FsrRcasLoadH(sp+ASW2( 1, 0)).rgb;
+  AH3 h=FsrRcasLoadH(sp+ASW2( 0, 1)).rgb;
+  // Rename (32-bit) or regroup (16-bit).
+  AH1 bR=b.r;
+  AH1 bG=b.g;
+  AH1 bB=b.b;
+  AH1 dR=d.r;
+  AH1 dG=d.g;
+  AH1 dB=d.b;
+  AH1 eR=e.r;
+  AH1 eG=e.g;
+  AH1 eB=e.b;
+  AH1 fR=f.r;
+  AH1 fG=f.g;
+  AH1 fB=f.b;
+  AH1 hR=h.r;
+  AH1 hG=h.g;
+  AH1 hB=h.b;
+  // Run optional input transform.
+  FsrRcasInputH(bR,bG,bB);
+  FsrRcasInputH(dR,dG,dB);
+  FsrRcasInputH(eR,eG,eB);
+  FsrRcasInputH(fR,fG,fB);
+  FsrRcasInputH(hR,hG,hB);
+  // Luma times 2.
+  AH1 bL=bB*AH1_(0.5)+(bR*AH1_(0.5)+bG);
+  AH1 dL=dB*AH1_(0.5)+(dR*AH1_(0.5)+dG);
+  AH1 eL=eB*AH1_(0.5)+(eR*AH1_(0.5)+eG);
+  AH1 fL=fB*AH1_(0.5)+(fR*AH1_(0.5)+fG);
+  AH1 hL=hB*AH1_(0.5)+(hR*AH1_(0.5)+hG);
+  // Noise detection.
+  AH1 nz=AH1_(0.25)*bL+AH1_(0.25)*dL+AH1_(0.25)*fL+AH1_(0.25)*hL-eL;
+  nz=ASatH1(abs(nz)*APrxMedRcpH1(AMax3H1(AMax3H1(bL,dL,eL),fL,hL)-AMin3H1(AMin3H1(bL,dL,eL),fL,hL)));
+  nz=AH1_(-0.5)*nz+AH1_(1.0);
+  // Min and max of ring.
+  AH1 mn4R=min(AMin3H1(bR,dR,fR),hR);
+  AH1 mn4G=min(AMin3H1(bG,dG,fG),hG);
+  AH1 mn4B=min(AMin3H1(bB,dB,fB),hB);
+  AH1 mx4R=max(AMax3H1(bR,dR,fR),hR);
+  AH1 mx4G=max(AMax3H1(bG,dG,fG),hG);
+  AH1 mx4B=max(AMax3H1(bB,dB,fB),hB);
+  // Immediate constants for peak range.
+  AH2 peakC=AH2(1.0,-1.0*4.0);
+  // Limiters, these need to be high precision RCPs.
+  AH1 hitMinR=mn4R*ARcpH1(AH1_(4.0)*mx4R);
+  AH1 hitMinG=mn4G*ARcpH1(AH1_(4.0)*mx4G);
+  AH1 hitMinB=mn4B*ARcpH1(AH1_(4.0)*mx4B);
+  AH1 hitMaxR=(peakC.x-mx4R)*ARcpH1(AH1_(4.0)*mn4R+peakC.y);
+  AH1 hitMaxG=(peakC.x-mx4G)*ARcpH1(AH1_(4.0)*mn4G+peakC.y);
+  AH1 hitMaxB=(peakC.x-mx4B)*ARcpH1(AH1_(4.0)*mn4B+peakC.y);
+  AH1 lobeR=max(-hitMinR,hitMaxR);
+  AH1 lobeG=max(-hitMinG,hitMaxG);
+  AH1 lobeB=max(-hitMinB,hitMaxB);
+  AH1 lobe=max(AH1_(-FSR_RCAS_LIMIT),min(AMax3H1(lobeR,lobeG,lobeB),AH1_(0.0)))*AH2_AU1(con.y).x;
+  // Apply noise removal.
+  #ifdef FSR_RCAS_DENOISE
+   lobe*=nz;
+  #endif
+  // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+  AH1 rcpL=APrxMedRcpH1(AH1_(4.0)*lobe+AH1_(1.0));
+  pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+  pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+  pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     PACKED 16-BIT VERSION
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_HX2)
+ // Input callback prototypes that need to be implemented by the calling shader
+ AH4 FsrRcasLoadHx2(ASW2 p);
+ void FsrRcasInputHx2(inout AH2 r,inout AH2 g,inout AH2 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store.
+ void FsrRcasDepackHx2(out AH4 pix0,out AH4 pix1,AH2 pixR,AH2 pixG,AH2 pixB){
+  #ifdef A_HLSL
+   // Invoke a slower path for DX only, since it won't allow uninitialized values.
+   pix0.a=pix1.a=0.0;
+  #endif
+  pix0.rgb=AH3(pixR.x,pixG.x,pixB.x);
+  pix1.rgb=AH3(pixR.y,pixG.y,pixB.y);}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasHx2(
+ // Output values are for 2 8x8 tiles in a 16x8 region.
+ //  pix<R,G,B>.x =  left 8x8 tile
+ //  pix<R,G,B>.y = right 8x8 tile
+ // This enables later processing to easily be packed as well.
+ out AH2 pixR,
+ out AH2 pixG,
+ out AH2 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+  out AH2 pixA,
+ #endif
+ AU2 ip, // Integer pixel position in output.
+ AU4 con){ // Constant generated by RcasSetup().
+  // No scaling algorithm uses minimal 3x3 pixel neighborhood.
+  ASW2 sp0=ASW2(ip);
+  AH3 b0=FsrRcasLoadHx2(sp0+ASW2( 0,-1)).rgb;
+  AH3 d0=FsrRcasLoadHx2(sp0+ASW2(-1, 0)).rgb;
+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+   AH4 ee0=FsrRcasLoadHx2(sp0);
+   AH3 e0=ee0.rgb;pixA.r=ee0.a;
+  #else
+   AH3 e0=FsrRcasLoadHx2(sp0).rgb;
+  #endif
+  AH3 f0=FsrRcasLoadHx2(sp0+ASW2( 1, 0)).rgb;
+  AH3 h0=FsrRcasLoadHx2(sp0+ASW2( 0, 1)).rgb;
+  ASW2 sp1=sp0+ASW2(8,0);
+  AH3 b1=FsrRcasLoadHx2(sp1+ASW2( 0,-1)).rgb;
+  AH3 d1=FsrRcasLoadHx2(sp1+ASW2(-1, 0)).rgb;
+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+   AH4 ee1=FsrRcasLoadHx2(sp1);
+   AH3 e1=ee1.rgb;pixA.g=ee1.a;
+  #else
+   AH3 e1=FsrRcasLoadHx2(sp1).rgb;
+  #endif
+  AH3 f1=FsrRcasLoadHx2(sp1+ASW2( 1, 0)).rgb;
+  AH3 h1=FsrRcasLoadHx2(sp1+ASW2( 0, 1)).rgb;
+  // Arrays of Structures to Structures of Arrays conversion.
+  AH2 bR=AH2(b0.r,b1.r);
+  AH2 bG=AH2(b0.g,b1.g);
+  AH2 bB=AH2(b0.b,b1.b);
+  AH2 dR=AH2(d0.r,d1.r);
+  AH2 dG=AH2(d0.g,d1.g);
+  AH2 dB=AH2(d0.b,d1.b);
+  AH2 eR=AH2(e0.r,e1.r);
+  AH2 eG=AH2(e0.g,e1.g);
+  AH2 eB=AH2(e0.b,e1.b);
+  AH2 fR=AH2(f0.r,f1.r);
+  AH2 fG=AH2(f0.g,f1.g);
+  AH2 fB=AH2(f0.b,f1.b);
+  AH2 hR=AH2(h0.r,h1.r);
+  AH2 hG=AH2(h0.g,h1.g);
+  AH2 hB=AH2(h0.b,h1.b);
+  // Run optional input transform.
+  FsrRcasInputHx2(bR,bG,bB);
+  FsrRcasInputHx2(dR,dG,dB);
+  FsrRcasInputHx2(eR,eG,eB);
+  FsrRcasInputHx2(fR,fG,fB);
+  FsrRcasInputHx2(hR,hG,hB);
+  // Luma times 2.
+  AH2 bL=bB*AH2_(0.5)+(bR*AH2_(0.5)+bG);
+  AH2 dL=dB*AH2_(0.5)+(dR*AH2_(0.5)+dG);
+  AH2 eL=eB*AH2_(0.5)+(eR*AH2_(0.5)+eG);
+  AH2 fL=fB*AH2_(0.5)+(fR*AH2_(0.5)+fG);
+  AH2 hL=hB*AH2_(0.5)+(hR*AH2_(0.5)+hG);
+  // Noise detection.
+  AH2 nz=AH2_(0.25)*bL+AH2_(0.25)*dL+AH2_(0.25)*fL+AH2_(0.25)*hL-eL;
+  nz=ASatH2(abs(nz)*APrxMedRcpH2(AMax3H2(AMax3H2(bL,dL,eL),fL,hL)-AMin3H2(AMin3H2(bL,dL,eL),fL,hL)));
+  nz=AH2_(-0.5)*nz+AH2_(1.0);
+  // Min and max of ring.
+  AH2 mn4R=min(AMin3H2(bR,dR,fR),hR);
+  AH2 mn4G=min(AMin3H2(bG,dG,fG),hG);
+  AH2 mn4B=min(AMin3H2(bB,dB,fB),hB);
+  AH2 mx4R=max(AMax3H2(bR,dR,fR),hR);
+  AH2 mx4G=max(AMax3H2(bG,dG,fG),hG);
+  AH2 mx4B=max(AMax3H2(bB,dB,fB),hB);
+  // Immediate constants for peak range.
+  AH2 peakC=AH2(1.0,-1.0*4.0);
+  // Limiters, these need to be high precision RCPs.
+  AH2 hitMinR=mn4R*ARcpH2(AH2_(4.0)*mx4R);
+  AH2 hitMinG=mn4G*ARcpH2(AH2_(4.0)*mx4G);
+  AH2 hitMinB=mn4B*ARcpH2(AH2_(4.0)*mx4B);
+  AH2 hitMaxR=(peakC.x-mx4R)*ARcpH2(AH2_(4.0)*mn4R+peakC.y);
+  AH2 hitMaxG=(peakC.x-mx4G)*ARcpH2(AH2_(4.0)*mn4G+peakC.y);
+  AH2 hitMaxB=(peakC.x-mx4B)*ARcpH2(AH2_(4.0)*mn4B+peakC.y);
+  AH2 lobeR=max(-hitMinR,hitMaxR);
+  AH2 lobeG=max(-hitMinG,hitMaxG);
+  AH2 lobeB=max(-hitMinB,hitMaxB);
+  AH2 lobe=max(AH2_(-FSR_RCAS_LIMIT),min(AMax3H2(lobeR,lobeG,lobeB),AH2_(0.0)))*AH2_(AH2_AU1(con.y).x);
+  // Apply noise removal.
+  #ifdef FSR_RCAS_DENOISE
+   lobe*=nz;
+  #endif
+  // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+  AH2 rcpL=APrxMedRcpH2(AH2_(4.0)*lobe+AH2_(1.0));
+  pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+  pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+  pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                          FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts.
+// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel.
+// The 'Lfga*()' functions provide a convenient way to introduce grain.
+// These functions limit grain based on distance to signal limits.
+// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality.
+// Grain application should be done in a linear colorspace.
+// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased).
+//------------------------------------------------------------------------------------------------------------------------------
+// Usage,
+//   FsrLfga*(
+//    color, // In/out linear colorspace color {0 to 1} ranged.
+//    grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain.
+//    amount); // Amount of grain (0 to 1} ranged.
+//------------------------------------------------------------------------------------------------------------------------------
+// Example if grain texture is monochrome: 'FsrLfgaF(color,AF3_(grain),amount)'
+//==============================================================================================================================
+#if defined(A_GPU)
+ // Maximum grain is the minimum distance to the signal limit.
+ void FsrLfgaF(inout AF3 c,AF3 t,AF1 a){c+=(t*AF3_(a))*min(AF3_(1.0)-c,c);}
+#endif
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(A_HALF)
+ // Half precision version (slower).
+ void FsrLfgaH(inout AH3 c,AH3 t,AH1 a){c+=(t*AH3_(a))*min(AH3_(1.0)-c,c);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Packed half precision version (faster).
+ void FsrLfgaHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 tR,AH2 tG,AH2 tB,AH1 a){
+  cR+=(tR*AH2_(a))*min(AH2_(1.0)-cR,cR);cG+=(tG*AH2_(a))*min(AH2_(1.0)-cG,cG);cB+=(tB*AH2_(a))*min(AH2_(1.0)-cB,cB);}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                          FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear.
+// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering.
+//------------------------------------------------------------------------------------------------------------------------------
+// Reversible tonemapper usage,
+//  FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}.
+//  FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}.
+//==============================================================================================================================
+#if defined(A_GPU)
+ void FsrSrtmF(inout AF3 c){c*=AF3_(ARcpF1(AMax3F1(c.r,c.g,c.b)+AF1_(1.0)));}
+ // The extra max solves the c=1.0 case (which is a /0).
+ void FsrSrtmInvF(inout AF3 c){c*=AF3_(ARcpF1(max(AF1_(1.0/32768.0),AF1_(1.0)-AMax3F1(c.r,c.g,c.b))));}
+#endif
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(A_HALF)
+ void FsrSrtmH(inout AH3 c){c*=AH3_(ARcpH1(AMax3H1(c.r,c.g,c.b)+AH1_(1.0)));}
+ void FsrSrtmInvH(inout AH3 c){c*=AH3_(ARcpH1(max(AH1_(1.0/32768.0),AH1_(1.0)-AMax3H1(c.r,c.g,c.b))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrSrtmHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){
+  AH2 rcp=ARcpH2(AMax3H2(cR,cG,cB)+AH2_(1.0));cR*=rcp;cG*=rcp;cB*=rcp;}
+ void FsrSrtmInvHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){
+  AH2 rcp=ARcpH2(max(AH2_(1.0/32768.0),AH2_(1.0)-AMax3H2(cR,cG,cB)));cR*=rcp;cG*=rcp;cB*=rcp;}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                       FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion.
+// Gamma 2.0 is used so that the conversion back to linear is just to square the color.
+// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively.
+// Given good non-biased temporal blue noise as dither input,
+// the output dither will temporally conserve energy.
+// This is done by choosing the linear nearest step point instead of perceptual nearest.
+// See code below for details.
+//------------------------------------------------------------------------------------------------------------------------------
+// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION
+// ===============================================
+// - Output is 'uint(floor(saturate(n)*255.0+0.5))'.
+// - Thus rounding is to nearest.
+// - NaN gets converted to zero.
+// - INF is clamped to {0.0 to 1.0}.
+//==============================================================================================================================
+#if defined(A_GPU)
+ // Hand tuned integer position to dither value, with more values than simple checkerboard.
+ // Only 32-bit has enough precision for this compddation.
+ // Output is {0 to <1}.
+ AF1 FsrTepdDitF(AU2 p,AU1 f){
+  AF1 x=AF1_(p.x+f);
+  AF1 y=AF1_(p.y);
+  // The 1.61803 golden ratio.
+  AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
+  // Number designed to provide a good visual pattern.
+  AF1 b=AF1_(1.0/3.69);
+  x=x*a+(y*b);
+  return AFractF1(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // This version is 8-bit gamma 2.0.
+ // The 'c' input is {0 to 1}.
+ // Output is {0 to 1} ready for image store.
+ void FsrTepdC8F(inout AF3 c,AF1 dit){
+  AF3 n=sqrt(c);
+  n=floor(n*AF3_(255.0))*AF3_(1.0/255.0);
+  AF3 a=n*n;
+  AF3 b=n+AF3_(1.0/255.0);b=b*b;
+  // Ratio of 'a' to 'b' required to produce 'c'.
+  // APrxLoRcpF1() won't work here (at least for very high dynamic ranges).
+  // APrxMedRcpF1() is an IADD,FMA,MUL.
+  AF3 r=(c-b)*APrxMedRcpF3(a-b);
+  // Use the ratio as a cutoff to choose 'a' or 'b'.
+  // AGtZeroF1() is a MUL.
+  c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/255.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // This version is 10-bit gamma 2.0.
+ // The 'c' input is {0 to 1}.
+ // Output is {0 to 1} ready for image store.
+ void FsrTepdC10F(inout AF3 c,AF1 dit){
+  AF3 n=sqrt(c);
+  n=floor(n*AF3_(1023.0))*AF3_(1.0/1023.0);
+  AF3 a=n*n;
+  AF3 b=n+AF3_(1.0/1023.0);b=b*b;
+  AF3 r=(c-b)*APrxMedRcpF3(a-b);
+  c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/1023.0));}
+#endif
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(A_HALF)
+ AH1 FsrTepdDitH(AU2 p,AU1 f){
+  AF1 x=AF1_(p.x+f);
+  AF1 y=AF1_(p.y);
+  AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
+  AF1 b=AF1_(1.0/3.69);
+  x=x*a+(y*b);
+  return AH1(AFractF1(x));}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC8H(inout AH3 c,AH1 dit){
+  AH3 n=sqrt(c);
+  n=floor(n*AH3_(255.0))*AH3_(1.0/255.0);
+  AH3 a=n*n;
+  AH3 b=n+AH3_(1.0/255.0);b=b*b;
+  AH3 r=(c-b)*APrxMedRcpH3(a-b);
+  c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/255.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC10H(inout AH3 c,AH1 dit){
+  AH3 n=sqrt(c);
+  n=floor(n*AH3_(1023.0))*AH3_(1.0/1023.0);
+  AH3 a=n*n;
+  AH3 b=n+AH3_(1.0/1023.0);b=b*b;
+  AH3 r=(c-b)*APrxMedRcpH3(a-b);
+  c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/1023.0));}
+//==============================================================================================================================
+ // This computes dither for positions 'p' and 'p+{8,0}'.
+ AH2 FsrTepdDitHx2(AU2 p,AU1 f){
+  AF2 x;
+  x.x=AF1_(p.x+f);
+  x.y=x.x+AF1_(8.0);
+  AF1 y=AF1_(p.y);
+  AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
+  AF1 b=AF1_(1.0/3.69);
+  x=x*AF2_(a)+AF2_(y*b);
+  return AH2(AFractF2(x));}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC8Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){
+  AH2 nR=sqrt(cR);
+  AH2 nG=sqrt(cG);
+  AH2 nB=sqrt(cB);
+  nR=floor(nR*AH2_(255.0))*AH2_(1.0/255.0);
+  nG=floor(nG*AH2_(255.0))*AH2_(1.0/255.0);
+  nB=floor(nB*AH2_(255.0))*AH2_(1.0/255.0);
+  AH2 aR=nR*nR;
+  AH2 aG=nG*nG;
+  AH2 aB=nB*nB;
+  AH2 bR=nR+AH2_(1.0/255.0);bR=bR*bR;
+  AH2 bG=nG+AH2_(1.0/255.0);bG=bG*bG;
+  AH2 bB=nB+AH2_(1.0/255.0);bB=bB*bB;
+  AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR);
+  AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG);
+  AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB);
+  cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/255.0));
+  cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/255.0));
+  cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/255.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC10Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){
+  AH2 nR=sqrt(cR);
+  AH2 nG=sqrt(cG);
+  AH2 nB=sqrt(cB);
+  nR=floor(nR*AH2_(1023.0))*AH2_(1.0/1023.0);
+  nG=floor(nG*AH2_(1023.0))*AH2_(1.0/1023.0);
+  nB=floor(nB*AH2_(1023.0))*AH2_(1.0/1023.0);
+  AH2 aR=nR*nR;
+  AH2 aG=nG*nG;
+  AH2 aB=nB*nB;
+  AH2 bR=nR+AH2_(1.0/1023.0);bR=bR*bR;
+  AH2 bG=nG+AH2_(1.0/1023.0);bG=bG*bG;
+  AH2 bB=nB+AH2_(1.0/1023.0);bB=bB*bB;
+  AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR);
+  AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG);
+  AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB);
+  cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/1023.0));
+  cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/1023.0));
+  cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/1023.0));}
+#endif
diff --git a/externals/FidelityFX-FSR/license.txt b/externals/FidelityFX-FSR/license.txt
new file mode 100755
index 000000000..324cba594
--- /dev/null
+++ b/externals/FidelityFX-FSR/license.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/src/common/math_util.h b/src/common/math_util.h
index 4c38d8040..510c4e56d 100755
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -48,8 +48,8 @@ struct Rectangle {
     }
 
     [[nodiscard]] Rectangle<T> Scale(const float s) const {
-        return Rectangle{left, top, static_cast<T>(left + GetWidth() * s),
-                         static_cast<T>(top + GetHeight() * s)};
+        return Rectangle{left, top, static_cast<T>(static_cast<float>(left + GetWidth()) * s),
+                         static_cast<T>(static_cast<float>(top + GetHeight()) * s)};
     }
 };
 
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 9dd5e3efb..3bcaa072f 100755
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -47,7 +47,9 @@ void LogSettings() {
     log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue());
     log_setting("Core_UseMultiCore", values.use_multi_core.GetValue());
     log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue());
-    log_setting("Renderer_UseResolutionFactor", values.resolution_factor.GetValue());
+    log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue());
+    log_setting("Renderer_ScalingFilter", values.scaling_filter.GetValue());
+    log_setting("Renderer_AntiAliasing", values.anti_aliasing.GetValue());
     log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue());
     log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue());
     log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue());
@@ -105,6 +107,55 @@ float Volume() {
     return values.volume.GetValue() / 100.0f;
 }
 
+void UpdateRescalingInfo() {
+    const auto setup = values.resolution_setup.GetValue();
+    auto& info = values.resolution_info;
+    info.downscale = false;
+    switch (setup) {
+    case ResolutionSetup::Res1_2X:
+        info.up_scale = 1;
+        info.down_shift = 1;
+        info.downscale = true;
+        break;
+    case ResolutionSetup::Res3_4X:
+        info.up_scale = 3;
+        info.down_shift = 2;
+        info.downscale = true;
+        break;
+    case ResolutionSetup::Res1X:
+        info.up_scale = 1;
+        info.down_shift = 0;
+        break;
+    case ResolutionSetup::Res2X:
+        info.up_scale = 2;
+        info.down_shift = 0;
+        break;
+    case ResolutionSetup::Res3X:
+        info.up_scale = 3;
+        info.down_shift = 0;
+        break;
+    case ResolutionSetup::Res4X:
+        info.up_scale = 4;
+        info.down_shift = 0;
+        break;
+    case ResolutionSetup::Res5X:
+        info.up_scale = 5;
+        info.down_shift = 0;
+        break;
+    case ResolutionSetup::Res6X:
+        info.up_scale = 6;
+        info.down_shift = 0;
+        break;
+    default:
+        UNREACHABLE();
+        info.up_scale = 1;
+        info.down_shift = 0;
+    }
+    info.up_factor = static_cast<f32>(info.up_scale) / (1U << info.down_shift);
+    info.down_factor = static_cast<f32>(1U << info.down_shift) / info.up_scale;
+    info.active = info.up_scale != 1 || info.down_shift != 0;
+}
+
 void RestoreGlobalState(bool is_powered_on) {
     // If a game is running, DO NOT restore the global settings state
     if (is_powered_on) {
diff --git a/src/common/settings.h b/src/common/settings.h
index 1e5e99aec..4a0e7346e 100755
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -52,6 +52,56 @@ enum class NvdecEmulation : u32 {
     GPU = 2,
 };
 
+enum class ResolutionSetup : u32 {
+    Res1_2X = 0,
+    Res3_4X = 1,
+    Res1X = 2,
+    Res2X = 3,
+    Res3X = 4,
+    Res4X = 5,
+    Res5X = 6,
+    Res6X = 7,
+};
+
+enum class ScalingFilter : u32 {
+    NearestNeighbor = 0,
+    Bilinear = 1,
+    Bicubic = 2,
+    Gaussian = 3,
+    ScaleForce = 4,
+    Fsr = 5,
+    LastFilter = Fsr,
+};
+
+enum class AntiAliasing : u32 {
+    None = 0,
+    Fxaa = 1,
+    LastAA = Fxaa,
+};
+
+struct ResolutionScalingInfo {
+    u32 up_scale{1};
+    u32 down_shift{0};
+    f32 up_factor{1.0f};
+    f32 down_factor{1.0f};
+    bool active{};
+    bool downscale{};
+
+    s32 ScaleUp(s32 value) const {
+        if (value == 0) {
+            return 0;
+        }
+        return std::max((value * static_cast<s32>(up_scale)) >> static_cast<s32>(down_shift), 1);
+    }
+
+    u32 ScaleUp(u32 value) const {
+        if (value == 0U) {
+            return 0U;
+        }
+        return std::max((value * up_scale) >> down_shift, 1U);
+    }
+};
+
 /** The BasicSetting class is a simple resource manager. It defines a label and default value
  * alongside the actual value of the setting for simpler and less-error prone use with frontend
  * configurations. Setting a default value and label is required, though subclasses may deviate from
@@ -451,7 +501,10 @@ struct Values {
                                                          "disable_shader_loop_safety_checks"};
     Setting<int> vulkan_device{0, "vulkan_device"};
 
-    Setting<u16> resolution_factor{1, "resolution_factor"};
+    ResolutionScalingInfo resolution_info{};
+    Setting<ResolutionSetup> resolution_setup{ResolutionSetup::Res1X, "resolution_setup"};
+    Setting<ScalingFilter> scaling_filter{ScalingFilter::Bilinear, "scaling_filter"};
+    Setting<AntiAliasing> anti_aliasing{AntiAliasing::None, "anti_aliasing"};
     // *nix platforms may have issues with the borderless windowed fullscreen mode.
     // Default to exclusive fullscreen on these platforms for now.
     RangedSetting<FullscreenMode> fullscreen_mode{
@@ -596,6 +649,8 @@ std::string GetTimeZoneString();
 
 void LogSettings();
 
+void UpdateRescalingInfo();
+
 // Restore the global state of all applicable settings in the Values struct
 void RestoreGlobalState(bool is_powered_on);
 
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index 0832463d6..4b58b672a 100755
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -44,16 +44,13 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) {
     return res;
 }
 
-FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) {
-    u32 width, height;
+FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale) {
+    const bool is_docked = Settings::values.use_docked_mode.GetValue();
+    const u32 screen_width = is_docked ? ScreenDocked::Width : ScreenUndocked::Width;
+    const u32 screen_height = is_docked ? ScreenDocked::Height : ScreenUndocked::Height;
 
-    if (Settings::values.use_docked_mode.GetValue()) {
-        width = ScreenDocked::Width * res_scale;
-        height = ScreenDocked::Height * res_scale;
-    } else {
-        width = ScreenUndocked::Width * res_scale;
-        height = ScreenUndocked::Height * res_scale;
-    }
+    const u32 width = static_cast<u32>(static_cast<f32>(screen_width) * res_scale);
+    const u32 height = static_cast<u32>(static_cast<f32>(screen_height) * res_scale);
 
     return DefaultFrameLayout(width, height);
 }
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index e2e3bbbb3..2e36c0163 100755
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -60,7 +60,7 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height);
  * Convenience method to get frame layout by resolution scale
  * @param res_scale resolution scale factor
  */
-FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale);
+FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale);
 
 /**
  * Convenience method to determine emulation aspect ratio
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index eccdcc20d..2767e4336 100755
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -801,15 +801,11 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext&
     rb.Push(ResultSuccess);
 
     if (Settings::values.use_docked_mode.GetValue()) {
-        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) *
-                static_cast<u32>(Settings::values.resolution_factor.GetValue()));
-        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) *
-                static_cast<u32>(Settings::values.resolution_factor.GetValue()));
+        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth));
+        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight));
     } else {
-        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) *
-                static_cast<u32>(Settings::values.resolution_factor.GetValue()));
-        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) *
-                static_cast<u32>(Settings::values.resolution_factor.GetValue()));
+        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth));
+        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight));
     }
 }
 
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 439e7e472..6f2b4007b 100755
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -543,11 +543,8 @@ private:
         switch (transaction) {
         case TransactionId::Connect: {
             IGBPConnectRequestParcel request{ctx.ReadBuffer()};
-            IGBPConnectResponseParcel response{
-                static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) *
-                                 Settings::values.resolution_factor.GetValue()),
-                static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) *
-                                 Settings::values.resolution_factor.GetValue())};
+            IGBPConnectResponseParcel response{static_cast<u32>(DisplayResolution::UndockedWidth),
+                                               static_cast<u32>(DisplayResolution::UndockedHeight)};
 
             buffer_queue.Connect();
 
@@ -777,15 +774,11 @@ private:
         rb.Push(ResultSuccess);
 
         if (Settings::values.use_docked_mode.GetValue()) {
-            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) *
-                    static_cast<u32>(Settings::values.resolution_factor.GetValue()));
-            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) *
-                    static_cast<u32>(Settings::values.resolution_factor.GetValue()));
+            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth));
+            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight));
         } else {
-            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) *
-                    static_cast<u32>(Settings::values.resolution_factor.GetValue()));
-            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) *
-                    static_cast<u32>(Settings::values.resolution_factor.GetValue()));
+            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth));
+            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight));
         }
 
         rb.PushRaw<float>(60.0f); // This wouldn't seem to be correct for 30 fps games.
@@ -1065,10 +1058,8 @@ private:
         // This only returns the fixed values of 1280x720 and makes no distinguishing
         // between docked and undocked dimensions. We take the liberty of applying
         // the resolution scaling factor here.
-        rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth) *
-                static_cast<u32>(Settings::values.resolution_factor.GetValue()));
-        rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight) *
-                static_cast<u32>(Settings::values.resolution_factor.GetValue()));
+        rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth));
+        rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight));
     }
 
     void SetLayerScalingMode(Kernel::HLERequestContext& ctx) {
@@ -1101,8 +1092,6 @@ private:
         LOG_WARNING(Service_VI, "(STUBBED) called");
 
         DisplayInfo display_info;
-        display_info.width *= static_cast<u64>(Settings::values.resolution_factor.GetValue());
-        display_info.height *= static_cast<u64>(Settings::values.resolution_factor.GetValue());
         ctx.WriteBuffer(&display_info, sizeof(DisplayInfo));
         IPC::ResponseBuilder rb{ctx, 4};
         rb.Push(ResultSuccess);
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 191475f71..654db0b52 100755
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -229,8 +229,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
     AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core.GetValue());
     AddField(field_type, "Renderer_Backend",
              TranslateRenderer(Settings::values.renderer_backend.GetValue()));
-    AddField(field_type, "Renderer_ResolutionFactor",
-             Settings::values.resolution_factor.GetValue());
     AddField(field_type, "Renderer_UseSpeedLimit", Settings::values.use_speed_limit.GetValue());
     AddField(field_type, "Renderer_SpeedLimit", Settings::values.speed_limit.GetValue());
     AddField(field_type, "Renderer_UseDiskShaderCache",
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index b5b7e5e83..bc3df80c8 100755
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -221,6 +221,7 @@ add_library(shader_recompiler STATIC
     ir_opt/lower_fp16_to_fp32.cpp
     ir_opt/lower_int64_to_int32.cpp
     ir_opt/passes.h
+    ir_opt/rescaling_pass.cpp
     ir_opt/ssa_rewrite_pass.cpp
     ir_opt/texture_pass.cpp
     ir_opt/verification_pass.cpp
diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h
index 35503000c..669702553 100755
--- a/src/shader_recompiler/backend/bindings.h
+++ b/src/shader_recompiler/backend/bindings.h
@@ -14,6 +14,8 @@ struct Bindings {
     u32 storage_buffer{};
     u32 texture{};
     u32 image{};
+    u32 texture_scaling_index{};
+    u32 image_scaling_index{};
 };
 
 } // namespace Shader::Backend
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp
index 069c019ad..8fd459dfe 100755
--- a/src/shader_recompiler/backend/glasm/emit_context.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_context.cpp
@@ -6,6 +6,7 @@
 
 #include "shader_recompiler/backend/bindings.h"
 #include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm.h"
 #include "shader_recompiler/frontend/ir/program.h"
 #include "shader_recompiler/profile.h"
 #include "shader_recompiler/runtime_info.h"
@@ -55,7 +56,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
     }
     if (!runtime_info.glasm_use_storage_buffers) {
         if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
-            Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
+            const size_t index{num + PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE};
+            Add("PARAM c[{}]={{program.local[0..{}]}};", index, index - 1);
         }
     }
     stage = program.stage;
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
index 4ce1c4f54..004658546 100755
--- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -448,6 +448,9 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I
         header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
         header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
     }
+    if (program.info.uses_rescaling_uniform) {
+        header += "PARAM scaling[1]={program.local[0..0]};";
+    }
     header += "TEMP ";
     for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
         header += fmt::format("R{},", index);
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h
index bcb55f062..292655acb 100755
--- a/src/shader_recompiler/backend/glasm/emit_glasm.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.h
@@ -13,6 +13,8 @@
 
 namespace Shader::Backend::GLASM {
 
+constexpr u32 PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE = 1;
+
 [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
                                     IR::Program& program, Bindings& bindings);
 
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
index 09e3a9b82..d325d31c7 100755
--- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -608,6 +608,24 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Re
     ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
 }
 
+void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
+    if (!index.IsImmediate()) {
+        throw NotImplementedException("Non-constant texture rescaling");
+    }
+    ctx.Add("AND.U RC.x,scaling[0].x,{};"
+            "SNE.S {},RC.x,0;",
+            1u << index.U32(), ctx.reg_alloc.Define(inst));
+}
+
+void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
+    if (!index.IsImmediate()) {
+        throw NotImplementedException("Non-constant texture rescaling");
+    }
+    ctx.Add("AND.U RC.x,scaling[0].y,{};"
+            "SNE.S {},RC.x,0;",
+            1u << index.U32(), ctx.reg_alloc.Define(inst));
+}
+
 void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                            ScalarU32 value) {
     ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
index 12afda43b..1f343bff5 100755
--- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -72,6 +72,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
 void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
 void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
 void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
+void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst);
 void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset);
 void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value);
 void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
@@ -303,6 +304,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
 void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
 void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
 void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
 void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
 void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value);
 void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
@@ -553,6 +556,8 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
 void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
 void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                     Register color);
+void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
+void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
 void EmitBindlessImageAtomicIAdd32(EmitContext&);
 void EmitBindlessImageAtomicSMin32(EmitContext&);
 void EmitBindlessImageAtomicUMin32(EmitContext&);
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
index f55c26b76..8aa494a4d 100755
--- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
@@ -90,6 +90,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
     ctx.Add("MUL.S {}.x,{},{};", inst, a, b);
 }
 
+void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+    ctx.Add("DIV.S {}.x,{},{};", inst, a, b);
+}
+
+void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
+    ctx.Add("DIV.U {}.x,{},{};", inst, a, b);
+}
+
 void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
     if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) {
         ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32));
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
index ff64c6924..d41b0aac8 100755
--- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
@@ -210,6 +210,10 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
     ctx.Add("MOV.F {}.x,y_direction[0].w;", inst);
 }
 
+void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) {
+    ctx.Add("MOV.F {}.x,scaling[0].z;", inst);
+}
+
 void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
     ctx.Add("MOV.S {}.x,0;", inst);
 }
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
index 4e6f2c0fe..97bd59302 100755
--- a/src/shader_recompiler/backend/glsl/emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -393,6 +393,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
             DefineGenericOutput(index, program.invocations);
         }
     }
+    if (info.uses_rescaling_uniform) {
+        header += "layout(location=0) uniform vec4 scaling;";
+    }
     DefineConstantBuffers(bindings);
     DefineStorageBuffers(bindings);
     SetupImages(bindings);
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 170db269a..4c26f3829 100755
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -445,6 +445,10 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
     ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst);
 }
 
+void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) {
+    ctx.AddF32("{}=scaling.z;", inst);
+}
+
 void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) {
     ctx.AddU32("{}=lmem[{}];", inst, word_offset);
 }
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
index 447eb8e0a..2f78d0267 100755
--- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -612,6 +612,22 @@ void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value
                value);
 }
 
+void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
+    if (!index.IsImmediate()) {
+        throw NotImplementedException("Non-constant texture rescaling");
+    }
+    const u32 image_index{index.U32()};
+    ctx.AddU1("{}=(ftou(scaling.x)&{})!=0;", inst, 1u << image_index);
+}
+
+void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) {
+    if (!index.IsImmediate()) {
+        throw NotImplementedException("Non-constant texture rescaling");
+    }
+    const u32 image_index{index.U32()};
+    ctx.AddU1("{}=(ftou(scaling.y)&{})!=0;", inst, 1u << image_index);
+}
+
 void EmitBindlessImageSampleImplicitLod(EmitContext&) {
     NotImplemented();
 }
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
index 5936d086f..f86502e4c 100755
--- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -85,6 +85,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
 void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
 void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
 void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
+void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst);
 void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset);
 void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value);
 void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
@@ -362,6 +363,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin
 void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
 void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
 void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
 void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
 void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
 void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
@@ -627,6 +630,8 @@ void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
                    std::string_view coords);
 void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
                     std::string_view coords, std::string_view color);
+void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
+void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index);
 void EmitBindlessImageAtomicIAdd32(EmitContext&);
 void EmitBindlessImageAtomicSMin32(EmitContext&);
 void EmitBindlessImageAtomicUMin32(EmitContext&);
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
index 38419f88f..88c1d4c5e 100755
--- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
@@ -78,6 +78,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin
     ctx.AddU32("{}=uint({}*{});", inst, a, b);
 }
 
+void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU32("{}=uint(int({})/int({}));", inst, a, b);
+}
+
+void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+    ctx.AddU32("{}={}/{};", inst, a, b);
+}
+
 void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
     ctx.AddU32("{}=uint(-({}));", inst, value);
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 2885e6799..7230f85de 100755
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -7,11 +7,14 @@
 #include <climits>
 #include <string_view>
 
+#include <boost/container/static_vector.hpp>
+
 #include <fmt/format.h>
 
 #include "common/common_types.h"
 #include "common/div_ceil.h"
 #include "shader_recompiler/backend/spirv/emit_context.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
 
 namespace Shader::Backend::SPIRV {
 namespace {
@@ -456,8 +459,9 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
 
 EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
                          IR::Program& program, Bindings& bindings)
-    : Sirit::Module(profile_.supported_spirv), profile{profile_},
-      runtime_info{runtime_info_}, stage{program.stage} {
+    : Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_},
+      stage{program.stage}, texture_rescaling_index{bindings.texture_scaling_index},
+      image_rescaling_index{bindings.image_scaling_index} {
     const bool is_unified{profile.unified_descriptor_binding};
     u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer};
     u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer};
@@ -474,10 +478,11 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
     DefineStorageBuffers(program.info, storage_binding);
     DefineTextureBuffers(program.info, texture_binding);
     DefineImageBuffers(program.info, image_binding);
-    DefineTextures(program.info, texture_binding);
-    DefineImages(program.info, image_binding);
+    DefineTextures(program.info, texture_binding, bindings.texture_scaling_index);
+    DefineImages(program.info, image_binding, bindings.image_scaling_index);
     DefineAttributeMemAccess(program.info);
     DefineGlobalMemoryFunctions(program.info);
+    DefineRescalingInput(program.info);
 }
 
 EmitContext::~EmitContext() = default;
@@ -920,6 +925,73 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
         define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
 }
 
+void EmitContext::DefineRescalingInput(const Info& info) {
+    if (!info.uses_rescaling_uniform) {
+        return;
+    }
+    if (profile.unified_descriptor_binding) {
+        DefineRescalingInputPushConstant();
+    } else {
+        DefineRescalingInputUniformConstant();
+    }
+}
+
+void EmitContext::DefineRescalingInputPushConstant() {
+    boost::container::static_vector<Id, 3> members{};
+    u32 member_index{0};
+
+    rescaling_textures_type = TypeArray(U32[1], Const(4u));
+    Decorate(rescaling_textures_type, spv::Decoration::ArrayStride, 4u);
+    members.push_back(rescaling_textures_type);
+    rescaling_textures_member_index = member_index++;
+
+    rescaling_images_type = TypeArray(U32[1], Const(NUM_IMAGE_SCALING_WORDS));
+    Decorate(rescaling_images_type, spv::Decoration::ArrayStride, 4u);
+    members.push_back(rescaling_images_type);
+    rescaling_images_member_index = member_index++;
+
+    if (stage != Stage::Compute) {
+        members.push_back(F32[1]);
+        rescaling_downfactor_member_index = member_index++;
+    }
+    const Id push_constant_struct{TypeStruct(std::span(members.data(), members.size()))};
+    Decorate(push_constant_struct, spv::Decoration::Block);
+    Name(push_constant_struct, "ResolutionInfo");
+
+    MemberDecorate(push_constant_struct, rescaling_textures_member_index, spv::Decoration::Offset,
+                   static_cast<u32>(offsetof(RescalingLayout, rescaling_textures)));
+    MemberName(push_constant_struct, rescaling_textures_member_index, "rescaling_textures");
+
+    MemberDecorate(push_constant_struct, rescaling_images_member_index, spv::Decoration::Offset,
+                   static_cast<u32>(offsetof(RescalingLayout, rescaling_images)));
+    MemberName(push_constant_struct, rescaling_images_member_index, "rescaling_images");
+
+    if (stage != Stage::Compute) {
+        MemberDecorate(push_constant_struct, rescaling_downfactor_member_index,
+                       spv::Decoration::Offset,
+                       static_cast<u32>(offsetof(RescalingLayout, down_factor)));
+        MemberName(push_constant_struct, rescaling_downfactor_member_index, "down_factor");
+    }
+    const Id pointer_type{TypePointer(spv::StorageClass::PushConstant, push_constant_struct)};
+    rescaling_push_constants = AddGlobalVariable(pointer_type, spv::StorageClass::PushConstant);
+    Name(rescaling_push_constants, "rescaling_push_constants");
+
+    if (profile.supported_spirv >= 0x00010400) {
+        interfaces.push_back(rescaling_push_constants);
+    }
+}
+
+void EmitContext::DefineRescalingInputUniformConstant() {
+    const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, F32[4])};
+    rescaling_uniform_constant =
+        AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant);
+    Decorate(rescaling_uniform_constant, spv::Decoration::Location, 0u);
+
+    if (profile.supported_spirv >= 0x00010400) {
+        interfaces.push_back(rescaling_uniform_constant);
+    }
+}
+
 void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
     if (info.constant_buffer_descriptors.empty()) {
         return;
@@ -1108,7 +1180,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
     }
 }
 
-void EmitContext::DefineTextures(const Info& info, u32& binding) {
+void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_index) {
     textures.reserve(info.texture_descriptors.size());
     for (const TextureDescriptor& desc : info.texture_descriptors) {
         const Id image_type{ImageType(*this, desc)};
@@ -1130,13 +1202,14 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) {
             interfaces.push_back(id);
         }
         ++binding;
+        ++scaling_index;
     }
     if (info.uses_atomic_image_u32) {
         image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
     }
 }
 
-void EmitContext::DefineImages(const Info& info, u32& binding) {
+void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_index) {
     images.reserve(info.image_descriptors.size());
     for (const ImageDescriptor& desc : info.image_descriptors) {
         if (desc.count != 1) {
@@ -1157,6 +1230,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding) {
             interfaces.push_back(id);
         }
         ++binding;
+        ++scaling_index;
     }
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 847d0c0e6..fb24490de 100755
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -235,6 +235,16 @@ public:
     Id indexed_load_func{};
     Id indexed_store_func{};
 
+    Id rescaling_uniform_constant{};
+    Id rescaling_push_constants{};
+    Id rescaling_textures_type{};
+    Id rescaling_images_type{};
+    u32 rescaling_textures_member_index{};
+    u32 rescaling_images_member_index{};
+    u32 rescaling_downfactor_member_index{};
+    u32 texture_rescaling_index{};
+    u32 image_rescaling_index{};
+
     Id local_memory{};
 
     Id shared_memory_u8{};
@@ -299,10 +309,13 @@ private:
     void DefineStorageBuffers(const Info& info, u32& binding);
     void DefineTextureBuffers(const Info& info, u32& binding);
     void DefineImageBuffers(const Info& info, u32& binding);
-    void DefineTextures(const Info& info, u32& binding);
-    void DefineImages(const Info& info, u32& binding);
+    void DefineTextures(const Info& info, u32& binding, u32& scaling_index);
+    void DefineImages(const Info& info, u32& binding, u32& scaling_index);
     void DefineAttributeMemAccess(const Info& info);
     void DefineGlobalMemoryFunctions(const Info& info);
+    void DefineRescalingInput(const Info& info);
+    void DefineRescalingInputPushConstant();
+    void DefineRescalingInputUniformConstant();
 
     void DefineInputs(const IR::Program& program);
     void DefineOutputs(const IR::Program& program);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index db0c935fe..4b25534ce 100755
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -16,6 +16,19 @@
 
 namespace Shader::Backend::SPIRV {
 
+constexpr u32 NUM_TEXTURE_SCALING_WORDS = 4;
+constexpr u32 NUM_IMAGE_SCALING_WORDS = 2;
+constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS =
+    NUM_TEXTURE_SCALING_WORDS + NUM_IMAGE_SCALING_WORDS;
+
+struct RescalingLayout {
+    alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures;
+    alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images;
+    alignas(16) u32 down_factor;
+};
+constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures);
+constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor);
+
 [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
                                          IR::Program& program, Bindings& bindings);
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 6f60c6574..b1e14ae1b 100755
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -542,6 +542,18 @@ Id EmitYDirection(EmitContext& ctx) {
     return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f);
 }
 
+Id EmitResolutionDownFactor(EmitContext& ctx) {
+    if (ctx.profile.unified_descriptor_binding) {
+        const Id pointer_type{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.F32[1])};
+        const Id index{ctx.Const(ctx.rescaling_downfactor_member_index)};
+        const Id pointer{ctx.OpAccessChain(pointer_type, ctx.rescaling_push_constants, index)};
+        return ctx.OpLoad(ctx.F32[1], pointer);
+    } else {
+        const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)};
+        return ctx.OpCompositeExtract(ctx.F32[1], composite, 2u);
+    }
+}
+
 Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
     const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
     return ctx.OpLoad(ctx.U32[1], pointer);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index 1d5364309..4d168a96d 100755
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -224,6 +224,36 @@ Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx
     Decorate(ctx, inst, sample);
     return ctx.OpCompositeExtract(result_type, sample, 1U);
 }
+
+Id IsScaled(EmitContext& ctx, const IR::Value& index, Id member_index, u32 base_index) {
+    const Id push_constant_u32{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1])};
+    Id bit{};
+    if (index.IsImmediate()) {
+        // Use BitwiseAnd instead of BitfieldExtract for better codegen on Nvidia OpenGL.
+        // LOP32I.NZ is used to set the predicate rather than BFE+ISETP.
+        const u32 index_value{index.U32() + base_index};
+        const Id word_index{ctx.Const(index_value / 32)};
+        const Id bit_index_mask{ctx.Const(1u << (index_value % 32))};
+        const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants,
+                                           member_index, word_index)};
+        const Id word{ctx.OpLoad(ctx.U32[1], pointer)};
+        bit = ctx.OpBitwiseAnd(ctx.U32[1], word, bit_index_mask);
+    } else {
+        Id index_value{ctx.Def(index)};
+        if (base_index != 0) {
+            index_value = ctx.OpIAdd(ctx.U32[1], index_value, ctx.Const(base_index));
+        }
+        const Id bit_index{ctx.OpBitwiseAnd(ctx.U32[1], index_value, ctx.Const(31u))};
+        bit = ctx.OpBitFieldUExtract(ctx.U32[1], index_value, bit_index, ctx.Const(1u));
+    }
+    return ctx.OpINotEqual(ctx.U1, bit, ctx.u32_zero_value);
+}
+
+Id BitTest(EmitContext& ctx, Id mask, Id bit) {
+    const Id shifted{ctx.OpShiftRightLogical(ctx.U32[1], mask, bit)};
+    const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))};
+    return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value);
+}
 } // Anonymous namespace
 
 Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
@@ -470,4 +500,28 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
     ctx.OpImageWrite(Image(ctx, index, info), coords, color);
 }
 
+Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index) {
+    if (ctx.profile.unified_descriptor_binding) {
+        const Id member_index{ctx.Const(ctx.rescaling_textures_member_index)};
+        return IsScaled(ctx, index, member_index, ctx.texture_rescaling_index);
+    } else {
+        const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)};
+        const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 0u)};
+        const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)};
+        return BitTest(ctx, mask, ctx.Def(index));
+    }
+}
+
+Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index) {
+    if (ctx.profile.unified_descriptor_binding) {
+        const Id member_index{ctx.Const(ctx.rescaling_images_member_index)};
+        return IsScaled(ctx, index, member_index, ctx.image_rescaling_index);
+    } else {
+        const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)};
+        const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 1u)};
+        const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)};
+        return BitTest(ctx, mask, ctx.Def(index));
+    }
+}
+
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index c9db1c164..6cd22dd3e 100755
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -75,6 +75,7 @@ Id EmitInvocationId(EmitContext& ctx);
 Id EmitSampleId(EmitContext& ctx);
 Id EmitIsHelperInvocation(EmitContext& ctx);
 Id EmitYDirection(EmitContext& ctx);
+Id EmitResolutionDownFactor(EmitContext& ctx);
 Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
 void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
 Id EmitUndefU1(EmitContext& ctx);
@@ -283,6 +284,8 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
 Id EmitISub32(EmitContext& ctx, Id a, Id b);
 Id EmitISub64(EmitContext& ctx, Id a, Id b);
 Id EmitIMul32(EmitContext& ctx, Id a, Id b);
+Id EmitSDiv32(EmitContext& ctx, Id a, Id b);
+Id EmitUDiv32(EmitContext& ctx, Id a, Id b);
 Id EmitINeg32(EmitContext& ctx, Id value);
 Id EmitINeg64(EmitContext& ctx, Id value);
 Id EmitIAbs32(EmitContext& ctx, Id value);
@@ -510,6 +513,8 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
                      Id derivates, Id offset, Id lod_clamp);
 Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
 void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
+Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index);
+Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index);
 Id EmitBindlessImageAtomicIAdd32(EmitContext&);
 Id EmitBindlessImageAtomicSMin32(EmitContext&);
 Id EmitBindlessImageAtomicUMin32(EmitContext&);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
index 3501d7495..50277eec3 100755
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -72,6 +72,14 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
     return ctx.OpIMul(ctx.U32[1], a, b);
 }
 
+Id EmitSDiv32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpSDiv(ctx.U32[1], a, b);
+}
+
+Id EmitUDiv32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpUDiv(ctx.U32[1], a, b);
+}
+
 Id EmitINeg32(EmitContext& ctx, Id value) {
     return ctx.OpSNegate(ctx.U32[1], value);
 }
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index 7c08b25ce..974efa4a0 100755
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -22,6 +22,11 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
     PrependNewInst(end(), op, args);
 }
 
+Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) {
+    Inst* const inst{inst_pool->Create(base_inst)};
+    return instructions.insert(insertion_point, *inst);
+}
+
 Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
                                       std::initializer_list<Value> args, u32 flags) {
     Inst* const inst{inst_pool->Create(op, flags)};
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
index 7e134b4c7..a56155ef4 100755
--- a/src/shader_recompiler/frontend/ir/basic_block.h
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -40,6 +40,9 @@ public:
     /// Appends a new instruction to the end of this basic block.
     void AppendNewInst(Opcode op, std::initializer_list<Value> args);
 
+    /// Prepends a copy of an instruction to this basic block before the insertion point.
+    iterator PrependNewInst(iterator insertion_point, const Inst& base_inst);
+
     /// Prepends a new instruction to this basic block before the insertion point.
     iterator PrependNewInst(iterator insertion_point, Opcode op,
                             std::initializer_list<Value> args = {}, u32 flags = 0);
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 13159a68d..356f889ac 100755
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -375,6 +375,10 @@ F32 IREmitter::YDirection() {
     return Inst<F32>(Opcode::YDirection);
 }
 
+F32 IREmitter::ResolutionDownFactor() {
+    return Inst<F32>(Opcode::ResolutionDownFactor);
+}
+
 U32 IREmitter::LaneId() {
     return Inst<U32>(Opcode::LaneId);
 }
@@ -1141,6 +1145,10 @@ U32 IREmitter::IMul(const U32& a, const U32& b) {
     return Inst<U32>(Opcode::IMul32, a, b);
 }
 
+U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) {
+    return Inst<U32>(is_signed ? Opcode::SDiv32 : Opcode::UDiv32, a, b);
+}
+
 U32U64 IREmitter::INeg(const U32U64& value) {
     switch (value.Type()) {
     case Type::U32:
@@ -1938,6 +1946,14 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c
     return Inst(op, Flags{info}, handle, coords, value);
 }
 
+U1 IREmitter::IsTextureScaled(const U32& index) {
+    return Inst<U1>(Opcode::IsTextureScaled, index);
+}
+
+U1 IREmitter::IsImageScaled(const U32& index) {
+    return Inst<U1>(Opcode::IsImageScaled, index);
+}
+
 U1 IREmitter::VoteAll(const U1& value) {
     return Inst<U1>(Opcode::VoteAll, value);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 1b89ca5a0..13eefa88b 100755
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -102,6 +102,8 @@ public:
     [[nodiscard]] U1 IsHelperInvocation();
     [[nodiscard]] F32 YDirection();
 
+    [[nodiscard]] F32 ResolutionDownFactor();
+
     [[nodiscard]] U32 LaneId();
 
     [[nodiscard]] U32 LoadGlobalU8(const U64& address);
@@ -207,6 +209,7 @@ public:
     [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
     [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
     [[nodiscard]] U32 IMul(const U32& a, const U32& b);
+    [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
     [[nodiscard]] U32U64 INeg(const U32U64& value);
     [[nodiscard]] U32 IAbs(const U32& value);
     [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
@@ -356,6 +359,10 @@ public:
                                        TextureInstInfo info);
     [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
                                             const Value& value, TextureInstInfo info);
+
+    [[nodiscard]] U1 IsTextureScaled(const U32& index);
+    [[nodiscard]] U1 IsImageScaled(const U32& index);
+
     [[nodiscard]] U1 VoteAll(const U1& value);
     [[nodiscard]] U1 VoteAny(const U1& value);
     [[nodiscard]] U1 VoteEqual(const U1& value);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 3dfa5a880..25795c179 100755
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -46,6 +46,17 @@ Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
     }
 }
 
+Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} {
+    if (base.op == Opcode::Phi) {
+        throw NotImplementedException("Copying phi node");
+    }
+    std::construct_at(&args);
+    const size_t num_args{base.NumArgs()};
+    for (size_t index = 0; index < num_args; ++index) {
+        SetArg(index, base.Arg(index));
+    }
+}
+
 Inst::~Inst() {
     if (op == Opcode::Phi) {
         std::destroy_at(&phi_args);
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index d91098c80..6929919df 100755
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -62,6 +62,7 @@ OPCODE(InvocationId,                                        U32,
 OPCODE(SampleId,                                            U32,                                                                                            )
 OPCODE(IsHelperInvocation,                                  U1,                                                                                             )
 OPCODE(YDirection,                                          F32,                                                                                            )
+OPCODE(ResolutionDownFactor,                                F32,                                                                                            )
 
 // Undefined
 OPCODE(UndefU1,                                             U1,                                                                                             )
@@ -286,6 +287,8 @@ OPCODE(IAdd64,                                              U64,            U64,
 OPCODE(ISub32,                                              U32,            U32,            U32,                                                            )
 OPCODE(ISub64,                                              U64,            U64,            U64,                                                            )
 OPCODE(IMul32,                                              U32,            U32,            U32,                                                            )
+OPCODE(SDiv32,                                              U32,            U32,            U32,                                                            )
+OPCODE(UDiv32,                                              U32,            U32,            U32,                                                            )
 OPCODE(INeg32,                                              U32,            U32,                                                                            )
 OPCODE(INeg64,                                              U64,            U64,                                                                            )
 OPCODE(IAbs32,                                              U32,            U32,                                                                            )
@@ -490,6 +493,9 @@ OPCODE(ImageGradient,                                       F32x4,          Opaq
 OPCODE(ImageRead,                                           U32x4,          Opaque,         Opaque,                                                         )
 OPCODE(ImageWrite,                                          Void,           Opaque,         Opaque,         U32x4,                                          )
 
+OPCODE(IsTextureScaled,                                     U1,             U32,                                                                            )
+OPCODE(IsImageScaled,                                       U1,             U32,                                                                            )
+
 // Atomic Image operations
 
 OPCODE(BindlessImageAtomicIAdd32,                           U32,            U32,            Opaque,            U32,                                         )
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 334bb47aa..8cddde231 100755
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -116,10 +116,10 @@ public:
 class Inst : public boost::intrusive::list_base_hook<> {
 public:
     explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
+    explicit Inst(const Inst& base);
     ~Inst();
 
     Inst& operator=(const Inst&) = delete;
-    Inst(const Inst&) = delete;
 
     Inst& operator=(Inst&&) = delete;
     Inst(Inst&&) = delete;
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 012d55357..5caf225a6 100755
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -177,6 +177,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
     Optimization::TexturePass(env, program);
 
     Optimization::ConstantPropagationPass(program);
+
+    if (Settings::values.resolution_info.active) {
+        Optimization::RescalingPass(program);
+    }
     Optimization::DeadCodeEliminationPass(program);
     if (Settings::values.renderer_debug) {
         Optimization::VerificationPass(program);
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index f69e1c9cc..1e476d83d 100755
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -430,6 +430,11 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::IsHelperInvocation:
         info.uses_is_helper_invocation = true;
         break;
+    case IR::Opcode::ResolutionDownFactor:
+    case IR::Opcode::IsTextureScaled:
+    case IR::Opcode::IsImageScaled:
+        info.uses_rescaling_uniform = true;
+        break;
     case IR::Opcode::LaneId:
         info.uses_subgroup_invocation_id = true;
         break;
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 2f89b1ea0..f877c7ba0 100755
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -19,6 +19,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program);
 void IdentityRemovalPass(IR::Program& program);
 void LowerFp16ToFp32(IR::Program& program);
 void LowerInt64ToInt32(IR::Program& program);
+void RescalingPass(IR::Program& program);
 void SsaRewritePass(IR::Program& program);
 void TexturePass(Environment& env, IR::Program& program);
 void VerificationPass(const IR::Program& program);
diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
new file mode 100755
index 000000000..a5fa4ee83
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
@@ -0,0 +1,295 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/alignment.h"
+#include "common/settings.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Optimization {
+namespace {
+[[nodiscard]] bool IsTextureTypeRescalable(TextureType type) {
+    switch (type) {
+    case TextureType::Color2D:
+    case TextureType::ColorArray2D:
+        return true;
+    case TextureType::Color1D:
+    case TextureType::ColorArray1D:
+    case TextureType::Color3D:
+    case TextureType::ColorCube:
+    case TextureType::ColorArrayCube:
+    case TextureType::Buffer:
+        break;
+    }
+    return false;
+}
+
+void VisitMark(const IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::ShuffleIndex:
+    case IR::Opcode::ShuffleUp:
+    case IR::Opcode::ShuffleDown:
+    case IR::Opcode::ShuffleButterfly: {
+        const IR::Value shfl_arg{inst.Arg(0)};
+        if (shfl_arg.IsImmediate()) {
+            break;
+        }
+        const IR::Inst* const arg_inst{shfl_arg.InstRecursive()};
+        if (arg_inst->GetOpcode() != IR::Opcode::BitCastU32F32) {
+            break;
+        }
+        const IR::Value bitcast_arg{arg_inst->Arg(0)};
+        if (bitcast_arg.IsImmediate()) {
+            break;
+        }
+        IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()};
+        if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) {
+            const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()};
+            switch (attr) {
+            case IR::Attribute::PositionX:
+            case IR::Attribute::PositionY:
+                bitcast_inst->SetFlags<u32>(0xDEADBEEF);
+                break;
+            default:
+                break;
+            }
+        }
+        break;
+    }
+    default:
+        break;
+    }
+}
+
+void PatchFragCoord(IR::Block& block, IR::Inst& inst) {
+    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+    const IR::F32 down_factor{ir.ResolutionDownFactor()};
+    const IR::F32 frag_coord{ir.GetAttribute(inst.Arg(0).Attribute())};
+    const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)};
+    inst.ReplaceUsesWith(downscaled_frag_coord);
+}
+
+[[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
+    IR::U32 scaled_value{value};
+    if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
+        scaled_value = ir.IMul(scaled_value, ir.Imm32(up_scale));
+    }
+    if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
+        scaled_value = ir.ShiftRightArithmetic(scaled_value, ir.Imm32(down_shift));
+    }
+    return IR::U32{ir.Select(is_scaled, scaled_value, value)};
+}
+
+[[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value,
+                               const IR::Attribute attrib) {
+    const IR::F32 up_factor{ir.Imm32(Settings::values.resolution_info.up_factor)};
+    const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), up_factor)};
+    const IR::F32 frag_coord{ir.GetAttribute(attrib)};
+    const IR::F32 down_factor{ir.Imm32(Settings::values.resolution_info.down_factor)};
+    const IR::F32 floor{ir.FPMul(up_factor, ir.FPFloor(ir.FPMul(frag_coord, down_factor)))};
+    const IR::F16F32F64 deviation{ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor)))};
+    return IR::U32{ir.Select(is_scaled, ir.ConvertFToU(32, deviation), value)};
+}
+
+[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
+    IR::U32 scaled_value{value};
+    if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
+        scaled_value = ir.ShiftLeftLogical(scaled_value, ir.Imm32(down_shift));
+    }
+    if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
+        scaled_value = ir.IDiv(scaled_value, ir.Imm32(up_scale));
+    }
+    return IR::U32{ir.Select(is_scaled, scaled_value, value)};
+}
+
+void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) {
+    const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
+    switch (info.type) {
+    case TextureType::Color2D:
+    case TextureType::ColorArray2D: {
+        const IR::Value new_inst{&*block.PrependNewInst(it, inst)};
+        const IR::U32 width{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 0)})};
+        const IR::U32 height{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 1)})};
+        const IR::Value replacement{ir.CompositeConstruct(
+            width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))};
+        inst.ReplaceUsesWith(replacement);
+        break;
+    }
+    case TextureType::Color1D:
+    case TextureType::ColorArray1D:
+    case TextureType::Color3D:
+    case TextureType::ColorCube:
+    case TextureType::ColorArrayCube:
+    case TextureType::Buffer:
+        // Nothing to patch here
+        break;
+    }
+}
+
+void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
+                           size_t index) {
+    const IR::Value composite{inst.Arg(index)};
+    if (composite.IsEmpty()) {
+        return;
+    }
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
+    const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
+    switch (info.type) {
+    case TextureType::Color2D:
+        inst.SetArg(index, ir.CompositeConstruct(x, y));
+        break;
+    case TextureType::ColorArray2D: {
+        const IR::U32 z{ir.CompositeExtract(composite, 2)};
+        inst.SetArg(index, ir.CompositeConstruct(x, y, z));
+        break;
+    }
+    case TextureType::Color1D:
+    case TextureType::ColorArray1D:
+    case TextureType::Color3D:
+    case TextureType::ColorCube:
+    case TextureType::ColorArrayCube:
+    case TextureType::Buffer:
+        // Nothing to patch here
+        break;
+    }
+}
+
+void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    const IR::Value coord{inst.Arg(1)};
+    const IR::U32 coord_x{ir.CompositeExtract(coord, 0)};
+    const IR::U32 coord_y{ir.CompositeExtract(coord, 1)};
+
+    const IR::U32 scaled_x{SubScale(ir, is_scaled, coord_x, IR::Attribute::PositionX)};
+    const IR::U32 scaled_y{SubScale(ir, is_scaled, coord_y, IR::Attribute::PositionY)};
+    switch (info.type) {
+    case TextureType::Color2D:
+        inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y));
+        break;
+    case TextureType::ColorArray2D: {
+        const IR::U32 z{ir.CompositeExtract(coord, 2)};
+        inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y, z));
+        break;
+    }
+    case TextureType::Color1D:
+    case TextureType::ColorArray1D:
+    case TextureType::Color3D:
+    case TextureType::ColorCube:
+    case TextureType::ColorArrayCube:
+    case TextureType::Buffer:
+        // Nothing to patch here
+        break;
+    }
+}
+
+void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
+    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    if (!IsTextureTypeRescalable(info.type)) {
+        return;
+    }
+    const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
+    SubScaleCoord(ir, inst, is_scaled);
+    // Scale ImageFetch offset
+    ScaleIntegerComposite(ir, inst, is_scaled, 2);
+}
+
+void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
+    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    if (!IsTextureTypeRescalable(info.type)) {
+        return;
+    }
+    const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
+    SubScaleCoord(ir, inst, is_scaled);
+}
+
+void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
+    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    if (!IsTextureTypeRescalable(info.type)) {
+        return;
+    }
+    const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
+    ScaleIntegerComposite(ir, inst, is_scaled, 1);
+    // Scale ImageFetch offset
+    ScaleIntegerComposite(ir, inst, is_scaled, 2);
+}
+
+void PatchImageRead(IR::Block& block, IR::Inst& inst) {
+    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+    const auto info{inst.Flags<IR::TextureInstInfo>()};
+    if (!IsTextureTypeRescalable(info.type)) {
+        return;
+    }
+    const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
+    ScaleIntegerComposite(ir, inst, is_scaled, 1);
+}
+
+void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) {
+    const bool is_fragment_shader{program.stage == Stage::Fragment};
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::GetAttribute: {
+        const IR::Attribute attr{inst.Arg(0).Attribute()};
+        switch (attr) {
+        case IR::Attribute::PositionX:
+        case IR::Attribute::PositionY:
+            if (is_fragment_shader && inst.Flags<u32>() != 0xDEADBEEF) {
+                PatchFragCoord(block, inst);
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    }
+    case IR::Opcode::ImageQueryDimensions:
+        PatchImageQueryDimensions(block, inst);
+        break;
+    case IR::Opcode::ImageFetch:
+        if (is_fragment_shader) {
+            SubScaleImageFetch(block, inst);
+        } else {
+            PatchImageFetch(block, inst);
+        }
+        break;
+    case IR::Opcode::ImageRead:
+        if (is_fragment_shader) {
+            SubScaleImageRead(block, inst);
+        } else {
+            PatchImageRead(block, inst);
+        }
+        break;
+    default:
+        break;
+    }
+}
+} // Anonymous namespace
+
+void RescalingPass(IR::Program& program) {
+    const bool is_fragment_shader{program.stage == Stage::Fragment};
+    if (is_fragment_shader) {
+        for (IR::Block* const block : program.post_order_blocks) {
+            for (IR::Inst& inst : block->Instructions()) {
+                VisitMark(inst);
+            }
+        }
+    }
+    for (IR::Block* const block : program.post_order_blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            Visit(program, *block, inst);
+        }
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 4ef4dbd40..9f375c30e 100755
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -172,6 +172,7 @@ struct Info {
     bool uses_global_memory{};
     bool uses_atomic_image_u32{};
     bool uses_shadow_lod{};
+    bool uses_rescaling_uniform{};
 
     IR::Type used_constant_buffer_types{};
     IR::Type used_storage_buffer_types{};
@@ -190,4 +191,13 @@ struct Info {
     ImageDescriptors image_descriptors;
 };
 
+template <typename Descriptors>
+u32 NumDescriptors(const Descriptors& descriptors) {
+    u32 num{};
+    for (const auto& desc : descriptors) {
+        num += desc.count;
+    }
+    return num;
+}
+
 } // namespace Shader
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 269db21a5..4c4273342 100755
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -130,6 +130,8 @@ add_library(video_core STATIC
     renderer_vulkan/vk_descriptor_pool.h
     renderer_vulkan/vk_fence_manager.cpp
     renderer_vulkan/vk_fence_manager.h
+    renderer_vulkan/vk_fsr.cpp
+    renderer_vulkan/vk_fsr.h
     renderer_vulkan/vk_graphics_pipeline.cpp
     renderer_vulkan/vk_graphics_pipeline.h
     renderer_vulkan/vk_master_semaphore.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d350c9b36..43bed63ac 100755
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -853,12 +853,14 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
     }
     if constexpr (USE_MEMORY_MAPS) {
         auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+        runtime.PreCopyBarrier();
         for (auto& [copy, buffer_id] : downloads) {
             // Have in mind the staging buffer offset for the copy
             copy.dst_offset += download_staging.offset;
             const std::array copies{copy};
-            runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
+            runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false);
         }
+        runtime.PostCopyBarrier();
         runtime.Finish();
         for (const auto& [copy, buffer_id] : downloads) {
             const Buffer& buffer = slot_buffers[buffer_id];
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index f0d545f90..d63ad5a35 100755
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -29,6 +29,8 @@ enum : u8 {
     ColorBuffer6,
     ColorBuffer7,
     ZetaBuffer,
+    RescaleViewports,
+    RescaleScissors,
 
     VertexBuffers,
     VertexBuffer0,
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 20d748c12..d779a967a 100755
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -1,3 +1,11 @@
+set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr)
+
+set(GLSL_INCLUDES
+    fidelityfx_fsr.comp
+    ${FIDELITYFX_INCLUDE_DIR}/ffx_a.h
+    ${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h
+)
+
 set(SHADER_FILES
     astc_decoder.comp
     block_linear_unswizzle_2d.comp
@@ -5,14 +13,25 @@ set(SHADER_FILES
     convert_depth_to_float.frag
     convert_float_to_depth.frag
     full_screen_triangle.vert
+    fxaa.frag
+    fxaa.vert
     opengl_copy_bc4.comp
     opengl_present.frag
     opengl_present.vert
+    opengl_present_scaleforce.frag
     pitch_unswizzle.comp
+    present_bicubic.frag
+    present_gaussian.frag
     vulkan_blit_color_float.frag
     vulkan_blit_depth_stencil.frag
+    vulkan_fidelityfx_fsr_easu_fp16.comp
+    vulkan_fidelityfx_fsr_easu_fp32.comp
+    vulkan_fidelityfx_fsr_rcas_fp16.comp
+    vulkan_fidelityfx_fsr_rcas_fp32.comp
     vulkan_present.frag
     vulkan_present.vert
+    vulkan_present_scaleforce_fp16.frag
+    vulkan_present_scaleforce_fp32.frag
     vulkan_quad_indexed.comp
     vulkan_uint8.comp
 )
@@ -76,7 +95,7 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES})
             OUTPUT
                 ${SPIRV_HEADER_FILE}
             COMMAND
-                ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
+                ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
             MAIN_DEPENDENCY
                 ${SOURCE_FILE}
         )
@@ -84,9 +103,12 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES})
     endif()
 endforeach()
 
+set(SHADER_SOURCES ${SHADER_FILES})
+list(APPEND SHADER_SOURCES ${GLSL_INCLUDES})
+
 add_custom_target(host_shaders
     DEPENDS
         ${SHADER_HEADERS}
     SOURCES
-        ${SHADER_FILES}
+        ${SHADER_SOURCES}
 )
diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp
new file mode 100755
index 000000000..6b97f789d
--- /dev/null
+++ b/src/video_core/host_shaders/fidelityfx_fsr.comp
@@ -0,0 +1,116 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+//!#version 460 core
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shading_language_420pack : enable
+#extension GL_GOOGLE_include_directive : enable
+#extension GL_EXT_shader_explicit_arithmetic_types : require
+
+// FidelityFX Super Resolution Sample
+//
+// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+layout( push_constant ) uniform constants {
+    uvec4 Const0;
+    uvec4 Const1;
+    uvec4 Const2;
+    uvec4 Const3;
+};
+
+layout(set=0,binding=0) uniform sampler2D InputTexture;
+layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
+
+#define A_GPU 1
+#define A_GLSL 1
+
+#ifndef YUZU_USE_FP16
+    #include "ffx_a.h"
+
+    #if USE_EASU
+        #define FSR_EASU_F 1
+        AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
+        AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
+        AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
+    #endif
+    #if USE_RCAS
+        #define FSR_RCAS_F 1
+        AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
+        void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
+    #endif
+#else
+    #define A_HALF
+    #include "ffx_a.h"
+
+    #if USE_EASU
+        #define FSR_EASU_H 1
+        AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
+        AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
+        AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
+    #endif
+    #if USE_RCAS
+        #define FSR_RCAS_H 1
+        AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
+        void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
+    #endif
+#endif
+
+#include "ffx_fsr1.h"
+
+void CurrFilter(AU2 pos) {
+#if USE_BILINEAR
+    AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
+    imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0));
+#endif
+#if USE_EASU
+    #ifndef YUZU_USE_FP16
+        AF3 c;
+        FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
+        imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
+    #else
+        AH3 c;
+        FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
+        imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
+    #endif
+#endif
+#if USE_RCAS
+    #ifndef YUZU_USE_FP16
+        AF3 c;
+        FsrRcasF(c.r, c.g, c.b, pos, Const0);
+        imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
+    #else
+        AH3 c;
+        FsrRcasH(c.r, c.g, c.b, pos, Const0);
+        imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
+    #endif
+#endif
+}
+
+layout(local_size_x=64) in;
+void main() {
+    // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+    AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
+    CurrFilter(gxy);
+    gxy.x += 8u;
+    CurrFilter(gxy);
+    gxy.y += 8u;
+    CurrFilter(gxy);
+    gxy.x -= 8u;
+    CurrFilter(gxy);
+}
diff --git a/src/video_core/host_shaders/fxaa.frag b/src/video_core/host_shaders/fxaa.frag
new file mode 100755
index 000000000..02f4068d1
--- /dev/null
+++ b/src/video_core/host_shaders/fxaa.frag
@@ -0,0 +1,76 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// Source code is adapted from
+// https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/
+
+#version 460
+
+#ifdef VULKAN
+
+#define BINDING_COLOR_TEXTURE 1
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#define BINDING_COLOR_TEXTURE 0
+
+#endif
+
+layout (location = 0) in vec4 posPos;
+
+layout (location = 0) out vec4 frag_color;
+
+layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
+
+const float FXAA_SPAN_MAX = 8.0;
+const float FXAA_REDUCE_MUL = 1.0 / 8.0;
+const float FXAA_REDUCE_MIN = 1.0 / 128.0;
+
+#define FxaaTexLod0(t, p) textureLod(t, p, 0.0)
+#define FxaaTexOff(t, p, o) textureLodOffset(t, p, 0.0, o)
+
+vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) {
+
+    vec3 rgbNW = FxaaTexLod0(tex, posPos.zw).xyz;
+    vec3 rgbNE = FxaaTexOff(tex, posPos.zw, ivec2(1,0)).xyz;
+    vec3 rgbSW = FxaaTexOff(tex, posPos.zw, ivec2(0,1)).xyz;
+    vec3 rgbSE = FxaaTexOff(tex, posPos.zw, ivec2(1,1)).xyz;
+    vec3 rgbM  = FxaaTexLod0(tex, posPos.xy).xyz;
+/*---------------------------------------------------------*/
+    vec3 luma = vec3(0.299, 0.587, 0.114);
+    float lumaNW = dot(rgbNW, luma);
+    float lumaNE = dot(rgbNE, luma);
+    float lumaSW = dot(rgbSW, luma);
+    float lumaSE = dot(rgbSE, luma);
+    float lumaM  = dot(rgbM,  luma);
+/*---------------------------------------------------------*/
+    float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
+    float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
+/*---------------------------------------------------------*/
+    vec2 dir;
+    dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
+    dir.y =  ((lumaNW + lumaSW) - (lumaNE + lumaSE));
+/*---------------------------------------------------------*/
+    float dirReduce = max(
+        (lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL),
+        FXAA_REDUCE_MIN);
+    float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce);
+    dir = min(vec2( FXAA_SPAN_MAX,  FXAA_SPAN_MAX),
+          max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX),
+          dir * rcpDirMin)) / textureSize(tex, 0);
+/*--------------------------------------------------------*/
+    vec3 rgbA = (1.0 / 2.0) * (
+        FxaaTexLod0(tex, posPos.xy + dir * (1.0 / 3.0 - 0.5)).xyz +
+        FxaaTexLod0(tex, posPos.xy + dir * (2.0 / 3.0 - 0.5)).xyz);
+    vec3 rgbB = rgbA * (1.0 / 2.0) + (1.0 / 4.0) * (
+        FxaaTexLod0(tex, posPos.xy + dir * (0.0 / 3.0 - 0.5)).xyz +
+        FxaaTexLod0(tex, posPos.xy + dir * (3.0 / 3.0 - 0.5)).xyz);
+    float lumaB = dot(rgbB, luma);
+    if((lumaB < lumaMin) || (lumaB > lumaMax)) return rgbA;
+    return rgbB;
+}
+
+void main() {
+  frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0);
+}
diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert
new file mode 100755
index 000000000..ac20c04e9
--- /dev/null
+++ b/src/video_core/host_shaders/fxaa.vert
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460
+
+out gl_PerVertex {
+    vec4 gl_Position;
+};
+
+const vec2 vertices[4] =
+    vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0));
+
+layout (location = 0) out vec4 posPos;
+
+#ifdef VULKAN
+
+#define BINDING_COLOR_TEXTURE 0
+#define VERTEX_ID gl_VertexIndex
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#define BINDING_COLOR_TEXTURE 0
+#define VERTEX_ID gl_VertexID
+
+#endif
+
+layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
+
+const float FXAA_SUBPIX_SHIFT = 0;
+
+void main() {
+  vec2 vertex = vertices[VERTEX_ID];
+  gl_Position = vec4(vertex, 0.0, 1.0);
+  vec2 vert_tex_coord = (vertex + 1.0) / 2.0;
+  posPos.xy = vert_tex_coord;
+  posPos.zw = vert_tex_coord - (0.5 + FXAA_SUBPIX_SHIFT) / textureSize(input_texture, 0);
+}
diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag
new file mode 100755
index 000000000..71ff9e1e3
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag
@@ -0,0 +1,130 @@
+// MIT License
+//
+// Copyright (c) 2020 BreadFish64
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce
+
+//! #version 460
+
+#extension GL_ARB_separate_shader_objects : enable
+
+#ifdef YUZU_USE_FP16
+
+#extension GL_AMD_gpu_shader_half_float : enable
+#extension GL_NV_gpu_shader5 : enable
+
+#define lfloat float16_t
+#define lvec2 f16vec2
+#define lvec3 f16vec3
+#define lvec4 f16vec4
+
+#else
+
+#define lfloat float
+#define lvec2 vec2
+#define lvec3 vec3
+#define lvec4 vec4
+
+#endif
+
+#ifdef VULKAN
+
+#define BINDING_COLOR_TEXTURE 1
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#define BINDING_COLOR_TEXTURE 0
+
+#endif
+
+layout (location = 0) in vec2 tex_coord;
+
+layout (location = 0) out vec4 frag_color;
+
+layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
+
+const bool ignore_alpha = true;
+
+lfloat ColorDist1(lvec4 a, lvec4 b) {
+    // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion
+    const lvec3 K = lvec3(0.2627, 0.6780, 0.0593);
+    const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b);
+    const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r);
+    lvec4 diff = a - b;
+    lfloat Y = dot(diff.rgb, K);
+    lfloat Cb = scaleB * (diff.b - Y);
+    lfloat Cr = scaleR * (diff.r - Y);
+    lvec3 YCbCr = lvec3(Y, Cb, Cr);
+    lfloat d = length(YCbCr);
+    if (ignore_alpha) {
+        return d;
+    }
+    return sqrt(a.a * b.a * d * d + diff.a * diff.a);
+}
+
+lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) {
+    return lvec4(
+            ColorDist1(ref, A),
+            ColorDist1(ref, B),
+            ColorDist1(ref, C),
+            ColorDist1(ref, D)
+        );
+}
+
+vec4 Scaleforce(sampler2D tex, vec2 tex_coord) {
+    lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1)));
+    lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1)));
+    lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1)));
+    lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0)));
+    lvec4 cc = lvec4(texture(tex, tex_coord));
+    lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0)));
+    lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1)));
+    lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1)));
+    lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1)));
+
+    lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr);
+    lvec4 offset_br = ColorDist(cc, br, bc, bl, cl);
+
+    // Calculate how different cc is from the texels around it
+    const lfloat plus_weight = lfloat(1.5);
+    const lfloat cross_weight = lfloat(1.5);
+    lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight));
+
+    if (total_dist == lfloat(0.0)) {
+        return cc;
+    } else {
+        // Add together all the distances with direction taken into account
+        lvec4 tmp = offset_tl - offset_br;
+        lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight;
+
+        // When the image has thin points, they tend to split apart.
+        // This is because the texels all around are different and total_offset reaches into clear areas.
+        // This works pretty well to keep the offset in bounds for these cases.
+        lfloat clamp_val = length(total_offset) / total_dist;
+        vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0);
+
+        return texture(tex, tex_coord - final_offset);
+    }
+}
+
+void main() {
+    frag_color = Scaleforce(input_texture, tex_coord);
+}
diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag
new file mode 100755
index 000000000..902b70c2b
--- /dev/null
+++ b/src/video_core/host_shaders/present_bicubic.frag
@@ -0,0 +1,67 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+
+#ifdef VULKAN
+
+#define BINDING_COLOR_TEXTURE 1
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#define BINDING_COLOR_TEXTURE 0
+
+#endif
+
+
+layout (location = 0) in vec2 frag_tex_coord;
+
+layout (location = 0) out vec4 color;
+
+layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture;
+
+vec4 cubic(float v) {
+    vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v;
+    vec4 s = n * n * n;
+    float x = s.x;
+    float y = s.y - 4.0 * s.x;
+    float z = s.z - 4.0 * s.y + 6.0 * s.x;
+    float w = 6.0 - x - y - z;
+    return vec4(x, y, z, w) * (1.0 / 6.0);
+}
+
+vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) {
+
+    vec2 texSize = textureSize(textureSampler, 0);
+    vec2 invTexSize = 1.0 / texSize;
+
+    texCoords = texCoords * texSize - 0.5;
+
+    vec2 fxy = fract(texCoords);
+    texCoords -= fxy;
+
+    vec4 xcubic = cubic(fxy.x);
+    vec4 ycubic = cubic(fxy.y);
+
+    vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy;
+
+    vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw);
+    vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s;
+
+    offset *= invTexSize.xxyy;
+
+    vec4 sample0 = texture(textureSampler, offset.xz);
+    vec4 sample1 = texture(textureSampler, offset.yz);
+    vec4 sample2 = texture(textureSampler, offset.xw);
+    vec4 sample3 = texture(textureSampler, offset.yw);
+
+    float sx = s.x / (s.x + s.y);
+    float sy = s.z / (s.z + s.w);
+
+    return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy);
+}
+
+void main() {
+    color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f);
+}
diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag
new file mode 100755
index 000000000..72a300dac
--- /dev/null
+++ b/src/video_core/host_shaders/present_gaussian.frag
@@ -0,0 +1,70 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// Code adapted from the following sources:
+// - https://learnopengl.com/Advanced-Lighting/Bloom
+// - https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/
+
+#version 460 core
+
+#ifdef VULKAN
+
+#define BINDING_COLOR_TEXTURE 1
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#define BINDING_COLOR_TEXTURE 0
+
+#endif
+
+layout(location = 0) in vec2 frag_tex_coord;
+
+layout(location = 0) out vec4 color;
+
+layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture;
+
+const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308);
+const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703);
+
+vec4 blurVertical(sampler2D textureSampler, vec2 coord, vec2 norm) {
+    vec4 result = vec4(0.0f);
+    for (int i = 1; i < 3; i++) {
+        result += texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) * weight[i];
+        result += texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) * weight[i];
+    }
+    return result;
+}
+
+vec4 blurHorizontal(sampler2D textureSampler, vec2 coord, vec2 norm) {
+    vec4 result = vec4(0.0f);
+    for (int i = 1; i < 3; i++) {
+        result += texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) * weight[i];
+        result += texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) * weight[i];
+    }
+    return result;
+}
+
+vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) {
+    vec4 result = vec4(0.0f);
+    for (int i = 1; i < 3; i++) {
+        result +=
+            texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) * weight[i];
+        result +=
+            texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) * weight[i];
+    }
+    return result;
+}
+
+void main() {
+    vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0];
+    vec2 tex_offset = 1.0f / textureSize(color_texture, 0);
+
+    // TODO(Blinkhawk): This code can be optimized through shader group instructions.
+    vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb;
+    vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
+    vec3 diagonalA = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
+    vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb;
+    vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
+    color = vec4(combination + base, 1.0f);
+}
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
new file mode 100755
index 000000000..1c96a7905
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
@@ -0,0 +1,11 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define YUZU_USE_FP16
+#define USE_EASU 1
+
+#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
new file mode 100755
index 000000000..f4daff739
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
@@ -0,0 +1,10 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define USE_EASU 1
+
+#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
new file mode 100755
index 000000000..6b6796dd1
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
@@ -0,0 +1,11 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define YUZU_USE_FP16
+#define USE_RCAS 1
+
+#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
new file mode 100755
index 000000000..f785eebf3
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
@@ -0,0 +1,10 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define USE_RCAS 1
+
+#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
new file mode 100755
index 000000000..924c03060
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
@@ -0,0 +1,7 @@
+#version 460
+
+#extension GL_GOOGLE_include_directive : enable
+
+#define YUZU_USE_FP16
+
+#include "opengl_present_scaleforce.frag"
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
new file mode 100755
index 000000000..a594b83ca
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
@@ -0,0 +1,5 @@
+#version 460
+
+#extension GL_GOOGLE_include_directive : enable
+
+#include "opengl_present_scaleforce.frag"
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 187a28e4d..d4dd10bb6 100755
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <span>
 
+#include "shader_recompiler/backend/glasm/emit_glasm.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
@@ -229,8 +230,10 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
             .padding = 0,
         };
         buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
-        glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
-                                        reinterpret_cast<const GLuint*>(&ssbo));
+        glProgramLocalParametersI4uivNV(
+            PROGRAM_LUT[stage],
+            Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
+            reinterpret_cast<const GLuint*>(&ssbo));
     }
 }
 
@@ -250,8 +253,10 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
             .padding = 0,
         };
         buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
-        glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
-                                        reinterpret_cast<const GLuint*>(&ssbo));
+        glProgramLocalParametersI4uivNV(
+            GL_COMPUTE_PROGRAM_NV,
+            Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
+            reinterpret_cast<const GLuint*>(&ssbo));
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index aa1cc592f..5c1f21c65 100755
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -19,15 +19,6 @@ using VideoCommon::ImageId;
 constexpr u32 MAX_TEXTURES = 64;
 constexpr u32 MAX_IMAGES = 16;
 
-template <typename Range>
-u32 AccumulateCount(const Range& range) {
-    u32 num{};
-    for (const auto& desc : range) {
-        num += desc.count;
-    }
-    return num;
-}
-
 size_t ComputePipelineKey::Hash() const noexcept {
     return static_cast<size_t>(
         Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
@@ -58,17 +49,17 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
     std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
                 uniform_buffer_sizes.begin());
 
-    num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
-    num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
+    num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
+    num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
 
-    const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
+    const u32 num_textures{num_texture_buffers + Shader::NumDescriptors(info.texture_descriptors)};
     ASSERT(num_textures <= MAX_TEXTURES);
 
-    const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
+    const u32 num_images{num_image_buffers + Shader::NumDescriptors(info.image_descriptors)};
     ASSERT(num_images <= MAX_IMAGES);
 
     const bool is_glasm{assembly_program.handle != 0};
-    const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
+    const u32 num_storage_buffers{Shader::NumDescriptors(info.storage_buffers_descriptors)};
     use_storage_buffers =
         !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
     writes_global_memory = !use_storage_buffers &&
@@ -88,8 +79,7 @@ void ComputePipeline::Configure() {
     }
     texture_cache.SynchronizeComputeDescriptors();
 
-    std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
-    boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+    boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
     std::array<GLuint, MAX_TEXTURES> samplers;
     std::array<GLuint, MAX_TEXTURES> textures;
     std::array<GLuint, MAX_IMAGES> images;
@@ -119,33 +109,39 @@ void ComputePipeline::Configure() {
         }
         return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
     }};
-    const auto add_image{[&](const auto& desc) {
+    const auto add_image{[&](const auto& desc, bool blacklist) {
         for (u32 index = 0; index < desc.count; ++index) {
             const auto handle{read_handle(desc, index)};
-            image_view_indices.push_back(handle.first);
+            views.push_back({
+                .index = handle.first,
+                .blacklist = blacklist,
+                .id = {},
+            });
         }
     }};
     for (const auto& desc : info.texture_buffer_descriptors) {
         for (u32 index = 0; index < desc.count; ++index) {
             const auto handle{read_handle(desc, index)};
-            image_view_indices.push_back(handle.first);
+            views.push_back({handle.first});
             samplers[sampler_binding++] = 0;
         }
     }
-    std::ranges::for_each(info.image_buffer_descriptors, add_image);
+    for (const auto& desc : info.image_buffer_descriptors) {
+        add_image(desc, false);
+    }
     for (const auto& desc : info.texture_descriptors) {
         for (u32 index = 0; index < desc.count; ++index) {
             const auto handle{read_handle(desc, index)};
-            image_view_indices.push_back(handle.first);
+            views.push_back({handle.first});
 
             Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
             samplers[sampler_binding++] = sampler->Handle();
         }
     }
-    std::ranges::for_each(info.image_descriptors, add_image);
-
-    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
-    texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+    for (const auto& desc : info.image_descriptors) {
+        add_image(desc, desc.is_written);
+    }
+    texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
 
     if (assembly_program.handle != 0) {
         program_manager.BindComputeAssemblyProgram(assembly_program.handle);
@@ -161,7 +157,7 @@ void ComputePipeline::Configure() {
             if constexpr (is_image) {
                 is_written = desc.is_written;
             }
-            ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
+            ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)};
             buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
                                                   image_view.BufferSize(), image_view.format,
                                                   is_written, is_image);
@@ -177,23 +173,45 @@ void ComputePipeline::Configure() {
     buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
     buffer_cache.BindHostComputeBuffers();
 
-    const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
+    const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers +
+                                                num_image_buffers};
     texture_binding += num_texture_buffers;
     image_binding += num_image_buffers;
 
+    u32 texture_scaling_mask{};
     for (const auto& desc : info.texture_descriptors) {
         for (u32 index = 0; index < desc.count; ++index) {
-            ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
-            textures[texture_binding++] = image_view.Handle(desc.type);
+            ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
+            textures[texture_binding] = image_view.Handle(desc.type);
+            if (texture_cache.IsRescaling(image_view)) {
+                texture_scaling_mask |= 1u << texture_binding;
+            }
+            ++texture_binding;
         }
     }
+    u32 image_scaling_mask{};
     for (const auto& desc : info.image_descriptors) {
         for (u32 index = 0; index < desc.count; ++index) {
-            ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+            ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
             if (desc.is_written) {
                 texture_cache.MarkModification(image_view.image_id);
             }
-            images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+            images[image_binding] = image_view.StorageView(desc.type, desc.format);
+            if (texture_cache.IsRescaling(image_view)) {
+                image_scaling_mask |= 1u << image_binding;
+            }
+            ++image_binding;
+        }
+    }
+    if (info.uses_rescaling_uniform) {
+        const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)};
+        const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)};
+        if (assembly_program.handle != 0) {
+            glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_texture_scaling_mask,
+                                         float_image_scaling_mask, 0.0f, 0.0f);
+        } else {
+            glProgramUniform4f(source_program.handle, 0, float_texture_scaling_mask,
+                               float_image_scaling_mask, 0.0f, 0.0f);
         }
     }
     if (texture_binding != 0) {
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index bccb37a58..f8495896c 100755
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -15,7 +15,7 @@
 #include "video_core/renderer_opengl/gl_shader_util.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/shader_notify.h"
-#include "video_core/texture_cache/texture_cache_base.h"
+#include "video_core/texture_cache/texture_cache.h"
 
 #if defined(_MSC_VER) && defined(NDEBUG)
 #define LAMBDA_FORCEINLINE [[msvc::forceinline]]
@@ -27,6 +27,7 @@ namespace OpenGL {
 namespace {
 using Shader::ImageBufferDescriptor;
 using Shader::ImageDescriptor;
+using Shader::NumDescriptors;
 using Shader::TextureBufferDescriptor;
 using Shader::TextureDescriptor;
 using Tegra::Texture::TexturePair;
@@ -35,15 +36,6 @@ using VideoCommon::ImageId;
 constexpr u32 MAX_TEXTURES = 64;
 constexpr u32 MAX_IMAGES = 8;
 
-template <typename Range>
-u32 AccumulateCount(const Range& range) {
-    u32 num{};
-    for (const auto& desc : range) {
-        num += desc.count;
-    }
-    return num;
-}
-
 GLenum Stage(size_t stage_index) {
     switch (stage_index) {
     case 0:
@@ -204,23 +196,23 @@ GraphicsPipeline::GraphicsPipeline(
             base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
             base_storage_bindings[stage + 1] = base_storage_bindings[stage];
 
-            base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
-            base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
+            base_uniform_bindings[stage + 1] += NumDescriptors(info.constant_buffer_descriptors);
+            base_storage_bindings[stage + 1] += NumDescriptors(info.storage_buffers_descriptors);
         }
         enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
         std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
 
-        const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
+        const u32 num_tex_buffer_bindings{NumDescriptors(info.texture_buffer_descriptors)};
         num_texture_buffers[stage] += num_tex_buffer_bindings;
         num_textures += num_tex_buffer_bindings;
 
-        const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
+        const u32 num_img_buffers_bindings{NumDescriptors(info.image_buffer_descriptors)};
         num_image_buffers[stage] += num_img_buffers_bindings;
         num_images += num_img_buffers_bindings;
 
-        num_textures += AccumulateCount(info.texture_descriptors);
-        num_images += AccumulateCount(info.image_descriptors);
-        num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
+        num_textures += NumDescriptors(info.texture_descriptors);
+        num_images += NumDescriptors(info.image_descriptors);
+        num_storage_buffers += NumDescriptors(info.storage_buffers_descriptors);
 
         writes_global_memory |= std::ranges::any_of(
             info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
@@ -288,10 +280,9 @@ GraphicsPipeline::GraphicsPipeline(
 
 template <typename Spec>
 void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
-    std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
-    std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+    std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
     std::array<GLuint, MAX_TEXTURES> samplers;
-    size_t image_view_index{};
+    size_t views_index{};
     GLsizei sampler_binding{};
 
     texture_cache.SynchronizeGraphicsDescriptors();
@@ -336,30 +327,34 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
             }
             return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
         }};
-        const auto add_image{[&](const auto& desc) {
+        const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
             for (u32 index = 0; index < desc.count; ++index) {
                 const auto handle{read_handle(desc, index)};
-                image_view_indices[image_view_index++] = handle.first;
+                views[views_index++] = {
+                    .index = handle.first,
+                    .blacklist = blacklist,
+                    .id = {},
+                };
             }
         }};
         if constexpr (Spec::has_texture_buffers) {
             for (const auto& desc : info.texture_buffer_descriptors) {
                 for (u32 index = 0; index < desc.count; ++index) {
                     const auto handle{read_handle(desc, index)};
-                    image_view_indices[image_view_index++] = handle.first;
+                    views[views_index++] = {handle.first};
                     samplers[sampler_binding++] = 0;
                 }
             }
         }
         if constexpr (Spec::has_image_buffers) {
             for (const auto& desc : info.image_buffer_descriptors) {
-                add_image(desc);
+                add_image(desc, false);
             }
         }
         for (const auto& desc : info.texture_descriptors) {
             for (u32 index = 0; index < desc.count; ++index) {
                 const auto handle{read_handle(desc, index)};
-                image_view_indices[image_view_index++] = handle.first;
+                views[views_index++] = {handle.first};
 
                 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
                 samplers[sampler_binding++] = sampler->Handle();
@@ -367,7 +362,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
         }
         if constexpr (Spec::has_images) {
             for (const auto& desc : info.image_descriptors) {
-                add_image(desc);
+                add_image(desc, desc.is_written);
             }
         }
     }};
@@ -386,13 +381,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
     if constexpr (Spec::enabled_stages[4]) {
         config_stage(4);
     }
-    const std::span indices_span(image_view_indices.data(), image_view_index);
-    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+    texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views_index));
 
     texture_cache.UpdateRenderTargets(false);
     state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
 
-    ImageId* texture_buffer_index{image_view_ids.data()};
+    VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
     const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
         size_t index{};
         const auto add_buffer{[&](const auto& desc) {
@@ -402,12 +396,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
                 if constexpr (is_image) {
                     is_written = desc.is_written;
                 }
-                ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+                ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
                 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
                                                        image_view.BufferSize(), image_view.format,
                                                        is_written, is_image);
                 ++index;
-                ++texture_buffer_index;
+                ++texture_buffer_it;
             }
         }};
         const Shader::Info& info{stage_infos[stage]};
@@ -423,13 +417,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
                 add_buffer(desc);
             }
         }
-        for (const auto& desc : info.texture_descriptors) {
-            texture_buffer_index += desc.count;
-        }
+        texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors);
         if constexpr (Spec::has_images) {
-            for (const auto& desc : info.image_descriptors) {
-                texture_buffer_index += desc.count;
-            }
+            texture_buffer_it += Shader::NumDescriptors(info.image_descriptors);
         }
     }};
     if constexpr (Spec::enabled_stages[0]) {
@@ -453,12 +443,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
     if (!is_built.load(std::memory_order::relaxed)) {
         WaitForBuild();
     }
-    if (assembly_programs[0].handle != 0) {
+    const bool use_assembly{assembly_programs[0].handle != 0};
+    if (use_assembly) {
         program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
     } else {
         program_manager.BindSourcePrograms(source_programs);
     }
-    const ImageId* views_it{image_view_ids.data()};
+    const VideoCommon::ImageViewInOut* views_it{views.data()};
     GLsizei texture_binding = 0;
     GLsizei image_binding = 0;
     std::array<GLuint, MAX_TEXTURES> textures;
@@ -473,20 +464,49 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
         views_it += num_texture_buffers[stage];
         views_it += num_image_buffers[stage];
 
+        u32 texture_scaling_mask{};
+        u32 image_scaling_mask{};
+        u32 stage_texture_binding{};
+        u32 stage_image_binding{};
+
         const auto& info{stage_infos[stage]};
         for (const auto& desc : info.texture_descriptors) {
             for (u32 index = 0; index < desc.count; ++index) {
-                ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
-                textures[texture_binding++] = image_view.Handle(desc.type);
+                ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
+                textures[texture_binding] = image_view.Handle(desc.type);
+                if (texture_cache.IsRescaling(image_view)) {
+                    texture_scaling_mask |= 1u << stage_texture_binding;
+                }
+                ++texture_binding;
+                ++stage_texture_binding;
             }
         }
         for (const auto& desc : info.image_descriptors) {
             for (u32 index = 0; index < desc.count; ++index) {
-                ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+                ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
                 if (desc.is_written) {
                     texture_cache.MarkModification(image_view.image_id);
                 }
-                images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+                images[image_binding] = image_view.StorageView(desc.type, desc.format);
+                if (texture_cache.IsRescaling(image_view)) {
+                    image_scaling_mask |= 1u << stage_image_binding;
+                }
+                ++image_binding;
+                ++stage_image_binding;
+            }
+        }
+        if (info.uses_rescaling_uniform) {
+            const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)};
+            const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)};
+            const bool is_rescaling{texture_cache.IsRescaling()};
+            const f32 config_down_factor{Settings::values.resolution_info.down_factor};
+            const f32 down_factor{is_rescaling ? config_down_factor : 1.0f};
+            if (use_assembly) {
+                glProgramLocalParameter4fARB(AssemblyStage(stage), 0, float_texture_scaling_mask,
+                                             float_image_scaling_mask, down_factor, 0.0f);
+            } else {
+                glProgramUniform4f(source_programs[stage].handle, 0, float_texture_scaling_mask,
+                                   float_image_scaling_mask, down_factor, 0.0f);
             }
         }
     }};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index b909c387e..696173acc 100755
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -186,6 +186,10 @@ void RasterizerOpenGL::Clear() {
     SyncRasterizeEnable();
     SyncStencilTestState();
 
+    std::scoped_lock lock{texture_cache.mutex};
+    texture_cache.UpdateRenderTargets(true);
+    state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+    SyncViewport();
     if (regs.clear_flags.scissor) {
         SyncScissorTest();
     } else {
@@ -194,10 +198,6 @@ void RasterizerOpenGL::Clear() {
     }
     UNIMPLEMENTED_IF(regs.clear_flags.viewport);
 
-    std::scoped_lock lock{texture_cache.mutex};
-    texture_cache.UpdateRenderTargets(true);
-    state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
-
     if (use_color) {
         glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
     }
@@ -216,8 +216,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 
     query_cache.UpdateCounters();
 
-    SyncState();
-
     GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
     if (!pipeline) {
         return;
@@ -225,6 +223,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
     pipeline->Configure(is_indexed);
 
+    SyncState();
+
     const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
     BeginTransformFeedback(pipeline, primitive_mode);
 
@@ -535,7 +535,8 @@ void RasterizerOpenGL::SyncViewport() {
     auto& flags = maxwell3d.dirty.flags;
     const auto& regs = maxwell3d.regs;
 
-    const bool dirty_viewport = flags[Dirty::Viewports];
+    const bool rescale_viewports = flags[VideoCommon::Dirty::RescaleViewports];
+    const bool dirty_viewport = flags[Dirty::Viewports] || rescale_viewports;
     const bool dirty_clip_control = flags[Dirty::ClipControl];
 
     if (dirty_clip_control || flags[Dirty::FrontFace]) {
@@ -555,8 +556,7 @@ void RasterizerOpenGL::SyncViewport() {
         }
         glFrontFace(mode);
     }
-
-    if (dirty_viewport || flags[Dirty::ClipControl]) {
+    if (dirty_viewport || dirty_clip_control) {
         flags[Dirty::ClipControl] = false;
 
         bool flip_y = false;
@@ -572,37 +572,58 @@ void RasterizerOpenGL::SyncViewport() {
         state_tracker.ClipControl(origin, depth);
         state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
     }
+    const bool is_rescaling{texture_cache.IsRescaling()};
+    const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
+    const auto conv = [scale](float value) -> GLfloat {
+        float new_value = value * scale;
+        if (scale < 1.0f) {
+            const bool sign = std::signbit(value);
+            new_value = std::round(std::abs(new_value));
+            new_value = sign ? -new_value : new_value;
+        }
+        return static_cast<GLfloat>(new_value);
+    };
 
     if (dirty_viewport) {
         flags[Dirty::Viewports] = false;
 
-        const bool force = flags[Dirty::ViewportTransform];
+        const bool force = flags[Dirty::ViewportTransform] || rescale_viewports;
         flags[Dirty::ViewportTransform] = false;
+        flags[VideoCommon::Dirty::RescaleViewports] = false;
 
-        for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) {
-            if (!force && !flags[Dirty::Viewport0 + i]) {
+        for (size_t index = 0; index < Maxwell::NumViewports; ++index) {
+            if (!force && !flags[Dirty::Viewport0 + index]) {
                 continue;
             }
-            flags[Dirty::Viewport0 + i] = false;
+            flags[Dirty::Viewport0 + index] = false;
 
-            const auto& src = regs.viewport_transform[i];
-            const Common::Rectangle<f32> rect{src.GetRect()};
-            glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(),
-                               rect.GetHeight());
+            const auto& src = regs.viewport_transform[index];
+            GLfloat x = conv(src.translate_x - src.scale_x);
+            GLfloat y = conv(src.translate_y - src.scale_y);
+            GLfloat width = conv(src.scale_x * 2.0f);
+            GLfloat height = conv(src.scale_y * 2.0f);
+
+            if (height < 0) {
+                y += height;
+                height = -height;
+            }
+            glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f,
+                               height != 0.0f ? height : 1.0f);
 
             const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
             const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
             const GLdouble far_depth = src.translate_z + src.scale_z;
             if (device.HasDepthBufferFloat()) {
-                glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth);
+                glDepthRangeIndexeddNV(static_cast<GLuint>(index), near_depth, far_depth);
             } else {
-                glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+                glDepthRangeIndexed(static_cast<GLuint>(index), near_depth, far_depth);
             }
 
             if (!GLAD_GL_NV_viewport_swizzle) {
                 continue;
             }
-            glViewportSwizzleNV(static_cast<GLuint>(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x),
+            glViewportSwizzleNV(static_cast<GLuint>(index),
+                                MaxwellToGL::ViewportSwizzle(src.swizzle.x),
                                 MaxwellToGL::ViewportSwizzle(src.swizzle.y),
                                 MaxwellToGL::ViewportSwizzle(src.swizzle.z),
                                 MaxwellToGL::ViewportSwizzle(src.swizzle.w));
@@ -905,14 +926,34 @@ void RasterizerOpenGL::SyncLogicOpState() {
 
 void RasterizerOpenGL::SyncScissorTest() {
     auto& flags = maxwell3d.dirty.flags;
-    if (!flags[Dirty::Scissors]) {
+    if (!flags[Dirty::Scissors] && !flags[VideoCommon::Dirty::RescaleScissors]) {
         return;
     }
     flags[Dirty::Scissors] = false;
 
+    const bool force = flags[VideoCommon::Dirty::RescaleScissors];
+    flags[VideoCommon::Dirty::RescaleScissors] = false;
+
     const auto& regs = maxwell3d.regs;
+
+    const auto& resolution = Settings::values.resolution_info;
+    const bool is_rescaling{texture_cache.IsRescaling()};
+    const u32 up_scale = is_rescaling ? resolution.up_scale : 1U;
+    const u32 down_shift = is_rescaling ? resolution.down_shift : 0U;
+    const auto scale_up = [up_scale, down_shift](u32 value) -> u32 {
+        if (value == 0) {
+            return 0U;
+        }
+        const u32 upset = value * up_scale;
+        u32 acumm{};
+        if ((up_scale >> down_shift) == 0) {
+            acumm = upset % 2;
+        }
+        const u32 converted_value = upset >> down_shift;
+        return std::max<u32>(converted_value + acumm, 1U);
+    };
     for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
-        if (!flags[Dirty::Scissor0 + index]) {
+        if (!force && !flags[Dirty::Scissor0 + index]) {
             continue;
         }
         flags[Dirty::Scissor0 + index] = false;
@@ -920,8 +961,8 @@ void RasterizerOpenGL::SyncScissorTest() {
         const auto& src = regs.scissor_test[index];
         if (src.enable) {
             glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
-            glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y,
-                             src.max_x - src.min_x, src.max_y - src.min_y);
+            glScissorIndexed(static_cast<GLuint>(index), scale_up(src.min_x), scale_up(src.min_y),
+                             scale_up(src.max_x - src.min_x), scale_up(src.max_y - src.min_y));
         } else {
             glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
         }
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 8695c29e3..70947838c 100755
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -166,7 +166,7 @@ void OGLFramebuffer::Create() {
         return;
 
     MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
-    glGenFramebuffers(1, &handle);
+    glCreateFramebuffers(1, &handle);
 }
 
 void OGLFramebuffer::Release() {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 02682bd76..42ef67628 100755
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -426,16 +426,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
             // Normal path
             programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
 
-            for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
-                total_storage_buffers += desc.count;
-            }
+            total_storage_buffers +=
+                Shader::NumDescriptors(programs[index].info.storage_buffers_descriptors);
         } else {
             // VertexB path when VertexA is present.
             auto& program_va{programs[0]};
             auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
-            for (const auto& desc : program_vb.info.storage_buffers_descriptors) {
-                total_storage_buffers += desc.count;
-            }
+            total_storage_buffers +=
+                Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors);
             programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
         }
     }
@@ -510,10 +508,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
     Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
     auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
 
-    u32 num_storage_buffers{};
-    for (const auto& desc : program.info.storage_buffers_descriptors) {
-        num_storage_buffers += desc.count;
-    }
+    const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)};
     Shader::RuntimeInfo info;
     info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
 
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 8c3ca3d82..6841b5450 100755
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -316,6 +316,52 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
     }
 }
 
+OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) {
+    const GLenum target = ImageTarget(info);
+    const GLsizei width = info.size.width;
+    const GLsizei height = info.size.height;
+    const GLsizei depth = info.size.depth;
+    const int max_host_mip_levels = std::bit_width(info.size.width);
+    const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
+    const GLsizei num_layers = info.resources.layers;
+    const GLsizei num_samples = info.num_samples;
+
+    GLuint handle = 0;
+    OGLTexture texture;
+    if (target != GL_TEXTURE_BUFFER) {
+        texture.Create(target);
+        handle = texture.handle;
+    }
+    switch (target) {
+    case GL_TEXTURE_1D_ARRAY:
+        glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
+        break;
+    case GL_TEXTURE_2D_ARRAY:
+        glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
+        break;
+    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
+        // TODO: Where should 'fixedsamplelocations' come from?
+        const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
+        glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
+                                      height >> samples_y, num_layers, GL_FALSE);
+        break;
+    }
+    case GL_TEXTURE_RECTANGLE:
+        glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
+        break;
+    case GL_TEXTURE_3D:
+        glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
+        break;
+    case GL_TEXTURE_BUFFER:
+        UNREACHABLE();
+        break;
+    default:
+        UNREACHABLE_MSG("Invalid target=0x{:x}", target);
+        break;
+    }
+    return texture;
+}
+
 [[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) {
     switch (format) {
     case PixelFormat::B5G6R5_UNORM:
@@ -359,7 +405,8 @@ ImageBufferMap::~ImageBufferMap() {
 
 TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
                                          StateTracker& state_tracker_)
-    : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) {
+    : device{device_}, state_tracker{state_tracker_},
+      util_shaders(program_manager), resolution{Settings::values.resolution_info} {
     static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
     for (size_t i = 0; i < TARGETS.size(); ++i) {
         const GLenum target = TARGETS[i];
@@ -426,6 +473,17 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
     set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
     set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
     set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
+
+    if (resolution.active) {
+        for (size_t i = 0; i < rescale_draw_fbos.size(); ++i) {
+            rescale_draw_fbos[i].Create();
+            rescale_read_fbos[i].Create();
+
+            // Make sure the framebuffer is created without DSA
+            glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_draw_fbos[i].handle);
+            glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_read_fbos[i].handle);
+        }
+    }
 }
 
 TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -605,13 +663,13 @@ std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req
     return found;
 }
 
-Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
+Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
              VAddr cpu_addr_)
-    : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) {
-    if (CanBeAccelerated(runtime, info)) {
+    : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
+    if (CanBeAccelerated(*runtime, info)) {
         flags |= ImageFlagBits::AcceleratedUpload;
     }
-    if (IsConverted(runtime.device, info.format, info.type)) {
+    if (IsConverted(runtime->device, info.format, info.type)) {
         flags |= ImageFlagBits::Converted;
         gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
         gl_format = GL_RGBA;
@@ -622,58 +680,25 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
         gl_format = tuple.format;
         gl_type = tuple.type;
     }
-    const GLenum target = ImageTarget(info);
-    const GLsizei width = info.size.width;
-    const GLsizei height = info.size.height;
-    const GLsizei depth = info.size.depth;
-    const int max_host_mip_levels = std::bit_width(info.size.width);
-    const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
-    const GLsizei num_layers = info.resources.layers;
-    const GLsizei num_samples = info.num_samples;
-
-    GLuint handle = 0;
-    if (target != GL_TEXTURE_BUFFER) {
-        texture.Create(target);
-        handle = texture.handle;
-    }
-    switch (target) {
-    case GL_TEXTURE_1D_ARRAY:
-        glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
-        break;
-    case GL_TEXTURE_2D_ARRAY:
-        glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
-        break;
-    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
-        // TODO: Where should 'fixedsamplelocations' come from?
-        const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
-        glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
-                                      height >> samples_y, num_layers, GL_FALSE);
-        break;
-    }
-    case GL_TEXTURE_RECTANGLE:
-        glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
-        break;
-    case GL_TEXTURE_3D:
-        glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
-        break;
-    case GL_TEXTURE_BUFFER:
-        UNREACHABLE();
-        break;
-    default:
-        UNREACHABLE_MSG("Invalid target=0x{:x}", target);
-        break;
-    }
-    if (runtime.device.HasDebuggingToolAttached()) {
+    texture = MakeImage(info, gl_internal_format);
+    current_texture = texture.handle;
+    if (runtime->device.HasDebuggingToolAttached()) {
         const std::string name = VideoCommon::Name(*this);
-        glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle,
-                      static_cast<GLsizei>(name.size()), name.data());
+        glObjectLabel(ImageTarget(info) == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE,
+                      texture.handle, static_cast<GLsizei>(name.size()), name.data());
     }
 }
 
+Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {}
+
 Image::~Image() = default;
 
 void Image::UploadMemory(const ImageBufferMap& map,
                          std::span<const VideoCommon::BufferImageCopy> copies) {
+    const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
+    if (is_rescaled) {
+        ScaleDown(true);
+    }
     glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
     glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
 
@@ -693,12 +718,18 @@ void Image::UploadMemory(const ImageBufferMap& map,
         }
         CopyBufferToImage(copy, map.offset);
     }
+    if (is_rescaled) {
+        ScaleUp();
+    }
 }
 
 void Image::DownloadMemory(ImageBufferMap& map,
                            std::span<const VideoCommon::BufferImageCopy> copies) {
+    const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
+    if (is_rescaled) {
+        ScaleDown();
+    }
     glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
-
     glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
     glPixelStorei(GL_PACK_ALIGNMENT, 1);
 
@@ -716,6 +747,9 @@ void Image::DownloadMemory(ImageBufferMap& map,
         }
         CopyImageToBuffer(copy, map.offset);
     }
+    if (is_rescaled) {
+        ScaleUp(true);
+    }
 }
 
 GLuint Image::StorageHandle() noexcept {
@@ -741,11 +775,11 @@ GLuint Image::StorageHandle() noexcept {
             return store_view.handle;
         }
         store_view.Create();
-        glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0,
+        glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0,
                       info.resources.levels, 0, info.resources.layers);
         return store_view.handle;
     default:
-        return texture.handle;
+        return current_texture;
     }
 }
 
@@ -849,6 +883,140 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b
     }
 }
 
+void Image::Scale(bool up_scale) {
+    const auto format_type = GetFormatType(info.format);
+    const GLenum attachment = [format_type] {
+        switch (format_type) {
+        case SurfaceType::ColorTexture:
+            return GL_COLOR_ATTACHMENT0;
+        case SurfaceType::Depth:
+            return GL_DEPTH_ATTACHMENT;
+        case SurfaceType::DepthStencil:
+            return GL_DEPTH_STENCIL_ATTACHMENT;
+        default:
+            UNREACHABLE();
+            return GL_COLOR_ATTACHMENT0;
+        }
+    }();
+    const GLenum mask = [format_type] {
+        switch (format_type) {
+        case SurfaceType::ColorTexture:
+            return GL_COLOR_BUFFER_BIT;
+        case SurfaceType::Depth:
+            return GL_DEPTH_BUFFER_BIT;
+        case SurfaceType::DepthStencil:
+            return GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT;
+        default:
+            UNREACHABLE();
+            return GL_COLOR_BUFFER_BIT;
+        }
+    }();
+    const size_t fbo_index = [format_type] {
+        switch (format_type) {
+        case SurfaceType::ColorTexture:
+            return 0;
+        case SurfaceType::Depth:
+            return 1;
+        case SurfaceType::DepthStencil:
+            return 2;
+        default:
+            UNREACHABLE();
+            return 0;
+        }
+    }();
+    const bool is_2d = info.type == ImageType::e2D;
+    const bool is_color{(mask & GL_COLOR_BUFFER_BIT) != 0};
+    // Integer formats must use NEAREST filter
+    const bool linear_color_format{is_color && !IsPixelFormatInteger(info.format)};
+    const GLenum filter = linear_color_format ? GL_LINEAR : GL_NEAREST;
+
+    const auto& resolution = runtime->resolution;
+    const u32 scaled_width = resolution.ScaleUp(info.size.width);
+    const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
+    const u32 original_width = info.size.width;
+    const u32 original_height = info.size.height;
+
+    if (!upscaled_backup.handle) {
+        auto dst_info = info;
+        dst_info.size.width = scaled_width;
+        dst_info.size.height = scaled_height;
+        upscaled_backup = MakeImage(dst_info, gl_internal_format);
+    }
+    const u32 src_width = up_scale ? original_width : scaled_width;
+    const u32 src_height = up_scale ? original_height : scaled_height;
+    const u32 dst_width = up_scale ? scaled_width : original_width;
+    const u32 dst_height = up_scale ? scaled_height : original_height;
+    const auto src_handle = up_scale ? texture.handle : upscaled_backup.handle;
+    const auto dst_handle = up_scale ? upscaled_backup.handle : texture.handle;
+
+    // TODO (ameerj): Investigate other GL states that affect blitting.
+    glDisablei(GL_SCISSOR_TEST, 0);
+    glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(dst_width),
+                       static_cast<GLfloat>(dst_height));
+
+    const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle;
+    const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle;
+    for (s32 layer = 0; layer < info.resources.layers; ++layer) {
+        for (s32 level = 0; level < info.resources.levels; ++level) {
+            const u32 src_level_width = std::max(1u, src_width >> level);
+            const u32 src_level_height = std::max(1u, src_height >> level);
+            const u32 dst_level_width = std::max(1u, dst_width >> level);
+            const u32 dst_level_height = std::max(1u, dst_height >> level);
+
+            glNamedFramebufferTextureLayer(read_fbo, attachment, src_handle, level, layer);
+            glNamedFramebufferTextureLayer(draw_fbo, attachment, dst_handle, level, layer);
+
+            glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0,
+                                   0, dst_level_width, dst_level_height, mask, filter);
+        }
+    }
+    current_texture = dst_handle;
+    auto& state_tracker = runtime->GetStateTracker();
+    state_tracker.NotifyViewport0();
+    state_tracker.NotifyScissor0();
+}
+
+bool Image::ScaleUp(bool ignore) {
+    if (True(flags & ImageFlagBits::Rescaled)) {
+        return false;
+    }
+    if (gl_format == 0 && gl_type == 0) {
+        // compressed textures
+        return false;
+    }
+    if (info.type == ImageType::Linear) {
+        UNREACHABLE();
+        return false;
+    }
+    flags |= ImageFlagBits::Rescaled;
+    if (!runtime->resolution.active) {
+        return false;
+    }
+    has_scaled = true;
+    if (ignore) {
+        current_texture = upscaled_backup.handle;
+        return true;
+    }
+    Scale(true);
+    return true;
+}
+
+bool Image::ScaleDown(bool ignore) {
+    if (False(flags & ImageFlagBits::Rescaled)) {
+        return false;
+    }
+    flags &= ~ImageFlagBits::Rescaled;
+    if (!runtime->resolution.active) {
+        return false;
+    }
+    if (ignore) {
+        current_texture = texture.handle;
+        return true;
+    }
+    Scale(false);
+    return true;
+}
+
 ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
                      ImageId image_id_, Image& image)
     : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
@@ -862,7 +1030,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
     flat_range = info.range;
     set_object_label = device.HasDebuggingToolAttached();
     is_render_target = info.IsRenderTarget();
-    original_texture = image.texture.handle;
+    original_texture = image.Handle();
     num_samples = image.info.num_samples;
     if (!is_render_target) {
         swizzle[0] = info.x_source;
@@ -950,7 +1118,7 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
                      const VideoCommon::ImageViewInfo& view_info)
     : VideoCommon::ImageViewBase{info, view_info} {}
 
-ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
     : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
 
 GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
@@ -1116,25 +1284,24 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
 void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image,
                           std::span<const VideoCommon::ImageCopy> copies) {
     static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
-    const u32 requested_pbo_size =
-        std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes);
-
-    if (bgr_pbo_size < requested_pbo_size) {
-        bgr_pbo.Create();
-        bgr_pbo_size = requested_pbo_size;
-        glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY);
-    }
+    const u32 img_bpp = BytesPerBlock(src_image.info.format);
     for (const ImageCopy& copy : copies) {
         ASSERT(copy.src_offset == zero_offset);
         ASSERT(copy.dst_offset == zero_offset);
-
+        const u32 num_src_layers = static_cast<u32>(copy.src_subresource.num_layers);
+        const u32 copy_size = copy.extent.width * copy.extent.height * num_src_layers * img_bpp;
+        if (bgr_pbo_size < copy_size) {
+            bgr_pbo.Create();
+            bgr_pbo_size = copy_size;
+            glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY);
+        }
         // Copy from source to PBO
         glPixelStorei(GL_PACK_ALIGNMENT, 1);
         glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
         glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle);
         glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
-                             copy.src_subresource.num_layers, src_image.GlFormat(),
-                             src_image.GlType(), static_cast<GLsizei>(bgr_pbo_size), nullptr);
+                             num_src_layers, src_image.GlFormat(), src_image.GlType(),
+                             static_cast<GLsizei>(bgr_pbo_size), nullptr);
 
         // Copy from PBO to destination in desired GL format
         glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 1ca2c90be..c51a7428d 100755
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -15,6 +15,10 @@
 #include "video_core/texture_cache/image_view_base.h"
 #include "video_core/texture_cache/texture_cache_base.h"
 
+namespace Settings {
+struct ResolutionScalingInfo;
+}
+
 namespace OpenGL {
 
 class Device;
@@ -80,7 +84,7 @@ public:
 
     void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
 
-    void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
+    void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) {
         UNIMPLEMENTED();
     }
 
@@ -110,6 +114,12 @@ public:
 
     bool HasNativeASTC() const noexcept;
 
+    void TickFrame() {}
+
+    StateTracker& GetStateTracker() {
+        return state_tracker;
+    }
+
 private:
     struct StagingBuffers {
         explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
@@ -149,6 +159,10 @@ private:
     OGLTextureView null_image_view_cube;
 
     std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
+
+    std::array<OGLFramebuffer, 3> rescale_draw_fbos;
+    std::array<OGLFramebuffer, 3> rescale_read_fbos;
+    const Settings::ResolutionScalingInfo& resolution;
 };
 
 class Image : public VideoCommon::ImageBase {
@@ -157,6 +171,7 @@ class Image : public VideoCommon::ImageBase {
 public:
     explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
                    VAddr cpu_addr);
+    explicit Image(const VideoCommon::NullImageParams&);
 
     ~Image();
 
@@ -174,7 +189,7 @@ public:
     GLuint StorageHandle() noexcept;
 
     GLuint Handle() const noexcept {
-        return texture.handle;
+        return current_texture;
     }
 
     GLuint GlFormat() const noexcept {
@@ -185,16 +200,25 @@ public:
         return gl_type;
     }
 
+    bool ScaleUp(bool ignore = false);
+
+    bool ScaleDown(bool ignore = false);
+
 private:
     void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
 
     void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
 
+    void Scale(bool up_scale);
+
     OGLTexture texture;
+    OGLTexture upscaled_backup;
     OGLTextureView store_view;
     GLenum gl_internal_format = GL_NONE;
     GLenum gl_format = GL_NONE;
     GLenum gl_type = GL_NONE;
+    TextureCacheRuntime* runtime{};
+    GLuint current_texture{};
 };
 
 class ImageView : public VideoCommon::ImageViewBase {
@@ -206,7 +230,7 @@ public:
                        const VideoCommon::ImageViewInfo&, GPUVAddr);
     explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
                        const VideoCommon::ImageViewInfo& view_info);
-    explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
+    explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
 
     [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
                                      Shader::ImageFormat image_format);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 7d7cba69c..28daacd82 100755
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -21,8 +21,13 @@
 #include "core/memory.h"
 #include "core/perf_stats.h"
 #include "core/telemetry_session.h"
+#include "video_core/host_shaders/fxaa_frag.h"
+#include "video_core/host_shaders/fxaa_vert.h"
 #include "video_core/host_shaders/opengl_present_frag.h"
+#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
 #include "video_core/host_shaders/opengl_present_vert.h"
+#include "video_core/host_shaders/present_bicubic_frag.h"
+#include "video_core/host_shaders/present_gaussian_frag.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_shader_util.h"
@@ -208,7 +213,9 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
     framebuffer_crop_rect = framebuffer.crop_rect;
 
     const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
-    if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
+    screen_info.was_accelerated =
+        rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride);
+    if (screen_info.was_accelerated) {
         return;
     }
 
@@ -251,12 +258,25 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
 
 void RendererOpenGL::InitOpenGLObjects() {
     // Create shader programs
+    fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER);
+    fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER);
     present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
-    present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
+    present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
+    present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER);
+    present_gaussian_fragment =
+        CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER);
+    present_scaleforce_fragment =
+        CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),
+                      GL_FRAGMENT_SHADER);
 
     // Generate presentation sampler
     present_sampler.Create();
     glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+
+    present_sampler_nn.Create();
+    glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+    glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
 
     // Generate VBO handle for drawing
     vertex_buffer.Create();
@@ -274,6 +294,8 @@ void RendererOpenGL::InitOpenGLObjects() {
 
     // Clear screen to black
     LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
+
+    fxaa_framebuffer.Create();
 }
 
 void RendererOpenGL::AddTelemetryFields() {
@@ -325,18 +347,130 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
     texture.resource.Release();
     texture.resource.Create(GL_TEXTURE_2D);
     glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
+    fxaa_texture.Release();
+    fxaa_texture.Create(GL_TEXTURE_2D);
+    glTextureStorage2D(fxaa_texture.handle, 1, GL_RGBA16F,
+                       Settings::values.resolution_info.ScaleUp(screen_info.texture.width),
+                       Settings::values.resolution_info.ScaleUp(screen_info.texture.height));
+    glNamedFramebufferTexture(fxaa_framebuffer.handle, GL_COLOR_ATTACHMENT0, fxaa_texture.handle,
+                              0);
 }
 
 void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
+    // TODO: Signal state tracker about these changes
+    state_tracker.NotifyScreenDrawVertexArray();
+    state_tracker.NotifyPolygonModes();
+    state_tracker.NotifyViewport0();
+    state_tracker.NotifyScissor0();
+    state_tracker.NotifyColorMask(0);
+    state_tracker.NotifyBlend0();
+    state_tracker.NotifyFramebuffer();
+    state_tracker.NotifyFrontFace();
+    state_tracker.NotifyCullTest();
+    state_tracker.NotifyDepthTest();
+    state_tracker.NotifyStencilTest();
+    state_tracker.NotifyPolygonOffset();
+    state_tracker.NotifyRasterizeEnable();
+    state_tracker.NotifyFramebufferSRGB();
+    state_tracker.NotifyLogicOp();
+    state_tracker.NotifyClipControl();
+    state_tracker.NotifyAlphaTest();
+
+    state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
+
     // Update background color before drawing
     glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
                  Settings::values.bg_green.GetValue() / 255.0f,
                  Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
 
+    glEnable(GL_CULL_FACE);
+    glDisable(GL_COLOR_LOGIC_OP);
+    glDisable(GL_DEPTH_TEST);
+    glDisable(GL_STENCIL_TEST);
+    glDisable(GL_POLYGON_OFFSET_FILL);
+    glDisable(GL_RASTERIZER_DISCARD);
+    glDisable(GL_ALPHA_TEST);
+    glDisablei(GL_BLEND, 0);
+    glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+    glCullFace(GL_BACK);
+    glFrontFace(GL_CW);
+    glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+
+    glBindTextureUnit(0, screen_info.display_texture);
+
+    if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa) {
+        program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle);
+
+        glEnablei(GL_SCISSOR_TEST, 0);
+        auto viewport_width = screen_info.texture.width;
+        auto scissor_width = framebuffer_crop_rect.GetWidth();
+        if (scissor_width <= 0) {
+            scissor_width = viewport_width;
+        }
+        auto viewport_height = screen_info.texture.height;
+        auto scissor_height = framebuffer_crop_rect.GetHeight();
+        if (scissor_height <= 0) {
+            scissor_height = viewport_height;
+        }
+        if (screen_info.was_accelerated) {
+            viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width);
+            scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width);
+            viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height);
+            scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height);
+        }
+        glScissorIndexed(0, 0, 0, scissor_width, scissor_height);
+        glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(viewport_width),
+                           static_cast<GLfloat>(viewport_height));
+        glDepthRangeIndexed(0, 0.0, 0.0);
+
+        glBindSampler(0, present_sampler.handle);
+        GLint old_read_fb;
+        GLint old_draw_fb;
+        glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
+        glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
+        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fxaa_framebuffer.handle);
+
+        glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+        glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
+        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
+
+        glBindTextureUnit(0, fxaa_texture.handle);
+    }
+
     // Set projection matrix
     const std::array ortho_matrix =
         MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
-    program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
+
+    GLuint fragment_handle;
+    const auto filter = Settings::values.scaling_filter.GetValue();
+    switch (filter) {
+    case Settings::ScalingFilter::NearestNeighbor:
+        fragment_handle = present_bilinear_fragment.handle;
+        break;
+    case Settings::ScalingFilter::Bilinear:
+        fragment_handle = present_bilinear_fragment.handle;
+        break;
+    case Settings::ScalingFilter::Bicubic:
+        fragment_handle = present_bicubic_fragment.handle;
+        break;
+    case Settings::ScalingFilter::Gaussian:
+        fragment_handle = present_gaussian_fragment.handle;
+        break;
+    case Settings::ScalingFilter::ScaleForce:
+        fragment_handle = present_scaleforce_fragment.handle;
+        break;
+    case Settings::ScalingFilter::Fsr:
+        LOG_WARNING(
+            Render_OpenGL,
+            "FidelityFX FSR Super Sampling is not supported in OpenGL, changing to ScaleForce");
+        fragment_handle = present_scaleforce_fragment.handle;
+        break;
+    default:
+        fragment_handle = present_bilinear_fragment.handle;
+        break;
+    }
+    program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle);
     glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
                                 ortho_matrix.data());
 
@@ -370,6 +504,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
         scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
                   static_cast<f32>(screen_info.texture.height);
     }
+    if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa &&
+        !screen_info.was_accelerated) {
+        scale_u /= Settings::values.resolution_info.up_factor;
+        scale_v /= Settings::values.resolution_info.up_factor;
+    }
 
     const auto& screen = layout.screen;
     const std::array vertices = {
@@ -380,47 +519,14 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
     };
     glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
 
-    // TODO: Signal state tracker about these changes
-    state_tracker.NotifyScreenDrawVertexArray();
-    state_tracker.NotifyPolygonModes();
-    state_tracker.NotifyViewport0();
-    state_tracker.NotifyScissor0();
-    state_tracker.NotifyColorMask(0);
-    state_tracker.NotifyBlend0();
-    state_tracker.NotifyFramebuffer();
-    state_tracker.NotifyFrontFace();
-    state_tracker.NotifyCullTest();
-    state_tracker.NotifyDepthTest();
-    state_tracker.NotifyStencilTest();
-    state_tracker.NotifyPolygonOffset();
-    state_tracker.NotifyRasterizeEnable();
-    state_tracker.NotifyFramebufferSRGB();
-    state_tracker.NotifyLogicOp();
-    state_tracker.NotifyClipControl();
-    state_tracker.NotifyAlphaTest();
-
-    state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
-    glEnable(GL_CULL_FACE);
     if (screen_info.display_srgb) {
         glEnable(GL_FRAMEBUFFER_SRGB);
     } else {
         glDisable(GL_FRAMEBUFFER_SRGB);
     }
-    glDisable(GL_COLOR_LOGIC_OP);
-    glDisable(GL_DEPTH_TEST);
-    glDisable(GL_STENCIL_TEST);
-    glDisable(GL_POLYGON_OFFSET_FILL);
-    glDisable(GL_RASTERIZER_DISCARD);
-    glDisable(GL_ALPHA_TEST);
-    glDisablei(GL_BLEND, 0);
     glDisablei(GL_SCISSOR_TEST, 0);
-    glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
-    glCullFace(GL_BACK);
-    glFrontFace(GL_CW);
-    glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
     glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
                        static_cast<GLfloat>(layout.height));
-    glDepthRangeIndexed(0, 0.0, 0.0);
 
     glEnableVertexAttribArray(PositionLocation);
     glEnableVertexAttribArray(TexCoordLocation);
@@ -440,8 +546,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
         glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
     }
 
-    glBindTextureUnit(0, screen_info.display_texture);
-    glBindSampler(0, present_sampler.handle);
+    if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) {
+        glBindSampler(0, present_sampler.handle);
+    } else {
+        glBindSampler(0, present_sampler_nn.handle);
+    }
 
     glClear(GL_COLOR_BUFFER_BIT);
     glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index d455f572f..cda333cad 100755
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -50,6 +50,7 @@ struct TextureInfo {
 /// Structure used for storing information about the display target for the Switch screen
 struct ScreenInfo {
     GLuint display_texture{};
+    bool was_accelerated = false;
     bool display_srgb{};
     const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
     TextureInfo texture;
@@ -109,9 +110,15 @@ private:
 
     // OpenGL object IDs
     OGLSampler present_sampler;
+    OGLSampler present_sampler_nn;
     OGLBuffer vertex_buffer;
+    OGLProgram fxaa_vertex;
+    OGLProgram fxaa_fragment;
     OGLProgram present_vertex;
-    OGLProgram present_fragment;
+    OGLProgram present_bilinear_fragment;
+    OGLProgram present_bicubic_fragment;
+    OGLProgram present_gaussian_fragment;
+    OGLProgram present_scaleforce_fragment;
     OGLFramebuffer screenshot_framebuffer;
 
     // GPU address of the vertex buffer
@@ -119,6 +126,8 @@ private:
 
     /// Display information for Switch screen
     ScreenInfo screen_info;
+    OGLTexture fxaa_texture;
+    OGLFramebuffer fxaa_framebuffer;
 
     /// OpenGL framebuffer data
     std::vector<u8> gl_framebuffer_data;
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 6c1b2f063..b3884a4f5 100755
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -363,7 +363,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
 
 BlitImageHelper::~BlitImageHelper() = default;
 
-void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view,
                                 const Region2D& dst_region, const Region2D& src_region,
                                 Tegra::Engines::Fermi2D::Filter filter,
                                 Tegra::Engines::Fermi2D::Operation operation) {
@@ -373,9 +373,8 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
         .operation = operation,
     };
     const VkPipelineLayout layout = *one_texture_pipeline_layout;
-    const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
     const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
-    const VkPipeline pipeline = FindOrEmplacePipeline(key);
+    const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
     scheduler.RequestRenderpass(dst_framebuffer);
     scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
                       src_view](vk::CommandBuffer cmdbuf) {
@@ -398,10 +397,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
                                        Tegra::Engines::Fermi2D::Operation operation) {
     ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
     ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
-
+    const BlitImagePipelineKey key{
+        .renderpass = dst_framebuffer->RenderPass(),
+        .operation = operation,
+    };
     const VkPipelineLayout layout = *two_textures_pipeline_layout;
     const VkSampler sampler = *nearest_sampler;
-    const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
+    const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key);
     scheduler.RequestRenderpass(dst_framebuffer);
     scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
                       src_stencil_view, this](vk::CommandBuffer cmdbuf) {
@@ -419,40 +421,45 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
 }
 
 void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
-                                      const ImageView& src_image_view) {
+                                      const ImageView& src_image_view, u32 up_scale,
+                                      u32 down_shift) {
     ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
-    Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
+    Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
 }
 
 void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
-                                      const ImageView& src_image_view) {
+                                      const ImageView& src_image_view, u32 up_scale,
+                                      u32 down_shift) {
     ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
-    Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
+    Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
 }
 
 void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
-                                      const ImageView& src_image_view) {
+                                      const ImageView& src_image_view, u32 up_scale,
+                                      u32 down_shift) {
     ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
-    Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
+    Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
 }
 
 void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
-                                      const ImageView& src_image_view) {
+                                      const ImageView& src_image_view, u32 up_scale,
+                                      u32 down_shift) {
     ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
-    Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
+    Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
 }
 
 void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
-                              const ImageView& src_image_view) {
+                              const ImageView& src_image_view, u32 up_scale, u32 down_shift) {
     const VkPipelineLayout layout = *one_texture_pipeline_layout;
     const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
     const VkSampler sampler = *nearest_sampler;
     const VkExtent2D extent{
-        .width = src_image_view.size.width,
-        .height = src_image_view.size.height,
+        .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
+        .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
     };
     scheduler.RequestRenderpass(dst_framebuffer);
-    scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift,
+                      this](vk::CommandBuffer cmdbuf) {
         const VkOffset2D offset{
             .x = 0,
             .y = 0,
@@ -488,7 +495,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
     scheduler.InvalidateState();
 }
 
-VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
+VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) {
     const auto it = std::ranges::find(blit_color_keys, key);
     if (it != blit_color_keys.end()) {
         return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
@@ -542,12 +549,14 @@ VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& ke
     return *blit_color_pipelines.back();
 }
 
-VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
-    if (blit_depth_stencil_pipeline) {
-        return *blit_depth_stencil_pipeline;
+VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key) {
+    const auto it = std::ranges::find(blit_depth_stencil_keys, key);
+    if (it != blit_depth_stencil_keys.end()) {
+        return *blit_depth_stencil_pipelines[std::distance(blit_depth_stencil_keys.begin(), it)];
     }
+    blit_depth_stencil_keys.push_back(key);
     const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
-    blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
+    blit_depth_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
@@ -560,15 +569,15 @@ VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
         .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
         .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
         .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-        .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
+        .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
         .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
         .layout = *two_textures_pipeline_layout,
-        .renderPass = renderpass,
+        .renderPass = key.renderpass,
         .subpass = 0,
         .basePipelineHandle = VK_NULL_HANDLE,
         .basePipelineIndex = 0,
-    });
-    return *blit_depth_stencil_pipeline;
+    }));
+    return *blit_depth_stencil_pipelines.back();
 }
 
 void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 33ee095c1..d77f76678 100755
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -34,7 +34,7 @@ public:
                              StateTracker& state_tracker, DescriptorPool& descriptor_pool);
     ~BlitImageHelper();
 
-    void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+    void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
                    const Region2D& dst_region, const Region2D& src_region,
                    Tegra::Engines::Fermi2D::Filter filter,
                    Tegra::Engines::Fermi2D::Operation operation);
@@ -44,21 +44,25 @@ public:
                           const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
                           Tegra::Engines::Fermi2D::Operation operation);
 
-    void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+    void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+                         u32 up_scale, u32 down_shift);
 
-    void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+    void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+                         u32 up_scale, u32 down_shift);
 
-    void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+    void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+                         u32 up_scale, u32 down_shift);
 
-    void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+    void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+                         u32 up_scale, u32 down_shift);
 
 private:
     void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
-                 const ImageView& src_image_view);
+                 const ImageView& src_image_view, u32 up_scale, u32 down_shift);
 
-    [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
+    [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
 
-    [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
+    [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
 
     void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
 
@@ -84,7 +88,8 @@ private:
 
     std::vector<BlitImagePipelineKey> blit_color_keys;
     std::vector<vk::Pipeline> blit_color_pipelines;
-    vk::Pipeline blit_depth_stencil_pipeline;
+    std::vector<BlitImagePipelineKey> blit_depth_stencil_keys;
+    std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
     vk::Pipeline convert_d32_to_r32_pipeline;
     vk::Pipeline convert_r32_to_d32_pipeline;
     vk::Pipeline convert_d16_to_r16_pipeline;
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index 4847db6b6..11c160570 100755
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -10,6 +10,7 @@
 
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
 #include "shader_recompiler/shader_info.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
@@ -20,6 +21,8 @@
 
 namespace Vulkan {
 
+using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
+
 class DescriptorLayoutBuilder {
 public:
     DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
@@ -68,18 +71,28 @@ public:
     }
 
     vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
+        using Shader::Backend::SPIRV::RescalingLayout;
+        const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u;
+        const VkPushConstantRange range{
+            .stageFlags = static_cast<VkShaderStageFlags>(
+                is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS),
+            .offset = 0,
+            .size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset,
+        };
         return device->GetLogical().CreatePipelineLayout({
             .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
             .pNext = nullptr,
             .flags = 0,
             .setLayoutCount = descriptor_set_layout ? 1U : 0U,
             .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
-            .pushConstantRangeCount = 0,
-            .pPushConstantRanges = nullptr,
+            .pushConstantRangeCount = 1,
+            .pPushConstantRanges = &range,
         });
     }
 
     void Add(const Shader::Info& info, VkShaderStageFlags stage) {
+        is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0;
+
         Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
         Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
         Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
@@ -115,6 +128,7 @@ private:
     }
 
     const Device* device{};
+    bool is_compute{};
     boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
     boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
     u32 binding{};
@@ -122,31 +136,68 @@ private:
     size_t offset{};
 };
 
-inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
-                                 const ImageId*& image_view_ids, TextureCache& texture_cache,
-                                 VKUpdateDescriptorQueue& update_descriptor_queue) {
-    for (const auto& desc : info.texture_buffer_descriptors) {
-        image_view_ids += desc.count;
+class RescalingPushConstant {
+public:
+    explicit RescalingPushConstant() noexcept {}
+
+    void PushTexture(bool is_rescaled) noexcept {
+        *texture_ptr |= is_rescaled ? texture_bit : 0u;
+        texture_bit <<= 1u;
+        if (texture_bit == 0u) {
+            texture_bit = 1u;
+            ++texture_ptr;
+        }
     }
-    for (const auto& desc : info.image_buffer_descriptors) {
-        image_view_ids += desc.count;
+
+    void PushImage(bool is_rescaled) noexcept {
+        *image_ptr |= is_rescaled ? image_bit : 0u;
+        image_bit <<= 1u;
+        if (image_bit == 0u) {
+            image_bit = 1u;
+            ++image_ptr;
+        }
     }
+
+    const std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS>& Data() const noexcept {
+        return words;
+    }
+
+private:
+    std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS> words{};
+    u32* texture_ptr{words.data()};
+    u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS};
+    u32 texture_bit{1u};
+    u32 image_bit{1u};
+};
+
+inline void PushImageDescriptors(TextureCache& texture_cache,
+                                 VKUpdateDescriptorQueue& update_descriptor_queue,
+                                 const Shader::Info& info, RescalingPushConstant& rescaling,
+                                 const VkSampler*& samplers,
+                                 const VideoCommon::ImageViewInOut*& views) {
+    const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
+    const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
+    views += num_texture_buffers;
+    views += num_image_buffers;
     for (const auto& desc : info.texture_descriptors) {
         for (u32 index = 0; index < desc.count; ++index) {
+            const VideoCommon::ImageViewId image_view_id{(views++)->id};
             const VkSampler sampler{*(samplers++)};
-            ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
+            ImageView& image_view{texture_cache.GetImageView(image_view_id)};
             const VkImageView vk_image_view{image_view.Handle(desc.type)};
             update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
+            rescaling.PushTexture(texture_cache.IsRescaling(image_view));
         }
     }
     for (const auto& desc : info.image_descriptors) {
         for (u32 index = 0; index < desc.count; ++index) {
-            ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
+            ImageView& image_view{texture_cache.GetImageView((views++)->id)};
             if (desc.is_written) {
                 texture_cache.MarkModification(image_view.image_id);
             }
             const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
             update_descriptor_queue.AddImage(vk_image_view);
+            rescaling.PushImage(texture_cache.IsRescaling(image_view));
         }
     }
 }
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 888bc7392..1e447e621 100755
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -12,14 +12,22 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/math_util.h"
+#include "common/settings.h"
 #include "core/core.h"
 #include "core/frontend/emu_window.h"
 #include "core/memory.h"
 #include "video_core/gpu.h"
+#include "video_core/host_shaders/fxaa_frag_spv.h"
+#include "video_core/host_shaders/fxaa_vert_spv.h"
+#include "video_core/host_shaders/present_bicubic_frag_spv.h"
+#include "video_core/host_shaders/present_gaussian_frag_spv.h"
 #include "video_core/host_shaders/vulkan_present_frag_spv.h"
+#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h"
+#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h"
 #include "video_core/host_shaders/vulkan_present_vert_spv.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
 #include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_fsr.h"
 #include "video_core/renderer_vulkan/vk_master_semaphore.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_shader_util.h"
@@ -144,8 +152,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
     scheduler.Wait(resource_ticks[image_index]);
     resource_ticks[image_index] = scheduler.CurrentTick();
 
-    UpdateDescriptorSet(image_index,
-                        use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
+    VkImageView source_image_view =
+        use_accelerated ? screen_info.image_view : *raw_image_views[image_index];
 
     BufferData data;
     SetUniformData(data, layout);
@@ -222,9 +230,134 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
                                    read_barrier);
             cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
             cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
-                                   VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
+                                   VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
+                                       VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                                   0, write_barrier);
         });
     }
+
+    const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue();
+    if (use_accelerated && anti_alias_pass != Settings::AntiAliasing::None) {
+        UpdateAADescriptorSet(image_index, source_image_view, false);
+        const u32 up_scale = Settings::values.resolution_info.up_scale;
+        const u32 down_shift = Settings::values.resolution_info.down_shift;
+        VkExtent2D size{
+            .width = (up_scale * framebuffer.width) >> down_shift,
+            .height = (up_scale * framebuffer.height) >> down_shift,
+        };
+        scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) {
+            const VkImageMemoryBarrier base_barrier{
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .pNext = nullptr,
+                .srcAccessMask = 0,
+                .dstAccessMask = 0,
+                .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = {},
+                .subresourceRange =
+                    {
+                        .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                        .baseMipLevel = 0,
+                        .levelCount = 1,
+                        .baseArrayLayer = 0,
+                        .layerCount = 1,
+                    },
+            };
+
+            {
+                VkImageMemoryBarrier fsr_write_barrier = base_barrier;
+                fsr_write_barrier.image = *aa_image;
+                fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+                cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                                       VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, fsr_write_barrier);
+            }
+
+            const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
+            const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
+            const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
+            const VkClearValue clear_color{
+                .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
+            };
+            const VkRenderPassBeginInfo renderpass_bi{
+                .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                .pNext = nullptr,
+                .renderPass = *aa_renderpass,
+                .framebuffer = *aa_framebuffer,
+                .renderArea =
+                    {
+                        .offset = {0, 0},
+                        .extent = size,
+                    },
+                .clearValueCount = 1,
+                .pClearValues = &clear_color,
+            };
+            const VkViewport viewport{
+                .x = 0.0f,
+                .y = 0.0f,
+                .width = static_cast<float>(size.width),
+                .height = static_cast<float>(size.height),
+                .minDepth = 0.0f,
+                .maxDepth = 1.0f,
+            };
+            const VkRect2D scissor{
+                .offset = {0, 0},
+                .extent = size,
+            };
+            cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
+            switch (anti_alias_pass) {
+            case Settings::AntiAliasing::Fxaa:
+                cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline);
+                break;
+            default:
+                cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline);
+                break;
+            }
+            cmdbuf.SetViewport(0, viewport);
+            cmdbuf.SetScissor(0, scissor);
+
+            cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
+            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0,
+                                      aa_descriptor_sets[image_index], {});
+            cmdbuf.Draw(4, 1, 0, 0);
+            cmdbuf.EndRenderPass();
+
+            {
+                VkImageMemoryBarrier blit_read_barrier = base_barrier;
+                blit_read_barrier.image = *aa_image;
+                blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+                blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+                cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+                                       VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier);
+            }
+        });
+        source_image_view = *aa_image_view;
+    }
+
+    if (fsr) {
+        auto crop_rect = framebuffer.crop_rect;
+        if (crop_rect.GetWidth() == 0) {
+            crop_rect.right = framebuffer.width;
+        }
+        if (crop_rect.GetHeight() == 0) {
+            crop_rect.bottom = framebuffer.height;
+        }
+        crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
+        VkExtent2D fsr_input_size{
+            .width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
+            .height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
+        };
+        VkImageView fsr_image_view =
+            fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
+        UpdateDescriptorSet(image_index, fsr_image_view, true);
+    } else {
+        const bool is_nn =
+            Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor;
+        UpdateDescriptorSet(image_index, source_image_view, is_nn);
+    }
+
     scheduler.Record(
         [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
             const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
@@ -258,8 +391,28 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
                 .offset = {0, 0},
                 .extent = size,
             };
+            const auto filter = Settings::values.scaling_filter.GetValue();
             cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
-            cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+            switch (filter) {
+            case Settings::ScalingFilter::NearestNeighbor:
+                cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
+                break;
+            case Settings::ScalingFilter::Bilinear:
+                cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
+                break;
+            case Settings::ScalingFilter::Bicubic:
+                cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline);
+                break;
+            case Settings::ScalingFilter::Gaussian:
+                cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *gaussian_pipeline);
+                break;
+            case Settings::ScalingFilter::ScaleForce:
+                cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline);
+                break;
+            default:
+                cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
+                break;
+            }
             cmdbuf.SetViewport(0, viewport);
             cmdbuf.SetScissor(0, scissor);
 
@@ -281,11 +434,16 @@ VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& frameb
 }
 
 vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
+    return CreateFramebuffer(image_view, extent, renderpass);
+}
+
+vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent,
+                                                vk::RenderPass& rd) {
     return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{
         .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
-        .renderPass = *renderpass,
+        .renderPass = *rd,
         .attachmentCount = 1,
         .pAttachments = &image_view,
         .width = extent.width,
@@ -308,9 +466,21 @@ void VKBlitScreen::CreateDynamicResources() {
     CreateRenderPass();
     CreateFramebuffers();
     CreateGraphicsPipeline();
+    fsr.reset();
+    if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
+        CreateFSR();
+    }
 }
 
 void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
+    if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
+        if (!fsr) {
+            CreateFSR();
+        }
+    } else {
+        fsr.reset();
+    }
+
     if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) {
         return;
     }
@@ -324,7 +494,16 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
 
 void VKBlitScreen::CreateShaders() {
     vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
-    fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
+    fxaa_vertex_shader = BuildShader(device, FXAA_VERT_SPV);
+    fxaa_fragment_shader = BuildShader(device, FXAA_FRAG_SPV);
+    bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
+    bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV);
+    gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV);
+    if (device.IsFloat16Supported()) {
+        scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV);
+    } else {
+        scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV);
+    }
 }
 
 void VKBlitScreen::CreateSemaphores() {
@@ -344,6 +523,13 @@ void VKBlitScreen::CreateDescriptorPool() {
         },
     }};
 
+    const std::array<VkDescriptorPoolSize, 1> pool_sizes_aa{{
+        {
+            .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .descriptorCount = static_cast<u32>(image_count * 2),
+        },
+    }};
+
     const VkDescriptorPoolCreateInfo ci{
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
         .pNext = nullptr,
@@ -353,19 +539,33 @@ void VKBlitScreen::CreateDescriptorPool() {
         .pPoolSizes = pool_sizes.data(),
     };
     descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
+
+    const VkDescriptorPoolCreateInfo ci_aa{
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+        .maxSets = static_cast<u32>(image_count),
+        .poolSizeCount = static_cast<u32>(pool_sizes_aa.size()),
+        .pPoolSizes = pool_sizes_aa.data(),
+    };
+    aa_descriptor_pool = device.GetLogical().CreateDescriptorPool(ci_aa);
 }
 
 void VKBlitScreen::CreateRenderPass() {
+    renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat());
+}
+
+vk::RenderPass VKBlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) {
     const VkAttachmentDescription color_attachment{
         .flags = 0,
-        .format = swapchain.GetImageViewFormat(),
+        .format = format,
         .samples = VK_SAMPLE_COUNT_1_BIT,
         .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
         .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
         .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
         .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
         .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
-        .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+        .finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL,
     };
 
     const VkAttachmentReference color_attachment_ref{
@@ -408,7 +608,7 @@ void VKBlitScreen::CreateRenderPass() {
         .pDependencies = &dependency,
     };
 
-    renderpass = device.GetLogical().CreateRenderPass(renderpass_ci);
+    return device.GetLogical().CreateRenderPass(renderpass_ci);
 }
 
 void VKBlitScreen::CreateDescriptorSetLayout() {
@@ -429,6 +629,23 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
         },
     }};
 
+    const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings_aa{{
+        {
+            .binding = 0,
+            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .descriptorCount = 1,
+            .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+            .pImmutableSamplers = nullptr,
+        },
+        {
+            .binding = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .descriptorCount = 1,
+            .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+            .pImmutableSamplers = nullptr,
+        },
+    }};
+
     const VkDescriptorSetLayoutCreateInfo ci{
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
         .pNext = nullptr,
@@ -437,11 +654,21 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
         .pBindings = layout_bindings.data(),
     };
 
+    const VkDescriptorSetLayoutCreateInfo ci_aa{
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .bindingCount = static_cast<u32>(layout_bindings_aa.size()),
+        .pBindings = layout_bindings_aa.data(),
+    };
+
     descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
+    aa_descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci_aa);
 }
 
 void VKBlitScreen::CreateDescriptorSets() {
     const std::vector layouts(image_count, *descriptor_set_layout);
+    const std::vector layouts_aa(image_count, *aa_descriptor_set_layout);
 
     const VkDescriptorSetAllocateInfo ai{
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
@@ -451,7 +678,16 @@ void VKBlitScreen::CreateDescriptorSets() {
         .pSetLayouts = layouts.data(),
     };
 
+    const VkDescriptorSetAllocateInfo ai_aa{
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+        .pNext = nullptr,
+        .descriptorPool = *aa_descriptor_pool,
+        .descriptorSetCount = static_cast<u32>(image_count),
+        .pSetLayouts = layouts_aa.data(),
+    };
+
     descriptor_sets = descriptor_pool.Allocate(ai);
+    aa_descriptor_sets = aa_descriptor_pool.Allocate(ai_aa);
 }
 
 void VKBlitScreen::CreatePipelineLayout() {
@@ -464,11 +700,21 @@ void VKBlitScreen::CreatePipelineLayout() {
         .pushConstantRangeCount = 0,
         .pPushConstantRanges = nullptr,
     };
+    const VkPipelineLayoutCreateInfo ci_aa{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .setLayoutCount = 1,
+        .pSetLayouts = aa_descriptor_set_layout.address(),
+        .pushConstantRangeCount = 0,
+        .pPushConstantRanges = nullptr,
+    };
     pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
+    aa_pipeline_layout = device.GetLogical().CreatePipelineLayout(ci_aa);
 }
 
 void VKBlitScreen::CreateGraphicsPipeline() {
-    const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
+    const std::array<VkPipelineShaderStageCreateInfo, 2> bilinear_shader_stages{{
         {
             .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
             .pNext = nullptr,
@@ -483,7 +729,70 @@ void VKBlitScreen::CreateGraphicsPipeline() {
             .pNext = nullptr,
             .flags = 0,
             .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-            .module = *fragment_shader,
+            .module = *bilinear_fragment_shader,
+            .pName = "main",
+            .pSpecializationInfo = nullptr,
+        },
+    }};
+
+    const std::array<VkPipelineShaderStageCreateInfo, 2> bicubic_shader_stages{{
+        {
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage = VK_SHADER_STAGE_VERTEX_BIT,
+            .module = *vertex_shader,
+            .pName = "main",
+            .pSpecializationInfo = nullptr,
+        },
+        {
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+            .module = *bicubic_fragment_shader,
+            .pName = "main",
+            .pSpecializationInfo = nullptr,
+        },
+    }};
+
+    const std::array<VkPipelineShaderStageCreateInfo, 2> gaussian_shader_stages{{
+        {
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage = VK_SHADER_STAGE_VERTEX_BIT,
+            .module = *vertex_shader,
+            .pName = "main",
+            .pSpecializationInfo = nullptr,
+        },
+        {
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+            .module = *gaussian_fragment_shader,
+            .pName = "main",
+            .pSpecializationInfo = nullptr,
+        },
+    }};
+
+    const std::array<VkPipelineShaderStageCreateInfo, 2> scaleforce_shader_stages{{
+        {
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage = VK_SHADER_STAGE_VERTEX_BIT,
+            .module = *vertex_shader,
+            .pName = "main",
+            .pSpecializationInfo = nullptr,
+        },
+        {
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+            .module = *scaleforce_fragment_shader,
             .pName = "main",
             .pSpecializationInfo = nullptr,
         },
@@ -583,12 +892,12 @@ void VKBlitScreen::CreateGraphicsPipeline() {
         .pDynamicStates = dynamic_states.data(),
     };
 
-    const VkGraphicsPipelineCreateInfo pipeline_ci{
+    const VkGraphicsPipelineCreateInfo bilinear_pipeline_ci{
         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
-        .stageCount = static_cast<u32>(shader_stages.size()),
-        .pStages = shader_stages.data(),
+        .stageCount = static_cast<u32>(bilinear_shader_stages.size()),
+        .pStages = bilinear_shader_stages.data(),
         .pVertexInputState = &vertex_input_ci,
         .pInputAssemblyState = &input_assembly_ci,
         .pTessellationState = nullptr,
@@ -605,7 +914,76 @@ void VKBlitScreen::CreateGraphicsPipeline() {
         .basePipelineIndex = 0,
     };
 
-    pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci);
+    const VkGraphicsPipelineCreateInfo bicubic_pipeline_ci{
+        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stageCount = static_cast<u32>(bicubic_shader_stages.size()),
+        .pStages = bicubic_shader_stages.data(),
+        .pVertexInputState = &vertex_input_ci,
+        .pInputAssemblyState = &input_assembly_ci,
+        .pTessellationState = nullptr,
+        .pViewportState = &viewport_state_ci,
+        .pRasterizationState = &rasterization_ci,
+        .pMultisampleState = &multisampling_ci,
+        .pDepthStencilState = nullptr,
+        .pColorBlendState = &color_blend_ci,
+        .pDynamicState = &dynamic_state_ci,
+        .layout = *pipeline_layout,
+        .renderPass = *renderpass,
+        .subpass = 0,
+        .basePipelineHandle = 0,
+        .basePipelineIndex = 0,
+    };
+
+    const VkGraphicsPipelineCreateInfo gaussian_pipeline_ci{
+        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stageCount = static_cast<u32>(gaussian_shader_stages.size()),
+        .pStages = gaussian_shader_stages.data(),
+        .pVertexInputState = &vertex_input_ci,
+        .pInputAssemblyState = &input_assembly_ci,
+        .pTessellationState = nullptr,
+        .pViewportState = &viewport_state_ci,
+        .pRasterizationState = &rasterization_ci,
+        .pMultisampleState = &multisampling_ci,
+        .pDepthStencilState = nullptr,
+        .pColorBlendState = &color_blend_ci,
+        .pDynamicState = &dynamic_state_ci,
+        .layout = *pipeline_layout,
+        .renderPass = *renderpass,
+        .subpass = 0,
+        .basePipelineHandle = 0,
+        .basePipelineIndex = 0,
+    };
+
+    const VkGraphicsPipelineCreateInfo scaleforce_pipeline_ci{
+        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stageCount = static_cast<u32>(scaleforce_shader_stages.size()),
+        .pStages = scaleforce_shader_stages.data(),
+        .pVertexInputState = &vertex_input_ci,
+        .pInputAssemblyState = &input_assembly_ci,
+        .pTessellationState = nullptr,
+        .pViewportState = &viewport_state_ci,
+        .pRasterizationState = &rasterization_ci,
+        .pMultisampleState = &multisampling_ci,
+        .pDepthStencilState = nullptr,
+        .pColorBlendState = &color_blend_ci,
+        .pDynamicState = &dynamic_state_ci,
+        .layout = *pipeline_layout,
+        .renderPass = *renderpass,
+        .subpass = 0,
+        .basePipelineHandle = 0,
+        .basePipelineIndex = 0,
+    };
+
+    bilinear_pipeline = device.GetLogical().CreateGraphicsPipeline(bilinear_pipeline_ci);
+    bicubic_pipeline = device.GetLogical().CreateGraphicsPipeline(bicubic_pipeline_ci);
+    gaussian_pipeline = device.GetLogical().CreateGraphicsPipeline(gaussian_pipeline_ci);
+    scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci);
 }
 
 void VKBlitScreen::CreateSampler() {
@@ -614,8 +992,29 @@ void VKBlitScreen::CreateSampler() {
         .pNext = nullptr,
         .flags = 0,
         .magFilter = VK_FILTER_LINEAR,
+        .minFilter = VK_FILTER_LINEAR,
+        .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+        .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+        .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+        .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+        .mipLodBias = 0.0f,
+        .anisotropyEnable = VK_FALSE,
+        .maxAnisotropy = 0.0f,
+        .compareEnable = VK_FALSE,
+        .compareOp = VK_COMPARE_OP_NEVER,
+        .minLod = 0.0f,
+        .maxLod = 0.0f,
+        .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
+        .unnormalizedCoordinates = VK_FALSE,
+    };
+
+    const VkSamplerCreateInfo ci_nn{
+        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .magFilter = VK_FILTER_NEAREST,
         .minFilter = VK_FILTER_NEAREST,
-        .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
+        .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
         .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
         .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
         .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
@@ -631,6 +1030,7 @@ void VKBlitScreen::CreateSampler() {
     };
 
     sampler = device.GetLogical().CreateSampler(ci);
+    nn_sampler = device.GetLogical().CreateSampler(ci_nn);
 }
 
 void VKBlitScreen::CreateFramebuffers() {
@@ -639,7 +1039,7 @@ void VKBlitScreen::CreateFramebuffers() {
 
     for (std::size_t i = 0; i < image_count; ++i) {
         const VkImageView image_view{swapchain.GetImageViewIndex(i)};
-        framebuffers[i] = CreateFramebuffer(image_view, size);
+        framebuffers[i] = CreateFramebuffer(image_view, size, renderpass);
     }
 }
 
@@ -649,6 +1049,11 @@ void VKBlitScreen::ReleaseRawImages() {
     }
     raw_images.clear();
     raw_buffer_commits.clear();
+
+    aa_image_view.reset();
+    aa_image.reset();
+    aa_commit = MemoryCommit{};
+
     buffer.reset();
     buffer_commit = MemoryCommit{};
 }
@@ -675,8 +1080,11 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
     raw_image_views.resize(image_count);
     raw_buffer_commits.resize(image_count);
 
-    for (size_t i = 0; i < image_count; ++i) {
-        raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
+    const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1,
+                                  u32 down_shift = 0) {
+        u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
+                                               : VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+        return device.GetLogical().CreateImage(VkImageCreateInfo{
             .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
             .pNext = nullptr,
             .flags = 0,
@@ -684,26 +1092,30 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
             .format = GetFormat(framebuffer),
             .extent =
                 {
-                    .width = framebuffer.width,
-                    .height = framebuffer.height,
+                    .width = (up_scale * framebuffer.width) >> down_shift,
+                    .height = (up_scale * framebuffer.height) >> down_shift,
                     .depth = 1,
                 },
             .mipLevels = 1,
             .arrayLayers = 1,
             .samples = VK_SAMPLE_COUNT_1_BIT,
-            .tiling = VK_IMAGE_TILING_LINEAR,
-            .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+            .tiling = used_on_framebuffer ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
+            .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | extra_usages,
             .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
             .queueFamilyIndexCount = 0,
             .pQueueFamilyIndices = nullptr,
             .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
         });
-        raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal);
-        raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
+    };
+    const auto create_commit = [&](vk::Image& image) {
+        return memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
+    };
+    const auto create_image_view = [&](vk::Image& image) {
+        return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
             .pNext = nullptr,
             .flags = 0,
-            .image = *raw_images[i],
+            .image = *image,
             .viewType = VK_IMAGE_VIEW_TYPE_2D,
             .format = GetFormat(framebuffer),
             .components =
@@ -722,10 +1134,211 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
                     .layerCount = 1,
                 },
         });
+    };
+
+    for (size_t i = 0; i < image_count; ++i) {
+        raw_images[i] = create_image();
+        raw_buffer_commits[i] = create_commit(raw_images[i]);
+        raw_image_views[i] = create_image_view(raw_images[i]);
     }
+
+    // AA Resources
+    const u32 up_scale = Settings::values.resolution_info.up_scale;
+    const u32 down_shift = Settings::values.resolution_info.down_shift;
+    aa_image = create_image(true, up_scale, down_shift);
+    aa_commit = create_commit(aa_image);
+    aa_image_view = create_image_view(aa_image);
+    VkExtent2D size{
+        .width = (up_scale * framebuffer.width) >> down_shift,
+        .height = (up_scale * framebuffer.height) >> down_shift,
+    };
+    if (aa_renderpass) {
+        aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
+        return;
+    }
+    aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false);
+    aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
+
+    const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{
+        {
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage = VK_SHADER_STAGE_VERTEX_BIT,
+            .module = *fxaa_vertex_shader,
+            .pName = "main",
+            .pSpecializationInfo = nullptr,
+        },
+        {
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+            .module = *fxaa_fragment_shader,
+            .pName = "main",
+            .pSpecializationInfo = nullptr,
+        },
+    }};
+
+    const auto vertex_binding_description = ScreenRectVertex::GetDescription();
+    const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
+
+    const VkPipelineVertexInputStateCreateInfo vertex_input_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .vertexBindingDescriptionCount = 1,
+        .pVertexBindingDescriptions = &vertex_binding_description,
+        .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()},
+        .pVertexAttributeDescriptions = vertex_attrs_description.data(),
+    };
+
+    const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+        .primitiveRestartEnable = VK_FALSE,
+    };
+
+    const VkPipelineViewportStateCreateInfo viewport_state_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .viewportCount = 1,
+        .pViewports = nullptr,
+        .scissorCount = 1,
+        .pScissors = nullptr,
+    };
+
+    const VkPipelineRasterizationStateCreateInfo rasterization_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .depthClampEnable = VK_FALSE,
+        .rasterizerDiscardEnable = VK_FALSE,
+        .polygonMode = VK_POLYGON_MODE_FILL,
+        .cullMode = VK_CULL_MODE_NONE,
+        .frontFace = VK_FRONT_FACE_CLOCKWISE,
+        .depthBiasEnable = VK_FALSE,
+        .depthBiasConstantFactor = 0.0f,
+        .depthBiasClamp = 0.0f,
+        .depthBiasSlopeFactor = 0.0f,
+        .lineWidth = 1.0f,
+    };
+
+    const VkPipelineMultisampleStateCreateInfo multisampling_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+        .sampleShadingEnable = VK_FALSE,
+        .minSampleShading = 0.0f,
+        .pSampleMask = nullptr,
+        .alphaToCoverageEnable = VK_FALSE,
+        .alphaToOneEnable = VK_FALSE,
+    };
+
+    const VkPipelineColorBlendAttachmentState color_blend_attachment{
+        .blendEnable = VK_FALSE,
+        .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+        .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+        .colorBlendOp = VK_BLEND_OP_ADD,
+        .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+        .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+        .alphaBlendOp = VK_BLEND_OP_ADD,
+        .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+                          VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+    };
+
+    const VkPipelineColorBlendStateCreateInfo color_blend_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .logicOpEnable = VK_FALSE,
+        .logicOp = VK_LOGIC_OP_COPY,
+        .attachmentCount = 1,
+        .pAttachments = &color_blend_attachment,
+        .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
+    };
+
+    static constexpr std::array dynamic_states{
+        VK_DYNAMIC_STATE_VIEWPORT,
+        VK_DYNAMIC_STATE_SCISSOR,
+    };
+    const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
+        .pDynamicStates = dynamic_states.data(),
+    };
+
+    const VkGraphicsPipelineCreateInfo fxaa_pipeline_ci{
+        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stageCount = static_cast<u32>(fxaa_shader_stages.size()),
+        .pStages = fxaa_shader_stages.data(),
+        .pVertexInputState = &vertex_input_ci,
+        .pInputAssemblyState = &input_assembly_ci,
+        .pTessellationState = nullptr,
+        .pViewportState = &viewport_state_ci,
+        .pRasterizationState = &rasterization_ci,
+        .pMultisampleState = &multisampling_ci,
+        .pDepthStencilState = nullptr,
+        .pColorBlendState = &color_blend_ci,
+        .pDynamicState = &dynamic_state_ci,
+        .layout = *aa_pipeline_layout,
+        .renderPass = *aa_renderpass,
+        .subpass = 0,
+        .basePipelineHandle = 0,
+        .basePipelineIndex = 0,
+    };
+
+    // AA
+    aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
 }
 
-void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
+void VKBlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view,
+                                         bool nn) const {
+    const VkDescriptorImageInfo image_info{
+        .sampler = nn ? *nn_sampler : *sampler,
+        .imageView = image_view,
+        .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+    };
+
+    const VkWriteDescriptorSet sampler_write{
+        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+        .pNext = nullptr,
+        .dstSet = aa_descriptor_sets[image_index],
+        .dstBinding = 0,
+        .dstArrayElement = 0,
+        .descriptorCount = 1,
+        .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+        .pImageInfo = &image_info,
+        .pBufferInfo = nullptr,
+        .pTexelBufferView = nullptr,
+    };
+
+    const VkWriteDescriptorSet sampler_write_2{
+        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+        .pNext = nullptr,
+        .dstSet = aa_descriptor_sets[image_index],
+        .dstBinding = 1,
+        .dstArrayElement = 0,
+        .descriptorCount = 1,
+        .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+        .pImageInfo = &image_info,
+        .pBufferInfo = nullptr,
+        .pTexelBufferView = nullptr,
+    };
+
+    device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
+}
+
+void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view,
+                                       bool nn) const {
     const VkDescriptorBufferInfo buffer_info{
         .buffer = *buffer,
         .offset = offsetof(BufferData, uniform),
@@ -746,7 +1359,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
     };
 
     const VkDescriptorImageInfo image_info{
-        .sampler = *sampler,
+        .sampler = nn ? *nn_sampler : *sampler,
         .imageView = image_view,
         .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
     };
@@ -798,17 +1411,19 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
     UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
     UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
 
-    // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
-    // (e.g. handheld mode) on a 1920x1080 framebuffer.
     f32 scale_u = 1.0f;
     f32 scale_v = 1.0f;
-    if (framebuffer_crop_rect.GetWidth() > 0) {
-        scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
-                  static_cast<f32>(screen_info.width);
-    }
-    if (framebuffer_crop_rect.GetHeight() > 0) {
-        scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
-                  static_cast<f32>(screen_info.height);
+    // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
+    // (e.g. handheld mode) on a 1920x1080 framebuffer.
+    if (!fsr) {
+        if (framebuffer_crop_rect.GetWidth() > 0) {
+            scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
+                      static_cast<f32>(screen_info.width);
+        }
+        if (framebuffer_crop_rect.GetHeight() > 0) {
+            scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
+                      static_cast<f32>(screen_info.height);
+        }
     }
 
     const auto& screen = layout.screen;
@@ -822,6 +1437,15 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
     data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v);
 }
 
+void VKBlitScreen::CreateFSR() {
+    const auto& layout = render_window.GetFramebufferLayout();
+    const VkExtent2D fsr_size{
+        .width = layout.screen.GetWidth(),
+        .height = layout.screen.GetHeight(),
+    };
+    fsr = std::make_unique<FSR>(device, memory_allocator, image_count, fsr_size);
+}
+
 u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
     return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
 }
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 430bcfbca..bbca71af3 100755
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -34,6 +34,7 @@ namespace Vulkan {
 struct ScreenInfo;
 
 class Device;
+class FSR;
 class RasterizerVulkan;
 class VKScheduler;
 class VKSwapchain;
@@ -66,6 +67,9 @@ public:
     [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
                                                     VkExtent2D extent);
 
+    [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
+                                                    VkExtent2D extent, vk::RenderPass& rd);
+
 private:
     struct BufferData;
 
@@ -74,6 +78,7 @@ private:
     void CreateSemaphores();
     void CreateDescriptorPool();
     void CreateRenderPass();
+    vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
     void CreateDescriptorSetLayout();
     void CreateDescriptorSets();
     void CreatePipelineLayout();
@@ -88,11 +93,14 @@ private:
     void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
     void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
 
-    void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
+    void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
+    void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
     void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
     void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
                        const Layout::FramebufferLayout layout) const;
 
+    void CreateFSR();
+
     u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
     u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
                           std::size_t image_index) const;
@@ -107,14 +115,24 @@ private:
     const VKScreenInfo& screen_info;
 
     vk::ShaderModule vertex_shader;
-    vk::ShaderModule fragment_shader;
+    vk::ShaderModule fxaa_vertex_shader;
+    vk::ShaderModule fxaa_fragment_shader;
+    vk::ShaderModule bilinear_fragment_shader;
+    vk::ShaderModule bicubic_fragment_shader;
+    vk::ShaderModule gaussian_fragment_shader;
+    vk::ShaderModule scaleforce_fragment_shader;
     vk::DescriptorPool descriptor_pool;
     vk::DescriptorSetLayout descriptor_set_layout;
     vk::PipelineLayout pipeline_layout;
-    vk::Pipeline pipeline;
+    vk::Pipeline nearest_neightbor_pipeline;
+    vk::Pipeline bilinear_pipeline;
+    vk::Pipeline bicubic_pipeline;
+    vk::Pipeline gaussian_pipeline;
+    vk::Pipeline scaleforce_pipeline;
     vk::RenderPass renderpass;
     std::vector<vk::Framebuffer> framebuffers;
     vk::DescriptorSets descriptor_sets;
+    vk::Sampler nn_sampler;
     vk::Sampler sampler;
 
     vk::Buffer buffer;
@@ -126,8 +144,22 @@ private:
     std::vector<vk::Image> raw_images;
     std::vector<vk::ImageView> raw_image_views;
     std::vector<MemoryCommit> raw_buffer_commits;
+
+    vk::DescriptorPool aa_descriptor_pool;
+    vk::DescriptorSetLayout aa_descriptor_set_layout;
+    vk::PipelineLayout aa_pipeline_layout;
+    vk::Pipeline aa_pipeline;
+    vk::RenderPass aa_renderpass;
+    vk::Framebuffer aa_framebuffer;
+    vk::DescriptorSets aa_descriptor_sets;
+    vk::Image aa_image;
+    vk::ImageView aa_image_view;
+    MemoryCommit aa_commit;
+
     u32 raw_width = 0;
     u32 raw_height = 0;
+
+    std::unique_ptr<FSR> fsr;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 8ac58bc2f..5ffd93499 100755
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -146,7 +146,7 @@ void BufferCacheRuntime::Finish() {
 }
 
 void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
-                                    std::span<const VideoCommon::BufferCopy> copies) {
+                                    std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
     static constexpr VkMemoryBarrier READ_BARRIER{
         .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
         .pNext = nullptr,
@@ -163,10 +163,42 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
     boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
     std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
+        if (barrier) {
+            cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                                   VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER);
+        }
+        cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
+        if (barrier) {
+            cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+                                   VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
+        }
+    });
+}
+
+void BufferCacheRuntime::PreCopyBarrier() {
+    static constexpr VkMemoryBarrier READ_BARRIER{
+        .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+        .pNext = nullptr,
+        .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+        .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+    };
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([](vk::CommandBuffer cmdbuf) {
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
                                0, READ_BARRIER);
-        cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
+    });
+}
+
+void BufferCacheRuntime::PostCopyBarrier() {
+    static constexpr VkMemoryBarrier WRITE_BARRIER{
+        .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+        .pNext = nullptr,
+        .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+        .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+    };
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([](vk::CommandBuffer cmdbuf) {
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
                                0, WRITE_BARRIER);
     });
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index c27402ff0..1ee0d8420 100755
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -69,8 +69,12 @@ public:
 
     [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
 
+    void PreCopyBarrier();
+
     void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
-                    std::span<const VideoCommon::BufferCopy> copies);
+                    std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+
+    void PostCopyBarrier();
 
     void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);
 
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 44faf626a..de36bcdb7 100755
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -22,6 +22,7 @@
 namespace Vulkan {
 
 using Shader::ImageBufferDescriptor;
+using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
 using Tegra::Texture::TexturePair;
 
 ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
@@ -108,8 +109,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
     texture_cache.SynchronizeComputeDescriptors();
 
     static constexpr size_t max_elements = 64;
-    std::array<ImageId, max_elements> image_view_ids;
-    boost::container::static_vector<u32, max_elements> image_view_indices;
+    boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
     boost::container::static_vector<VkSampler, max_elements> samplers;
 
     const auto& qmd{kepler_compute.launch_description};
@@ -134,30 +134,37 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
         }
         return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
     }};
-    const auto add_image{[&](const auto& desc) {
+    const auto add_image{[&](const auto& desc, bool blacklist) {
         for (u32 index = 0; index < desc.count; ++index) {
             const auto handle{read_handle(desc, index)};
-            image_view_indices.push_back(handle.first);
+            views.push_back({
+                .index = handle.first,
+                .blacklist = blacklist,
+                .id = {},
+            });
         }
     }};
-    std::ranges::for_each(info.texture_buffer_descriptors, add_image);
-    std::ranges::for_each(info.image_buffer_descriptors, add_image);
+    for (const auto& desc : info.texture_buffer_descriptors) {
+        add_image(desc, false);
+    }
+    for (const auto& desc : info.image_buffer_descriptors) {
+        add_image(desc, false);
+    }
     for (const auto& desc : info.texture_descriptors) {
         for (u32 index = 0; index < desc.count; ++index) {
             const auto handle{read_handle(desc, index)};
-            image_view_indices.push_back(handle.first);
+            views.push_back({handle.first});
 
             Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
             samplers.push_back(sampler->Handle());
         }
     }
-    std::ranges::for_each(info.image_descriptors, add_image);
-
-    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
-    texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+    for (const auto& desc : info.image_descriptors) {
+        add_image(desc, desc.is_written);
+    }
+    texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
 
     buffer_cache.UnbindComputeTextureBuffers();
-    ImageId* texture_buffer_ids{image_view_ids.data()};
     size_t index{};
     const auto add_buffer{[&](const auto& desc) {
         constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
@@ -166,11 +173,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
             if constexpr (is_image) {
                 is_written = desc.is_written;
             }
-            ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
+            ImageView& image_view = texture_cache.GetImageView(views[index].id);
             buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
                                                   image_view.BufferSize(), image_view.format,
                                                   is_written, is_image);
-            ++texture_buffer_ids;
             ++index;
         }
     }};
@@ -180,9 +186,11 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
     buffer_cache.UpdateComputeBuffers();
     buffer_cache.BindHostComputeBuffers();
 
+    RescalingPushConstant rescaling;
     const VkSampler* samplers_it{samplers.data()};
-    const ImageId* views_it{image_view_ids.data()};
-    PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue);
+    const VideoCommon::ImageViewInOut* views_it{views.data()};
+    PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it,
+                         views_it);
 
     if (!is_built.load(std::memory_order::relaxed)) {
         // Wait for the pipeline to be built
@@ -192,11 +200,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
         });
     }
     const void* const descriptor_data{update_descriptor_queue.UpdateData()};
-    scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
+    const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
+    scheduler.Record([this, descriptor_data, is_rescaling,
+                      rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
         if (!descriptor_set_layout) {
             return;
         }
+        if (is_rescaling) {
+            cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
+                                 RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
+                                 rescaling_data.data());
+        }
         const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
         const vk::Device& dev{device.GetLogical()};
         dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
new file mode 100755
index 000000000..73629d229
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -0,0 +1,553 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cmath>
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h"
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h"
+#include "video_core/renderer_vulkan/vk_fsr.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+
+namespace Vulkan {
+namespace {
+// Reimplementations of the constant generating functions in ffx_fsr1.h
+// GCC generated a lot of warnings when using the official header.
+u32 AU1_AH1_AF1(f32 f) {
+    static constexpr u32 base[512]{
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
+        0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
+        0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
+        0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008,
+        0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
+        0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
+        0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
+        0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+    };
+    static constexpr s8 shift[512]{
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16,
+        0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
+        0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18,
+    };
+    const u32 u = Common::BitCast<u32>(f);
+    const u32 i = u >> 23;
+    return base[i] + ((u & 0x7fffff) >> shift[i]);
+}
+
+u32 AU1_AH2_AF2(f32 a[2]) {
+    return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16);
+}
+
+void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX,
+                f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY,
+                f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) {
+    con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX);
+    con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY);
+    con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f);
+    con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f);
+    con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
+    con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY);
+    con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
+    con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY);
+    con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX);
+    con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
+    con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
+    con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
+    con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX);
+    con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY);
+    con3[2] = con3[3] = 0;
+}
+
+void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4],
+                      f32 inputViewportInPixelsX, f32 inputViewportInPixelsY,
+                      f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX,
+                      f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) {
+    FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY,
+               inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY);
+    con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f +
+                                   inputOffsetInPixelsX);
+    con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f +
+                                   inputOffsetInPixelsY);
+}
+
+void FsrRcasCon(u32* con, f32 sharpness) {
+    sharpness = std::exp2f(-sharpness);
+    f32 hSharp[2]{sharpness, sharpness};
+    con[0] = Common::BitCast<u32>(sharpness);
+    con[1] = AU1_AH2_AF2(hSharp);
+    con[2] = 0;
+    con[3] = 0;
+}
+} // Anonymous namespace
+
+FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,
+         VkExtent2D output_size_)
+    : device{device_}, memory_allocator{memory_allocator_}, image_count{image_count_},
+      output_size{output_size_} {
+
+    CreateImages();
+    CreateSampler();
+    CreateShaders();
+    CreateDescriptorPool();
+    CreateDescriptorSetLayout();
+    CreateDescriptorSets();
+    CreatePipelineLayout();
+    CreatePipeline();
+}
+
+VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
+                      VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
+
+    UpdateDescriptorSet(image_index, image_view);
+
+    scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) {
+        const VkImageMemoryBarrier base_barrier{
+            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = 0,
+            .dstAccessMask = 0,
+            .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+            .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .image = {},
+            .subresourceRange =
+                {
+                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                    .baseMipLevel = 0,
+                    .levelCount = 1,
+                    .baseArrayLayer = 0,
+                    .layerCount = 1,
+                },
+        };
+
+        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
+
+        std::array<u32, 4 * 4> push_constants;
+        FsrEasuConOffset(
+            push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
+            push_constants.data() + 12,
+
+            static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
+            static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height),
+            static_cast<f32>(output_size.width), static_cast<f32>(output_size.height),
+            static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
+        cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
+
+        {
+            VkImageMemoryBarrier fsr_write_barrier = base_barrier;
+            fsr_write_barrier.image = *images[image_index],
+            fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+
+            cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                                   VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, fsr_write_barrier);
+        }
+
+        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
+                                  descriptor_sets[image_index * 2], {});
+        cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
+                        Common::DivCeil(output_size.height, 16u), 1);
+
+        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline);
+
+        FsrRcasCon(push_constants.data(), 0.25f);
+        cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
+
+        {
+            std::array<VkImageMemoryBarrier, 2> barriers;
+            auto& fsr_read_barrier = barriers[0];
+            auto& blit_write_barrier = barriers[1];
+
+            fsr_read_barrier = base_barrier;
+            fsr_read_barrier.image = *images[image_index];
+            fsr_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
+            fsr_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+            blit_write_barrier = base_barrier;
+            blit_write_barrier.image = *images[image_count + image_index];
+            blit_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+            blit_write_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
+
+            cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                                   VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, {}, {}, barriers);
+        }
+
+        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
+                                  descriptor_sets[image_index * 2 + 1], {});
+        cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
+                        Common::DivCeil(output_size.height, 16u), 1);
+
+        {
+            std::array<VkImageMemoryBarrier, 1> barriers;
+            auto& blit_read_barrier = barriers[0];
+
+            blit_read_barrier = base_barrier;
+            blit_read_barrier.image = *images[image_count + image_index];
+            blit_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
+            blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+            cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                                   VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, {}, {}, barriers);
+        }
+    });
+
+    return *image_views[image_count + image_index];
+}
+
+void FSR::CreateDescriptorPool() {
+    const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
+        {
+            .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .descriptorCount = static_cast<u32>(image_count * 2),
+        },
+        {
+            .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .descriptorCount = static_cast<u32>(image_count * 2),
+        },
+    }};
+
+    const VkDescriptorPoolCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+        .maxSets = static_cast<u32>(image_count * 2),
+        .poolSizeCount = static_cast<u32>(pool_sizes.size()),
+        .pPoolSizes = pool_sizes.data(),
+    };
+    descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
+}
+
+void FSR::CreateDescriptorSetLayout() {
+    const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{
+        {
+            .binding = 0,
+            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .descriptorCount = 1,
+            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+            .pImmutableSamplers = sampler.address(),
+        },
+        {
+            .binding = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .descriptorCount = 1,
+            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+            .pImmutableSamplers = sampler.address(),
+        },
+    }};
+
+    const VkDescriptorSetLayoutCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .bindingCount = static_cast<u32>(layout_bindings.size()),
+        .pBindings = layout_bindings.data(),
+    };
+
+    descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
+}
+
+void FSR::CreateDescriptorSets() {
+    const u32 sets = static_cast<u32>(image_count * 2);
+    const std::vector layouts(sets, *descriptor_set_layout);
+
+    const VkDescriptorSetAllocateInfo ai{
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+        .pNext = nullptr,
+        .descriptorPool = *descriptor_pool,
+        .descriptorSetCount = sets,
+        .pSetLayouts = layouts.data(),
+    };
+
+    descriptor_sets = descriptor_pool.Allocate(ai);
+}
+
+void FSR::CreateImages() {
+    images.resize(image_count * 2);
+    image_views.resize(image_count * 2);
+    buffer_commits.resize(image_count * 2);
+
+    for (size_t i = 0; i < image_count * 2; ++i) {
+        images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
+            .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .imageType = VK_IMAGE_TYPE_2D,
+            .format = VK_FORMAT_R16G16B16A16_SFLOAT,
+            .extent =
+                {
+                    .width = output_size.width,
+                    .height = output_size.height,
+                    .depth = 1,
+                },
+            .mipLevels = 1,
+            .arrayLayers = 1,
+            .samples = VK_SAMPLE_COUNT_1_BIT,
+            .tiling = VK_IMAGE_TILING_OPTIMAL,
+            .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT |
+                     VK_IMAGE_USAGE_SAMPLED_BIT,
+            .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+            .queueFamilyIndexCount = 0,
+            .pQueueFamilyIndices = nullptr,
+            .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+        });
+        buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal);
+        image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
+            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .image = *images[i],
+            .viewType = VK_IMAGE_VIEW_TYPE_2D,
+            .format = VK_FORMAT_R16G16B16A16_SFLOAT,
+            .components =
+                {
+                    .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+                    .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+                    .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+                    .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+                },
+            .subresourceRange =
+                {
+                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                    .baseMipLevel = 0,
+                    .levelCount = 1,
+                    .baseArrayLayer = 0,
+                    .layerCount = 1,
+                },
+        });
+    }
+}
+
+void FSR::CreatePipelineLayout() {
+    VkPushConstantRange push_const{
+        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        .offset = 0,
+        .size = sizeof(std::array<u32, 4 * 4>),
+    };
+    VkPipelineLayoutCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .setLayoutCount = 1,
+        .pSetLayouts = descriptor_set_layout.address(),
+        .pushConstantRangeCount = 1,
+        .pPushConstantRanges = &push_const,
+    };
+
+    pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
+}
+
+void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
+    const auto fsr_image_view = *image_views[image_index];
+    const auto blit_image_view = *image_views[image_count + image_index];
+
+    const VkDescriptorImageInfo image_info{
+        .sampler = VK_NULL_HANDLE,
+        .imageView = image_view,
+        .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+    };
+    const VkDescriptorImageInfo fsr_image_info{
+        .sampler = VK_NULL_HANDLE,
+        .imageView = fsr_image_view,
+        .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+    };
+    const VkDescriptorImageInfo blit_image_info{
+        .sampler = VK_NULL_HANDLE,
+        .imageView = blit_image_view,
+        .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+    };
+
+    VkWriteDescriptorSet sampler_write{
+        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+        .pNext = nullptr,
+        .dstSet = descriptor_sets[image_index * 2],
+        .dstBinding = 0,
+        .dstArrayElement = 0,
+        .descriptorCount = 1,
+        .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+        .pImageInfo = &image_info,
+        .pBufferInfo = nullptr,
+        .pTexelBufferView = nullptr,
+    };
+
+    VkWriteDescriptorSet output_write{
+        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+        .pNext = nullptr,
+        .dstSet = descriptor_sets[image_index * 2],
+        .dstBinding = 1,
+        .dstArrayElement = 0,
+        .descriptorCount = 1,
+        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+        .pImageInfo = &fsr_image_info,
+        .pBufferInfo = nullptr,
+        .pTexelBufferView = nullptr,
+    };
+
+    device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
+
+    sampler_write.dstSet = descriptor_sets[image_index * 2 + 1];
+    sampler_write.pImageInfo = &fsr_image_info;
+    output_write.dstSet = descriptor_sets[image_index * 2 + 1];
+    output_write.pImageInfo = &blit_image_info;
+
+    device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
+}
+
+void FSR::CreateSampler() {
+    const VkSamplerCreateInfo ci{
+        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .magFilter = VK_FILTER_LINEAR,
+        .minFilter = VK_FILTER_LINEAR,
+        .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
+        .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+        .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+        .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+        .mipLodBias = 0.0f,
+        .anisotropyEnable = VK_FALSE,
+        .maxAnisotropy = 0.0f,
+        .compareEnable = VK_FALSE,
+        .compareOp = VK_COMPARE_OP_NEVER,
+        .minLod = 0.0f,
+        .maxLod = 0.0f,
+        .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
+        .unnormalizedCoordinates = VK_FALSE,
+    };
+
+    sampler = device.GetLogical().CreateSampler(ci);
+}
+
+void FSR::CreateShaders() {
+    if (device.IsFloat16Supported()) {
+        easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV);
+        rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV);
+    } else {
+        easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV);
+        rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV);
+    }
+}
+
+void FSR::CreatePipeline() {
+    VkPipelineShaderStageCreateInfo shader_stage_easu{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+        .module = *easu_shader,
+        .pName = "main",
+        .pSpecializationInfo = nullptr,
+    };
+
+    VkPipelineShaderStageCreateInfo shader_stage_rcas{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+        .module = *rcas_shader,
+        .pName = "main",
+        .pSpecializationInfo = nullptr,
+    };
+
+    VkComputePipelineCreateInfo pipeline_ci_easu{
+        .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stage = shader_stage_easu,
+        .layout = *pipeline_layout,
+        .basePipelineHandle = VK_NULL_HANDLE,
+        .basePipelineIndex = 0,
+    };
+
+    VkComputePipelineCreateInfo pipeline_ci_rcas{
+        .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stage = shader_stage_rcas,
+        .layout = *pipeline_layout,
+        .basePipelineHandle = VK_NULL_HANDLE,
+        .basePipelineIndex = 0,
+    };
+
+    easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_easu);
+    rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_rcas);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h
new file mode 100755
index 000000000..6bbec3d36
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_fsr.h
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/math_util.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+class Device;
+class VKScheduler;
+
+class FSR {
+public:
+    explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
+                 VkExtent2D output_size);
+    VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
+                     VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
+
+private:
+    void CreateDescriptorPool();
+    void CreateDescriptorSetLayout();
+    void CreateDescriptorSets();
+    void CreateImages();
+    void CreateSampler();
+    void CreateShaders();
+    void CreatePipeline();
+    void CreatePipelineLayout();
+
+    void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
+
+    const Device& device;
+    MemoryAllocator& memory_allocator;
+    size_t image_count;
+    VkExtent2D output_size;
+
+    vk::DescriptorPool descriptor_pool;
+    vk::DescriptorSetLayout descriptor_set_layout;
+    vk::DescriptorSets descriptor_sets;
+    vk::PipelineLayout pipeline_layout;
+    vk::ShaderModule easu_shader;
+    vk::ShaderModule rcas_shader;
+    vk::Pipeline easu_pipeline;
+    vk::Pipeline rcas_pipeline;
+    vk::Sampler sampler;
+    std::vector<vk::Image> images;
+    std::vector<vk::ImageView> image_views;
+    std::vector<MemoryCommit> buffer_commits;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 8634c3316..616a7b457 100755
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -32,6 +32,8 @@ namespace {
 using boost::container::small_vector;
 using boost::container::static_vector;
 using Shader::ImageBufferDescriptor;
+using Shader::Backend::SPIRV::RESCALING_LAYOUT_DOWN_FACTOR_OFFSET;
+using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
 using Tegra::Texture::TexturePair;
 using VideoCore::Surface::PixelFormat;
 using VideoCore::Surface::PixelFormatFromDepthFormat;
@@ -235,6 +237,7 @@ GraphicsPipeline::GraphicsPipeline(
         stage_infos[stage] = *info;
         enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
         std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
+        num_textures += Shader::NumDescriptors(info->texture_descriptors);
     }
     auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
         DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
@@ -277,11 +280,10 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
 
 template <typename Spec>
 void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
-    std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids;
-    std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices;
+    std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views;
     std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
     size_t sampler_index{};
-    size_t image_index{};
+    size_t view_index{};
 
     texture_cache.SynchronizeGraphicsDescriptors();
 
@@ -322,26 +324,30 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
             }
             return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
         }};
-        const auto add_image{[&](const auto& desc) {
+        const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
             for (u32 index = 0; index < desc.count; ++index) {
                 const auto handle{read_handle(desc, index)};
-                image_view_indices[image_index++] = handle.first;
+                views[view_index++] = {
+                    .index = handle.first,
+                    .blacklist = blacklist,
+                    .id = {},
+                };
             }
         }};
         if constexpr (Spec::has_texture_buffers) {
             for (const auto& desc : info.texture_buffer_descriptors) {
-                add_image(desc);
+                add_image(desc, false);
             }
         }
         if constexpr (Spec::has_image_buffers) {
             for (const auto& desc : info.image_buffer_descriptors) {
-                add_image(desc);
+                add_image(desc, false);
             }
         }
         for (const auto& desc : info.texture_descriptors) {
             for (u32 index = 0; index < desc.count; ++index) {
                 const auto handle{read_handle(desc, index)};
-                image_view_indices[image_index++] = handle.first;
+                views[view_index++] = {handle.first};
 
                 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
                 samplers[sampler_index++] = sampler->Handle();
@@ -349,7 +355,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
         }
         if constexpr (Spec::has_images) {
             for (const auto& desc : info.image_descriptors) {
-                add_image(desc);
+                add_image(desc, desc.is_written);
             }
         }
     }};
@@ -368,10 +374,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
     if constexpr (Spec::enabled_stages[4]) {
         config_stage(4);
     }
-    const std::span indices_span(image_view_indices.data(), image_index);
-    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+    texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), view_index));
 
-    ImageId* texture_buffer_index{image_view_ids.data()};
+    VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
     const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
         size_t index{};
         const auto add_buffer{[&](const auto& desc) {
@@ -381,12 +386,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
                 if constexpr (is_image) {
                     is_written = desc.is_written;
                 }
-                ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+                ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
                 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
                                                        image_view.BufferSize(), image_view.format,
                                                        is_written, is_image);
                 ++index;
-                ++texture_buffer_index;
+                ++texture_buffer_it;
             }
         }};
         buffer_cache.UnbindGraphicsTextureBuffers(stage);
@@ -402,13 +407,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
                 add_buffer(desc);
             }
         }
-        for (const auto& desc : info.texture_descriptors) {
-            texture_buffer_index += desc.count;
-        }
+        texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors);
         if constexpr (Spec::has_images) {
-            for (const auto& desc : info.image_descriptors) {
-                texture_buffer_index += desc.count;
-            }
+            texture_buffer_it += Shader::NumDescriptors(info.image_descriptors);
         }
     }};
     if constexpr (Spec::enabled_stages[0]) {
@@ -432,12 +433,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
 
     update_descriptor_queue.Acquire();
 
+    RescalingPushConstant rescaling;
     const VkSampler* samplers_it{samplers.data()};
-    const ImageId* views_it{image_view_ids.data()};
+    const VideoCommon::ImageViewInOut* views_it{views.data()};
     const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
         buffer_cache.BindHostStageBuffers(stage);
-        PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache,
-                             update_descriptor_queue);
+        PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling,
+                             samplers_it, views_it);
     }};
     if constexpr (Spec::enabled_stages[0]) {
         prepare_stage(0);
@@ -454,10 +456,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
     if constexpr (Spec::enabled_stages[4]) {
         prepare_stage(4);
     }
-    ConfigureDraw();
+    ConfigureDraw(rescaling);
 }
 
-void GraphicsPipeline::ConfigureDraw() {
+void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
     texture_cache.UpdateRenderTargets(false);
     scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
 
@@ -468,12 +470,25 @@ void GraphicsPipeline::ConfigureDraw() {
             build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
         });
     }
+    const bool is_rescaling{texture_cache.IsRescaling()};
+    const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)};
     const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
     const void* const descriptor_data{update_descriptor_queue.UpdateData()};
-    scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
+                      is_rescaling, update_rescaling](vk::CommandBuffer cmdbuf) {
         if (bind_pipeline) {
             cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
         }
+        cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
+                             RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
+                             rescaling_data.data());
+        if (update_rescaling) {
+            const f32 config_down_factor{Settings::values.resolution_info.down_factor};
+            const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f};
+            cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
+                                 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor),
+                                 &scale_down_factor);
+        }
         if (!descriptor_set_layout) {
             return;
         }
@@ -826,18 +841,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
 void GraphicsPipeline::Validate() {
     size_t num_images{};
     for (const auto& info : stage_infos) {
-        for (const auto& desc : info.texture_buffer_descriptors) {
-            num_images += desc.count;
-        }
-        for (const auto& desc : info.image_buffer_descriptors) {
-            num_images += desc.count;
-        }
-        for (const auto& desc : info.texture_descriptors) {
-            num_images += desc.count;
-        }
-        for (const auto& desc : info.image_descriptors) {
-            num_images += desc.count;
-        }
+        num_images += Shader::NumDescriptors(info.texture_buffer_descriptors);
+        num_images += Shader::NumDescriptors(info.image_buffer_descriptors);
+        num_images += Shader::NumDescriptors(info.texture_descriptors);
+        num_images += Shader::NumDescriptors(info.image_descriptors);
     }
     ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
 }
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 1c780e944..a0c1d8f07 100755
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -62,6 +62,7 @@ namespace Vulkan {
 class Device;
 class PipelineStatistics;
 class RenderPassCache;
+class RescalingPushConstant;
 class VKScheduler;
 class VKUpdateDescriptorQueue;
 
@@ -113,7 +114,7 @@ private:
     template <typename Spec>
     void ConfigureImpl(bool is_indexed);
 
-    void ConfigureDraw();
+    void ConfigureDraw(const RescalingPushConstant& rescaling);
 
     void MakePipeline(VkRenderPass render_pass);
 
@@ -138,6 +139,7 @@ private:
     std::array<Shader::Info, NUM_STAGES> stage_infos;
     std::array<u32, 5> enabled_uniform_buffer_masks{};
     VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
+    u32 num_textures{};
 
     vk::DescriptorSetLayout descriptor_set_layout;
     DescriptorAllocator descriptor_allocator;
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 0886b7da8..9be9c9bed 100755
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -70,7 +70,9 @@ public:
             return;
         }
         // If none of the above is hit, fallback to a regular wait
-        semaphore.Wait(tick);
+        while (!semaphore.Wait(tick)) {
+        }
+        Refresh();
     }
 
 private:
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 30b47a7a0..fd334a146 100755
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -58,18 +58,28 @@ struct DrawParams {
     bool is_indexed;
 };
 
-VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
+VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) {
     const auto& src = regs.viewport_transform[index];
-    const float width = src.scale_x * 2.0f;
-    float y = src.translate_y - src.scale_y;
-    float height = src.scale_y * 2.0f;
+    const auto conv = [scale](float value) {
+        float new_value = value * scale;
+        if (scale < 1.0f) {
+            const bool sign = std::signbit(value);
+            new_value = std::round(std::abs(new_value));
+            new_value = sign ? -new_value : new_value;
+        }
+        return new_value;
+    };
+    const float x = conv(src.translate_x - src.scale_x);
+    const float width = conv(src.scale_x * 2.0f);
+    float y = conv(src.translate_y - src.scale_y);
+    float height = conv(src.scale_y * 2.0f);
     if (regs.screen_y_control.y_negate) {
         y += height;
         height = -height;
     }
     const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
     VkViewport viewport{
-        .x = src.translate_x - src.scale_x,
+        .x = x,
         .y = y,
         .width = width != 0.0f ? width : 1.0f,
         .height = height != 0.0f ? height : 1.0f,
@@ -83,14 +93,27 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
     return viewport;
 }
 
-VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
+VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u32 down_shift = 0) {
     const auto& src = regs.scissor_test[index];
     VkRect2D scissor;
+    const auto scale_up = [&](s32 value) -> s32 {
+        if (value == 0) {
+            return 0U;
+        }
+        const s32 upset = value * up_scale;
+        s32 acumm = 0;
+        if ((up_scale >> down_shift) == 0) {
+            acumm = upset % 2;
+        }
+        const s32 converted_value = (value * up_scale) >> down_shift;
+        return value < 0 ? std::min<s32>(converted_value - acumm, -1)
+                         : std::max<s32>(converted_value + acumm, 1);
+    };
     if (src.enable) {
-        scissor.offset.x = static_cast<s32>(src.min_x);
-        scissor.offset.y = static_cast<s32>(src.min_y);
-        scissor.extent.width = src.max_x - src.min_x;
-        scissor.extent.height = src.max_y - src.min_y;
+        scissor.offset.x = scale_up(static_cast<s32>(src.min_x));
+        scissor.offset.y = scale_up(static_cast<s32>(src.min_y));
+        scissor.extent.width = scale_up(src.max_x - src.min_x);
+        scissor.extent.height = scale_up(src.max_y - src.min_y);
     } else {
         scissor.offset.x = 0;
         scissor.offset.y = 0;
@@ -199,7 +222,7 @@ void RasterizerVulkan::Clear() {
 
     query_cache.UpdateCounters();
 
-    const auto& regs = maxwell3d.regs;
+    auto& regs = maxwell3d.regs;
     const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
                            regs.clear_buffers.A;
     const bool use_depth = regs.clear_buffers.Z;
@@ -214,8 +237,16 @@ void RasterizerVulkan::Clear() {
     const VkExtent2D render_area = framebuffer->RenderArea();
     scheduler.RequestRenderpass(framebuffer);
 
+    u32 up_scale = 1;
+    u32 down_shift = 0;
+    if (texture_cache.IsRescaling()) {
+        up_scale = Settings::values.resolution_info.up_scale;
+        down_shift = Settings::values.resolution_info.down_shift;
+    }
+    UpdateViewportsState(regs);
+
     VkClearRect clear_rect{
-        .rect = GetScissorState(regs, 0),
+        .rect = GetScissorState(regs, 0, up_scale, down_shift),
         .baseArrayLayer = regs.clear_buffers.layer,
         .layerCount = 1,
     };
@@ -230,7 +261,38 @@ void RasterizerVulkan::Clear() {
     const u32 color_attachment = regs.clear_buffers.RT;
     if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
         VkClearValue clear_value;
-        std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
+        bool is_integer = false;
+        bool is_signed = false;
+        size_t int_size = 8;
+        for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++i) {
+            const auto& this_rt = regs.rt[i];
+            if (this_rt.Address() == 0) {
+                continue;
+            }
+            if (this_rt.format == Tegra::RenderTargetFormat::NONE) {
+                continue;
+            }
+            const auto format =
+                VideoCore::Surface::PixelFormatFromRenderTargetFormat(this_rt.format);
+            is_integer = IsPixelFormatInteger(format);
+            is_signed = IsPixelFormatSignedInteger(format);
+            int_size = PixelComponentSizeBitsInteger(format);
+            break;
+        }
+        if (!is_integer) {
+            std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
+        } else if (!is_signed) {
+            for (size_t i = 0; i < 4; i++) {
+                clear_value.color.uint32[i] = static_cast<u32>(
+                    static_cast<f32>(static_cast<u64>(int_size) << 1U) * regs.clear_color[i]);
+            }
+        } else {
+            for (size_t i = 0; i < 4; i++) {
+                clear_value.color.int32[i] =
+                    static_cast<s32>(static_cast<f32>(static_cast<s64>(int_size - 1) << 1) *
+                                     (regs.clear_color[i] - 0.5f));
+            }
+        }
 
         scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
             const VkClearAttachment attachment{
@@ -595,15 +657,17 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
     if (!state_tracker.TouchViewports()) {
         return;
     }
+    const bool is_rescaling{texture_cache.IsRescaling()};
+    const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
     const std::array viewports{
-        GetViewportState(device, regs, 0),  GetViewportState(device, regs, 1),
-        GetViewportState(device, regs, 2),  GetViewportState(device, regs, 3),
-        GetViewportState(device, regs, 4),  GetViewportState(device, regs, 5),
-        GetViewportState(device, regs, 6),  GetViewportState(device, regs, 7),
-        GetViewportState(device, regs, 8),  GetViewportState(device, regs, 9),
-        GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
-        GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
-        GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
+        GetViewportState(device, regs, 0, scale),  GetViewportState(device, regs, 1, scale),
+        GetViewportState(device, regs, 2, scale),  GetViewportState(device, regs, 3, scale),
+        GetViewportState(device, regs, 4, scale),  GetViewportState(device, regs, 5, scale),
+        GetViewportState(device, regs, 6, scale),  GetViewportState(device, regs, 7, scale),
+        GetViewportState(device, regs, 8, scale),  GetViewportState(device, regs, 9, scale),
+        GetViewportState(device, regs, 10, scale), GetViewportState(device, regs, 11, scale),
+        GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale),
+        GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale),
     };
     scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
 }
@@ -612,13 +676,29 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
     if (!state_tracker.TouchScissors()) {
         return;
     }
+    u32 up_scale = 1;
+    u32 down_shift = 0;
+    if (texture_cache.IsRescaling()) {
+        up_scale = Settings::values.resolution_info.up_scale;
+        down_shift = Settings::values.resolution_info.down_shift;
+    }
     const std::array scissors{
-        GetScissorState(regs, 0),  GetScissorState(regs, 1),  GetScissorState(regs, 2),
-        GetScissorState(regs, 3),  GetScissorState(regs, 4),  GetScissorState(regs, 5),
-        GetScissorState(regs, 6),  GetScissorState(regs, 7),  GetScissorState(regs, 8),
-        GetScissorState(regs, 9),  GetScissorState(regs, 10), GetScissorState(regs, 11),
-        GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
-        GetScissorState(regs, 15),
+        GetScissorState(regs, 0, up_scale, down_shift),
+        GetScissorState(regs, 1, up_scale, down_shift),
+        GetScissorState(regs, 2, up_scale, down_shift),
+        GetScissorState(regs, 3, up_scale, down_shift),
+        GetScissorState(regs, 4, up_scale, down_shift),
+        GetScissorState(regs, 5, up_scale, down_shift),
+        GetScissorState(regs, 6, up_scale, down_shift),
+        GetScissorState(regs, 7, up_scale, down_shift),
+        GetScissorState(regs, 8, up_scale, down_shift),
+        GetScissorState(regs, 9, up_scale, down_shift),
+        GetScissorState(regs, 10, up_scale, down_shift),
+        GetScissorState(regs, 11, up_scale, down_shift),
+        GetScissorState(regs, 12, up_scale, down_shift),
+        GetScissorState(regs, 13, up_scale, down_shift),
+        GetScissorState(regs, 14, up_scale, down_shift),
+        GetScissorState(regs, 15, up_scale, down_shift),
     };
     scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
 }
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 0c11c814f..3bfdf41ba 100755
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -128,6 +128,15 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
     return true;
 }
 
+bool VKScheduler::UpdateRescaling(bool is_rescaling) {
+    if (state.rescaling_defined && is_rescaling == state.is_rescaling) {
+        return false;
+    }
+    state.rescaling_defined = true;
+    state.is_rescaling = is_rescaling;
+    return true;
+}
+
 void VKScheduler::WorkerThread(std::stop_token stop_token) {
     Common::SetCurrentThreadName("yuzu:VulkanWorker");
     do {
@@ -227,6 +236,7 @@ void VKScheduler::AllocateNewContext() {
 
 void VKScheduler::InvalidateState() {
     state.graphics_pipeline = nullptr;
+    state.rescaling_defined = false;
     state_tracker.InvalidateCommandBufferState();
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 85fc1712f..1b06c9296 100755
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -56,6 +56,9 @@ public:
     /// Update the pipeline to the current execution context.
     bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
 
+    /// Update the rescaling state. Returns true if the state has to be updated.
+    bool UpdateRescaling(bool is_rescaling);
+
     /// Invalidates current command buffer state except for render passes
     void InvalidateState();
 
@@ -185,6 +188,8 @@ private:
         VkFramebuffer framebuffer = nullptr;
         VkExtent2D render_area = {0, 0};
         GraphicsPipeline* graphics_pipeline = nullptr;
+        bool is_rescaling = false;
+        bool rescaling_defined = false;
     };
 
     void WorkerThread(std::stop_token stop_token);
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 2f2d6b31f..40a149832 100755
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -71,11 +71,15 @@ public:
     }
 
     bool TouchViewports() {
-        return Exchange(Dirty::Viewports, false);
+        const bool dirty_viewports = Exchange(Dirty::Viewports, false);
+        const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
+        return dirty_viewports || rescale_viewports;
     }
 
     bool TouchScissors() {
-        return Exchange(Dirty::Scissors, false);
+        const bool dirty_scissors = Exchange(Dirty::Scissors, false);
+        const bool rescale_scissors = Exchange(VideoCommon::Dirty::RescaleScissors, false);
+        return dirty_scissors || rescale_scissors;
     }
 
     bool TouchDepthBias() {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 06c5fb867..1c0741250 100755
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -32,10 +32,12 @@ using Tegra::Engines::Fermi2D;
 using Tegra::Texture::SwizzleSource;
 using Tegra::Texture::TextureMipmapFilter;
 using VideoCommon::BufferImageCopy;
+using VideoCommon::ImageFlagBits;
 using VideoCommon::ImageInfo;
 using VideoCommon::ImageType;
 using VideoCommon::SubresourceRange;
 using VideoCore::Surface::IsPixelFormatASTC;
+using VideoCore::Surface::IsPixelFormatInteger;
 
 namespace {
 constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
@@ -588,8 +590,158 @@ struct RangedBarrierRange {
     UNREACHABLE_MSG("Invalid image format={}", format);
     return VK_FORMAT_R32_UINT;
 }
+
+void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info,
+               VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution,
+               bool up_scaling = true) {
+    const bool is_2d = info.type == ImageType::e2D;
+    const auto resources = info.resources;
+    const VkExtent2D extent{
+        .width = info.size.width,
+        .height = info.size.height,
+    };
+    // Depth and integer formats must use NEAREST filter for blits.
+    const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT};
+    const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)};
+    const VkFilter vk_filter = is_bilinear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d,
+                      vk_filter, up_scaling](vk::CommandBuffer cmdbuf) {
+        const VkOffset2D src_size{
+            .x = static_cast<s32>(up_scaling ? extent.width : resolution.ScaleUp(extent.width)),
+            .y = static_cast<s32>(is_2d && up_scaling ? extent.height
+                                                      : resolution.ScaleUp(extent.height)),
+        };
+        const VkOffset2D dst_size{
+            .x = static_cast<s32>(up_scaling ? resolution.ScaleUp(extent.width) : extent.width),
+            .y = static_cast<s32>(is_2d && up_scaling ? resolution.ScaleUp(extent.height)
+                                                      : extent.height),
+        };
+        boost::container::small_vector<VkImageBlit, 4> regions;
+        regions.reserve(resources.levels);
+        for (s32 level = 0; level < resources.levels; level++) {
+            regions.push_back({
+                .srcSubresource{
+                    .aspectMask = aspect_mask,
+                    .mipLevel = static_cast<u32>(level),
+                    .baseArrayLayer = 0,
+                    .layerCount = static_cast<u32>(resources.layers),
+                },
+                .srcOffsets{
+                    {
+                        .x = 0,
+                        .y = 0,
+                        .z = 0,
+                    },
+                    {
+                        .x = std::max(1, src_size.x >> level),
+                        .y = std::max(1, src_size.y >> level),
+                        .z = 1,
+                    },
+                },
+                .dstSubresource{
+                    .aspectMask = aspect_mask,
+                    .mipLevel = static_cast<u32>(level),
+                    .baseArrayLayer = 0,
+                    .layerCount = static_cast<u32>(resources.layers),
+                },
+                .dstOffsets{
+                    {
+                        .x = 0,
+                        .y = 0,
+                        .z = 0,
+                    },
+                    {
+                        .x = std::max(1, dst_size.x >> level),
+                        .y = std::max(1, dst_size.y >> level),
+                        .z = 1,
+                    },
+                },
+            });
+        }
+        const VkImageSubresourceRange subresource_range{
+            .aspectMask = aspect_mask,
+            .baseMipLevel = 0,
+            .levelCount = VK_REMAINING_MIP_LEVELS,
+            .baseArrayLayer = 0,
+            .layerCount = VK_REMAINING_ARRAY_LAYERS,
+        };
+        const std::array read_barriers{
+            VkImageMemoryBarrier{
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .pNext = nullptr,
+                .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+                .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+                .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = src_image,
+                .subresourceRange = subresource_range,
+            },
+            VkImageMemoryBarrier{
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .pNext = nullptr,
+                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
+                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+                                 VK_ACCESS_TRANSFER_WRITE_BIT,
+                .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+                .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, // Discard contents
+                .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = dst_image,
+                .subresourceRange = subresource_range,
+            },
+        };
+        const std::array write_barriers{
+            VkImageMemoryBarrier{
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .pNext = nullptr,
+                .srcAccessMask = 0,
+                .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT,
+                .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = src_image,
+                .subresourceRange = subresource_range,
+            },
+            VkImageMemoryBarrier{
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .pNext = nullptr,
+                .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+                .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT,
+                .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = dst_image,
+                .subresourceRange = subresource_range,
+            },
+        };
+        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                               0, nullptr, nullptr, read_barriers);
+        cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
+                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, vk_filter);
+        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                               0, nullptr, nullptr, write_barriers);
+    });
+}
 } // Anonymous namespace
 
+TextureCacheRuntime::TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
+                                         MemoryAllocator& memory_allocator_,
+                                         StagingBufferPool& staging_buffer_pool_,
+                                         BlitImageHelper& blit_image_helper_,
+                                         ASTCDecoderPass& astc_decoder_pass_,
+                                         RenderPassCache& render_pass_cache_)
+    : device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_},
+      staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_},
+      astc_decoder_pass{astc_decoder_pass_}, render_pass_cache{render_pass_cache_},
+      resolution{Settings::values.resolution_info} {}
+
 void TextureCacheRuntime::Finish() {
     scheduler.Finish();
 }
@@ -614,8 +766,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
         return;
     }
     if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
-        blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter,
-                                    operation);
+        blit_image_helper.BlitColor(dst_framebuffer, src.Handle(Shader::TextureType::Color2D),
+                                    dst_region, src_region, filter, operation);
         return;
     }
     if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
@@ -719,26 +871,29 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
     });
 }
 
-void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
+void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view,
+                                       bool rescaled) {
+    const u32 up_scale = rescaled ? resolution.up_scale : 1;
+    const u32 down_shift = rescaled ? resolution.down_shift : 0;
     switch (dst_view.format) {
     case PixelFormat::R16_UNORM:
         if (src_view.format == PixelFormat::D16_UNORM) {
-            return blit_image_helper.ConvertD16ToR16(dst, src_view);
+            return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift);
         }
         break;
     case PixelFormat::R32_FLOAT:
         if (src_view.format == PixelFormat::D32_FLOAT) {
-            return blit_image_helper.ConvertD32ToR32(dst, src_view);
+            return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift);
         }
         break;
     case PixelFormat::D16_UNORM:
         if (src_view.format == PixelFormat::R16_UNORM) {
-            return blit_image_helper.ConvertR16ToD16(dst, src_view);
+            return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift);
         }
         break;
     case PixelFormat::D32_FLOAT:
         if (src_view.format == PixelFormat::R32_FLOAT) {
-            return blit_image_helper.ConvertR32ToD32(dst, src_view);
+            return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift);
         }
         break;
     default:
@@ -840,36 +995,39 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
     return device.GetDeviceLocalMemory();
 }
 
-Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
+void TextureCacheRuntime::TickFrame() {}
+
+Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
              VAddr cpu_addr_)
-    : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
-      image(MakeImage(runtime.device, info)),
-      commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)),
+    : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
+      runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)),
+      commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)),
       aspect_mask(ImageAspectMask(info.format)) {
-    if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
+    if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
         if (Settings::values.accelerate_astc.GetValue()) {
             flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
         } else {
             flags |= VideoCommon::ImageFlagBits::Converted;
         }
     }
-    if (runtime.device.HasDebuggingToolAttached()) {
-        image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
+    if (runtime->device.HasDebuggingToolAttached()) {
+        original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
     }
     static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
         .pNext = nullptr,
         .usage = VK_IMAGE_USAGE_STORAGE_BIT,
     };
-    if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
-        const auto& device = runtime.device.GetLogical();
+    current_image = *original_image;
+    if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
+        const auto& device = runtime->device.GetLogical();
         storage_image_views.reserve(info.resources.levels);
         for (s32 level = 0; level < info.resources.levels; ++level) {
             storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{
                 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
                 .pNext = &storage_image_view_usage_create_info,
                 .flags = 0,
-                .image = *image,
+                .image = *original_image,
                 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
                 .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
                 .components{
@@ -890,26 +1048,39 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
     }
 }
 
+Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {}
+
 Image::~Image() = default;
 
 void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
     // TODO: Move this to another API
+    const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
+    if (is_rescaled) {
+        ScaleDown(true);
+    }
     scheduler->RequestOutsideRenderPassOperationContext();
     std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
     const VkBuffer src_buffer = map.buffer;
-    const VkImage vk_image = *image;
+    const VkImage vk_image = *original_image;
     const VkImageAspectFlags vk_aspect_mask = aspect_mask;
     const bool is_initialized = std::exchange(initialized, true);
     scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
                        vk_copies](vk::CommandBuffer cmdbuf) {
         CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
     });
+    if (is_rescaled) {
+        ScaleUp();
+    }
 }
 
 void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
+    const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
+    if (is_rescaled) {
+        ScaleDown();
+    }
     std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
     scheduler->RequestOutsideRenderPassOperationContext();
-    scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
+    scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask,
                        vk_copies](vk::CommandBuffer cmdbuf) {
         const VkImageMemoryBarrier read_barrier{
             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
@@ -959,6 +1130,146 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
                                0, memory_write_barrier, nullptr, image_write_barrier);
     });
+    if (is_rescaled) {
+        ScaleUp(true);
+    }
+}
+
+bool Image::ScaleUp(bool ignore) {
+    if (True(flags & ImageFlagBits::Rescaled)) {
+        return false;
+    }
+    ASSERT(info.type != ImageType::Linear);
+    flags |= ImageFlagBits::Rescaled;
+    const auto& resolution = runtime->resolution;
+    if (!resolution.active) {
+        return false;
+    }
+    has_scaled = true;
+    const auto& device = runtime->device;
+    if (!scaled_image) {
+        const bool is_2d = info.type == ImageType::e2D;
+        const u32 scaled_width = resolution.ScaleUp(info.size.width);
+        const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
+        auto scaled_info = info;
+        scaled_info.size.width = scaled_width;
+        scaled_info.size.height = scaled_height;
+        scaled_image = MakeImage(device, scaled_info);
+        auto& allocator = runtime->memory_allocator;
+        scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
+        ignore = false;
+    }
+    current_image = *scaled_image;
+    if (ignore) {
+        return true;
+    }
+
+    if (aspect_mask == 0) {
+        aspect_mask = ImageAspectMask(info.format);
+    }
+    static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
+    const PixelFormat format = StorageFormat(info.format);
+    const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
+    const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+    if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
+        BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
+    } else {
+        return BlitScaleHelper(true);
+    }
+    return true;
+}
+
+bool Image::ScaleDown(bool ignore) {
+    if (False(flags & ImageFlagBits::Rescaled)) {
+        return false;
+    }
+    ASSERT(info.type != ImageType::Linear);
+    flags &= ~ImageFlagBits::Rescaled;
+    const auto& resolution = runtime->resolution;
+    if (!resolution.active) {
+        return false;
+    }
+    current_image = *original_image;
+    if (ignore) {
+        return true;
+    }
+    if (aspect_mask == 0) {
+        aspect_mask = ImageAspectMask(info.format);
+    }
+    static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
+    const PixelFormat format = StorageFormat(info.format);
+    const auto& device = runtime->device;
+    const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
+    const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+    if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
+        BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
+    } else {
+        return BlitScaleHelper(false);
+    }
+    return true;
+}
+
+bool Image::BlitScaleHelper(bool scale_up) {
+    using namespace VideoCommon;
+    static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;
+    const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT};
+    const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)};
+    const auto operation = is_bilinear ? Tegra::Engines::Fermi2D::Filter::Bilinear
+                                       : Tegra::Engines::Fermi2D::Filter::Point;
+
+    const bool is_2d = info.type == ImageType::e2D;
+    const auto& resolution = runtime->resolution;
+    const u32 scaled_width = resolution.ScaleUp(info.size.width);
+    const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
+    std::unique_ptr<ImageView>& blit_view = scale_up ? scale_view : normal_view;
+    std::unique_ptr<Framebuffer>& blit_framebuffer =
+        scale_up ? scale_framebuffer : normal_framebuffer;
+    if (!blit_view) {
+        const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
+        blit_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
+    }
+
+    const u32 src_width = scale_up ? info.size.width : scaled_width;
+    const u32 src_height = scale_up ? info.size.height : scaled_height;
+    const u32 dst_width = scale_up ? scaled_width : info.size.width;
+    const u32 dst_height = scale_up ? scaled_height : info.size.height;
+    const Region2D src_region{
+        .start = {0, 0},
+        .end = {static_cast<s32>(src_width), static_cast<s32>(src_height)},
+    };
+    const Region2D dst_region{
+        .start = {0, 0},
+        .end = {static_cast<s32>(dst_width), static_cast<s32>(dst_height)},
+    };
+    const VkExtent2D extent{
+        .width = std::max(scaled_width, info.size.width),
+        .height = std::max(scaled_height, info.size.width),
+    };
+
+    auto* view_ptr = blit_view.get();
+    if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
+        if (!blit_framebuffer) {
+            blit_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
+        }
+        const auto color_view = blit_view->Handle(Shader::TextureType::Color2D);
+
+        runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region,
+                                             src_region, operation, BLIT_OPERATION);
+    } else if (!runtime->device.IsBlitDepthStencilSupported() &&
+               aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+        if (!blit_framebuffer) {
+            blit_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent);
+        }
+        runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(),
+                                                    blit_view->StencilView(), dst_region,
+                                                    src_region, operation, BLIT_OPERATION);
+    } else {
+        // TODO: Use helper blits where applicable
+        flags &= ~ImageFlagBits::Rescaled;
+        LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", info.format);
+        return false;
+    }
+    return true;
 }
 
 ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
@@ -1052,7 +1363,7 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
     : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
       buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
 
-ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params)
     : VideoCommon::ImageViewBase{params} {}
 
 VkImageView ImageView::DepthView() {
@@ -1162,7 +1473,27 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
 }
 
 Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
-                         ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
+                         ImageView* depth_buffer, const VideoCommon::RenderTargets& key)
+    : render_area{VkExtent2D{
+          .width = key.size.width,
+          .height = key.size.height,
+      }} {
+    CreateFramebuffer(runtime, color_buffers, depth_buffer);
+    if (runtime.device.HasDebuggingToolAttached()) {
+        framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
+    }
+}
+
+Framebuffer::Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
+                         ImageView* depth_buffer, VkExtent2D extent)
+    : render_area{extent} {
+    std::array<ImageView*, NUM_RT> color_buffers{color_buffer};
+    CreateFramebuffer(runtime, color_buffers, depth_buffer);
+}
+
+void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
+                                    std::span<ImageView*, NUM_RT> color_buffers,
+                                    ImageView* depth_buffer) {
     std::vector<VkImageView> attachments;
     RenderPassKey renderpass_key{};
     s32 num_layers = 1;
@@ -1200,10 +1531,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
 
     renderpass = runtime.render_pass_cache.Get(renderpass_key);
 
-    render_area = VkExtent2D{
-        .width = key.size.width,
-        .height = key.size.height,
-    };
     num_color_buffers = static_cast<u32>(num_colors);
     framebuffer = runtime.device.GetLogical().CreateFramebuffer({
         .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
@@ -1212,13 +1539,10 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
         .renderPass = renderpass,
         .attachmentCount = static_cast<u32>(attachments.size()),
         .pAttachments = attachments.data(),
-        .width = key.size.width,
-        .height = key.size.height,
+        .width = render_area.width,
+        .height = render_area.height,
         .layers = static_cast<u32>(std::max(num_layers, 1)),
     });
-    if (runtime.device.HasDebuggingToolAttached()) {
-        framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
-    }
 }
 
 void TextureCacheRuntime::AccelerateImageUpload(
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index b09c468e4..9d149d306 100755
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -13,6 +13,10 @@
 #include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
+namespace Settings {
+struct ResolutionScalingInfo;
+}
+
 namespace Vulkan {
 
 using VideoCommon::ImageId;
@@ -31,14 +35,14 @@ class RenderPassCache;
 class StagingBufferPool;
 class VKScheduler;
 
-struct TextureCacheRuntime {
-    const Device& device;
-    VKScheduler& scheduler;
-    MemoryAllocator& memory_allocator;
-    StagingBufferPool& staging_buffer_pool;
-    BlitImageHelper& blit_image_helper;
-    ASTCDecoderPass& astc_decoder_pass;
-    RenderPassCache& render_pass_cache;
+class TextureCacheRuntime {
+public:
+    explicit TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
+                                 MemoryAllocator& memory_allocator_,
+                                 StagingBufferPool& staging_buffer_pool_,
+                                 BlitImageHelper& blit_image_helper_,
+                                 ASTCDecoderPass& astc_decoder_pass_,
+                                 RenderPassCache& render_pass_cache_);
 
     void Finish();
 
@@ -46,6 +50,10 @@ struct TextureCacheRuntime {
 
     StagingBufferRef DownloadStagingBuffer(size_t size);
 
+    void TickFrame();
+
+    u64 GetDeviceLocalMemory() const;
+
     void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
                    const Region2D& dst_region, const Region2D& src_region,
                    Tegra::Engines::Fermi2D::Filter filter,
@@ -53,7 +61,7 @@ struct TextureCacheRuntime {
 
     void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
 
-    void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
+    void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled);
 
     bool CanAccelerateImageUpload(Image&) const noexcept {
         return false;
@@ -74,13 +82,21 @@ struct TextureCacheRuntime {
         return true;
     }
 
-    u64 GetDeviceLocalMemory() const;
+    const Device& device;
+    VKScheduler& scheduler;
+    MemoryAllocator& memory_allocator;
+    StagingBufferPool& staging_buffer_pool;
+    BlitImageHelper& blit_image_helper;
+    ASTCDecoderPass& astc_decoder_pass;
+    RenderPassCache& render_pass_cache;
+    const Settings::ResolutionScalingInfo& resolution;
 };
 
 class Image : public VideoCommon::ImageBase {
 public:
     explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
                    VAddr cpu_addr);
+    explicit Image(const VideoCommon::NullImageParams&);
 
     ~Image();
 
@@ -97,7 +113,7 @@ public:
                         std::span<const VideoCommon::BufferImageCopy> copies);
 
     [[nodiscard]] VkImage Handle() const noexcept {
-        return *image;
+        return current_image;
     }
 
     [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
@@ -113,14 +129,30 @@ public:
         return std::exchange(initialized, true);
     }
 
+    bool ScaleUp(bool ignore = false);
+
+    bool ScaleDown(bool ignore = false);
+
 private:
-    VKScheduler* scheduler;
-    vk::Image image;
+    bool BlitScaleHelper(bool scale_up);
+
+    VKScheduler* scheduler{};
+    TextureCacheRuntime* runtime{};
+
+    vk::Image original_image;
     MemoryCommit commit;
-    vk::ImageView image_view;
     std::vector<vk::ImageView> storage_image_views;
     VkImageAspectFlags aspect_mask = 0;
     bool initialized = false;
+    vk::Image scaled_image{};
+    MemoryCommit scaled_commit{};
+    VkImage current_image{};
+
+    std::unique_ptr<Framebuffer> scale_framebuffer;
+    std::unique_ptr<ImageView> scale_view;
+
+    std::unique_ptr<Framebuffer> normal_framebuffer;
+    std::unique_ptr<ImageView> normal_view;
 };
 
 class ImageView : public VideoCommon::ImageViewBase {
@@ -128,7 +160,7 @@ public:
     explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
     explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
                        const VideoCommon::ImageViewInfo&, GPUVAddr);
-    explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
+    explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
 
     [[nodiscard]] VkImageView DepthView();
 
@@ -197,9 +229,15 @@ private:
 
 class Framebuffer {
 public:
-    explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
+    explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
                          ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
 
+    explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
+                         ImageView* depth_buffer, VkExtent2D extent);
+
+    void CreateFramebuffer(TextureCacheRuntime& runtime,
+                           std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer);
+
     [[nodiscard]] VkFramebuffer Handle() const noexcept {
         return *framebuffer;
     }
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index eb1746265..58d262446 100755
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -279,6 +279,80 @@ bool IsPixelFormatSRGB(PixelFormat format) {
     }
 }
 
+bool IsPixelFormatInteger(PixelFormat format) {
+    switch (format) {
+    case PixelFormat::A8B8G8R8_SINT:
+    case PixelFormat::A8B8G8R8_UINT:
+    case PixelFormat::A2B10G10R10_UINT:
+    case PixelFormat::R8_SINT:
+    case PixelFormat::R8_UINT:
+    case PixelFormat::R16G16B16A16_SINT:
+    case PixelFormat::R16G16B16A16_UINT:
+    case PixelFormat::R32G32B32A32_UINT:
+    case PixelFormat::R32G32B32A32_SINT:
+    case PixelFormat::R32G32_SINT:
+    case PixelFormat::R16_UINT:
+    case PixelFormat::R16_SINT:
+    case PixelFormat::R16G16_UINT:
+    case PixelFormat::R16G16_SINT:
+    case PixelFormat::R8G8_SINT:
+    case PixelFormat::R8G8_UINT:
+    case PixelFormat::R32G32_UINT:
+    case PixelFormat::R32_UINT:
+    case PixelFormat::R32_SINT:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool IsPixelFormatSignedInteger(PixelFormat format) {
+    switch (format) {
+    case PixelFormat::A8B8G8R8_SINT:
+    case PixelFormat::R8_SINT:
+    case PixelFormat::R16G16B16A16_SINT:
+    case PixelFormat::R32G32B32A32_SINT:
+    case PixelFormat::R32G32_SINT:
+    case PixelFormat::R16_SINT:
+    case PixelFormat::R16G16_SINT:
+    case PixelFormat::R8G8_SINT:
+    case PixelFormat::R32_SINT:
+        return true;
+    default:
+        return false;
+    }
+}
+
+size_t PixelComponentSizeBitsInteger(PixelFormat format) {
+    switch (format) {
+    case PixelFormat::A8B8G8R8_SINT:
+    case PixelFormat::A8B8G8R8_UINT:
+    case PixelFormat::R8_SINT:
+    case PixelFormat::R8_UINT:
+    case PixelFormat::R8G8_SINT:
+    case PixelFormat::R8G8_UINT:
+        return 8;
+    case PixelFormat::A2B10G10R10_UINT:
+        return 10;
+    case PixelFormat::R16G16B16A16_SINT:
+    case PixelFormat::R16G16B16A16_UINT:
+    case PixelFormat::R16_UINT:
+    case PixelFormat::R16_SINT:
+    case PixelFormat::R16G16_UINT:
+    case PixelFormat::R16G16_SINT:
+        return 16;
+    case PixelFormat::R32G32B32A32_UINT:
+    case PixelFormat::R32G32B32A32_SINT:
+    case PixelFormat::R32G32_SINT:
+    case PixelFormat::R32G32_UINT:
+    case PixelFormat::R32_UINT:
+    case PixelFormat::R32_SINT:
+        return 32;
+    default:
+        return 0;
+    }
+}
+
 std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
     return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
 }
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 1503db81f..2ce7c7d33 100755
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -460,6 +460,12 @@ bool IsPixelFormatASTC(PixelFormat format);
 
 bool IsPixelFormatSRGB(PixelFormat format);
 
+bool IsPixelFormatInteger(PixelFormat format);
+
+bool IsPixelFormatSignedInteger(PixelFormat format);
+
+size_t PixelComponentSizeBitsInteger(PixelFormat format);
+
 std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
 
 u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format);
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 6052d148a..3db2fdf34 100755
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -60,15 +60,17 @@ namespace {
 ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
     : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
       unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
-      converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_},
-      cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes},
-      mip_level_offsets{CalculateMipLevelOffsets(info)} {
+      converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{},
+      has_scaled{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_},
+      cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} {
     if (info.type == ImageType::e3D) {
         slice_offsets = CalculateSliceOffsets(info);
         slice_subresources = CalculateSliceSubresources(info);
     }
 }
 
+ImageBase::ImageBase(const NullImageParams&) {}
+
 ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
     : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
 
@@ -254,6 +256,8 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
     }
     lhs.aliased_images.push_back(std::move(lhs_alias));
     rhs.aliased_images.push_back(std::move(rhs_alias));
+    lhs.flags &= ~ImageFlagBits::IsRescalable;
+    rhs.flags &= ~ImageFlagBits::IsRescalable;
 }
 
 } // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 0c17a791b..c960d807c 100755
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -33,6 +33,12 @@ enum class ImageFlagBits : u32 {
                           ///< garbage collection priority
     Alias = 1 << 11,      ///< This image has aliases and has priority on garbage
                           ///< collection
+
+    // Rescaler
+    Rescaled = 1 << 12,
+    CheckingRescalable = 1 << 13,
+    IsRescalable = 1 << 14,
+    Blacklisted = 1 << 15,
 };
 DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
 
@@ -43,8 +49,11 @@ struct AliasedImage {
     ImageId id;
 };
 
+struct NullImageParams {};
+
 struct ImageBase {
     explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
+    explicit ImageBase(const NullImageParams&);
 
     [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
 
@@ -68,11 +77,18 @@ struct ImageBase {
     void CheckBadOverlapState();
     void CheckAliasState();
 
+    bool HasScaled() {
+        return has_scaled;
+    }
+
     ImageInfo info;
 
     u32 guest_size_bytes = 0;
     u32 unswizzled_size_bytes = 0;
     u32 converted_size_bytes = 0;
+    u32 scale_rating = 0;
+    u64 scale_tick = 0;
+    bool has_scaled = false;
     ImageFlagBits flags = ImageFlagBits::CpuModified;
 
     GPUVAddr gpu_addr = 0;
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
index 64fd7010a..d8e414247 100755
--- a/src/video_core/texture_cache/image_info.cpp
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -31,6 +31,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
             .depth = config.block_depth,
         };
     }
+    rescaleable = false;
     tile_width_spacing = config.tile_width_spacing;
     if (config.texture_type != TextureType::Texture2D &&
         config.texture_type != TextureType::Texture2DNoMipmap) {
@@ -41,6 +42,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
         ASSERT(config.BaseLayer() == 0);
         type = ImageType::e1D;
         size.width = config.Width();
+        resources.layers = 1;
         break;
     case TextureType::Texture1DArray:
         UNIMPLEMENTED_IF(config.BaseLayer() != 0);
@@ -52,12 +54,14 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
     case TextureType::Texture2DNoMipmap:
         ASSERT(config.Depth() == 1);
         type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D;
+        rescaleable = !config.IsPitchLinear();
         size.width = config.Width();
         size.height = config.Height();
         resources.layers = config.BaseLayer() + 1;
         break;
     case TextureType::Texture2DArray:
         type = ImageType::e2D;
+        rescaleable = true;
         size.width = config.Width();
         size.height = config.Height();
         resources.layers = config.BaseLayer() + config.Depth();
@@ -82,10 +86,12 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
         size.width = config.Width();
         size.height = config.Height();
         size.depth = config.Depth();
+        resources.layers = 1;
         break;
     case TextureType::Texture1DBuffer:
         type = ImageType::Buffer;
         size.width = config.Width();
+        resources.layers = 1;
         break;
     default:
         UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
@@ -95,12 +101,15 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
         // FIXME: Call this without passing *this
         layer_stride = CalculateLayerStride(*this);
         maybe_unaligned_layer_stride = CalculateLayerSize(*this);
+        rescaleable &= (block.depth == 0) && resources.levels == 1;
+        downscaleable = size.height > 512;
     }
 }
 
 ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept {
     const auto& rt = regs.rt[index];
     format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format);
+    rescaleable = false;
     if (rt.tile_mode.is_pitch_linear) {
         ASSERT(rt.tile_mode.is_3d == 0);
         type = ImageType::Linear;
@@ -126,6 +135,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index)
         type = ImageType::e3D;
         size.depth = rt.depth;
     } else {
+        rescaleable = block.depth == 0 && size.height > 256;
+        downscaleable = size.height > 512;
         type = ImageType::e2D;
         resources.layers = rt.depth;
     }
@@ -135,6 +146,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
     format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format);
     size.width = regs.zeta_width;
     size.height = regs.zeta_height;
+    rescaleable = false;
     resources.levels = 1;
     layer_stride = regs.zeta.layer_stride * 4;
     maybe_unaligned_layer_stride = layer_stride;
@@ -153,6 +165,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
         type = ImageType::e3D;
         size.depth = regs.zeta_depth;
     } else {
+        rescaleable = block.depth == 0 && size.height > 256;
+        downscaleable = size.height > 512;
         type = ImageType::e2D;
         resources.layers = regs.zeta_depth;
     }
@@ -161,6 +175,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
 ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
     UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero");
     format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format);
+    rescaleable = false;
     if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) {
         type = ImageType::Linear;
         size = Extent3D{
@@ -171,6 +186,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
         pitch = config.pitch;
     } else {
         type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D;
+
         block = Extent3D{
             .width = config.block_width,
             .height = config.block_height,
@@ -183,6 +199,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
             .height = config.height,
             .depth = 1,
         };
+        rescaleable = block.depth == 0 && size.height > 256;
+        downscaleable = size.height > 512;
     }
 }
 
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
index 5049fc36e..5932dcaba 100755
--- a/src/video_core/texture_cache/image_info.h
+++ b/src/video_core/texture_cache/image_info.h
@@ -15,7 +15,7 @@ using Tegra::Texture::TICEntry;
 using VideoCore::Surface::PixelFormat;
 
 struct ImageInfo {
-    explicit ImageInfo() = default;
+    ImageInfo() = default;
     explicit ImageInfo(const TICEntry& config) noexcept;
     explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept;
     explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept;
@@ -33,6 +33,8 @@ struct ImageInfo {
     u32 maybe_unaligned_layer_stride = 0;
     u32 num_samples = 1;
     u32 tile_width_spacing = 0;
+    bool rescaleable = false;
+    bool downscaleable = false;
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index 450becbeb..c7b4fc231 100755
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -37,14 +37,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
 }
 
 ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info)
-    : format{info.format}, type{ImageViewType::Buffer}, size{
-                                                            .width = info.size.width,
-                                                            .height = 1,
-                                                            .depth = 1,
-                                                        } {
+    : image_id{NULL_IMAGE_ID}, format{info.format}, type{ImageViewType::Buffer},
+      size{
+          .width = info.size.width,
+          .height = 1,
+          .depth = 1,
+      } {
     ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer");
 }
 
-ImageViewBase::ImageViewBase(const NullImageParams&) {}
+ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {}
 
 } // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index 903f715c5..9c24c5359 100755
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -15,7 +15,7 @@ using VideoCore::Surface::PixelFormat;
 struct ImageViewInfo;
 struct ImageInfo;
 
-struct NullImageParams {};
+struct NullImageViewParams {};
 
 enum class ImageViewFlagBits : u16 {
     PreemtiveDownload = 1 << 0,
@@ -28,7 +28,7 @@ struct ImageViewBase {
     explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
                            ImageId image_id);
     explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info);
-    explicit ImageViewBase(const NullImageParams&);
+    explicit ImageViewBase(const NullImageViewParams&);
 
     [[nodiscard]] bool IsBuffer() const noexcept {
         return type == ImageViewType::Buffer;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index f70c1f764..26ab857c9 100755
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -7,6 +7,7 @@
 #include <unordered_set>
 
 #include "common/alignment.h"
+#include "common/settings.h"
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/texture_cache/image_view_base.h"
@@ -44,21 +45,22 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
 
     // Make sure the first index is reserved for the null resources
     // This way the null resource becomes a compile time constant
-    void(slot_image_views.insert(runtime, NullImageParams{}));
+    void(slot_images.insert(NullImageParams{}));
+    void(slot_image_views.insert(runtime, NullImageViewParams{}));
     void(slot_samplers.insert(runtime, sampler_descriptor));
 
     if constexpr (HAS_DEVICE_MEMORY_INFO) {
         const auto device_memory = runtime.GetDeviceLocalMemory();
-        const u64 possible_expected_memory = (device_memory * 3) / 10;
-        const u64 possible_critical_memory = (device_memory * 6) / 10;
+        const u64 possible_expected_memory = (device_memory * 4) / 10;
+        const u64 possible_critical_memory = (device_memory * 7) / 10;
         expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
         critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
         minimum_memory = 0;
     } else {
-        // on OGL we can be more conservatives as the driver takes care.
+        // On OpenGL we can be more conservatives as the driver takes care.
         expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
         critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
-        minimum_memory = expected_memory;
+        minimum_memory = 0;
     }
 }
 
@@ -67,7 +69,7 @@ void TextureCache<P>::RunGarbageCollector() {
     const bool high_priority_mode = total_used_memory >= expected_memory;
     const bool aggressive_mode = total_used_memory >= critical_memory;
     const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
-    size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5);
+    size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10);
     const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
         if (num_iterations == 0) {
             return true;
@@ -89,7 +91,7 @@ void TextureCache<P>::RunGarbageCollector() {
             UntrackImage(image, image_id);
         }
         UnregisterImage(image_id);
-        DeleteImage(image_id);
+        DeleteImage(image_id, image.scale_tick > frame_tick + 5);
         return false;
     };
     lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
@@ -103,6 +105,7 @@ void TextureCache<P>::TickFrame() {
     sentenced_images.Tick();
     sentenced_framebuffers.Tick();
     sentenced_image_view.Tick();
+    runtime.TickFrame();
     ++frame_tick;
 }
 
@@ -122,15 +125,14 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
 }
 
 template <class P>
-void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
-                                             std::span<ImageViewId> image_view_ids) {
-    FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
+template <bool has_blacklists>
+void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
+    FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views);
 }
 
 template <class P>
-void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
-                                            std::span<ImageViewId> image_view_ids) {
-    FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
+void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
+    FillImageViews<true>(compute_image_table, compute_image_view_ids, views);
 }
 
 template <class P>
@@ -202,24 +204,109 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
         PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
         return;
     }
-    flags[Dirty::RenderTargets] = false;
 
-    // Render target control is used on all render targets, so force look ups when this one is up
-    const bool force = flags[Dirty::RenderTargetControl];
-    flags[Dirty::RenderTargetControl] = false;
+    u32 scale_rating = 0;
+    bool rescaled = false;
+    std::array<ImageId, NUM_RT> tmp_color_images{};
+    ImageId tmp_depth_image{};
+    do {
+        flags[Dirty::RenderTargets] = false;
+
+        has_deleted_images = false;
+        // Render target control is used on all render targets, so force look ups when this one is
+        // up
+        const bool force = flags[Dirty::RenderTargetControl];
+        flags[Dirty::RenderTargetControl] = false;
+
+        scale_rating = 0;
+        bool any_rescaled = false;
+        bool can_rescale = true;
+        const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) {
+            if (view_id != NULL_IMAGE_VIEW_ID && view_id != ImageViewId{}) {
+                const auto& view = slot_image_views[view_id];
+                const auto image_id = view.image_id;
+                id_save = image_id;
+                auto& image = slot_images[image_id];
+                can_rescale &= ImageCanRescale(image);
+                any_rescaled |= True(image.flags & ImageFlagBits::Rescaled) ||
+                                GetFormatType(image.info.format) != SurfaceType::ColorTexture;
+                scale_rating = std::max<u32>(scale_rating, image.scale_tick <= frame_tick
+                                                               ? image.scale_rating + 1U
+                                                               : image.scale_rating);
+            } else {
+                id_save = CORRUPT_ID;
+            }
+        };
+        for (size_t index = 0; index < NUM_RT; ++index) {
+            ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
+            if (flags[Dirty::ColorBuffer0 + index] || force) {
+                flags[Dirty::ColorBuffer0 + index] = false;
+                BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
+            }
+            check_rescale(color_buffer_id, tmp_color_images[index]);
+        }
+        if (flags[Dirty::ZetaBuffer] || force) {
+            flags[Dirty::ZetaBuffer] = false;
+            BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
+        }
+        check_rescale(render_targets.depth_buffer_id, tmp_depth_image);
+
+        if (can_rescale) {
+            rescaled = any_rescaled || scale_rating >= 2;
+            const auto scale_up = [this](ImageId image_id) {
+                if (image_id != CORRUPT_ID) {
+                    Image& image = slot_images[image_id];
+                    ScaleUp(image);
+                }
+            };
+            if (rescaled) {
+                for (size_t index = 0; index < NUM_RT; ++index) {
+                    scale_up(tmp_color_images[index]);
+                }
+                scale_up(tmp_depth_image);
+                scale_rating = 2;
+            }
+        } else {
+            rescaled = false;
+            const auto scale_down = [this](ImageId image_id) {
+                if (image_id != CORRUPT_ID) {
+                    Image& image = slot_images[image_id];
+                    ScaleDown(image);
+                }
+            };
+            for (size_t index = 0; index < NUM_RT; ++index) {
+                scale_down(tmp_color_images[index]);
+            }
+            scale_down(tmp_depth_image);
+            scale_rating = 1;
+        }
+    } while (has_deleted_images);
+    // Rescale End
+
+    const auto set_rating = [this, scale_rating](ImageId image_id) {
+        if (image_id != CORRUPT_ID) {
+            Image& image = slot_images[image_id];
+            image.scale_rating = scale_rating;
+            if (image.scale_tick <= frame_tick) {
+                image.scale_tick = frame_tick + 1;
+            }
+        }
+    };
+    for (size_t index = 0; index < NUM_RT; ++index) {
+        set_rating(tmp_color_images[index]);
+    }
+    set_rating(tmp_depth_image);
+
+    if (is_rescaling != rescaled) {
+        flags[Dirty::RescaleViewports] = true;
+        flags[Dirty::RescaleScissors] = true;
+        is_rescaling = rescaled;
+    }
 
     for (size_t index = 0; index < NUM_RT; ++index) {
         ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
-        if (flags[Dirty::ColorBuffer0 + index] || force) {
-            flags[Dirty::ColorBuffer0 + index] = false;
-            BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
-        }
         PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
     }
-    if (flags[Dirty::ZetaBuffer] || force) {
-        flags[Dirty::ZetaBuffer] = false;
-        BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
-    }
     const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
 
     PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
@@ -227,9 +314,15 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
     for (size_t index = 0; index < NUM_RT; ++index) {
         render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
     }
+    u32 up_scale = 1;
+    u32 down_shift = 0;
+    if (is_rescaling) {
+        up_scale = Settings::values.resolution_info.up_scale;
+        down_shift = Settings::values.resolution_info.down_shift;
+    }
     render_targets.size = Extent2D{
-        maxwell3d.regs.render_area.width,
-        maxwell3d.regs.render_area.height,
+        (maxwell3d.regs.render_area.width * up_scale) >> down_shift,
+        (maxwell3d.regs.render_area.height * up_scale) >> down_shift,
     };
 
     flags[Dirty::DepthBiasGlobal] = true;
@@ -241,17 +334,29 @@ typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
 }
 
 template <class P>
+template <bool has_blacklists>
 void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
                                      std::span<ImageViewId> cached_image_view_ids,
-                                     std::span<const u32> indices,
-                                     std::span<ImageViewId> image_view_ids) {
-    ASSERT(indices.size() <= image_view_ids.size());
+                                     std::span<ImageViewInOut> views) {
+    bool has_blacklisted;
     do {
         has_deleted_images = false;
-        std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
-            return VisitImageView(table, cached_image_view_ids, index);
-        });
-    } while (has_deleted_images);
+        if constexpr (has_blacklists) {
+            has_blacklisted = false;
+        }
+        for (ImageViewInOut& view : views) {
+            view.id = VisitImageView(table, cached_image_view_ids, view.index);
+            if constexpr (has_blacklists) {
+                if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) {
+                    const ImageViewBase& image_view{slot_image_views[view.id]};
+                    auto& image = slot_images[image_view.image_id];
+                    image.flags |= ImageFlagBits::Blacklisted;
+                    has_blacklisted |= ScaleDown(image);
+                    image.scale_rating = 0;
+                }
+            }
+        }
+    } while (has_deleted_images || (has_blacklists && has_blacklisted));
 }
 
 template <class P>
@@ -369,8 +474,43 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
     PrepareImage(src_id, false, false);
     PrepareImage(dst_id, true, false);
 
-    ImageBase& dst_image = slot_images[dst_id];
-    const ImageBase& src_image = slot_images[src_id];
+    Image& dst_image = slot_images[dst_id];
+    Image& src_image = slot_images[src_id];
+    bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
+    bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
+
+    const bool is_resolve = src_image.info.num_samples != 1 && dst_image.info.num_samples == 1;
+    if (is_src_rescaled != is_dst_rescaled) {
+        if (ImageCanRescale(src_image)) {
+            ScaleUp(src_image);
+            is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
+            if (is_resolve) {
+                dst_image.info.rescaleable = true;
+                for (const auto& alias : dst_image.aliased_images) {
+                    Image& other_image = slot_images[alias.id];
+                    other_image.info.rescaleable = true;
+                }
+            }
+        }
+        if (ImageCanRescale(dst_image)) {
+            ScaleUp(dst_image);
+            is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
+        }
+    }
+    if (is_resolve && (is_src_rescaled != is_dst_rescaled)) {
+        // A resolve requires both images to be the same dimensions. Resize down if needed.
+        ScaleDown(src_image);
+        ScaleDown(dst_image);
+        is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
+        is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
+    }
+    const auto& resolution = Settings::values.resolution_info;
+    const auto scale_region = [&](Region2D& region) {
+        region.start.x = resolution.ScaleUp(region.start.x);
+        region.start.y = resolution.ScaleUp(region.start.y);
+        region.end.x = resolution.ScaleUp(region.end.x);
+        region.end.y = resolution.ScaleUp(region.end.y);
+    };
 
     // TODO: Deduplicate
     const std::optional src_base = src_image.TryFindBase(src.Address());
@@ -378,20 +518,26 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
     const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
     const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
     const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
-    const Region2D src_region{
+    Region2D src_region{
         Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
         Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
     };
+    if (is_src_rescaled) {
+        scale_region(src_region);
+    }
 
     const std::optional dst_base = dst_image.TryFindBase(dst.Address());
     const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
     const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
     const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
     const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
-    const Region2D dst_region{
+    Region2D dst_region{
         Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
         Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
     };
+    if (is_dst_rescaled) {
+        scale_region(dst_region);
+    }
 
     // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
     Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
@@ -486,6 +632,20 @@ void TextureCache<P>::PopAsyncFlushes() {
     committed_downloads.pop();
 }
 
+template <class P>
+bool TextureCache<P>::IsRescaling() const noexcept {
+    return is_rescaling;
+}
+
+template <class P>
+bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcept {
+    if (image_view.type == ImageViewType::Buffer) {
+        return false;
+    }
+    const ImageBase& image = slot_images[image_view.image_id];
+    return True(image.flags & ImageFlagBits::Rescaled);
+}
+
 template <class P>
 bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
     bool is_modified = false;
@@ -623,6 +783,105 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
     return image_id;
 }
 
+template <class P>
+bool TextureCache<P>::ImageCanRescale(ImageBase& image) {
+    if (!image.info.rescaleable) {
+        return false;
+    }
+    if (Settings::values.resolution_info.downscale && !image.info.downscaleable) {
+        return false;
+    }
+    if (True(image.flags & (ImageFlagBits::Rescaled | ImageFlagBits::CheckingRescalable))) {
+        return true;
+    }
+    if (True(image.flags & ImageFlagBits::IsRescalable)) {
+        return true;
+    }
+    image.flags |= ImageFlagBits::CheckingRescalable;
+    for (const auto& alias : image.aliased_images) {
+        Image& other_image = slot_images[alias.id];
+        if (!ImageCanRescale(other_image)) {
+            image.flags &= ~ImageFlagBits::CheckingRescalable;
+            return false;
+        }
+    }
+    image.flags &= ~ImageFlagBits::CheckingRescalable;
+    image.flags |= ImageFlagBits::IsRescalable;
+    return true;
+}
+
+template <class P>
+void TextureCache<P>::InvalidateScale(Image& image) {
+    if (image.scale_tick <= frame_tick) {
+        image.scale_tick = frame_tick + 1;
+    }
+    const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
+    auto& dirty = maxwell3d.dirty.flags;
+    dirty[Dirty::RenderTargets] = true;
+    dirty[Dirty::ZetaBuffer] = true;
+    for (size_t rt = 0; rt < NUM_RT; ++rt) {
+        dirty[Dirty::ColorBuffer0 + rt] = true;
+    }
+    for (const ImageViewId image_view_id : image_view_ids) {
+        std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
+        if (render_targets.depth_buffer_id == image_view_id) {
+            render_targets.depth_buffer_id = ImageViewId{};
+        }
+    }
+    RemoveImageViewReferences(image_view_ids);
+    RemoveFramebuffers(image_view_ids);
+    for (const ImageViewId image_view_id : image_view_ids) {
+        sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
+        slot_image_views.erase(image_view_id);
+    }
+    image.image_view_ids.clear();
+    image.image_view_infos.clear();
+    if constexpr (ENABLE_VALIDATION) {
+        std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
+        std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+    }
+    graphics_image_table.Invalidate();
+    compute_image_table.Invalidate();
+    has_deleted_images = true;
+}
+
+template <class P>
+u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) {
+    const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale *
+                                          Settings::values.resolution_info.up_scale);
+    const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift +
+                                            Settings::values.resolution_info.down_shift);
+    const u64 image_size_bytes =
+        static_cast<u64>(std::max(image.guest_size_bytes, image.unswizzled_size_bytes));
+    const u64 tentative_size = (image_size_bytes * scale_up) >> down_shift;
+    const u64 fitted_size = Common::AlignUp(tentative_size, 1024);
+    return fitted_size;
+}
+
+template <class P>
+bool TextureCache<P>::ScaleUp(Image& image) {
+    const bool has_copy = image.HasScaled();
+    const bool rescaled = image.ScaleUp();
+    if (!rescaled) {
+        return false;
+    }
+    if (!has_copy) {
+        total_used_memory += GetScaledImageSizeBytes(image);
+    }
+    InvalidateScale(image);
+    return true;
+}
+
+template <class P>
+bool TextureCache<P>::ScaleDown(Image& image) {
+    const bool rescaled = image.ScaleDown();
+    if (!rescaled) {
+        return false;
+    }
+    InvalidateScale(image);
+    return true;
+}
+
 template <class P>
 ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
                                      RelaxedOptions options) {
@@ -660,12 +919,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
     std::vector<ImageId> right_aliased_ids;
     std::unordered_set<ImageId> ignore_textures;
     std::vector<ImageId> bad_overlap_ids;
+    std::vector<ImageId> all_siblings;
+    const bool this_is_linear = info.type == ImageType::Linear;
     const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
         if (True(overlap.flags & ImageFlagBits::Remapped)) {
             ignore_textures.insert(overlap_id);
             return;
         }
-        if (info.type == ImageType::Linear) {
+        const bool overlap_is_linear = overlap.info.type == ImageType::Linear;
+        if (this_is_linear != overlap_is_linear) {
+            return;
+        }
+        if (this_is_linear && overlap_is_linear) {
             if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
                 // Alias linear images with the same pitch
                 left_aliased_ids.push_back(overlap_id);
@@ -681,6 +946,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
             cpu_addr = solution->cpu_addr;
             new_info.resources = solution->resources;
             overlap_ids.push_back(overlap_id);
+            all_siblings.push_back(overlap_id);
             return;
         }
         static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
@@ -688,10 +954,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
         if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
             left_aliased_ids.push_back(overlap_id);
             overlap.flags |= ImageFlagBits::Alias;
+            all_siblings.push_back(overlap_id);
         } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
                                  broken_views, native_bgr)) {
             right_aliased_ids.push_back(overlap_id);
             overlap.flags |= ImageFlagBits::Alias;
+            all_siblings.push_back(overlap_id);
         } else {
             bad_overlap_ids.push_back(overlap_id);
             overlap.flags |= ImageFlagBits::BadOverlap;
@@ -709,6 +977,37 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
         }
     };
     ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
+
+    bool can_rescale = info.rescaleable;
+    bool any_rescaled = false;
+    bool any_blacklisted = false;
+    for (const ImageId sibling_id : all_siblings) {
+        if (!can_rescale) {
+            break;
+        }
+        Image& sibling = slot_images[sibling_id];
+        can_rescale &= ImageCanRescale(sibling);
+        any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
+        any_blacklisted |= True(sibling.flags & ImageFlagBits::Blacklisted);
+    }
+
+    can_rescale &= any_rescaled;
+
+    if (can_rescale) {
+        for (const ImageId sibling_id : all_siblings) {
+            Image& sibling = slot_images[sibling_id];
+            ScaleUp(sibling);
+        }
+    } else {
+        for (const ImageId sibling_id : all_siblings) {
+            Image& sibling = slot_images[sibling_id];
+            ScaleDown(sibling);
+            if (any_blacklisted) {
+                sibling.flags |= ImageFlagBits::Blacklisted;
+            }
+        }
+    }
+
     const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
     Image& new_image = slot_images[new_image_id];
 
@@ -731,14 +1030,23 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
     // TODO: Only upload what we need
     RefreshContents(new_image, new_image_id);
 
+    if (can_rescale) {
+        ScaleUp(new_image);
+    } else {
+        ScaleDown(new_image);
+    }
+
     for (const ImageId overlap_id : overlap_ids) {
         Image& overlap = slot_images[overlap_id];
         if (overlap.info.num_samples != new_image.info.num_samples) {
             LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
         } else {
+            const auto& resolution = Settings::values.resolution_info;
             const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
-            const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
-            runtime.CopyImage(new_image, overlap, copies);
+            const u32 up_scale = can_rescale ? resolution.up_scale : 1;
+            const u32 down_shift = can_rescale ? resolution.down_shift : 0;
+            auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
+            runtime.CopyImage(new_image, overlap, std::move(copies));
         }
         if (True(overlap.flags & ImageFlagBits::Tracked)) {
             UntrackImage(overlap, overlap_id);
@@ -1083,13 +1391,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
                "Trying to unregister an already registered image");
     image.flags &= ~ImageFlagBits::Registered;
     image.flags &= ~ImageFlagBits::BadOverlap;
-    u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
-    if ((IsPixelFormatASTC(image.info.format) &&
-         True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
-        True(image.flags & ImageFlagBits::Converted)) {
-        tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
-    }
-    total_used_memory -= Common::AlignUp(tentative_size, 1024);
     lru_cache.Free(image.lru_index);
     const auto& clear_page_table =
         [this, image_id](
@@ -1213,8 +1514,18 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
 }
 
 template <class P>
-void TextureCache<P>::DeleteImage(ImageId image_id) {
+void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
     ImageBase& image = slot_images[image_id];
+    if (image.HasScaled()) {
+        total_used_memory -= GetScaledImageSizeBytes(image);
+    }
+    u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
+    if ((IsPixelFormatASTC(image.info.format) &&
+         True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
+        True(image.flags & ImageFlagBits::Converted)) {
+        tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
+    }
+    total_used_memory -= Common::AlignUp(tentative_size, 1024);
     const GPUVAddr gpu_addr = image.gpu_addr;
     const auto alloc_it = image_allocs_table.find(gpu_addr);
     if (alloc_it == image_allocs_table.end()) {
@@ -1269,10 +1580,14 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
                    num_removed_overlaps);
     }
     for (const ImageViewId image_view_id : image_view_ids) {
-        sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
+        if (!immediate_delete) {
+            sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
+        }
         slot_image_views.erase(image_view_id);
     }
-    sentenced_images.Push(std::move(slot_images[image_id]));
+    if (!immediate_delete) {
+        sentenced_images.Push(std::move(slot_images[image_id]));
+    }
     slot_images.erase(image_id);
 
     alloc_images.erase(alloc_image_it);
@@ -1322,26 +1637,68 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
 template <class P>
 void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
     boost::container::small_vector<const AliasedImage*, 1> aliased_images;
-    ImageBase& image = slot_images[image_id];
+    Image& image = slot_images[image_id];
+    bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
+    bool any_blacklisted = True(image.flags & ImageFlagBits::Blacklisted);
     u64 most_recent_tick = image.modification_tick;
     for (const AliasedImage& aliased : image.aliased_images) {
         ImageBase& aliased_image = slot_images[aliased.id];
         if (image.modification_tick < aliased_image.modification_tick) {
             most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
             aliased_images.push_back(&aliased);
+            any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled);
+            any_blacklisted |= True(aliased_image.flags & ImageFlagBits::Blacklisted);
         }
     }
     if (aliased_images.empty()) {
         return;
     }
+    const bool can_rescale = ImageCanRescale(image);
+    if (any_rescaled) {
+        if (can_rescale) {
+            ScaleUp(image);
+        } else {
+            ScaleDown(image);
+            if (any_blacklisted) {
+                image.flags |= ImageFlagBits::Blacklisted;
+            }
+        }
+    }
     image.modification_tick = most_recent_tick;
     std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
         const ImageBase& lhs_image = slot_images[lhs->id];
         const ImageBase& rhs_image = slot_images[rhs->id];
         return lhs_image.modification_tick < rhs_image.modification_tick;
     });
+    const auto& resolution = Settings::values.resolution_info;
     for (const AliasedImage* const aliased : aliased_images) {
-        CopyImage(image_id, aliased->id, aliased->copies);
+        if (!resolution.active | !any_rescaled) {
+            CopyImage(image_id, aliased->id, aliased->copies);
+            continue;
+        }
+        Image& aliased_image = slot_images[aliased->id];
+        if (!can_rescale) {
+            ScaleDown(aliased_image);
+            if (any_blacklisted) {
+                aliased_image.flags |= ImageFlagBits::Blacklisted;
+            }
+            CopyImage(image_id, aliased->id, aliased->copies);
+            continue;
+        }
+        ScaleUp(aliased_image);
+
+        const bool both_2d{image.info.type == ImageType::e2D &&
+                           aliased_image.info.type == ImageType::e2D};
+        auto copies = aliased->copies;
+        for (auto copy : copies) {
+            copy.extent.width = std::max<u32>(
+                (copy.extent.width * resolution.up_scale) >> resolution.down_shift, 1);
+            if (both_2d) {
+                copy.extent.height = std::max<u32>(
+                    (copy.extent.height * resolution.up_scale) >> resolution.down_shift, 1);
+            }
+        }
+        CopyImage(image_id, aliased->id, copies);
     }
 }
 
@@ -1377,9 +1734,25 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi
 }
 
 template <class P>
-void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
+void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) {
     Image& dst = slot_images[dst_id];
     Image& src = slot_images[src_id];
+    const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled);
+    if (is_rescaled) {
+        ASSERT(True(dst.flags & ImageFlagBits::Rescaled));
+        const bool both_2d{src.info.type == ImageType::e2D && dst.info.type == ImageType::e2D};
+        const auto& resolution = Settings::values.resolution_info;
+        for (auto& copy : copies) {
+            copy.src_offset.x = resolution.ScaleUp(copy.src_offset.x);
+            copy.dst_offset.x = resolution.ScaleUp(copy.dst_offset.x);
+            copy.extent.width = resolution.ScaleUp(copy.extent.width);
+            if (both_2d) {
+                copy.src_offset.y = resolution.ScaleUp(copy.src_offset.y);
+                copy.dst_offset.y = resolution.ScaleUp(copy.dst_offset.y);
+                copy.extent.height = resolution.ScaleUp(copy.extent.height);
+            }
+        }
+    }
     const auto dst_format_type = GetFormatType(dst.info.format);
     const auto src_format_type = GetFormatType(src.info.format);
     if (src_format_type == dst_format_type) {
@@ -1424,7 +1797,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const
         };
         UNIMPLEMENTED_IF(copy.extent != expected_size);
 
-        runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
+        runtime.ConvertImage(dst_framebuffer, dst_view, src_view, is_rescaled);
     }
 }
 
@@ -1433,8 +1806,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
     if (*old_id == new_id) {
         return;
     }
-    if (*old_id) {
-        const ImageViewBase& old_view = slot_image_views[*old_id];
+    if (new_id) {
+        const ImageViewBase& old_view = slot_image_views[new_id];
         if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
             uncommitted_downloads.push_back(old_view.image_id);
         }
@@ -1447,10 +1820,18 @@ std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
     ImageId image_id, const ImageViewInfo& view_info) {
     const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
     const ImageBase& image = slot_images[image_id];
+    const bool is_rescaled = True(image.flags & ImageFlagBits::Rescaled);
     const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
     const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
     const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
-    const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
+    Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
+    if (is_rescaled) {
+        const auto& resolution = Settings::values.resolution_info;
+        extent.width = resolution.ScaleUp(extent.width);
+        if (image.info.type == ImageType::e2D) {
+            extent.height = resolution.ScaleUp(extent.height);
+        }
+    }
     const u32 num_samples = image.info.num_samples;
     const auto [samples_x, samples_y] = SamplesLog2(num_samples);
     const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 2d1893c1c..eea589269 100755
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -21,6 +21,7 @@
 #include "video_core/texture_cache/descriptor_table.h"
 #include "video_core/texture_cache/image_base.h"
 #include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/image_view_base.h"
 #include "video_core/texture_cache/image_view_info.h"
 #include "video_core/texture_cache/render_targets.h"
 #include "video_core/texture_cache/slot_vector.h"
@@ -39,6 +40,12 @@ using VideoCore::Surface::PixelFormatFromDepthFormat;
 using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
 using namespace Common::Literals;
 
+struct ImageViewInOut {
+    u32 index{};
+    bool blacklist{};
+    ImageViewId id{};
+};
+
 template <class P>
 class TextureCache {
     /// Address shift for caching images into a hash table
@@ -53,11 +60,6 @@ class TextureCache {
     /// True when the API can provide info about the memory of the device.
     static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
 
-    /// Image view ID for null descriptors
-    static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
-    /// Sampler ID for bugged sampler ids
-    static constexpr SamplerId NULL_SAMPLER_ID{0};
-
     static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
     static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
 
@@ -99,11 +101,11 @@ public:
     void MarkModification(ImageId id) noexcept;
 
     /// Fill image_view_ids with the graphics images in indices
-    void FillGraphicsImageViews(std::span<const u32> indices,
-                                std::span<ImageViewId> image_view_ids);
+    template <bool has_blacklists>
+    void FillGraphicsImageViews(std::span<ImageViewInOut> views);
 
     /// Fill image_view_ids with the compute images in indices
-    void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
+    void FillComputeImageViews(std::span<ImageViewInOut> views);
 
     /// Get the sampler from the graphics descriptor table in the specified index
     Sampler* GetGraphicsSampler(u32 index);
@@ -160,6 +162,10 @@ public:
     /// Return true when a CPU region is modified from the GPU
     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
 
+    [[nodiscard]] bool IsRescaling() const noexcept;
+
+    [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
+
     std::mutex mutex;
 
 private:
@@ -198,9 +204,10 @@ private:
     void RunGarbageCollector();
 
     /// Fills image_view_ids in the image views in indices
+    template <bool has_blacklists>
     void FillImageViews(DescriptorTable<TICEntry>& table,
-                        std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
-                        std::span<ImageViewId> image_view_ids);
+                        std::span<ImageViewId> cached_image_view_ids,
+                        std::span<ImageViewInOut> views);
 
     /// Find or create an image view in the guest descriptor table
     ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
@@ -285,7 +292,7 @@ private:
     void UntrackImage(ImageBase& image, ImageId image_id);
 
     /// Delete image from the cache
-    void DeleteImage(ImageId image);
+    void DeleteImage(ImageId image, bool immediate_delete = false);
 
     /// Remove image views references from the cache
     void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
@@ -306,7 +313,7 @@ private:
     void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
 
     /// Execute copies from one image to the other, even if they are incompatible
-    void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
+    void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies);
 
     /// Bind an image view as render target, downloading resources preemtively if needed
     void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
@@ -318,6 +325,12 @@ private:
     /// Returns true if the current clear parameters clear the whole image of a given image view
     [[nodiscard]] bool IsFullClear(ImageViewId id);
 
+    bool ImageCanRescale(ImageBase& image);
+    void InvalidateScale(Image& image);
+    bool ScaleUp(Image& image);
+    bool ScaleDown(Image& image);
+    u64 GetScaledImageSizeBytes(ImageBase& image);
+
     Runtime& runtime;
     VideoCore::RasterizerInterface& rasterizer;
     Tegra::Engines::Maxwell3D& maxwell3d;
@@ -349,6 +362,7 @@ private:
     VAddr virtual_invalid_space{};
 
     bool has_deleted_images = false;
+    bool is_rescaling = false;
     u64 total_used_memory = 0;
     u64 minimum_memory;
     u64 expected_memory;
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index 47a11cb2f..5c274abdf 100755
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -22,6 +22,13 @@ using ImageAllocId = SlotId;
 using SamplerId = SlotId;
 using FramebufferId = SlotId;
 
+/// Fake image ID for null image views
+constexpr ImageId NULL_IMAGE_ID{0};
+/// Image view ID for null descriptors
+constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
+/// Sampler ID for bugged sampler ids
+constexpr SamplerId NULL_SAMPLER_ID{0};
+
 enum class ImageType : u32 {
     e1D,
     e2D,
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 59cf2f561..ddc9fb13a 100755
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -723,7 +723,7 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
 }
 
 std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
-                                             SubresourceBase base) {
+                                             SubresourceBase base, u32 up_scale, u32 down_shift) {
     ASSERT(dst.resources.levels >= src.resources.levels);
     ASSERT(dst.num_samples == src.num_samples);
 
@@ -732,7 +732,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
         ASSERT(src.type == ImageType::e3D);
         ASSERT(src.resources.levels == 1);
     }
-
+    const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
     std::vector<ImageCopy> copies;
     copies.reserve(src.resources.levels);
     for (s32 level = 0; level < src.resources.levels; ++level) {
@@ -762,6 +762,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
         if (is_dst_3d) {
             copy.extent.depth = src.size.depth;
         }
+        copy.extent.width = std::max<u32>((copy.extent.width * up_scale) >> down_shift, 1);
+        if (both_2d) {
+            copy.extent.height = std::max<u32>((copy.extent.height * up_scale) >> down_shift, 1);
+        }
     }
     return copies;
 }
@@ -1153,10 +1157,10 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase*
     if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
         dst_info.format = dst->info.format;
     }
-    if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
+    if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
         dst_info.format = src->info.format;
     }
-    if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
+    if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
         src_info.format = dst->info.format;
     }
 }
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 766502908..7af52de2e 100755
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -55,7 +55,8 @@ struct OverlapResult {
 
 [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
                                                            const ImageInfo& src,
-                                                           SubresourceBase base);
+                                                           SubresourceBase base, u32 up_scale = 1,
+                                                           u32 down_shift = 0);
 
 [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
 
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index cae543a51..e852c817e 100755
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -37,6 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
 namespace VideoCore {
 
 std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
+    Settings::UpdateRescalingInfo();
+
     const auto nvdec_value = Settings::values.nvdec_emulation.GetValue();
     const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
     const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
@@ -53,11 +55,10 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
     }
 }
 
-u16 GetResolutionScaleFactor(const RendererBase& renderer) {
-    return static_cast<u16>(
-        Settings::values.resolution_factor.GetValue() != 0
-            ? Settings::values.resolution_factor.GetValue()
-            : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio());
+float GetResolutionScaleFactor(const RendererBase& renderer) {
+    return Settings::values.resolution_info.active
+               ? Settings::values.resolution_info.up_factor
+               : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio();
 }
 
 } // namespace VideoCore
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index f5c27125d..f86877e86 100755
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -25,6 +25,6 @@ class RendererBase;
 /// Creates an emulated GPU instance using the given system context.
 std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system);
 
-u16 GetResolutionScaleFactor(const RendererBase& renderer);
+float GetResolutionScaleFactor(const RendererBase& renderer);
 
 } // namespace VideoCore
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 2d5daf6cd..10653ac6b 100755
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -40,6 +40,10 @@ public:
     VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
                                 FormatType format_type) const;
 
+    /// Returns true if a format is supported.
+    bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
+                           FormatType format_type) const;
+
     /// Reports a device loss.
     void ReportLoss() const;
 
@@ -370,10 +374,6 @@ private:
     /// Returns true if the device natively supports blitting depth stencil images.
     bool TestDepthStencilBlits() const;
 
-    /// Returns true if a format is supported.
-    bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
-                           FormatType format_type) const;
-
     VkInstance instance;                                         ///< Vulkan instance.
     vk::DeviceDispatch dld;                                      ///< Device function pointers.
     vk::PhysicalDevice physical;                                 ///< Physical device.
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 40fd47406..590c7e6b4 100755
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -629,11 +629,9 @@ void GRenderWindow::ReleaseRenderTarget() {
     main_context.reset();
 }
 
-void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) {
-    VideoCore::RendererBase& renderer = system.Renderer();
-    if (res_scale == 0) {
-        res_scale = VideoCore::GetResolutionScaleFactor(renderer);
-    }
+void GRenderWindow::CaptureScreenshot(const QString& screenshot_path) {
+    auto& renderer = system.Renderer();
+    const f32 res_scale = VideoCore::GetResolutionScaleFactor(renderer);
 
     const Layout::FramebufferLayout layout{Layout::FrameLayoutFromResolutionScale(res_scale)};
     screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32);
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index e6a0666e9..40fd4a9d6 100755
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -178,7 +178,7 @@ public:
 
     bool IsLoadingComplete() const;
 
-    void CaptureScreenshot(u32 res_scale, const QString& screenshot_path);
+    void CaptureScreenshot(const QString& screenshot_path);
 
     std::pair<u32, u32> ScaleTouch(const QPointF& pos) const;
 
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index c152e4ea1..b32e0190f 100755
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -825,6 +825,9 @@ void Config::ReadRendererValues() {
     ReadGlobalSetting(Settings::values.vulkan_device);
     ReadGlobalSetting(Settings::values.fullscreen_mode);
     ReadGlobalSetting(Settings::values.aspect_ratio);
+    ReadGlobalSetting(Settings::values.resolution_setup);
+    ReadGlobalSetting(Settings::values.scaling_filter);
+    ReadGlobalSetting(Settings::values.anti_aliasing);
     ReadGlobalSetting(Settings::values.max_anisotropy);
     ReadGlobalSetting(Settings::values.use_speed_limit);
     ReadGlobalSetting(Settings::values.speed_limit);
@@ -1366,6 +1369,18 @@ void Config::SaveRendererValues() {
                  static_cast<u32>(Settings::values.fullscreen_mode.GetDefault()),
                  Settings::values.fullscreen_mode.UsingGlobal());
     WriteGlobalSetting(Settings::values.aspect_ratio);
+    WriteSetting(QString::fromStdString(Settings::values.resolution_setup.GetLabel()),
+                 static_cast<u32>(Settings::values.resolution_setup.GetValue(global)),
+                 static_cast<u32>(Settings::values.resolution_setup.GetDefault()),
+                 Settings::values.resolution_setup.UsingGlobal());
+    WriteSetting(QString::fromStdString(Settings::values.scaling_filter.GetLabel()),
+                 static_cast<u32>(Settings::values.scaling_filter.GetValue(global)),
+                 static_cast<u32>(Settings::values.scaling_filter.GetDefault()),
+                 Settings::values.scaling_filter.UsingGlobal());
+    WriteSetting(QString::fromStdString(Settings::values.anti_aliasing.GetLabel()),
+                 static_cast<u32>(Settings::values.anti_aliasing.GetValue(global)),
+                 static_cast<u32>(Settings::values.anti_aliasing.GetDefault()),
+                 Settings::values.anti_aliasing.UsingGlobal());
     WriteGlobalSetting(Settings::values.max_anisotropy);
     WriteGlobalSetting(Settings::values.use_speed_limit);
     WriteGlobalSetting(Settings::values.speed_limit);
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index a7f4a6720..d673c1cdc 100755
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -189,5 +189,8 @@ Q_DECLARE_METATYPE(Settings::CPUAccuracy);
 Q_DECLARE_METATYPE(Settings::GPUAccuracy);
 Q_DECLARE_METATYPE(Settings::FullscreenMode);
 Q_DECLARE_METATYPE(Settings::NvdecEmulation);
+Q_DECLARE_METATYPE(Settings::ResolutionSetup);
+Q_DECLARE_METATYPE(Settings::ScalingFilter);
+Q_DECLARE_METATYPE(Settings::AntiAliasing);
 Q_DECLARE_METATYPE(Settings::RendererBackend);
 Q_DECLARE_METATYPE(Settings::ShaderBackend);
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 8e20cc6f3..59f975a6e 100755
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -89,6 +89,7 @@ void ConfigureGraphics::SetConfiguration() {
     ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
     ui->use_disk_shader_cache->setEnabled(runtime_lock);
     ui->nvdec_emulation_widget->setEnabled(runtime_lock);
+    ui->resolution_combobox->setEnabled(runtime_lock);
     ui->accelerate_astc->setEnabled(runtime_lock);
     ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
     ui->use_asynchronous_gpu_emulation->setChecked(
@@ -102,6 +103,12 @@ void ConfigureGraphics::SetConfiguration() {
         ui->nvdec_emulation->setCurrentIndex(
             static_cast<int>(Settings::values.nvdec_emulation.GetValue()));
         ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue());
+        ui->resolution_combobox->setCurrentIndex(
+            static_cast<int>(Settings::values.resolution_setup.GetValue()));
+        ui->scaling_filter_combobox->setCurrentIndex(
+            static_cast<int>(Settings::values.scaling_filter.GetValue()));
+        ui->anti_aliasing_combobox->setCurrentIndex(
+            static_cast<int>(Settings::values.anti_aliasing.GetValue()));
     } else {
         ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend);
         ConfigurationShared::SetHighlight(ui->api_widget,
@@ -122,6 +129,21 @@ void ConfigureGraphics::SetConfiguration() {
         ConfigurationShared::SetHighlight(ui->ar_label,
                                           !Settings::values.aspect_ratio.UsingGlobal());
 
+        ConfigurationShared::SetPerGameSetting(ui->resolution_combobox,
+                                               &Settings::values.resolution_setup);
+        ConfigurationShared::SetHighlight(ui->resolution_label,
+                                          !Settings::values.resolution_setup.UsingGlobal());
+
+        ConfigurationShared::SetPerGameSetting(ui->scaling_filter_combobox,
+                                               &Settings::values.scaling_filter);
+        ConfigurationShared::SetHighlight(ui->scaling_filter_label,
+                                          !Settings::values.scaling_filter.UsingGlobal());
+
+        ConfigurationShared::SetPerGameSetting(ui->anti_aliasing_combobox,
+                                               &Settings::values.anti_aliasing);
+        ConfigurationShared::SetHighlight(ui->anti_aliasing_label,
+                                          !Settings::values.anti_aliasing.UsingGlobal());
+
         ui->bg_combobox->setCurrentIndex(Settings::values.bg_red.UsingGlobal() ? 0 : 1);
         ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal());
         ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal());
@@ -133,11 +155,22 @@ void ConfigureGraphics::SetConfiguration() {
 }
 
 void ConfigureGraphics::ApplyConfiguration() {
+    const auto resolution_setup = static_cast<Settings::ResolutionSetup>(
+        ui->resolution_combobox->currentIndex() -
+        ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET));
+
+    const auto scaling_filter = static_cast<Settings::ScalingFilter>(
+        ui->scaling_filter_combobox->currentIndex() -
+        ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET));
+
+    const auto anti_aliasing = static_cast<Settings::AntiAliasing>(
+        ui->anti_aliasing_combobox->currentIndex() -
+        ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET));
+
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.fullscreen_mode,
                                              ui->fullscreen_mode_combobox);
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.aspect_ratio,
                                              ui->aspect_ratio_combobox);
-
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_disk_shader_cache,
                                              ui->use_disk_shader_cache, use_disk_shader_cache);
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation,
@@ -165,7 +198,34 @@ void ConfigureGraphics::ApplyConfiguration() {
             Settings::values.bg_green.SetValue(static_cast<u8>(bg_color.green()));
             Settings::values.bg_blue.SetValue(static_cast<u8>(bg_color.blue()));
         }
+        if (Settings::values.resolution_setup.UsingGlobal()) {
+            Settings::values.resolution_setup.SetValue(resolution_setup);
+        }
+        if (Settings::values.scaling_filter.UsingGlobal()) {
+            Settings::values.scaling_filter.SetValue(scaling_filter);
+        }
+        if (Settings::values.anti_aliasing.UsingGlobal()) {
+            Settings::values.anti_aliasing.SetValue(anti_aliasing);
+        }
     } else {
+        if (ui->resolution_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
+            Settings::values.resolution_setup.SetGlobal(true);
+        } else {
+            Settings::values.resolution_setup.SetGlobal(false);
+            Settings::values.resolution_setup.SetValue(resolution_setup);
+        }
+        if (ui->scaling_filter_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
+            Settings::values.scaling_filter.SetGlobal(true);
+        } else {
+            Settings::values.scaling_filter.SetGlobal(false);
+            Settings::values.scaling_filter.SetValue(scaling_filter);
+        }
+        if (ui->anti_aliasing_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
+            Settings::values.anti_aliasing.SetGlobal(true);
+        } else {
+            Settings::values.anti_aliasing.SetGlobal(false);
+            Settings::values.anti_aliasing.SetValue(anti_aliasing);
+        }
         if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
             Settings::values.renderer_backend.SetGlobal(true);
             Settings::values.shader_backend.SetGlobal(true);
@@ -312,6 +372,9 @@ void ConfigureGraphics::SetupPerGameUI() {
         ui->device->setEnabled(Settings::values.renderer_backend.UsingGlobal());
         ui->fullscreen_mode_combobox->setEnabled(Settings::values.fullscreen_mode.UsingGlobal());
         ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal());
+        ui->resolution_combobox->setEnabled(Settings::values.resolution_setup.UsingGlobal());
+        ui->scaling_filter_combobox->setEnabled(Settings::values.scaling_filter.UsingGlobal());
+        ui->anti_aliasing_combobox->setEnabled(Settings::values.anti_aliasing.UsingGlobal());
         ui->use_asynchronous_gpu_emulation->setEnabled(
             Settings::values.use_asynchronous_gpu_emulation.UsingGlobal());
         ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal());
@@ -340,6 +403,15 @@ void ConfigureGraphics::SetupPerGameUI() {
     ConfigurationShared::SetColoredComboBox(
         ui->fullscreen_mode_combobox, ui->fullscreen_mode_label,
         static_cast<int>(Settings::values.fullscreen_mode.GetValue(true)));
+    ConfigurationShared::SetColoredComboBox(
+        ui->resolution_combobox, ui->resolution_label,
+        static_cast<int>(Settings::values.resolution_setup.GetValue(true)));
+    ConfigurationShared::SetColoredComboBox(
+        ui->scaling_filter_combobox, ui->scaling_filter_label,
+        static_cast<int>(Settings::values.scaling_filter.GetValue(true)));
+    ConfigurationShared::SetColoredComboBox(
+        ui->anti_aliasing_combobox, ui->anti_aliasing_label,
+        static_cast<int>(Settings::values.anti_aliasing.GetValue(true)));
     ConfigurationShared::InsertGlobalItem(
         ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true)));
     ConfigurationShared::InsertGlobalItem(
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index beae74344..660b68c1c 100755
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -309,6 +309,173 @@
           </layout>
          </widget>
         </item>
+        <item>
+         <widget class="QWidget" name="resolution_layout" native="true">
+          <layout class="QHBoxLayout" name="horizontalLayout_5">
+           <property name="leftMargin">
+            <number>0</number>
+           </property>
+           <property name="topMargin">
+            <number>0</number>
+           </property>
+           <property name="rightMargin">
+            <number>0</number>
+           </property>
+           <property name="bottomMargin">
+            <number>0</number>
+           </property>
+           <item>
+            <widget class="QLabel" name="resolution_label">
+             <property name="text">
+              <string>Resolution:</string>
+             </property>
+            </widget>
+           </item>
+           <item>
+            <widget class="QComboBox" name="resolution_combobox">
+             <item>
+              <property name="text">
+               <string>0.5X  (360p/540p) [EXPERIMENTAL]</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>0.75X (540p/810p) [EXPERIMENTAL]</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>1X (720p/1080p)</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>2X (1440p/2160p)</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>3X (2160p/3240p)</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>4X (2880p/4320p)</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>5X (3600p/5400p)</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>6X (4320p/6480p)</string>
+              </property>
+             </item>
+            </widget>
+           </item>
+          </layout>
+         </widget>
+        </item>
+        <item>
+         <widget class="QWidget" name="scaling_filter_layout" native="true">
+          <layout class="QHBoxLayout" name="horizontalLayout_6">
+           <property name="leftMargin">
+            <number>0</number>
+           </property>
+           <property name="topMargin">
+            <number>0</number>
+           </property>
+           <property name="rightMargin">
+            <number>0</number>
+           </property>
+           <property name="bottomMargin">
+            <number>0</number>
+           </property>
+           <item>
+            <widget class="QLabel" name="scaling_filter_label">
+             <property name="text">
+              <string>Window Adapting Filter:</string>
+             </property>
+            </widget>
+           </item>
+           <item>
+            <widget class="QComboBox" name="scaling_filter_combobox">
+             <item>
+              <property name="text">
+               <string>Nearest Neighbor</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>Bilinear</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>Bicubic</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>Gaussian</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>ScaleForce</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>AMD's FidelityFX™️ Super Resolution [Vulkan Only]</string>
+              </property>
+             </item>
+            </widget>
+           </item>
+          </layout>
+         </widget>
+        </item>
+        <item>
+          <widget class="QWidget" name="anti_aliasing_layout" native="true">
+            <layout class="QHBoxLayout" name="horizontalLayout_7">
+              <property name="leftMargin">
+                <number>0</number>
+              </property>
+              <property name="topMargin">
+                <number>0</number>
+              </property>
+              <property name="rightMargin">
+                <number>0</number>
+              </property>
+              <property name="bottomMargin">
+                <number>0</number>
+              </property>
+              <item>
+                <widget class="QLabel" name="anti_aliasing_label">
+                  <property name="text">
+                    <string>Anti-Aliasing Method:</string>
+                  </property>
+                </widget>
+              </item>
+              <item>
+                <widget class="QComboBox" name="anti_aliasing_combobox">
+                  <item>
+                    <property name="text">
+                      <string>None</string>
+                    </property>
+                  </item>
+                  <item>
+                    <property name="text">
+                      <string>FXAA</string>
+                    </property>
+                  </item>
+                </widget>
+              </item>
+            </layout>
+          </widget>
+        </item>
         <item>
          <widget class="QWidget" name="bg_layout" native="true">
           <property name="sizePolicy">
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index 33110685a..a8b254199 100755
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -163,7 +163,7 @@ void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) {
 }
 
 void MicroProfileWidget::wheelEvent(QWheelEvent* ev) {
-    const auto wheel_position = ev->position().toPoint();
+    const auto wheel_position = ev->pos();
     MicroProfileMousePosition(wheel_position.x() / x_scale, wheel_position.y() / y_scale,
                               ev->angleDelta().y() / 120);
     ev->accept();
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 6bd0f9ee9..2af95dbe5 100755
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -159,7 +159,7 @@ GameListSearchField::GameListSearchField(GameList* parent) : QWidget{parent} {
  * @return true if the haystack contains all words of userinput
  */
 static bool ContainsAllWords(const QString& haystack, const QString& userinput) {
-    const QStringList userinput_split = userinput.split(QLatin1Char{' '}, Qt::SkipEmptyParts);
+    const QStringList userinput_split = userinput.split(QLatin1Char{' '}, QString::SkipEmptyParts);
 
     return std::all_of(userinput_split.begin(), userinput_split.end(),
                        [&haystack](const QString& s) { return haystack.contains(s); });
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 4ef0317bc..5e6e04bc4 100755
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -735,6 +735,8 @@ void GMainWindow::InitializeWidgets() {
 
     shader_building_label = new QLabel();
     shader_building_label->setToolTip(tr("The amount of shaders currently being built"));
+    res_scale_label = new QLabel();
+    res_scale_label->setToolTip(tr("The current selected resolution scaling multiplier."));
     emu_speed_label = new QLabel();
     emu_speed_label->setToolTip(
         tr("Current emulation speed. Values higher or lower than 100% "
@@ -747,8 +749,8 @@ void GMainWindow::InitializeWidgets() {
         tr("Time taken to emulate a Switch frame, not counting framelimiting or v-sync. For "
            "full-speed emulation this should be at most 16.67 ms."));
 
-    for (auto& label :
-         {shader_building_label, emu_speed_label, game_fps_label, emu_frametime_label}) {
+    for (auto& label : {shader_building_label, res_scale_label, emu_speed_label, game_fps_label,
+                        emu_frametime_label}) {
         label->setVisible(false);
         label->setFrameStyle(QFrame::NoFrame);
         label->setContentsMargins(4, 0, 4, 0);
@@ -760,6 +762,55 @@ void GMainWindow::InitializeWidgets() {
     tas_label->setFocusPolicy(Qt::NoFocus);
     statusBar()->insertPermanentWidget(0, tas_label);
 
+    // setup AA button
+    aa_status_button = new QPushButton();
+    aa_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton"));
+    aa_status_button->setFocusPolicy(Qt::NoFocus);
+    connect(aa_status_button, &QPushButton::clicked, [&] {
+        auto aa_mode = Settings::values.anti_aliasing.GetValue();
+        if (aa_mode == Settings::AntiAliasing::LastAA) {
+            aa_mode = Settings::AntiAliasing::None;
+        } else {
+            aa_mode = static_cast<Settings::AntiAliasing>(static_cast<u32>(aa_mode) + 1);
+        }
+        Settings::values.anti_aliasing.SetValue(aa_mode);
+        aa_status_button->setChecked(true);
+        UpdateAAText();
+    });
+    UpdateAAText();
+    aa_status_button->setCheckable(true);
+    aa_status_button->setChecked(true);
+    statusBar()->insertPermanentWidget(0, aa_status_button);
+
+    // Setup Filter button
+    filter_status_button = new QPushButton();
+    filter_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton"));
+    filter_status_button->setFocusPolicy(Qt::NoFocus);
+    connect(filter_status_button, &QPushButton::clicked, [&] {
+        auto filter = Settings::values.scaling_filter.GetValue();
+        if (filter == Settings::ScalingFilter::LastFilter) {
+            filter = Settings::ScalingFilter::NearestNeighbor;
+        } else {
+            filter = static_cast<Settings::ScalingFilter>(static_cast<u32>(filter) + 1);
+        }
+        if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL &&
+            filter == Settings::ScalingFilter::Fsr) {
+            filter = Settings::ScalingFilter::NearestNeighbor;
+        }
+        Settings::values.scaling_filter.SetValue(filter);
+        filter_status_button->setChecked(true);
+        UpdateFilterText();
+    });
+    auto filter = Settings::values.scaling_filter.GetValue();
+    if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL &&
+        filter == Settings::ScalingFilter::Fsr) {
+        Settings::values.scaling_filter.SetValue(Settings::ScalingFilter::NearestNeighbor);
+    }
+    UpdateFilterText();
+    filter_status_button->setCheckable(true);
+    filter_status_button->setChecked(true);
+    statusBar()->insertPermanentWidget(0, filter_status_button);
+
     // Setup Dock button
     dock_status_button = new QPushButton();
     dock_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton"));
@@ -830,6 +881,11 @@ void GMainWindow::InitializeWidgets() {
             Settings::values.renderer_backend.SetValue(Settings::RendererBackend::Vulkan);
         } else {
             Settings::values.renderer_backend.SetValue(Settings::RendererBackend::OpenGL);
+            const auto filter = Settings::values.scaling_filter.GetValue();
+            if (filter == Settings::ScalingFilter::Fsr) {
+                Settings::values.scaling_filter.SetValue(Settings::ScalingFilter::NearestNeighbor);
+                UpdateFilterText();
+            }
         }
 
         system->ApplySettings();
@@ -1545,6 +1601,7 @@ void GMainWindow::ShutdownGame() {
     // Disable status bar updates
     status_bar_update_timer.stop();
     shader_building_label->setVisible(false);
+    res_scale_label->setVisible(false);
     emu_speed_label->setVisible(false);
     game_fps_label->setVisible(false);
     emu_frametime_label->setVisible(false);
@@ -2907,8 +2964,7 @@ void GMainWindow::OnCaptureScreenshot() {
         }
     }
 #endif
-    render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor.GetValue(),
-                                     filename);
+    render_window->CaptureScreenshot(filename);
 }
 
 // TODO: Written 2020-10-01: Remove per-game config migration code when it is irrelevant
@@ -2999,6 +3055,11 @@ void GMainWindow::UpdateStatusBar() {
         shader_building_label->setVisible(false);
     }
 
+    const auto res_info = Settings::values.resolution_info;
+    const auto res_scale = res_info.up_factor;
+    res_scale_label->setText(
+        tr("Scale: %1x", "%1 is the resolution scaling factor").arg(res_scale));
+
     if (Settings::values.use_speed_limit.GetValue()) {
         emu_speed_label->setText(tr("Speed: %1% / %2%")
                                      .arg(results.emulation_speed * 100.0, 0, 'f', 0)
@@ -3014,6 +3075,7 @@ void GMainWindow::UpdateStatusBar() {
     }
     emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2));
 
+    res_scale_label->setVisible(true);
     emu_speed_label->setVisible(!Settings::values.use_multi_core.GetValue());
     game_fps_label->setVisible(true);
     emu_frametime_label->setVisible(true);
@@ -3043,11 +3105,55 @@ void GMainWindow::UpdateGPUAccuracyButton() {
     }
 }
 
+void GMainWindow::UpdateFilterText() {
+    const auto filter = Settings::values.scaling_filter.GetValue();
+    switch (filter) {
+    case Settings::ScalingFilter::NearestNeighbor:
+        filter_status_button->setText(tr("NEAREST"));
+        break;
+    case Settings::ScalingFilter::Bilinear:
+        filter_status_button->setText(tr("BILINEAR"));
+        break;
+    case Settings::ScalingFilter::Bicubic:
+        filter_status_button->setText(tr("BICUBIC"));
+        break;
+    case Settings::ScalingFilter::Gaussian:
+        filter_status_button->setText(tr("GAUSSIAN"));
+        break;
+    case Settings::ScalingFilter::ScaleForce:
+        filter_status_button->setText(tr("SCALEFORCE"));
+        break;
+    case Settings::ScalingFilter::Fsr:
+        filter_status_button->setText(tr("AMD'S FIDELITYFX SR"));
+        break;
+    default:
+        filter_status_button->setText(tr("BILINEAR"));
+        break;
+    }
+}
+
+void GMainWindow::UpdateAAText() {
+    const auto aa_mode = Settings::values.anti_aliasing.GetValue();
+    switch (aa_mode) {
+    case Settings::AntiAliasing::Fxaa:
+        aa_status_button->setText(tr("FXAA"));
+        break;
+    case Settings::AntiAliasing::None:
+        aa_status_button->setText(tr("NO AA"));
+        break;
+    default:
+        aa_status_button->setText(tr("FXAA"));
+        break;
+    }
+}
+
 void GMainWindow::UpdateStatusButtons() {
     dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue());
     renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() ==
                                        Settings::RendererBackend::Vulkan);
     UpdateGPUAccuracyButton();
+    UpdateFilterText();
+    UpdateAAText();
 }
 
 void GMainWindow::UpdateUISettings() {
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index aed15a0a0..351500743 100755
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -302,6 +302,8 @@ private:
     void MigrateConfigFiles();
     void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {},
                            std::string_view gpu_vendor = {});
+    void UpdateFilterText();
+    void UpdateAAText();
     void UpdateStatusBar();
     void UpdateGPUAccuracyButton();
     void UpdateStatusButtons();
@@ -328,6 +330,7 @@ private:
     // Status bar elements
     QLabel* message_label = nullptr;
     QLabel* shader_building_label = nullptr;
+    QLabel* res_scale_label = nullptr;
     QLabel* emu_speed_label = nullptr;
     QLabel* game_fps_label = nullptr;
     QLabel* emu_frametime_label = nullptr;
@@ -335,6 +338,8 @@ private:
     QPushButton* gpu_accuracy_button = nullptr;
     QPushButton* renderer_status_button = nullptr;
     QPushButton* dock_status_button = nullptr;
+    QPushButton* filter_status_button = nullptr;
+    QPushButton* aa_status_button = nullptr;
     QTimer status_bar_update_timer;
 
     std::unique_ptr<Config> config;
diff --git a/src/yuzu/uisettings.h b/src/yuzu/uisettings.h
index cac19452f..936914ef3 100755
--- a/src/yuzu/uisettings.h
+++ b/src/yuzu/uisettings.h
@@ -68,7 +68,6 @@ struct Values {
     Settings::BasicSetting<bool> enable_discord_presence{true, "enable_discord_presence"};
 
     Settings::BasicSetting<bool> enable_screenshot_save_as{true, "enable_screenshot_save_as"};
-    Settings::BasicSetting<u16> screenshot_resolution_factor{0, "screenshot_resolution_factor"};
 
     QString roms_path;
     QString symbols_path;
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 4bac2ca8a..ac6e60674 100755
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -451,6 +451,9 @@ void Config::ReadValues() {
     ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks);
     ReadSetting("Renderer", Settings::values.vulkan_device);
 
+    ReadSetting("Renderer", Settings::values.resolution_setup);
+    ReadSetting("Renderer", Settings::values.scaling_filter);
+    ReadSetting("Renderer", Settings::values.anti_aliasing);
     ReadSetting("Renderer", Settings::values.fullscreen_mode);
     ReadSetting("Renderer", Settings::values.aspect_ratio);
     ReadSetting("Renderer", Settings::values.max_anisotropy);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 5208cb5e4..120f1311e 100755
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -236,6 +236,29 @@ disable_shader_loop_safety_checks =
 # Which Vulkan physical device to use (defaults to 0)
 vulkan_device =
 
+# 0: 0.5x (360p/540p) [EXPERIMENTAL]
+# 1: 0.75x (540p/810p) [EXPERIMENTAL]
+# 2 (default): 1x (720p/1080p)
+# 3: 2x (1440p/2160p)
+# 4: 3x (2160p/3240p)
+# 5: 4x (2880p/4320p)
+# 6: 5x (3600p/5400p)
+# 7: 6x (4320p/6480p)
+resolution_setup =
+
+# Pixel filter to use when up- or down-sampling rendered frames.
+# 0: Nearest Neighbor
+# 1 (default): Bilinear
+# 2: Bicubic
+# 3: Gaussian
+# 4: ScaleForce
+# 5: AMD FidelityFX™️ Super Resolution [Vulkan Only]
+scaling_filter =
+
+# Anti-Aliasing (AA)
+# 0 (default): None, 1: FXAA
+anti_aliasing =
+
 # Whether to use fullscreen or borderless window mode
 # 0 (Windows default): Borderless window, 1 (All other default): Exclusive fullscreen
 fullscreen_mode =