From 8611727ffc4f7e115e5da77a2ede894cca561261 Mon Sep 17 00:00:00 2001 From: papadanku Date: Mon, 2 Sep 2024 02:42:22 -0700 Subject: [PATCH] cImageSharpen: Implement FSR's Robust Contrast Adaptive Sharpening (RCAS) --- shaders/cImageSharpen.fx | 50 +------- shaders/shared/fidelityfx/cCas.fxh | 188 ++++++++++++++--------------- 2 files changed, 95 insertions(+), 143 deletions(-) diff --git a/shaders/cImageSharpen.fx b/shaders/cImageSharpen.fx index f4d6a0a..ae9c681 100644 --- a/shaders/cImageSharpen.fx +++ b/shaders/cImageSharpen.fx @@ -1,59 +1,17 @@ -/* - Bilinear modification of AMD's CAS algorithm. - - Source: https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/main/sdk/include/FidelityFX/gpu/cas/ffx_cas.h - - This file is part of the FidelityFX SDK. - - Copyright (C) 2024 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files(the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and /or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - uniform int _RenderMode < ui_label = "Render Mode"; ui_type = "combo"; ui_items = "Image\0Mask\0"; > = 0; -uniform int _Detection < - ui_category = "Sharpening"; - ui_label = "Detection Mode"; - ui_type = "combo"; - ui_items = "Multi-Channel\0Single-Channel (Average)\0Single-Channel (Max)\0"; -> = 0; - -uniform int _Kernel < - ui_category = "Sharpening"; - ui_label = "Kernel Shape"; - ui_type = "combo"; - ui_items = "CAS: Box\0CAS: Diamond\0CShade: Bilinear Diamond\0"; -> = 1; - -uniform float _Contrast < +uniform float _Sharpening < ui_category = "Sharpening"; ui_label = "Contrast"; ui_type = "slider"; ui_min = 0.0; ui_max = 1.0; -> = 0.0; +> = 1.0; #include "shared/fidelityfx/cCas.fxh" @@ -69,9 +27,7 @@ float4 PS_CasFilterNoScaling(CShade_VS2PS_Quad Input): SV_TARGET0 OutputMask, Input.Tex0, fwidth(Input.Tex0.xy), - _Detection, - _Kernel, - _Contrast + _Sharpening ); if (_RenderMode == 1) diff --git a/shaders/shared/fidelityfx/cCas.fxh b/shaders/shared/fidelityfx/cCas.fxh index 942c20f..1c82bbb 100644 --- a/shaders/shared/fidelityfx/cCas.fxh +++ b/shaders/shared/fidelityfx/cCas.fxh @@ -28,6 +28,44 @@ THE SOFTWARE. */ +/* + https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/main/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h + + FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING + + CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness. + RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping. + RCAS also has a built in process to limit sharpening of what it detects as possible noise. + RCAS sharper does not support scaling, as it should be applied after EASU scaling. + Pass EASU output straight into RCAS, no color conversions necessary. + + RCAS is based on the following logic. + RCAS uses a 5 tap filter in a cross pattern (same as CAS), + w n + w 1 w for taps w m e + w s + + Where 'w' is the negative lobe weight. + output = (w*(n+e+w+s)+m)/(4*w+1) + + RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range, + 0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s) + 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1) + + Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount. + This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues. + So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps. + As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation. + This stabilizes RCAS. + + RCAS does a simple highpass which is normalized against the local contrast then shaped, + 0.25 + 0.25 -1 0.25 + 0.25 + This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges. +*/ +#define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0)) + #if !defined(INCLIDE_FFX_CAS) #define INCLIDE_FFX_CAS @@ -36,105 +74,63 @@ inout float4 FilterMask, in float2 Tex, in float2 Delta, - in int Detection, - in int Kernel, - in float Contrast + in float Sharpening ) { - /* - Load a collection of samples in a 3x3 neighorhood, where e is the current pixel. - 5 3 6 | 3 | 1 3 - 1 0 2 | 1 0 2 | 0 - 7 4 8 | 4 | 2 4 - */ - - // Select kernel sample - float4 TexArray[3]; - float4 Sample[9]; - switch (Kernel) - { - case 0: - TexArray[0] = Tex.xyxy + (Delta.xyxy * float4(-1.0, 0.0, 1.0, 0.0)); - TexArray[1] = Tex.xyxy + (Delta.xyxy * float4(0.0, -1.0, 0.0, 1.0)); - TexArray[2] = Tex.xyxy + (Delta.xyxy * float4(-1.0, -1.0, 1.0, 1.0)); - Sample[0] = tex2D(CShade_SampleColorTex, Tex); - Sample[1] = tex2D(CShade_SampleColorTex, TexArray[0].xy); - Sample[2] = tex2D(CShade_SampleColorTex, TexArray[0].zw); - Sample[3] = tex2D(CShade_SampleColorTex, TexArray[1].xy); - Sample[4] = tex2D(CShade_SampleColorTex, TexArray[1].zw); - Sample[5] = tex2D(CShade_SampleColorTex, TexArray[2].xw); - Sample[6] = tex2D(CShade_SampleColorTex, TexArray[2].zw); - Sample[7] = tex2D(CShade_SampleColorTex, TexArray[2].xy); - Sample[8] = tex2D(CShade_SampleColorTex, TexArray[2].zy); - break; - case 1: - TexArray[0] = Tex.xyxy + (Delta.xyxy * float4(-1.0, 0.0, 1.0, 0.0)); - TexArray[1] = Tex.xyxy + (Delta.xyxy * float4(0.0, -1.0, 0.0, 1.0)); - Sample[0] = tex2D(CShade_SampleColorTex, Tex); - Sample[1] = tex2D(CShade_SampleColorTex, TexArray[0].xy); - Sample[2] = tex2D(CShade_SampleColorTex, TexArray[0].zw); - Sample[3] = tex2D(CShade_SampleColorTex, TexArray[1].xy); - Sample[4] = tex2D(CShade_SampleColorTex, TexArray[1].zw); - break; - case 2: - TexArray[0] = Tex.xyxy + (Delta.xyxy * float4(-0.5, -0.5, 0.5, 0.5)); - Sample[0] = tex2D(CShade_SampleColorTex, Tex); - Sample[1] = tex2D(CShade_SampleColorTex, TexArray[0].xw); - Sample[2] = tex2D(CShade_SampleColorTex, TexArray[0].zw); - Sample[3] = tex2D(CShade_SampleColorTex, TexArray[0].xy); - Sample[4] = tex2D(CShade_SampleColorTex, TexArray[0].zy); - break; - default: - break; - } - - // Get polar min/max - float4 MinRGB = min(Sample[0], min(min(Sample[1], Sample[2]), min(Sample[3], Sample[4]))); - float4 MaxRGB = max(Sample[0], max(max(Sample[1], Sample[2]), max(Sample[3], Sample[4]))); - - if (Kernel == 0) - { - MinRGB = min(MinRGB, min(min(Sample[5], Sample[6]), min(Sample[7], Sample[8]))); - MaxRGB = max(MaxRGB, max(max(Sample[5], Sample[6]), max(Sample[7], Sample[8]))); - } - - // Get needed reciprocal - float4 ReciprocalMaxRGB = 1.0 / MaxRGB; - - // Amplify - float4 AmplifyRGB = saturate(min(MinRGB, 2.0 - MaxRGB) * ReciprocalMaxRGB); - - // Optional grayscale - switch (Detection) - { - case 1: - AmplifyRGB = CColor_GetLuma(AmplifyRGB.rgb, 0); - break; - case 2: - AmplifyRGB = CColor_GetLuma(AmplifyRGB.rgb, 3); - break; - } - - // Shaping amount of sharpening. - AmplifyRGB *= rsqrt(AmplifyRGB); - - /* Filter shape. - w | w | w w - w 1 w | w 1 w | 1 - w | w | w w - */ - float4 Peak = -(1.0 / lerp(8.0, 5.0, Contrast)); - float4 Weight = AmplifyRGB * Peak; - float4 ReciprocalWeight = 1.0 / (1.0 + (4.0 * Weight)); - + float4 TexArray[2]; + TexArray[0] = Tex.xyxy + (Delta.xyxy * float4(-1.0, 0.0, 1.0, 0.0)); + TexArray[1] = Tex.xyxy + (Delta.xyxy * float4(0.0, -1.0, 0.0, 1.0)); + + float4 Sample[5]; + Sample[0] = tex2D(CShade_SampleColorTex, Tex); + Sample[1] = tex2D(CShade_SampleColorTex, TexArray[0].xy); + Sample[2] = tex2D(CShade_SampleColorTex, TexArray[0].zw); + Sample[3] = tex2D(CShade_SampleColorTex, TexArray[1].xy); + Sample[4] = tex2D(CShade_SampleColorTex, TexArray[1].zw); + + // Luma times 2. + float Luma[5]; + Luma[0] = dot(Sample[0].rgb, float3(0.5, 0.5, 1.0)); + Luma[1] = dot(Sample[1].rgb, float3(0.5, 0.5, 1.0)); + Luma[2] = dot(Sample[2].rgb, float3(0.5, 0.5, 1.0)); + Luma[3] = dot(Sample[3].rgb, float3(0.5, 0.5, 1.0)); + Luma[4] = dot(Sample[4].rgb, float3(0.5, 0.5, 1.0)); + + // Noise detection using a normalized local contrast filter + float Noise = ((Luma[1] + Luma[2] + Luma[3] + Luma[4]) * 0.25) - Luma[0]; + float MaxLuma = max(Luma[0], max(max(Luma[1], Luma[2]), max(Luma[3], Luma[4]))); + float MinLuma = min(Luma[0], min(min(Luma[1], Luma[2]), min(Luma[3], Luma[4]))); + float RangeLuma = MaxLuma - MinLuma; + Noise = saturate(abs(Noise) / RangeLuma); + Noise = (-0.5 * Noise) + 1.0; + + // Min and max of ring. + float4 MaxRGB = max(max(Sample[1], Sample[2]), max(Sample[3], Sample[4])); + float4 MinRGB = min(min(Sample[1], Sample[2]), min(Sample[3], Sample[4])); + + // Immediate constants for peak range. + float2 PeakC = float2(1.0, -1.0 * 4.0); + + // Limiters, these need to be high precision RCPs. + float4 HitMinRGB = MinRGB / (4.0 * MaxRGB); + float4 HitMaxRGB = (PeakC.x - MaxRGB) / ((4.0 * MinRGB) + PeakC.y); + float4 LobeRGB = max(-HitMinRGB, HitMaxRGB); + float MaxLobe = max(max(LobeRGB.r, LobeRGB.g), LobeRGB.b); + + Sharpening = 1.0 - Sharpening; + float4 Lobe = max(-FSR_RCAS_LIMIT, min(MaxLobe, 0.0)) * int(exp2(-Sharpening)); + + // Apply noise removal + Lobe *= Noise; + + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + float4 RcpL = 1.0 / ((4.0 * Lobe) + 1.0); FilterShape = Sample[0]; - FilterShape += Sample[1] * Weight; - FilterShape += Sample[2] * Weight; - FilterShape += Sample[3] * Weight; - FilterShape += Sample[4] * Weight; - FilterShape = saturate(FilterShape * ReciprocalWeight); - - FilterMask = AmplifyRGB; + FilterShape += (Lobe * Sample[1]); + FilterShape += (Lobe * Sample[2]); + FilterShape += (Lobe * Sample[3]); + FilterShape += (Lobe * Sample[4]); + FilterShape *= RcpL; } #endif \ No newline at end of file