Skip to content

Commit

Permalink
ssgi updates
Browse files Browse the repository at this point in the history
  • Loading branch information
turanszkij committed Apr 2, 2024
1 parent 4cfc23e commit af983fa
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 456 deletions.
28 changes: 16 additions & 12 deletions WickedEngine/shaders/ssgiCS.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RWTexture2D<float4> output_diffuse : register(u0);

#ifdef WIDE
static const uint THREADCOUNT = 16;
static const int TILE_BORDER = 18;
static const int TILE_BORDER = 16;
#else
static const uint THREADCOUNT = 8;
static const int TILE_BORDER = 4;
Expand All @@ -25,7 +25,7 @@ groupshared uint group_valid;

inline uint coord_to_cache(int2 coord)
{
return flatten2D(clamp(TILE_BORDER + coord, 0, TILE_SIZE - 1), TILE_SIZE);
return flatten2D(clamp(coord, 0, TILE_SIZE - 1), TILE_SIZE);
}

static const float depthRejection = 8;
Expand All @@ -34,11 +34,10 @@ static const float depthRejection_rcp = rcp(depthRejection);
float3 compute_diffuse(
float3 origin_position,
float3 origin_normal,
int2 GTid,
int2 offset
int2 originLoc, // coord in cache
int2 sampleLoc // coord in cache
)
{
const int2 sampleLoc = GTid + offset;
const uint t = coord_to_cache(sampleLoc);
uint c = cache_rgb[t];
if(c == 0)
Expand All @@ -56,7 +55,7 @@ float3 compute_diffuse(
const float sample_z = sample_position.z;

// DDA occlusion:
const int2 start = GTid;
const int2 start = originLoc;
const int2 goal = sampleLoc;

const int dx = int(goal.x) - int(start.x);
Expand Down Expand Up @@ -86,7 +85,8 @@ float3 compute_diffuse(
const float sz = cache_z[tt];
if(sz < z - 0.1)
{
return occlusion * Unpack_R11G11B10_FLOAT(cache_rgb[tt]);
c = cache_rgb[tt];
break;
}
}
}
Expand Down Expand Up @@ -127,7 +127,8 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint2 GTid :
if (group_valid == 0)
return; // if no valid color was cached, whole group can exit early

const uint t = coord_to_cache(GTid.xy);
const int2 originLoc = GTid.xy + TILE_BORDER;
const uint t = coord_to_cache(originLoc);
float3 P;
P.z = cache_z[t];

Expand All @@ -137,23 +138,26 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint2 GTid :

P.xy = unpack_half2(cache_xy[t]);

const uint2 pixel = DTid.xy;
const float3 N = mul((float3x3)GetCamera().view, decode_oct(input_normal[interleaved_pixel].rg));

float3 diffuse = 0;
float sum = 0;
const int range = int(postprocess.params0.x);
const float spread = postprocess.params0.y + dither(pixel);
const float spread = postprocess.params0.y /*+ dither(DTid.xy)*/;
const float rangespread_rcp2 = postprocess.params0.z;


const int2 pixel_base = Gid.xy * THREADCOUNT + GTid;
for(int x = -range; x <= range; ++x)
{
for(int y = -range; y <= range; ++y)
{
const int2 pixel = pixel_base + int2(x, y);
if(any(pixel < 0) || any(pixel >= postprocess.resolution))
continue; // to not lose energy when sampling outside of textures, we skip those offsets
const float2 foffset = float2(x, y) * spread;
const int2 offset = round(foffset);
const float weight = saturate(1 - abs(foffset.x) * abs(foffset.y) * rangespread_rcp2);
diffuse += compute_diffuse(P, N, GTid, offset) * weight;
diffuse += compute_diffuse(P, N, originLoc, originLoc + offset) * weight;
sum += weight;
}
}
Expand Down
36 changes: 18 additions & 18 deletions WickedEngine/shaders/ssgi_deinterleaveCS.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ RWTexture2DArray<float3> atlas4x_color : register(u5);
RWTexture2DArray<float3> atlas8x_color : register(u6);
RWTexture2DArray<float3> atlas16x_color : register(u7);
RWTexture2D<float> regular2x_depth : register(u8);
RWTexture2D<float2> regular2x_normal : register(u9);
RWTexture2D<float> regular4x_depth : register(u10);
RWTexture2D<float2> regular4x_normal : register(u11);
RWTexture2D<float> regular8x_depth : register(u12);
RWTexture2D<float2> regular8x_normal : register(u13);
RWTexture2D<float> regular16x_depth : register(u14);
RWTexture2D<float> regular4x_depth : register(u9);
RWTexture2D<float> regular8x_depth : register(u10);
RWTexture2D<float> regular16x_depth : register(u11);
RWTexture2D<float2> regular2x_normal : register(u12);
RWTexture2D<float2> regular4x_normal : register(u13);
RWTexture2D<float2> regular8x_normal : register(u14);
RWTexture2D<float2> regular16x_normal : register(u15);

groupshared float shared_depths[256];
groupshared float2 shared_normals[256];
groupshared float3 shared_colors[256];
groupshared uint shared_normals[256];
groupshared uint shared_colors[256];

[numthreads(8, 8, 1)]
void main(uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV_DispatchThreadID)
Expand All @@ -38,10 +38,10 @@ void main(uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex, uint3 GTid :
shared_depths[destIdx + 128] = texture_depth[min(startST | uint2(0, 8), dim - 1)];
shared_depths[destIdx + 136] = texture_depth[min(startST | uint2(8, 8), dim - 1)];

shared_normals[destIdx + 0] = texture_normal[min(startST | uint2(0, 0), dim - 1)];
shared_normals[destIdx + 8] = texture_normal[min(startST | uint2(8, 0), dim - 1)];
shared_normals[destIdx + 128] = texture_normal[min(startST | uint2(0, 8), dim - 1)];
shared_normals[destIdx + 136] = texture_normal[min(startST | uint2(8, 8), dim - 1)];
shared_normals[destIdx + 0] = pack_half2(texture_normal[min(startST | uint2(0, 0), dim - 1)]);
shared_normals[destIdx + 8] = pack_half2(texture_normal[min(startST | uint2(8, 0), dim - 1)]);
shared_normals[destIdx + 128] = pack_half2(texture_normal[min(startST | uint2(0, 8), dim - 1)]);
shared_normals[destIdx + 136] = pack_half2(texture_normal[min(startST | uint2(8, 8), dim - 1)]);

const float2 uv0 = float2(startST | uint2(0, 0)) * dim_rcp;
const float2 uv1 = float2(startST | uint2(8, 0)) * dim_rcp;
Expand All @@ -55,18 +55,18 @@ void main(uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex, uint3 GTid :
const float2 prevUV1 = uv1 + velocity1;
const float2 prevUV2 = uv2 + velocity2;
const float2 prevUV3 = uv3 + velocity3;
shared_colors[destIdx + 0] = texture_input.SampleLevel(sampler_linear_clamp, prevUV0, 0);
shared_colors[destIdx + 8] = texture_input.SampleLevel(sampler_linear_clamp, prevUV1, 0);
shared_colors[destIdx + 128] = texture_input.SampleLevel(sampler_linear_clamp, prevUV2, 0);
shared_colors[destIdx + 136] = texture_input.SampleLevel(sampler_linear_clamp, prevUV3, 0);
shared_colors[destIdx + 0] = Pack_R11G11B10_FLOAT(texture_input.SampleLevel(sampler_linear_clamp, prevUV0, 0));
shared_colors[destIdx + 8] = Pack_R11G11B10_FLOAT(texture_input.SampleLevel(sampler_linear_clamp, prevUV1, 0));
shared_colors[destIdx + 128] = Pack_R11G11B10_FLOAT(texture_input.SampleLevel(sampler_linear_clamp, prevUV2, 0));
shared_colors[destIdx + 136] = Pack_R11G11B10_FLOAT(texture_input.SampleLevel(sampler_linear_clamp, prevUV3, 0));

GroupMemoryBarrierWithGroupSync();

uint ldsIndex = (GTid.x << 1) | (GTid.y << 5);

float depth = shared_depths[ldsIndex];
float2 normal = shared_normals[ldsIndex];
float3 color = shared_colors[ldsIndex];
float2 normal = unpack_half2(shared_normals[ldsIndex]);
float3 color = Unpack_R11G11B10_FLOAT(shared_colors[ldsIndex]);

color = color - 0.2; // cut out pixels that shouldn't act as lights
color *= 0.9; // accumulation energy loss
Expand Down
4 changes: 2 additions & 2 deletions WickedEngine/shaders/ssgi_upsampleCS.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ void main(uint2 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex)
const int range = int(postprocess.params0.x);
const float spread = postprocess.params0.y;
#else
const int range = 1;
const float spread = 8;
const int range = 2;
const float spread = 6;
#endif
for(int x = -range; x <= range; ++x)
{
Expand Down
Loading

0 comments on commit af983fa

Please sign in to comment.