From f11486081c0802731d81309f84a687da29c59993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tur=C3=A1nszki=20J=C3=A1nos?= Date: Tue, 10 Dec 2024 09:59:54 +0100 Subject: [PATCH] lightmap leaking fixes, optimizations (#986) --- WickedEngine/offlineshadercompiler.cpp | 1 + WickedEngine/shaders/ShaderInterop_Renderer.h | 18 +- WickedEngine/shaders/ShaderInterop_Weather.h | 25 +- WickedEngine/shaders/Shaders_SOURCE.vcxitems | 4 + .../shaders/Shaders_SOURCE.vcxitems.filters | 3 + WickedEngine/shaders/fogHF.hlsli | 8 +- WickedEngine/shaders/fontPS.hlsl | 2 +- WickedEngine/shaders/globals.hlsli | 41 +- .../shaders/hairparticle_simulateCS.hlsl | 4 +- WickedEngine/shaders/lightingHF.hlsli | 22 +- WickedEngine/shaders/lightmap_expandCS.hlsl | 58 +++ WickedEngine/shaders/objectHF.hlsli | 22 +- .../shaders/objectHF_mesh_shading.hlsli | 2 +- WickedEngine/shaders/paintdecalVS.hlsl | 6 +- WickedEngine/shaders/renderlightmapVS.hlsl | 5 +- WickedEngine/shaders/rtdiffuseCS.hlsl | 2 +- WickedEngine/shaders/shadingHF.hlsli | 8 +- WickedEngine/shaders/shadowHF.hlsli | 46 +- WickedEngine/shaders/shadowPS_alphatest.hlsl | 2 +- .../shaders/shadowPS_transparent.hlsl | 8 +- WickedEngine/shaders/shadowPS_water.hlsl | 5 +- WickedEngine/shaders/skinningCS.hlsl | 12 +- WickedEngine/shaders/skyAtmosphere.hlsli | 100 ++-- .../skyAtmosphere_cameraVolumeLutCS.hlsl | 4 +- ...mosphere_multiScatteredLuminanceLutCS.hlsl | 4 +- .../skyAtmosphere_skyLuminanceLutCS.hlsl | 4 +- .../shaders/skyAtmosphere_skyViewLutCS.hlsl | 4 +- .../skyAtmosphere_transmittanceLutCS.hlsl | 4 +- WickedEngine/shaders/surfaceHF.hlsli | 28 +- .../shaders/volumetricCloud_upsamplePS.hlsl | 2 +- .../volumetricLight_DirectionalPS.hlsl | 2 +- .../shaders/volumetricLight_PointPS.hlsl | 2 +- .../shaders/volumetricLight_SpotPS.hlsl | 2 +- WickedEngine/shaders/wetmap_updateCS.hlsl | 8 +- WickedEngine/wiEnums.h | 2 + WickedEngine/wiRenderPath3D.cpp | 18 +- WickedEngine/wiRenderer.cpp | 42 +- WickedEngine/wiScene.cpp | 478 +++++++++--------- WickedEngine/wiScene_Components.cpp | 14 +- WickedEngine/wiScene_Components.h | 1 + WickedEngine/wiVersion.cpp | 2 +- 41 files changed, 587 insertions(+), 438 deletions(-) create mode 100644 WickedEngine/shaders/lightmap_expandCS.hlsl diff --git a/WickedEngine/offlineshadercompiler.cpp b/WickedEngine/offlineshadercompiler.cpp index 7cd350e69d..0cc3d5c665 100644 --- a/WickedEngine/offlineshadercompiler.cpp +++ b/WickedEngine/offlineshadercompiler.cpp @@ -232,6 +232,7 @@ wi::vector shaders = { {"causticsCS", wi::graphics::ShaderStage::CS }, {"depth_reprojectCS", wi::graphics::ShaderStage::CS }, {"depth_pyramidCS", wi::graphics::ShaderStage::CS }, + {"lightmap_expandCS", wi::graphics::ShaderStage::CS }, {"emittedparticlePS_soft", wi::graphics::ShaderStage::PS }, diff --git a/WickedEngine/shaders/ShaderInterop_Renderer.h b/WickedEngine/shaders/ShaderInterop_Renderer.h index 3edebea890..6330816e65 100644 --- a/WickedEngine/shaders/ShaderInterop_Renderer.h +++ b/WickedEngine/shaders/ShaderInterop_Renderer.h @@ -649,7 +649,7 @@ struct alignas(16) ShaderMeshInstance uint layerMask; uint geometryOffset; // offset of all geometries for currently active LOD - uint2 emissive; + uint2 emissive; // packed half4 uint color; uint geometryCount; // number of all geometries in currently active LOD @@ -664,12 +664,11 @@ struct alignas(16) ShaderMeshInstance int vb_ao; int vb_wetmap; int lightmap; - uint alphaTest_size; + uint alphaTest_size; // packed half2 - uint2 rimHighlight; - uint2 padding; + uint2 rimHighlight; // packed half4 + uint2 quaternion; // packed half4 - float4 quaternion; ShaderTransform transform; ShaderTransform transformPrev; ShaderTransform transformRaw; // without quantization remapping applied @@ -693,7 +692,11 @@ struct alignas(16) ShaderMeshInstance vb_ao = -1; vb_wetmap = -1; alphaTest_size = 0; - quaternion = float4(0, 0, 0, 1); +#ifdef __cplusplus + quaternion = wi::math::pack_half4(float4(0, 0, 0, 1)); +#else + quaternion = pack_half4(float4(0, 0, 0, 1)); +#endif // __cplusplus rimHighlight = uint2(0, 0); transform.init(); transformPrev.init(); @@ -715,6 +718,7 @@ struct alignas(16) ShaderMeshInstance inline half GetAlphaTest() { return unpack_half2(alphaTest_size).x; } inline half GetSize() { return unpack_half2(alphaTest_size).y; } inline half4 GetRimHighlight() { return unpack_half4(rimHighlight); } + inline half4 GetQuaternion() { return unpack_half4(quaternion); } #endif // __cplusplus }; struct ShaderMeshInstancePointer @@ -1146,7 +1150,7 @@ struct alignas(16) FrameCB float cloudShadowFarPlaneKm; int texture_volumetricclouds_shadow_index; - float gi_boost; + uint giboost_packed; // force fp16 load uint entity_culling_count; float blue_noise_phase; diff --git a/WickedEngine/shaders/ShaderInterop_Weather.h b/WickedEngine/shaders/ShaderInterop_Weather.h index ef09501d89..53e04ae733 100644 --- a/WickedEngine/shaders/ShaderInterop_Weather.h +++ b/WickedEngine/shaders/ShaderInterop_Weather.h @@ -355,29 +355,28 @@ struct alignas(16) ShaderOcean struct alignas(16) ShaderWeather { - float3 sun_color; - float stars; // number of stars (0: disable stars, >0: increase number of stars) + uint2 sun_direction; // packed half3 + uint2 sun_color; // packed half3 - float3 sun_direction; + uint2 ambient; // packed half3 uint most_important_light_index; + float stars; // number of stars (0: disable stars, >0: increase number of stars) - float3 horizon; - float sky_exposure; - - float3 zenith; - float sky_rotation_sin; + uint2 horizon; // packed half3 + uint2 zenith; // packed half3 - float3 ambient; - float sky_rotation_cos; + float4 stars_rotation; // quaternion - float4x4 stars_rotation; + float3 padding_stars; + float sky_rotation_sin; + float sky_rotation_cos; + float sky_exposure; float rain_amount; + float rain_length; float rain_speed; float rain_scale; - - float3 padding_rain; float rain_splash_scale; float4 rain_color; diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems index 5ec08ff4e3..67a91108d3 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems @@ -434,6 +434,10 @@ Pixel + + Compute + 4.0 + Compute 4.0 diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters index 2d6a314a88..cdcd8826b2 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters @@ -1181,6 +1181,9 @@ PS + + CS + diff --git a/WickedEngine/shaders/fogHF.hlsli b/WickedEngine/shaders/fogHF.hlsli index eec3d029bb..990d4a0c25 100644 --- a/WickedEngine/shaders/fogHF.hlsli +++ b/WickedEngine/shaders/fogHF.hlsli @@ -19,7 +19,7 @@ inline half GetFogAmount(float distance, float3 O, float3 V) if (GetFrame().options & OPTION_BIT_HEIGHT_FOG) { - float fogFalloffScale = 1.0 / max(0.01, fog.height_end - fog.height_start); + float fogFalloffScale = rcp(max(0.01, fog.height_end - fog.height_start)); // solve for x, e^(-h * x) = 0.001 // x = 6.907755 * h^-1 @@ -71,7 +71,7 @@ inline half4 GetFog(float distance, float3 O, float3 V) // Sample inscattering color: { - const float3 L = GetSunDirection(); + const half3 L = GetSunDirection(); half3 inscatteringColor = GetSunColor(); @@ -79,11 +79,11 @@ inline half4 GetFog(float distance, float3 O, float3 V) if (GetFrame().options & OPTION_BIT_REALISTIC_SKY) { // 0 for position since fog is centered around world center - inscatteringColor *= GetAtmosphericLightTransmittance(GetWeather().atmosphere, float3(0.0, 0.0, 0.0), L, texture_transmittancelut); + inscatteringColor *= GetAtmosphericLightTransmittance(GetWeather().atmosphere, 0, L, texture_transmittancelut); } // Apply phase function solely for directionality: - const float cosTheta = dot(-V, L); + const half cosTheta = dot(-V, L); inscatteringColor *= HgPhase(FOG_INSCATTERING_PHASE_G, cosTheta); // Apply uniform phase since this medium is constant: diff --git a/WickedEngine/shaders/fontPS.hlsl b/WickedEngine/shaders/fontPS.hlsl index 1f749d20ec..48b470e68f 100644 --- a/WickedEngine/shaders/fontPS.hlsl +++ b/WickedEngine/shaders/fontPS.hlsl @@ -8,7 +8,7 @@ struct VertextoPixel float2 bary : TEXCOORD1; }; -half4 main(VertextoPixel input) : SV_TARGET +float4 main(VertextoPixel input) : SV_TARGET { Texture2D tex = bindless_textures_half4[font.texture_index]; half value = tex.SampleLevel(sampler_linear_clamp, input.uv, 0).r; diff --git a/WickedEngine/shaders/globals.hlsli b/WickedEngine/shaders/globals.hlsli index 3dd9b80979..9e9af7dd58 100644 --- a/WickedEngine/shaders/globals.hlsli +++ b/WickedEngine/shaders/globals.hlsli @@ -185,6 +185,10 @@ T inverse_lerp(T value1, T value2, T pos) "SRV(t0, space = 22, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ "SRV(t0, space = 23, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ "SRV(t0, space = 24, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 25, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 26, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 27, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 28, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ "UAV(u0, space = 100, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ "UAV(u0, space = 101, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ "UAV(u0, space = 102, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ @@ -248,6 +252,10 @@ static const BindlessResource> bindless_buffers_float; static const BindlessResource> bindless_buffers_float2; static const BindlessResource> bindless_buffers_float3; static const BindlessResource> bindless_buffers_float4; +static const BindlessResource> bindless_buffers_half; +static const BindlessResource> bindless_buffers_half2; +static const BindlessResource> bindless_buffers_half3; +static const BindlessResource> bindless_buffers_half4; static const BindlessResource bindless_textures2DArray; static const BindlessResource> bindless_textures2DArray_half4; static const BindlessResource bindless_cubemaps; @@ -300,6 +308,10 @@ static const uint DESCRIPTOR_SET_BINDLESS_ACCELERATION_STRUCTURE = 7; [[vk::binding(0, DESCRIPTOR_SET_BINDLESS_UNIFORM_TEXEL_BUFFER)]] Buffer bindless_buffers_float2[]; [[vk::binding(0, DESCRIPTOR_SET_BINDLESS_UNIFORM_TEXEL_BUFFER)]] Buffer bindless_buffers_float3[]; [[vk::binding(0, DESCRIPTOR_SET_BINDLESS_UNIFORM_TEXEL_BUFFER)]] Buffer bindless_buffers_float4[]; +[[vk::binding(0, DESCRIPTOR_SET_BINDLESS_UNIFORM_TEXEL_BUFFER)]] Buffer bindless_buffers_half[]; +[[vk::binding(0, DESCRIPTOR_SET_BINDLESS_UNIFORM_TEXEL_BUFFER)]] Buffer bindless_buffers_half2[]; +[[vk::binding(0, DESCRIPTOR_SET_BINDLESS_UNIFORM_TEXEL_BUFFER)]] Buffer bindless_buffers_half3[]; +[[vk::binding(0, DESCRIPTOR_SET_BINDLESS_UNIFORM_TEXEL_BUFFER)]] Buffer bindless_buffers_half4[]; [[vk::binding(0, DESCRIPTOR_SET_BINDLESS_SAMPLER)]] SamplerState bindless_samplers[]; [[vk::binding(0, DESCRIPTOR_SET_BINDLESS_SAMPLED_IMAGE)]] Texture2D bindless_textures[]; [[vk::binding(0, DESCRIPTOR_SET_BINDLESS_SAMPLED_IMAGE)]] Texture2DArray bindless_textures2DArray[]; @@ -362,6 +374,10 @@ Texture2D bindless_textures_float2[] : register(space21); Texture2D bindless_textures_uint[] : register(space22); Texture2D bindless_textures_uint4[] : register(space23); Texture2D bindless_textures_half4[] : register(space24); +Buffer bindless_buffers_half[] : register(space25); +Buffer bindless_buffers_half2[] : register(space26); +Buffer bindless_buffers_half3[] : register(space27); +Buffer bindless_buffers_half4[] : register(space28); RWTexture2D bindless_rwtextures[] : register(space100); RWByteAddressBuffer bindless_rwbuffers[] : register(space101); @@ -618,15 +634,15 @@ struct PrimitiveID #define texture_random64x64 bindless_textures[GetFrame().texture_random64x64_index] #define texture_bluenoise bindless_textures[GetFrame().texture_bluenoise_index] -#define texture_sheenlut bindless_textures[GetFrame().texture_sheenlut_index] -#define texture_skyviewlut bindless_textures[GetFrame().texture_skyviewlut_index] -#define texture_transmittancelut bindless_textures[GetFrame().texture_transmittancelut_index] -#define texture_multiscatteringlut bindless_textures[GetFrame().texture_multiscatteringlut_index] -#define texture_skyluminancelut bindless_textures[GetFrame().texture_skyluminancelut_index] -#define texture_cameravolumelut bindless_textures3D[GetFrame().texture_cameravolumelut_index] +#define texture_sheenlut bindless_textures_half4[GetFrame().texture_sheenlut_index] +#define texture_skyviewlut bindless_textures_half4[GetFrame().texture_skyviewlut_index] +#define texture_transmittancelut bindless_textures_half4[GetFrame().texture_transmittancelut_index] +#define texture_multiscatteringlut bindless_textures_half4[GetFrame().texture_multiscatteringlut_index] +#define texture_skyluminancelut bindless_textures_half4[GetFrame().texture_skyluminancelut_index] +#define texture_cameravolumelut bindless_textures3D_half4[GetFrame().texture_cameravolumelut_index] #define texture_wind bindless_textures3D[GetFrame().texture_wind_index] #define texture_wind_prev bindless_textures3D[GetFrame().texture_wind_prev_index] -#define texture_caustics bindless_textures[GetFrame().texture_caustics_index] +#define texture_caustics bindless_textures_half4[GetFrame().texture_caustics_index] #define scene_acceleration_structure bindless_accelerationstructures[GetScene().TLAS] #define texture_depth bindless_textures_float[GetCamera().texture_depth_index] @@ -806,11 +822,11 @@ inline half3 clipspace_to_uv(in half3 clipspace) return clipspace * half3(0.5, -0.5, 0.5) + 0.5; } -inline half3 GetSunColor() { return GetWeather().sun_color; } // sun color with intensity applied -inline float3 GetSunDirection() { return GetWeather().sun_direction; } -inline half3 GetHorizonColor() { return GetWeather().horizon.rgb; } -inline half3 GetZenithColor() { return GetWeather().zenith.rgb; } -inline half3 GetAmbientColor() { return GetWeather().ambient.rgb; } +inline half3 GetSunColor() { return unpack_half3(GetWeather().sun_color); } // sun color with intensity applied +inline half3 GetSunDirection() { return unpack_half3(GetWeather().sun_direction); } +inline half3 GetHorizonColor() { return unpack_half3(GetWeather().horizon); } +inline half3 GetZenithColor() { return unpack_half3(GetWeather().zenith); } +inline half3 GetAmbientColor() { return unpack_half3(GetWeather().ambient); } inline uint2 GetInternalResolution() { return GetCamera().internal_resolution; } inline float GetDeltaTime() { return GetFrame().delta_time; } inline float GetTime() { return GetFrame().time; } @@ -818,6 +834,7 @@ inline float GetTimePrev() { return GetFrame().time_previous; } inline float GetFrameCount() { return GetFrame().frame_count; } inline min16uint2 GetTemporalAASampleRotation() { return uint2(GetFrame().temporalaa_samplerotation & 0xFF, (GetFrame().temporalaa_samplerotation >> 8u) & 0xFF); } inline bool IsStaticSky() { return GetScene().globalenvmap >= 0; } +inline half GetGIBoost() { return unpack_half2(GetFrame().giboost_packed).x; } // Mie scaterring approximated with Henyey-Greenstein phase function. // https://www.alexandre-pestana.com/volumetric-lights/ diff --git a/WickedEngine/shaders/hairparticle_simulateCS.hlsl b/WickedEngine/shaders/hairparticle_simulateCS.hlsl index 5b2479524f..fd09df25fc 100644 --- a/WickedEngine/shaders/hairparticle_simulateCS.hlsl +++ b/WickedEngine/shaders/hairparticle_simulateCS.hlsl @@ -11,8 +11,8 @@ static const float3 HAIRPATCH[] = { Buffer meshIndexBuffer : register(t0); Buffer meshVertexBuffer_POS : register(t1); -Buffer meshVertexBuffer_NOR : register(t2); -Buffer meshVertexBuffer_length : register(t3); +Buffer meshVertexBuffer_NOR : register(t2); +Buffer meshVertexBuffer_length : register(t3); RWStructuredBuffer simulationBuffer : register(u0); RWBuffer vertexBuffer_POS : register(u1); diff --git a/WickedEngine/shaders/lightingHF.hlsli b/WickedEngine/shaders/lightingHF.hlsli index d822c1616f..ae67412a48 100644 --- a/WickedEngine/shaders/lightingHF.hlsli +++ b/WickedEngine/shaders/lightingHF.hlsli @@ -40,7 +40,7 @@ struct Lighting inline void ApplyLighting(in Surface surface, in Lighting lighting, inout half4 color) { - half3 diffuse = lighting.direct.diffuse / PI + lighting.indirect.diffuse * (half)GetFrame().gi_boost * (1 - surface.F) * surface.occlusion + surface.ssgi; + half3 diffuse = lighting.direct.diffuse / PI + lighting.indirect.diffuse * GetGIBoost() * (1 - surface.F) * surface.occlusion + surface.ssgi; half3 specular = lighting.direct.specular + lighting.indirect.specular * surface.occlusion; // reminder: cannot apply surface.F for whole indirect specular, because multiple layers have separate fresnels (sheen, clearcoat) color.rgb = lerp(surface.albedo * diffuse, surface.refraction.rgb, surface.refraction.a); color.rgb += specular; @@ -412,19 +412,20 @@ inline half3 EnvironmentReflection_Global(in Surface surface) uint2 dim; uint mipcount; cubemap.GetDimensions(0, dim.x, dim.y, mipcount); + half mipcount16f = half(mipcount); - half MIP = surface.roughness * mipcount; + half MIP = surface.roughness * mipcount16f; envColor = cubemap.SampleLevel(sampler_linear_clamp, surface.R, MIP).rgb * surface.F; #ifdef SHEEN envColor *= surface.sheen.albedoScaling; - MIP = surface.sheen.roughness * mipcount; + MIP = surface.sheen.roughness * mipcount16f; envColor += cubemap.SampleLevel(sampler_linear_clamp, surface.R, MIP).rgb * surface.sheen.color * surface.sheen.DFG; #endif // SHEEN #ifdef CLEARCOAT envColor *= 1 - surface.clearcoat.F; - MIP = surface.clearcoat.roughness * mipcount; + MIP = surface.clearcoat.roughness * mipcount16f; envColor += cubemap.SampleLevel(sampler_linear_clamp, surface.clearcoat.R, MIP).rgb * surface.clearcoat.F; #endif // CLEARCOAT @@ -455,15 +456,16 @@ inline half4 EnvironmentReflection_Local(in TextureCube cubemap, in Surfa uint2 dim; uint mipcount; cubemap.GetDimensions(0, dim.x, dim.y, mipcount); + half mipcount16f = half(mipcount); // Sample cubemap texture: - half MIP = surface.roughness * mipcount; - half3 envColor = (half3)cubemap.SampleLevel(sampler_linear_clamp, R_parallaxCorrected, MIP).rgb * surface.F; + half MIP = surface.roughness * mipcount16f; + half3 envColor = cubemap.SampleLevel(sampler_linear_clamp, R_parallaxCorrected, MIP).rgb * surface.F; #ifdef SHEEN envColor *= surface.sheen.albedoScaling; - MIP = surface.sheen.roughness * mipcount; - envColor += (half3)cubemap.SampleLevel(sampler_linear_clamp, R_parallaxCorrected, MIP).rgb * surface.sheen.color * surface.sheen.DFG; + MIP = surface.sheen.roughness * mipcount16f; + envColor += cubemap.SampleLevel(sampler_linear_clamp, R_parallaxCorrected, MIP).rgb * surface.sheen.color * surface.sheen.DFG; #endif // SHEEN #ifdef CLEARCOAT @@ -475,8 +477,8 @@ inline half4 EnvironmentReflection_Local(in TextureCube cubemap, in Surfa R_parallaxCorrected = surface.P - probe.position + surface.clearcoat.R * Distance; envColor *= 1 - surface.clearcoat.F; - MIP = surface.clearcoat.roughness * mipcount; - envColor += (half3)cubemap.SampleLevel(sampler_linear_clamp, R_parallaxCorrected, MIP).rgb * surface.clearcoat.F; + MIP = surface.clearcoat.roughness * mipcount16f; + envColor += cubemap.SampleLevel(sampler_linear_clamp, R_parallaxCorrected, MIP).rgb * surface.clearcoat.F; #endif // CLEARCOAT // blend out if close to any cube edge: diff --git a/WickedEngine/shaders/lightmap_expandCS.hlsl b/WickedEngine/shaders/lightmap_expandCS.hlsl new file mode 100644 index 0000000000..d6da907ed2 --- /dev/null +++ b/WickedEngine/shaders/lightmap_expandCS.hlsl @@ -0,0 +1,58 @@ +#include "globals.hlsli" +#include "ShaderInterop_Postprocess.h" + +Texture2D lightmap_input : register(t0); + +RWTexture2D lightmap_output : register(u0); + +static const int TILE_BORDER = 4; +static const uint TILE_SIZE = POSTPROCESS_BLOCKSIZE + TILE_BORDER * 2; +groupshared uint2 tile_cache[TILE_SIZE*TILE_SIZE]; + +[numthreads(POSTPROCESS_BLOCKSIZE, POSTPROCESS_BLOCKSIZE, 1)] +void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex) +{ + const int2 tile_upperleft = Gid.xy * POSTPROCESS_BLOCKSIZE - TILE_BORDER; + for (uint t = groupIndex; t < TILE_SIZE * TILE_SIZE; t += POSTPROCESS_BLOCKSIZE * POSTPROCESS_BLOCKSIZE) + { + const uint2 pixel = tile_upperleft + unflatten2D(t, TILE_SIZE); + tile_cache[t] = pack_half4(lightmap_input[pixel]); + } + GroupMemoryBarrierWithGroupSync(); + + float4 color = unpack_half4(tile_cache[flatten2D(GTid.xy + TILE_BORDER, TILE_SIZE)]); + + if (color.a < 1) + { + // spin outwards from center in spiral pattern and take the first sample which has valid opacity: + int generation = TILE_BORDER; + for (int growth = 0; (growth < generation) && (color.a < 1); ++growth) + { + const int side = 2 * (growth + 1); + int x = -growth - 1; + int y = -growth - 1; + for (int i = 0; (i < side) && (color.a < 1); ++i) + { + color = unpack_half4(tile_cache[flatten2D(GTid.xy + TILE_BORDER + int2(x, y), TILE_SIZE)]); + x++; + } + for (int i = 0; (i < side) && (color.a < 1); ++i) + { + color = unpack_half4(tile_cache[flatten2D(GTid.xy + TILE_BORDER + int2(x, y), TILE_SIZE)]); + y++; + } + for (int i = 0; (i < side) && (color.a < 1); ++i) + { + color = unpack_half4(tile_cache[flatten2D(GTid.xy + TILE_BORDER + int2(x, y), TILE_SIZE)]); + x--; + } + for (int i = 0; (i < side) && (color.a < 1); ++i) + { + color = unpack_half4(tile_cache[flatten2D(GTid.xy + TILE_BORDER + int2(x, y), TILE_SIZE)]); + y--; + } + } + } + + lightmap_output[DTid.xy] = color; +} diff --git a/WickedEngine/shaders/objectHF.hlsli b/WickedEngine/shaders/objectHF.hlsli index 942e86e436..16d26f706e 100644 --- a/WickedEngine/shaders/objectHF.hlsli +++ b/WickedEngine/shaders/objectHF.hlsli @@ -154,7 +154,7 @@ struct VertexInput [branch] if (GetMesh().vb_atl < 0) return 0; - return (half2)bindless_buffers_float2[GetMesh().vb_atl][vertexID]; + return bindless_buffers_half2[GetMesh().vb_atl][vertexID]; } half4 GetVertexColor() @@ -162,7 +162,7 @@ struct VertexInput [branch] if (GetMesh().vb_col < 0) return 1; - return (half4)bindless_buffers_float4[GetMesh().vb_col][vertexID]; + return bindless_buffers_half4[GetMesh().vb_col][vertexID]; } half3 GetNormal() @@ -170,7 +170,7 @@ struct VertexInput [branch] if (GetMesh().vb_nor < 0) return 0; - return (half3)bindless_buffers_float4[GetMesh().vb_nor][vertexID].xyz; + return bindless_buffers_half4[GetMesh().vb_nor][vertexID].xyz; } half4 GetTangent() @@ -178,7 +178,7 @@ struct VertexInput [branch] if (GetMesh().vb_tan < 0) return 0; - return (half4)bindless_buffers_float4[GetMesh().vb_tan][vertexID]; + return bindless_buffers_half4[GetMesh().vb_tan][vertexID]; } ShaderMeshInstance GetInstance() @@ -196,7 +196,7 @@ struct VertexInput [branch] if (GetInstance().vb_ao < 0) return 1; - return (half)bindless_buffers_float[NonUniformResourceIndex(GetInstance().vb_ao)][vertexID]; + return bindless_buffers_half[NonUniformResourceIndex(GetInstance().vb_ao)][vertexID]; } half GetWetmap() @@ -204,12 +204,12 @@ struct VertexInput //[branch] //if (GetInstance().vb_wetmap < 0) // return 0; - //return (half)bindless_buffers_float[NonUniformResourceIndex(GetInstance().vb_wetmap)][vertexID]; + //return bindless_buffers_half[NonUniformResourceIndex(GetInstance().vb_wetmap)][vertexID]; // There is something seriously bad with AMD driver's shader compiler as the above commented version works incorrectly and this works correctly but only for wetmap [branch] if (GetInstance().vb_wetmap >= 0) - return (half)bindless_buffers_float[NonUniformResourceIndex(GetInstance().vb_wetmap)][vertexID]; + return bindless_buffers_half[NonUniformResourceIndex(GetInstance().vb_wetmap)][vertexID]; return 0; } }; @@ -250,11 +250,11 @@ struct VertexSurface ao = 1; } - normal = rotate_vector(normal, (half4)input.GetInstance().quaternion); + normal = rotate_vector(normal, input.GetInstance().GetQuaternion()); normal = any(normal) ? normalize(normal) : 0; tangent = input.GetTangent(); - tangent.xyz = rotate_vector(tangent.xyz, (half4)input.GetInstance().quaternion); + tangent.xyz = rotate_vector(tangent.xyz, input.GetInstance().GetQuaternion()); tangent.xyz = any(tangent.xyz) ? normalize(tangent.xyz) : 0; uvsets = input.GetUVSets(); @@ -546,10 +546,6 @@ float4 main(PixelInput input, in bool is_frontface : SV_IsFrontFace) : SV_Target #endif // OBJECTSHADER_USE_COMMON #ifdef OBJECTSHADER_USE_TANGENT - if (is_frontface == false) - { - input.tan = -input.tan; - } surface.T = input.tan; surface.T.w = surface.T.w < 0 ? -1 : 1; half3 bitangent = cross(surface.T.xyz, input.nor) * surface.T.w; diff --git a/WickedEngine/shaders/objectHF_mesh_shading.hlsli b/WickedEngine/shaders/objectHF_mesh_shading.hlsli index 15912a2fed..689740cf18 100644 --- a/WickedEngine/shaders/objectHF_mesh_shading.hlsli +++ b/WickedEngine/shaders/objectHF_mesh_shading.hlsli @@ -65,7 +65,7 @@ void main(uint3 Gid : SV_GroupID, uint groupIndex : SV_GroupIndex) if((geometry.flags & SHADERMESH_FLAG_DOUBLE_SIDED) == 0 && geometry.vb_pre < 0) // disable cone culling for double sided and skinned { // Cone culling: - bounds.cone_axis = rotate_vector(bounds.cone_axis, inst.quaternion); + bounds.cone_axis = rotate_vector(bounds.cone_axis, inst.GetQuaternion()); if (camera.IsOrtho()) { visible &= dot(camera.forward, bounds.cone_axis) < bounds.cone_cutoff; diff --git a/WickedEngine/shaders/paintdecalVS.hlsl b/WickedEngine/shaders/paintdecalVS.hlsl index d4aec4ba01..ad71b0eaed 100644 --- a/WickedEngine/shaders/paintdecalVS.hlsl +++ b/WickedEngine/shaders/paintdecalVS.hlsl @@ -21,13 +21,13 @@ Output main(uint vertexID : SV_VertexID) } pos = mul(inst.transform.GetMatrix(), float4(pos, 1)).xyz; - float3 nor = 0; + half3 nor = 0; [branch] if(geometry.vb_nor >= 0) { - nor = bindless_buffers_float4[geometry.vb_nor][vertexID].xyz; + nor = bindless_buffers_half4[geometry.vb_nor][vertexID].xyz; } - nor = rotate_vector(nor, inst.quaternion); + nor = rotate_vector(nor, inst.GetQuaternion()); float2 uv = 0; [branch] diff --git a/WickedEngine/shaders/renderlightmapVS.hlsl b/WickedEngine/shaders/renderlightmapVS.hlsl index da2cf4e58b..ae6cb3ec63 100644 --- a/WickedEngine/shaders/renderlightmapVS.hlsl +++ b/WickedEngine/shaders/renderlightmapVS.hlsl @@ -15,7 +15,7 @@ Output main(uint vertexID : SV_VertexID) { ShaderMeshInstance inst = load_instance(push.instanceIndex); float3 pos = bindless_buffers_float4[push.vb_pos_wind][vertexID].xyz; - float3 nor = bindless_buffers_float4[push.vb_nor][vertexID].xyz; + half3 nor = bindless_buffers_half4[push.vb_nor][vertexID].xyz; float2 atl = bindless_buffers_float2[push.vb_atl][vertexID]; Output output; @@ -23,13 +23,12 @@ Output main(uint vertexID : SV_VertexID) output.pos = float4(atl, 0, 1); output.pos.xy = output.pos.xy * 2 - 1; output.pos.y *= -1; - output.pos.xy += xTracePixelOffset; output.uv = atl; output.pos3D = mul(inst.transform.GetMatrix(), float4(pos, 1)).xyz; - output.normal = rotate_vector(nor, inst.quaternion); + output.normal = rotate_vector(nor, inst.GetQuaternion()); return output; } diff --git a/WickedEngine/shaders/rtdiffuseCS.hlsl b/WickedEngine/shaders/rtdiffuseCS.hlsl index c25d77d251..c8708f4ce7 100644 --- a/WickedEngine/shaders/rtdiffuseCS.hlsl +++ b/WickedEngine/shaders/rtdiffuseCS.hlsl @@ -118,7 +118,7 @@ void main(uint2 DTid : SV_DispatchThreadID) } else if (GetFrame().options & OPTION_BIT_SURFELGI_ENABLED && GetCamera().texture_surfelgi_index >= 0 && surfel_cellvalid(surfel_cell(P))) { - payload.data += bindless_textures[GetCamera().texture_surfelgi_index][DTid.xy * 2].rgb * GetFrame().gi_boost; + payload.data += bindless_textures[GetCamera().texture_surfelgi_index][DTid.xy * 2].rgb * GetGIBoost(); } else { diff --git a/WickedEngine/shaders/shadingHF.hlsli b/WickedEngine/shaders/shadingHF.hlsli index 856a40f64b..9c5494996f 100644 --- a/WickedEngine/shaders/shadingHF.hlsli +++ b/WickedEngine/shaders/shadingHF.hlsli @@ -69,7 +69,7 @@ inline void ForwardLighting(inout Surface surface, inout Lighting lighting) [branch] if (is_saturated(uvw)) { - const half4 envmapColor = (half4)EnvironmentReflection_Local(cubemap, surface, probe, probeProjection, clipSpacePos); + const half4 envmapColor = EnvironmentReflection_Local(cubemap, surface, probe, probeProjection, clipSpacePos); // perform manual blending of probes: // NOTE: they are sorted top-to-bottom, but blending is performed bottom-to-top envmapAccumulation.rgb = mad(1 - envmapAccumulation.a, envmapColor.a * envmapColor.rgb, envmapAccumulation.rgb); @@ -129,6 +129,8 @@ inline void ForwardLighting(inout Surface surface, inout Lighting lighting) bucket_bits ^= 1u << bucket_bit_index; ShaderEntity light = load_entity(lights().first_item() + entity_index); + if (light.GetFlags() & ENTITY_FLAG_LIGHT_STATIC) + break; // static lights will be skipped here (they are used at lightmap baking) switch (light.GetType()) { @@ -311,7 +313,7 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f [branch] if (is_saturated(uvw)) { - const half4 envmapColor = (half4)EnvironmentReflection_Local(cubemap, surface, probe, probeProjection, clipSpacePos); + const half4 envmapColor = EnvironmentReflection_Local(cubemap, surface, probe, probeProjection, clipSpacePos); // perform manual blending of probes: // NOTE: they are sorted top-to-bottom, but blending is performed bottom-to-top envmapAccumulation.rgb = mad(1 - envmapAccumulation.a, envmapColor.a * envmapColor.rgb, envmapAccumulation.rgb); @@ -382,6 +384,8 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f for(uint entity_index = iterator.first_item(); entity_index < iterator.end_item(); ++entity_index) { ShaderEntity light = load_entity(entity_index); + if (light.GetFlags() & ENTITY_FLAG_LIGHT_STATIC) + break; // static lights will be skipped here (they are used at lightmap baking) half shadow_mask = 1; #if defined(SHADOW_MASK_ENABLED) && !defined(TRANSPARENT) diff --git a/WickedEngine/shaders/shadowHF.hlsli b/WickedEngine/shaders/shadowHF.hlsli index 0ce5ee9f0f..bb4eb11933 100644 --- a/WickedEngine/shaders/shadowHF.hlsli +++ b/WickedEngine/shaders/shadowHF.hlsli @@ -20,8 +20,8 @@ static const half soft_shadow_sample_count_rcp = rcp((half)soft_shadow_sample_co inline half3 sample_shadow(float2 uv, float cmp, float4 uv_clamping, half radius, min16uint2 pixel) { - Texture2D texture_shadowatlas = bindless_textures[GetFrame().texture_shadowatlas_index]; - Texture2D texture_shadowatlas_transparent = bindless_textures[GetFrame().texture_shadowatlas_transparent_index]; + Texture2D texture_shadowatlas = bindless_textures_half4[GetFrame().texture_shadowatlas_index]; + Texture2D texture_shadowatlas_transparent = bindless_textures_half4[GetFrame().texture_shadowatlas_transparent_index]; half3 shadow = 0; @@ -88,25 +88,25 @@ inline half3 shadow_cube(in ShaderEntity light, in float3 Lunnormalized, min16ui inline half3 sample_shadow(float2 uv, float cmp, min16uint2 pixel) { - Texture2D texture_shadowatlas = bindless_textures[GetFrame().texture_shadowatlas_index]; - half3 shadow = (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp).r; + Texture2D texture_shadowatlas = bindless_textures_half4[GetFrame().texture_shadowatlas_index]; + half3 shadow = texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp).r; #ifndef DISABLE_SOFT_SHADOWMAP // sample along a rectangle pattern around center: - shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, -1)).r; - shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, 0)).r; - shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, 1)).r; - shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(0, -1)).r; - shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(0, 1)).r; - shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, -1)).r; - shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, 0)).r; - shadow.x += (half)texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, 1)).r; + shadow.x += texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, -1)).r; + shadow.x += texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, 0)).r; + shadow.x += texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(-1, 1)).r; + shadow.x += texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(0, -1)).r; + shadow.x += texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(0, 1)).r; + shadow.x += texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, -1)).r; + shadow.x += texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, 0)).r; + shadow.x += texture_shadowatlas.SampleCmpLevelZero(sampler_cmp_depth, uv, cmp, int2(1, 1)).r; shadow = shadow.xxx / 9.0; #endif // DISABLE_SOFT_SHADOWMAP #ifndef DISABLE_TRANSPARENT_SHADOWMAP - Texture2D texture_shadowatlas_transparent = bindless_textures[GetFrame().texture_shadowatlas_transparent_index]; - half4 transparent_shadow = (half4)texture_shadowatlas_transparent.SampleLevel(sampler_linear_clamp, uv, 0); + Texture2D texture_shadowatlas_transparent = bindless_textures_half4[GetFrame().texture_shadowatlas_transparent_index]; + half4 transparent_shadow = texture_shadowatlas_transparent.SampleLevel(sampler_linear_clamp, uv, 0); #ifdef TRANSPARENT_SHADOWMAP_SECONDARY_DEPTH_CHECK if (transparent_shadow.a > cmp) #endif // TRANSPARENT_SHADOWMAP_SECONDARY_DEPTH_CHECK @@ -154,8 +154,8 @@ inline half3 shadow_cube(in ShaderEntity light, in float3 Lunnormalized, in min1 inline half shadow_2D_volumetricclouds(float3 P) { // Project into shadow map space (no need to divide by .w because ortho projection!): - float3 shadow_pos = mul(GetFrame().cloudShadowLightSpaceMatrix, float4(P, 1)).xyz; - float3 shadow_uv = clipspace_to_uv(shadow_pos); + half3 shadow_pos = mul(GetFrame().cloudShadowLightSpaceMatrix, float4(P, 1)).xyz; + half3 shadow_uv = clipspace_to_uv(shadow_pos); [branch] if (shadow_uv.z < 0.5) @@ -166,18 +166,18 @@ inline half shadow_2D_volumetricclouds(float3 P) [branch] if (is_saturated(shadow_uv)) { - float cloudShadowSampleZ = shadow_pos.z; + half cloudShadowSampleZ = shadow_pos.z; - Texture2D texture_volumetricclouds_shadow = bindless_textures[GetFrame().texture_volumetricclouds_shadow_index]; - float3 cloudShadowData = texture_volumetricclouds_shadow.SampleLevel(sampler_linear_clamp, shadow_uv.xy, 0.0f).rgb; + Texture2D texture_volumetricclouds_shadow = bindless_textures_half4[GetFrame().texture_volumetricclouds_shadow_index]; + half3 cloudShadowData = texture_volumetricclouds_shadow.SampleLevel(sampler_linear_clamp, shadow_uv.xy, 0.0).rgb; - float sampleDepthKm = saturate(1.0 - cloudShadowSampleZ) * GetFrame().cloudShadowFarPlaneKm; + half sampleDepthKm = saturate(1.0 - cloudShadowSampleZ) * GetFrame().cloudShadowFarPlaneKm; - float opticalDepth = cloudShadowData.g * (max(0.0f, cloudShadowData.r - sampleDepthKm) * SKY_UNIT_TO_M); + half opticalDepth = cloudShadowData.g * (max(0.0, cloudShadowData.r - sampleDepthKm) * SKY_UNIT_TO_M); opticalDepth = min(cloudShadowData.b, opticalDepth); - float transmittance = saturate(exp(-opticalDepth)); - return (half)transmittance; + half transmittance = saturate(exp(-opticalDepth)); + return transmittance; } return 1.0; diff --git a/WickedEngine/shaders/shadowPS_alphatest.hlsl b/WickedEngine/shaders/shadowPS_alphatest.hlsl index 6d55099af0..a5b7e1dd86 100644 --- a/WickedEngine/shaders/shadowPS_alphatest.hlsl +++ b/WickedEngine/shaders/shadowPS_alphatest.hlsl @@ -5,7 +5,7 @@ void main(PixelInput input) { ShaderMaterial material = GetMaterial(); - float alpha = 1; + half alpha = 1; [branch] if (material.textures[BASECOLORMAP].IsValid()) diff --git a/WickedEngine/shaders/shadowPS_transparent.hlsl b/WickedEngine/shaders/shadowPS_transparent.hlsl index 4552c25124..7f01bf03cc 100644 --- a/WickedEngine/shaders/shadowPS_transparent.hlsl +++ b/WickedEngine/shaders/shadowPS_transparent.hlsl @@ -8,7 +8,7 @@ float4 main(PixelInput input) : SV_TARGET ShaderMaterial material = GetMaterial(); float4 uvsets = input.GetUVSets(); - float4 color; + half4 color; [branch] if (material.textures[BASECOLORMAP].IsValid()) { @@ -31,9 +31,9 @@ float4 main(PixelInput input) : SV_TARGET clip(color.a - material.GetAlphaTest() - meshinstance.GetAlphaTest()); - float opacity = color.a; + half opacity = color.a; - float transmission = lerp(material.GetTransmission(), 1, material.GetCloak()); + half transmission = lerp(material.GetTransmission(), 1, material.GetCloak()); color.rgb = lerp(color.rgb, 1, material.GetCloak()); [branch] @@ -42,7 +42,7 @@ float4 main(PixelInput input) : SV_TARGET [branch] if (material.textures[TRANSMISSIONMAP].IsValid()) { - float transmissionMap = material.textures[TRANSMISSIONMAP].Sample(sampler_objectshader, uvsets).r; + half transmissionMap = material.textures[TRANSMISSIONMAP].Sample(sampler_objectshader, uvsets).r; transmission *= transmissionMap; } opacity *= 1 - transmission; diff --git a/WickedEngine/shaders/shadowPS_water.hlsl b/WickedEngine/shaders/shadowPS_water.hlsl index 8e930c1655..bf2b1edaa2 100644 --- a/WickedEngine/shaders/shadowPS_water.hlsl +++ b/WickedEngine/shaders/shadowPS_water.hlsl @@ -8,9 +8,8 @@ float4 main(PixelInput input) : SV_TARGET ShaderMaterial material = GetMaterial(); float4 uvsets = input.GetUVSets(); - float2 pixel = input.pos.xy; - float4 color; + half4 color; [branch] if (material.textures[BASECOLORMAP].IsValid()) { @@ -22,7 +21,7 @@ float4 main(PixelInput input) : SV_TARGET } color *= input.color; - float opacity = color.a; + half opacity = color.a; color.rgb = 1; // disable water shadow because it has already fog diff --git a/WickedEngine/shaders/skinningCS.hlsl b/WickedEngine/shaders/skinningCS.hlsl index 5f9cd135fd..0b7b0f91ed 100644 --- a/WickedEngine/shaders/skinningCS.hlsl +++ b/WickedEngine/shaders/skinningCS.hlsl @@ -26,6 +26,14 @@ PUSHCONSTANT(push, SkinningPushConstants); "SRV(t0, space = 18, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ "SRV(t0, space = 19, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ "SRV(t0, space = 20, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 21, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 22, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 23, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 24, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 25, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 26, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 27, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ + "SRV(t0, space = 28, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ "UAV(u0, space = 100, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ "UAV(u0, space = 101, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ "UAV(u0, space = 102, offset = 0, numDescriptors = unbounded, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE)," \ @@ -70,14 +78,14 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID) [branch] if (push.vb_nor >= 0) { - nor = bindless_buffers_float4[push.vb_nor][vertexID].xyz; + nor = bindless_buffers_half4[push.vb_nor][vertexID].xyz; } half4 tan = 0; [branch] if (push.vb_tan >= 0) { - tan = bindless_buffers_float4[push.vb_tan][vertexID]; + tan = bindless_buffers_half4[push.vb_tan][vertexID]; } float3 pos = pos_wind.xyz; diff --git a/WickedEngine/shaders/skyAtmosphere.hlsli b/WickedEngine/shaders/skyAtmosphere.hlsli index 3f877158a1..785d0b9056 100644 --- a/WickedEngine/shaders/skyAtmosphere.hlsli +++ b/WickedEngine/shaders/skyAtmosphere.hlsli @@ -41,11 +41,11 @@ static const float3 cameraVolumeLUTRes = float3(32, 32, 32); // We should precompute those terms from resolutions (Or set resolution as #defined constants) float FromUnitToSubUvs(float u, float resolution) { - return (u + 0.5f / resolution) * (resolution / (resolution + 1.0f)); + return (u + 0.5 / resolution) * (resolution / (resolution + 1.0)); } float FromSubUvsToUnit(float u, float resolution) { - return (u - 0.5f / resolution) * (resolution / (resolution - 1.0f)); + return (u - 0.5 / resolution) * (resolution / (resolution - 1.0)); } void UvToLutTransmittanceParams(AtmosphereParameters atmosphere, out float viewHeight, out float viewZenithCosAngle, in float2 uv) @@ -61,14 +61,14 @@ void UvToLutTransmittanceParams(AtmosphereParameters atmosphere, out float viewH float d_min = atmosphere.topRadius - viewHeight; float d_max = rho + H; float d = d_min + x_mu * (d_max - d_min); - viewZenithCosAngle = d == 0.0 ? 1.0f : (H * H - rho * rho - d * d) / (2.0 * viewHeight * d); + viewZenithCosAngle = d == 0.0 ? 1.0 : (H * H - rho * rho - d * d) / (2.0 * viewHeight * d); viewZenithCosAngle = clamp(viewZenithCosAngle, -1.0, 1.0); } void LutTransmittanceParamsToUv(AtmosphereParameters atmosphere, in float viewHeight, in float viewZenithCosAngle, out float2 uv) { - float H = sqrt(max(0.0f, atmosphere.topRadius * atmosphere.topRadius - atmosphere.bottomRadius * atmosphere.bottomRadius)); - float rho = sqrt(max(0.0f, viewHeight * viewHeight - atmosphere.bottomRadius * atmosphere.bottomRadius)); + float H = sqrt(max(0.0, atmosphere.topRadius * atmosphere.topRadius - atmosphere.bottomRadius * atmosphere.bottomRadius)); + float rho = sqrt(max(0.0, viewHeight * viewHeight - atmosphere.bottomRadius * atmosphere.bottomRadius)); float discriminant = viewHeight * viewHeight * (viewZenithCosAngle * viewZenithCosAngle - 1.0) + atmosphere.topRadius * atmosphere.topRadius; float d = max(0.0, (-viewHeight * viewZenithCosAngle + sqrt(discriminant))); // Distance to atmosphere boundary @@ -94,7 +94,7 @@ void UvToSkyViewLutParams(AtmosphereParameters atmosphere, out float viewZenithC float Beta = acos(CosBeta); float ZenithHorizonAngle = PI - Beta; - if (uv.y < 0.5f) + if (uv.y < 0.5) { float coord = 2.0 * uv.y; coord = 1.0 - coord; @@ -133,7 +133,7 @@ void SkyViewLutParamsToUv(AtmosphereParameters atmosphere, in bool intersectGrou coord = sqrt(abs(coord)); #endif coord = 1.0 - coord; - uv.y = coord * 0.5f; + uv.y = coord * 0.5; } else { @@ -141,11 +141,11 @@ void SkyViewLutParamsToUv(AtmosphereParameters atmosphere, in bool intersectGrou #if NONLINEARSKYVIEWLUT coord = sqrt(abs(coord)); #endif - uv.y = coord * 0.5f + 0.5f; + uv.y = coord * 0.5 + 0.5; } { - float coord = -lightViewCosAngle * 0.5f + 0.5f; + float coord = -lightViewCosAngle * 0.5 + 0.5; coord = sqrt(coord); uv.x = coord; } @@ -210,7 +210,7 @@ MediumSampleRGB SampleMediumRGB(in float3 worldPos, in AtmosphereParameters atmo s.extinctionMie = densityMie * atmosphere.mieExtinction; s.scatteringRay = densityRay * atmosphere.rayleighScattering; - s.absorptionRay = 0.0f; + s.absorptionRay = 0.0; s.extinctionRay = s.scatteringRay + s.absorptionRay; s.scatteringOzo = 0.0; @@ -233,19 +233,19 @@ MediumSampleRGB SampleMediumRGB(in float3 worldPos, in AtmosphereParameters atmo -float RayleighPhase(float cosTheta) +half RayleighPhase(half cosTheta) { - float factor = 3.0f / (16.0f * PI); - return factor * (1.0f + cosTheta * cosTheta); + half factor = 3.0 / (16.0 * PI); + return factor * (1.0 + cosTheta * cosTheta); } -float CornetteShanksMiePhaseFunction(float g, float cosTheta) +half CornetteShanksMiePhaseFunction(half g, half cosTheta) { - float k = 3.0 / (8.0 * PI) * (1.0 - g * g) / (2.0 + g * g); + half k = 3.0 / (8.0 * PI) * (1.0 - g * g) / (2.0 + g * g); return k * (1.0 + cosTheta * cosTheta) / pow(abs(1.0 + g * g - 2.0 * g * -cosTheta), 1.5); } -float HgPhase(float g, float cosTheta) +half HgPhase(half g, half cosTheta) { #ifdef USE_CornetteShanks return CornetteShanksMiePhaseFunction(g, cosTheta); @@ -258,14 +258,14 @@ float HgPhase(float g, float cosTheta) #endif } -float DualLobPhase(float g0, float g1, float w, float cosTheta) +half DualLobPhase(half g0, half g1, half w, half cosTheta) { return lerp(HgPhase(g0, cosTheta), HgPhase(g1, cosTheta), w); } -float UniformPhase() +half UniformPhase() { - return 1.0f / (4.0f * PI); + return 1.0 / (4.0 * PI); } @@ -325,8 +325,8 @@ bool MoveToTopAtmosphere(inout float3 worldPosition, in float3 worldDirection, i bool retval = true; if (viewHeight > atmosphereTopRadius) { - float tTop = RaySphereIntersectNearest(worldPosition, worldDirection, float3(0.0f, 0.0f, 0.0f), atmosphereTopRadius); - if (tTop >= 0.0f) + float tTop = RaySphereIntersectNearest(worldPosition, worldDirection, 0, atmosphereTopRadius); + if (tTop >= 0) { float3 upVector = worldPosition / viewHeight; float3 upOffset = upVector * -PLANET_RADIUS_OFFSET; @@ -341,16 +341,16 @@ bool MoveToTopAtmosphere(inout float3 worldPosition, in float3 worldDirection, i return retval; // ok to start tracing } -float3 GetMultipleScattering(AtmosphereParameters atmosphere, Texture2D multiScatteringLUTTexture, float2 multiScatteringLUTRes, float3 scattering, float3 extinction, float3 worldPosition, float viewZenithCosAngle) +float3 GetMultipleScattering(AtmosphereParameters atmosphere, Texture2D multiScatteringLUTTexture, float2 multiScatteringLUTRes, float3 scattering, float3 extinction, float3 worldPosition, float viewZenithCosAngle) { - float2 uv = saturate(float2(viewZenithCosAngle * 0.5f + 0.5f, (length(worldPosition) - atmosphere.bottomRadius) / (atmosphere.topRadius - atmosphere.bottomRadius))); + float2 uv = saturate(float2(viewZenithCosAngle * 0.5 + 0.5, (length(worldPosition) - atmosphere.bottomRadius) / (atmosphere.topRadius - atmosphere.bottomRadius))); uv = float2(FromUnitToSubUvs(uv.x, multiScatteringLUTRes.x), FromUnitToSubUvs(uv.y, multiScatteringLUTRes.y)); float3 multiScatteredLuminance = multiScatteringLUTTexture.SampleLevel(sampler_linear_clamp, uv, 0).rgb; return multiScatteredLuminance; } -float3 GetTransmittance(AtmosphereParameters atmosphere, float pHeight, float sunZenithCosAngle, Texture2D transmittanceLutTexture) +float3 GetTransmittance(AtmosphereParameters atmosphere, float pHeight, float sunZenithCosAngle, Texture2D transmittanceLutTexture) { float2 uv; LutTransmittanceParamsToUv(atmosphere, pHeight, sunZenithCosAngle, uv); @@ -359,15 +359,15 @@ float3 GetTransmittance(AtmosphereParameters atmosphere, float pHeight, float su return TransmittanceToSun; } -float3 GetAtmosphereTransmittance(float3 worldPosition, float3 worldDirection, AtmosphereParameters atmosphere, Texture2D transmittanceLutTexture) +half3 GetAtmosphereTransmittance(float3 worldPosition, float3 worldDirection, AtmosphereParameters atmosphere, Texture2D transmittanceLutTexture) { // If the worldDirection is occluded from this virtual planet, then return. // We do this due to the low resolution LUT, where the stored zenith to horizon never reaches black, to prevent linear interpolation artefacts. // At the most shadowed point of the LUT, pure black with earth shadow is never reached. - float2 sol = RaySphereIntersect(worldPosition, worldDirection, float3(0.0f, 0.0f, 0.0f), atmosphere.bottomRadius); - if (sol.x > 0.0f || sol.y > 0.0f) + float2 sol = RaySphereIntersect(worldPosition, worldDirection, 0, atmosphere.bottomRadius); + if (sol.x > 0 || sol.y > 0) { - return 0.0f; + return 0; } float pHeight = length(worldPosition); @@ -377,16 +377,16 @@ float3 GetAtmosphereTransmittance(float3 worldPosition, float3 worldDirection, A float2 uv; LutTransmittanceParamsToUv(atmosphere, pHeight, SunZenithCosAngle, uv); - float3 TransmittanceToSun = transmittanceLutTexture.SampleLevel(sampler_linear_clamp, uv, 0).rgb; + half3 TransmittanceToSun = transmittanceLutTexture.SampleLevel(sampler_linear_clamp, uv, 0).rgb; return TransmittanceToSun; } -float3 GetAtmosphericLightTransmittance(AtmosphereParameters atmosphere, float3 worldPosition, float3 worldDirection, Texture2D transmittanceLutTexture) +half3 GetAtmosphericLightTransmittance(AtmosphereParameters atmosphere, float3 worldPosition, half3 worldDirection, Texture2D transmittanceLutTexture) { const float3 planetCenterWorld = atmosphere.planetCenter * SKY_UNIT_TO_M; const float3 planetCenterToWorldPos = (worldPosition - planetCenterWorld) * M_TO_SKY_UNIT; - float3 atmosphereTransmittance = GetAtmosphereTransmittance(planetCenterToWorldPos, worldDirection, atmosphere, transmittanceLutTexture); + half3 atmosphereTransmittance = GetAtmosphereTransmittance(planetCenterToWorldPos, worldDirection, atmosphere, transmittanceLutTexture); return atmosphereTransmittance; } @@ -417,7 +417,7 @@ float3 GetCameraPlanetPos(AtmosphereParameters atmosphere, float3 cameraPosition return (skyWorldCameraOrigin - planetCenterWorld) * M_TO_SKY_UNIT; } -float3 GetSunLuminance(float3 worldPosition, float3 worldDirection, float3 sunDirection, float3 sunIlluminance, AtmosphereParameters atmosphere, Texture2D transmittanceLutTexture) +half3 GetSunLuminance(float3 worldPosition, float3 worldDirection, float3 sunDirection, half3 sunIlluminance, AtmosphereParameters atmosphere, Texture2D transmittanceLutTexture) { //float sunApexAngleDegree = 0.545; // Angular diameter of sun to earth from sea level, see https://en.wikipedia.org/wiki/Solid_angle float sunApexAngleDegree = 2.4; // Modified sun size @@ -426,14 +426,14 @@ float3 GetSunLuminance(float3 worldPosition, float3 worldDirection, float3 sunDi float3 retval = 0; - float t = RaySphereIntersectNearest(worldPosition, worldDirection, float3(0.0f, 0.0f, 0.0f), atmosphere.bottomRadius); - if (t < 0.0f) // no intersection + float t = RaySphereIntersectNearest(worldPosition, worldDirection, 0, atmosphere.bottomRadius); + if (t < 0) // no intersection { float VdotL = dot(worldDirection, normalize(sunDirection)); // weird... the sun disc shrinks near the horizon if we don't normalize sun direction if (VdotL > sunCosHalfApexAngle) { // Edge fade - const float halfCosHalfApex = sunCosHalfApexAngle + (1.0f - sunCosHalfApexAngle) * 0.25; // Start fading when at 75% distance from light disk center + const float halfCosHalfApex = sunCosHalfApexAngle + (1.0 - sunCosHalfApexAngle) * 0.25; // Start fading when at 75% distance from light disk center const float weight = 1.0 - saturate((halfCosHalfApex - VdotL) / (halfCosHalfApex - sunCosHalfApexAngle)); retval = weight * sunIlluminance; @@ -441,7 +441,7 @@ float3 GetSunLuminance(float3 worldPosition, float3 worldDirection, float3 sunDi if (GetWeather().stars > 0) { - float3 stars_direction = mul(worldDirection, (float3x3)GetWeather().stars_rotation); + float3 stars_direction = rotate_vector(worldDirection, GetWeather().stars_rotation); float stars_visibility = pow(saturate(1 - sunDirection.y), 2); float stars_density_at_maximum = lerp(22, 8, GetWeather().stars); float stars_threshold = lerp(32, stars_density_at_maximum, stars_visibility); // modifies the number of stars that are visible @@ -460,14 +460,14 @@ float3 GetSunLuminance(float3 worldPosition, float3 worldDirection, float3 sunDi float AerialPerspectiveDepthToSlice(float depth) { - return depth * (1.0f / AP_KM_PER_SLICE); + return depth * (1.0 / AP_KM_PER_SLICE); } float AerialPerspectiveSliceToDepth(float slice) { return slice * AP_KM_PER_SLICE; } -float4 GetAerialPerspectiveTransmittance(float2 uv, float3 worldPosition, float3 cameraPosition, Texture3D cameraVolumeLutTexture) +float4 GetAerialPerspectiveTransmittance(float2 uv, float3 worldPosition, float3 cameraPosition, Texture3D cameraVolumeLutTexture) { float tDepth = length((worldPosition * M_TO_SKY_UNIT) - (cameraPosition * M_TO_SKY_UNIT)); float slice = AerialPerspectiveDepthToSlice(tDepth); @@ -508,7 +508,7 @@ struct SingleScatteringResult SingleScatteringResult IntegrateScatteredLuminance( in AtmosphereParameters atmosphere, in float2 pixelPosition, in float3 worldPosition, in float3 worldDirection, in float3 sunDirection, in float3 sunIlluminance, in float tDepth, in float sampleCountIni, in bool variableSampleCount, in bool perPixelNoise, in bool opaque, in bool ground, in bool mieRayPhase, in bool multiScatteringApprox, - in bool volumetricCloudShadow, in bool opaqueShadow, in Texture2D transmittanceLutTexture, in Texture2D multiScatteringLUTTexture, in float opticalDepthScale = 1.0f, in float tMaxMax = 9000000.0f) + in bool volumetricCloudShadow, in bool opaqueShadow, in Texture2D transmittanceLutTexture, in Texture2D multiScatteringLUTTexture, in float opticalDepthScale = 1.0f, in float tMaxMax = 9000000.0f) { SingleScatteringResult result = (SingleScatteringResult) 0; result.L = 0; @@ -523,17 +523,17 @@ SingleScatteringResult IntegrateScatteredLuminance( } // Compute next intersection with atmosphere or ground - float3 earthO = float3(0.0f, 0.0f, 0.0f); + float3 earthO = 0; float tBottom = RaySphereIntersectNearest(worldPosition, worldDirection, earthO, atmosphere.bottomRadius); float tTop = RaySphereIntersectNearest(worldPosition, worldDirection, earthO, atmosphere.topRadius); - float tMax = 0.0f; + float tMax = 0; bool proceed = true; - if (tBottom < 0.0f) + if (tBottom < 0.0) { - if (tTop < 0.0f) + if (tTop < 0.0) { - tMax = 0.0f; // No intersection with earth nor atmosphere: stop right away + tMax = 0.0; // No intersection with earth nor atmosphere: stop right away proceed = false; } else @@ -543,7 +543,7 @@ SingleScatteringResult IntegrateScatteredLuminance( } else { - if (tTop > 0.0f) + if (tTop > 0.0) { tMax = min(tTop, tBottom); } @@ -574,7 +574,7 @@ SingleScatteringResult IntegrateScatteredLuminance( float dt = tMax / sampleCount; // Unlike volumetric fog lighting, we only care about the outmost cascade. This improves performance where we can't see the inner cascades anyway - ShaderEntity light = (ShaderEntity) 0; + ShaderEntity light = (ShaderEntity)0; uint furthestCascade = 0; bool validLight = false; @@ -594,13 +594,13 @@ SingleScatteringResult IntegrateScatteredLuminance( float3 globalL = sunIlluminance; // Ray march the atmosphere to integrate optical depth - float3 L = 0.0f; + float3 L = 0.0; float3 throughput = 1.0; float3 opticalDepth = 0.0; - float t = 0.0f; + float t = 0.0; float tPrev = 0.0; - const float sampleSegmentT = 0.3f; - for (float s = 0.0f; s < sampleCount; s += 1.0f) + const float sampleSegmentT = 0.3; + for (float s = 0.0f; s < sampleCount; s += 1.0) { if (variableSampleCount) { diff --git a/WickedEngine/shaders/skyAtmosphere_cameraVolumeLutCS.hlsl b/WickedEngine/shaders/skyAtmosphere_cameraVolumeLutCS.hlsl index 095e0704be..82dd9bb15e 100644 --- a/WickedEngine/shaders/skyAtmosphere_cameraVolumeLutCS.hlsl +++ b/WickedEngine/shaders/skyAtmosphere_cameraVolumeLutCS.hlsl @@ -3,8 +3,8 @@ #include "globals.hlsli" #include "skyAtmosphere.hlsli" -Texture2D transmittanceLUT : register(t0); -Texture2D multiScatteringLUT : register(t1); +Texture2D transmittanceLUT : register(t0); +Texture2D multiScatteringLUT : register(t1); RWTexture3D output : register(u0); diff --git a/WickedEngine/shaders/skyAtmosphere_multiScatteredLuminanceLutCS.hlsl b/WickedEngine/shaders/skyAtmosphere_multiScatteredLuminanceLutCS.hlsl index c0e03b5342..8d14be9de0 100644 --- a/WickedEngine/shaders/skyAtmosphere_multiScatteredLuminanceLutCS.hlsl +++ b/WickedEngine/shaders/skyAtmosphere_multiScatteredLuminanceLutCS.hlsl @@ -1,8 +1,8 @@ #include "globals.hlsli" #include "skyAtmosphere.hlsli" -Texture2D transmittanceLUT : register(t0); -Texture2D multiScatteringLUT : register(t1); +Texture2D transmittanceLUT : register(t0); +Texture2D multiScatteringLUT : register(t1); RWTexture2D output : register(u0); diff --git a/WickedEngine/shaders/skyAtmosphere_skyLuminanceLutCS.hlsl b/WickedEngine/shaders/skyAtmosphere_skyLuminanceLutCS.hlsl index f674249b1f..45e2ad9722 100644 --- a/WickedEngine/shaders/skyAtmosphere_skyLuminanceLutCS.hlsl +++ b/WickedEngine/shaders/skyAtmosphere_skyLuminanceLutCS.hlsl @@ -1,8 +1,8 @@ #include "globals.hlsli" #include "skyAtmosphere.hlsli" -Texture2D transmittanceLUT : register(t0); -Texture2D multiScatteringLUT : register(t1); +Texture2D transmittanceLUT : register(t0); +Texture2D multiScatteringLUT : register(t1); RWTexture2D output : register(u0); diff --git a/WickedEngine/shaders/skyAtmosphere_skyViewLutCS.hlsl b/WickedEngine/shaders/skyAtmosphere_skyViewLutCS.hlsl index 7f35b568cc..24e474736c 100644 --- a/WickedEngine/shaders/skyAtmosphere_skyViewLutCS.hlsl +++ b/WickedEngine/shaders/skyAtmosphere_skyViewLutCS.hlsl @@ -1,8 +1,8 @@ #include "globals.hlsli" #include "skyAtmosphere.hlsli" -Texture2D transmittanceLUT : register(t0); -Texture2D multiScatteringLUT : register(t1); +Texture2D transmittanceLUT : register(t0); +Texture2D multiScatteringLUT : register(t1); RWTexture2D output : register(u0); diff --git a/WickedEngine/shaders/skyAtmosphere_transmittanceLutCS.hlsl b/WickedEngine/shaders/skyAtmosphere_transmittanceLutCS.hlsl index 7127b73a61..b6432a0ff0 100644 --- a/WickedEngine/shaders/skyAtmosphere_transmittanceLutCS.hlsl +++ b/WickedEngine/shaders/skyAtmosphere_transmittanceLutCS.hlsl @@ -1,8 +1,8 @@ #include "globals.hlsli" #include "skyAtmosphere.hlsli" -Texture2D transmittanceLUT : register(t0); -Texture2D multiScatteringLUT : register(t1); +Texture2D transmittanceLUT : register(t0); +Texture2D multiScatteringLUT : register(t1); RWTexture2D output : register(u0); diff --git a/WickedEngine/shaders/surfaceHF.hlsli b/WickedEngine/shaders/surfaceHF.hlsli index d9ffe167ba..52dcc22e50 100644 --- a/WickedEngine/shaders/surfaceHF.hlsli +++ b/WickedEngine/shaders/surfaceHF.hlsli @@ -264,7 +264,7 @@ struct Surface #ifdef SHEEN // Sheen energy compensation: https://dassaultsystemes-technology.github.io/EnterprisePBRShadingModel/spec-2021x.md.html#figure_energy-compensation-sheen-e - sheen.DFG = texture_sheenlut.SampleLevel(sampler_linear_clamp, float2(NdotV, sheen.roughness), 0).r; + sheen.DFG = texture_sheenlut.SampleLevel(sampler_linear_clamp, half2(NdotV, sheen.roughness), 0).r; sheen.albedoScaling = 1.0 - max3(sheen.color) * sheen.DFG; #endif // SHEEN @@ -375,10 +375,10 @@ struct Surface [branch] if (geometry.vb_nor >= 0) { - Buffer buf = bindless_buffers_float4[NonUniformResourceIndex(geometry.vb_nor)]; - half3 n0 = rotate_vector(buf[i0].xyz, (half4)inst.quaternion); - half3 n1 = rotate_vector(buf[i1].xyz, (half4)inst.quaternion); - half3 n2 = rotate_vector(buf[i2].xyz, (half4)inst.quaternion); + Buffer buf = bindless_buffers_half4[NonUniformResourceIndex(geometry.vb_nor)]; + half3 n0 = rotate_vector(buf[i0].xyz, inst.GetQuaternion()); + half3 n1 = rotate_vector(buf[i1].xyz, inst.GetQuaternion()); + half3 n2 = rotate_vector(buf[i2].xyz, inst.GetQuaternion()); n0 = any(n0) ? normalize(n0) : 0; n1 = any(n1) ? normalize(n1) : 0; n2 = any(n2) ? normalize(n2) : 0; @@ -448,21 +448,17 @@ struct Surface [branch] if (geometry.vb_tan >= 0) { - Buffer buf = bindless_buffers_float4[NonUniformResourceIndex(geometry.vb_tan)]; + Buffer buf = bindless_buffers_half4[NonUniformResourceIndex(geometry.vb_tan)]; half4 t0 = buf[i0]; half4 t1 = buf[i1]; half4 t2 = buf[i2]; - t0.xyz = rotate_vector(t0.xyz, (half4)inst.quaternion); - t1.xyz = rotate_vector(t1.xyz, (half4)inst.quaternion); - t2.xyz = rotate_vector(t2.xyz, (half4)inst.quaternion); + t0.xyz = rotate_vector(t0.xyz, inst.GetQuaternion()); + t1.xyz = rotate_vector(t1.xyz, inst.GetQuaternion()); + t2.xyz = rotate_vector(t2.xyz, inst.GetQuaternion()); t0.xyz = any(t0.xyz) ? normalize(t0.xyz) : 0; t1.xyz = any(t1.xyz) ? normalize(t1.xyz) : 0; t2.xyz = any(t2.xyz) ? normalize(t2.xyz) : 0; T = attribute_at_bary(t0, t1, t2, bary); - if (is_backface) - { - T = -T; - } T.w = T.w < 0 ? -1 : 1; half3 bitangent = cross(T.xyz, Nunnormalized) * T.w; TBN = half3x3(T.xyz, bitangent, Nunnormalized); // unnormalized TBN! http://www.mikktspace.com/ @@ -576,7 +572,7 @@ struct Surface [branch] if (geometry.vb_col >= 0 && material.IsUsingVertexColors()) { - Buffer buf = bindless_buffers_float4[NonUniformResourceIndex(geometry.vb_col)]; + Buffer buf = bindless_buffers_half4[NonUniformResourceIndex(geometry.vb_col)]; const half4 c0 = buf[i0]; const half4 c1 = buf[i1]; const half4 c2 = buf[i2]; @@ -587,7 +583,7 @@ struct Surface [branch] if (inst.vb_ao >= 0 && material.IsUsingVertexAO()) { - Buffer buf = bindless_buffers_float[NonUniformResourceIndex(inst.vb_ao)]; + Buffer buf = bindless_buffers_half[NonUniformResourceIndex(inst.vb_ao)]; const half ao0 = buf[i0]; const half ao1 = buf[i1]; const half ao2 = buf[i2]; @@ -868,7 +864,7 @@ struct Surface [branch] if (inst.vb_wetmap >= 0) { - Buffer buf = bindless_buffers_float[NonUniformResourceIndex(inst.vb_wetmap)]; + Buffer buf = bindless_buffers_half[NonUniformResourceIndex(inst.vb_wetmap)]; const half wet0 = buf[i0]; const half wet1 = buf[i1]; const half wet2 = buf[i2]; diff --git a/WickedEngine/shaders/volumetricCloud_upsamplePS.hlsl b/WickedEngine/shaders/volumetricCloud_upsamplePS.hlsl index 76ea20d8fa..fdf9e22740 100644 --- a/WickedEngine/shaders/volumetricCloud_upsamplePS.hlsl +++ b/WickedEngine/shaders/volumetricCloud_upsamplePS.hlsl @@ -19,7 +19,7 @@ half Gaussian(half x, half sigma) return exp(-x * x / (2.0 * sigma * sigma)); } -half4 main(float4 pos : SV_Position, float2 uv : TEXCOORD) : SV_Target +float4 main(float4 pos : SV_Position, float2 uv : TEXCOORD) : SV_Target { const uint2 pixel = pos.xy; const float depth = texture_depth[pixel]; diff --git a/WickedEngine/shaders/volumetricLight_DirectionalPS.hlsl b/WickedEngine/shaders/volumetricLight_DirectionalPS.hlsl index 03d54c91f8..eed4417424 100644 --- a/WickedEngine/shaders/volumetricLight_DirectionalPS.hlsl +++ b/WickedEngine/shaders/volumetricLight_DirectionalPS.hlsl @@ -5,7 +5,7 @@ #include "fogHF.hlsli" #include "oceanSurfaceHF.hlsli" -half4 main(VertexToPixel input) : SV_Target +float4 main(VertexToPixel input) : SV_Target { ShaderEntity light = load_entity(directional_lights().first_item() + (uint)g_xColor.x); diff --git a/WickedEngine/shaders/volumetricLight_PointPS.hlsl b/WickedEngine/shaders/volumetricLight_PointPS.hlsl index c2733925b5..578a20d9d8 100644 --- a/WickedEngine/shaders/volumetricLight_PointPS.hlsl +++ b/WickedEngine/shaders/volumetricLight_PointPS.hlsl @@ -4,7 +4,7 @@ #include "fogHF.hlsli" #include "oceanSurfaceHF.hlsli" -half4 main(VertexToPixel input) : SV_TARGET +float4 main(VertexToPixel input) : SV_TARGET { ShaderEntity light = load_entity(pointlights().first_item() + (uint)g_xColor.x); diff --git a/WickedEngine/shaders/volumetricLight_SpotPS.hlsl b/WickedEngine/shaders/volumetricLight_SpotPS.hlsl index 02e3f1dff5..5bfc60b58d 100644 --- a/WickedEngine/shaders/volumetricLight_SpotPS.hlsl +++ b/WickedEngine/shaders/volumetricLight_SpotPS.hlsl @@ -36,7 +36,7 @@ bool intersectInfiniteCone(float3 p, float3 v, float3 pa, float3 va, float sina2 return true; } -half4 main(VertexToPixel input) : SV_TARGET +float4 main(VertexToPixel input) : SV_TARGET { ShaderEntity light = load_entity(spotlights().first_item() + (uint)g_xColor.x); diff --git a/WickedEngine/shaders/wetmap_updateCS.hlsl b/WickedEngine/shaders/wetmap_updateCS.hlsl index 97faee9ccb..452bc98333 100644 --- a/WickedEngine/shaders/wetmap_updateCS.hlsl +++ b/WickedEngine/shaders/wetmap_updateCS.hlsl @@ -18,11 +18,11 @@ void main(uint DTid : SV_DispatchThreadID) [branch] if(geometry.vb_nor >= 0) { - Buffer vb_nor = bindless_buffers_float4[geometry.vb_nor]; - float3 normal = vb_nor[DTid].xyz; - normal = rotate_vector(normal, meshinstance.quaternion); + Buffer vb_nor = bindless_buffers_half4[geometry.vb_nor]; + half3 normal = vb_nor[DTid].xyz; + normal = rotate_vector(normal, meshinstance.GetQuaternion()); normal = normalize(normal); - drying *= lerp(4, 1, pow(saturate(normal.y), 8)); // modulate drying speed based on surface slope + drying *= lerp(4, 1, pow8(saturate(normal.y))); // modulate drying speed based on surface slope } RWBuffer wetmap = bindless_rwbuffers_float[push.wetmap]; diff --git a/WickedEngine/wiEnums.h b/WickedEngine/wiEnums.h index 3a571be899..85eabe4b5f 100644 --- a/WickedEngine/wiEnums.h +++ b/WickedEngine/wiEnums.h @@ -412,6 +412,7 @@ namespace wi::enums CSTYPE_CAUSTICS, CSTYPE_DEPTH_REPROJECT, CSTYPE_DEPTH_PYRAMID, + CSTYPE_LIGHTMAP_EXPAND, ASTYPE_OBJECT, @@ -455,6 +456,7 @@ namespace wi::enums RSTYPE_OCCLUDEE, RSTYPE_VOXELIZE, RSTYPE_SKY, + RSTYPE_LIGHTMAP, RSTYPE_COUNT }; // depth-stencil states diff --git a/WickedEngine/wiRenderPath3D.cpp b/WickedEngine/wiRenderPath3D.cpp index 32f7ac162e..1546bac3ec 100644 --- a/WickedEngine/wiRenderPath3D.cpp +++ b/WickedEngine/wiRenderPath3D.cpp @@ -1136,6 +1136,21 @@ namespace wi ); } + if (scene->weather.IsVolumetricClouds() && !scene->weather.IsVolumetricCloudsReceiveShadow()) + { + // When volumetric cloud DOESN'T receives shadow it can be done async to shadow maps! + wi::renderer::Postprocess_VolumetricClouds( + volumetriccloudResources, + cmd, + *camera, + camera_previous, + camera_reflection, + wi::renderer::GetTemporalAAEnabled() || getFSR2Enabled(), + scene->weather.volumetricCloudsWeatherMapFirst.IsValid() ? &scene->weather.volumetricCloudsWeatherMapFirst.GetTexture() : nullptr, + scene->weather.volumetricCloudsWeatherMapSecond.IsValid() ? &scene->weather.volumetricCloudsWeatherMapSecond.GetTexture() : nullptr + ); + } + }); // Occlusion culling: @@ -1456,8 +1471,9 @@ namespace wi cmd ); } - if (scene->weather.IsVolumetricClouds()) + if (scene->weather.IsVolumetricClouds() && scene->weather.IsVolumetricCloudsReceiveShadow()) { + // When volumetric cloud receives shadow it must be done AFTER shadow maps! wi::renderer::Postprocess_VolumetricClouds( volumetriccloudResources, cmd, diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 2c44775552..4dc146fc0a 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -1170,6 +1170,7 @@ void LoadShaders() wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_CAUSTICS], "causticsCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_DEPTH_REPROJECT], "depth_reprojectCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_DEPTH_PYRAMID], "depth_pyramidCS.cso"); }); + wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::CS, shaders[CSTYPE_LIGHTMAP_EXPAND], "lightmap_expandCS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::HS, shaders[HSTYPE_OBJECT], "objectHS.cso"); }); wi::jobsystem::Execute(ctx, [](wi::jobsystem::JobArgs args) { LoadShader(ShaderStage::HS, shaders[HSTYPE_OBJECT_PREPASS], "objectHS_prepass.cso"); }); @@ -1440,7 +1441,7 @@ void LoadShaders() PipelineStateDesc desc; desc.vs = &shaders[VSTYPE_RENDERLIGHTMAP]; desc.ps = &shaders[PSTYPE_RENDERLIGHTMAP]; - desc.rs = &rasterizers[RSTYPE_DOUBLESIDED]; + desc.rs = &rasterizers[RSTYPE_LIGHTMAP]; desc.bs = &blendStates[BSTYPE_TRANSPARENT]; desc.dss = &depthStencils[DSSTYPE_DEPTHDISABLED]; @@ -2302,6 +2303,13 @@ void SetUpStates() rasterizers[RSTYPE_VOXELIZE] = rs; + rs = rasterizers[RSTYPE_DOUBLESIDED]; + //if (device->CheckCapability(GraphicsDeviceCapability::CONSERVATIVE_RASTERIZATION)) + //{ + // rs.conservative_rasterization_enable = true; + //} + rasterizers[RSTYPE_LIGHTMAP] = rs; + DepthStencilState dsd; @@ -3914,7 +3922,7 @@ void UpdatePerFrameData( frameCB.vxgi.clipmaps[i].voxelSize = scene.vxgi.clipmaps[i].voxelsize; } - frameCB.gi_boost = GetGIBoost(); + frameCB.giboost_packed = wi::math::pack_half2(GetGIBoost(), 0); if (scene.weather.rain_amount > 0) { @@ -10300,6 +10308,8 @@ void RefreshLightmaps(const Scene& scene, CommandList cmd) const ObjectComponent& object = scene.objects[objectIndex]; if (!object.lightmap.IsValid()) continue; + if (!object.lightmap_render.IsValid()) + continue; if (object.IsLightmapRenderRequested()) { @@ -10309,16 +10319,16 @@ void RefreshLightmaps(const Scene& scene, CommandList cmd) assert(!mesh.vertex_atlas.empty()); assert(mesh.vb_atl.IsValid()); - const TextureDesc& desc = object.lightmap.GetDesc(); + const TextureDesc& desc = object.lightmap_render.GetDesc(); if (object.lightmapIterationCount == 0) { - RenderPassImage rp = RenderPassImage::RenderTarget(&object.lightmap, RenderPassImage::LoadOp::CLEAR); + RenderPassImage rp = RenderPassImage::RenderTarget(&object.lightmap_render, RenderPassImage::LoadOp::CLEAR); device->RenderPassBegin(&rp, 1, cmd); } else { - RenderPassImage rp = RenderPassImage::RenderTarget(&object.lightmap, RenderPassImage::LoadOp::LOAD); + RenderPassImage rp = RenderPassImage::RenderTarget(&object.lightmap_render, RenderPassImage::LoadOp::LOAD); device->RenderPassBegin(&rp, 1, cmd); } @@ -10338,16 +10348,11 @@ void RefreshLightmaps(const Scene& scene, CommandList cmd) push.instanceIndex = objectIndex; device->PushConstants(&push, sizeof(push), cmd); - RaytracingCB cb; + RaytracingCB cb = {}; cb.xTraceResolution.x = desc.width; cb.xTraceResolution.y = desc.height; cb.xTraceResolution_rcp.x = 1.0f / cb.xTraceResolution.x; cb.xTraceResolution_rcp.y = 1.0f / cb.xTraceResolution.y; - XMFLOAT4 halton = wi::math::GetHaltonSequence(object.lightmapIterationCount); // for jittering the rasterization (good for eliminating atlas border artifacts) - cb.xTracePixelOffset.x = (halton.x * 2 - 1) * cb.xTraceResolution_rcp.x; - cb.xTracePixelOffset.y = (halton.y * 2 - 1) * cb.xTraceResolution_rcp.y; - cb.xTracePixelOffset.x *= 1.4f; // boost the jitter by a bit - cb.xTracePixelOffset.y *= 1.4f; // boost the jitter by a bit cb.xTraceAccumulationFactor = 1.0f / (object.lightmapIterationCount + 1.0f); // accumulation factor (alpha) cb.xTraceUserData.x = raytraceBounceCount; uint8_t instanceInclusionMask = 0xFF; @@ -10369,6 +10374,21 @@ void RefreshLightmaps(const Scene& scene, CommandList cmd) device->RenderPassEnd(cmd); + // Expand opaque areas: + { + device->BindComputeShader(&shaders[CSTYPE_LIGHTMAP_EXPAND], cmd); + + device->BindResource(&object.lightmap_render, 0, cmd); + + device->BindUAV(&object.lightmap, 0, cmd); + + device->Barrier(GPUBarrier::Image(&object.lightmap, object.lightmap.desc.layout, ResourceState::UNORDERED_ACCESS), cmd); + + device->Dispatch((desc.width + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, (desc.height + POSTPROCESS_BLOCKSIZE - 1) / POSTPROCESS_BLOCKSIZE, 1, cmd); + + device->Barrier(GPUBarrier::Image(&object.lightmap, ResourceState::UNORDERED_ACCESS, object.lightmap.desc.layout), cmd); + } + device->EventEnd(cmd); } } diff --git a/WickedEngine/wiScene.cpp b/WickedEngine/wiScene.cpp index 471d8b5582..c58b2c3eb4 100644 --- a/WickedEngine/wiScene.cpp +++ b/WickedEngine/wiScene.cpp @@ -943,18 +943,18 @@ namespace wi::scene shaderscene.aabb_extents_rcp.y = 1.0f / shaderscene.aabb_extents.y; shaderscene.aabb_extents_rcp.z = 1.0f / shaderscene.aabb_extents.z; - shaderscene.weather.sun_color = weather.sunColor; - shaderscene.weather.sun_direction = weather.sunDirection; + shaderscene.weather.sun_color = wi::math::pack_half3(weather.sunColor); + shaderscene.weather.sun_direction = wi::math::pack_half3(weather.sunDirection); shaderscene.weather.most_important_light_index = weather.most_important_light_index; - shaderscene.weather.ambient = weather.ambient; + shaderscene.weather.ambient = wi::math::pack_half3(weather.ambient); shaderscene.weather.sky_rotation_sin = std::sin(weather.sky_rotation); shaderscene.weather.sky_rotation_cos = std::cos(weather.sky_rotation); shaderscene.weather.fog.start = weather.fogStart; shaderscene.weather.fog.density = weather.fogDensity; shaderscene.weather.fog.height_start = weather.fogHeightStart; shaderscene.weather.fog.height_end = weather.fogHeightEnd; - shaderscene.weather.horizon = weather.horizon; - shaderscene.weather.zenith = weather.zenith; + shaderscene.weather.horizon = wi::math::pack_half3(weather.horizon); + shaderscene.weather.zenith = wi::math::pack_half3(weather.zenith); shaderscene.weather.sky_exposure = weather.skyExposure; shaderscene.weather.wind.speed = weather.windSpeed; shaderscene.weather.wind.randomness = weather.windRandomness; @@ -969,7 +969,7 @@ namespace wi::scene shaderscene.weather.ocean.texture_displacementmap = device->GetDescriptorIndex(ocean.getDisplacementMap(), SubresourceType::SRV); shaderscene.weather.ocean.texture_gradientmap = device->GetDescriptorIndex(ocean.getGradientMap(), SubresourceType::SRV); shaderscene.weather.stars = weather.stars; - XMStoreFloat4x4(&shaderscene.weather.stars_rotation, XMMatrixRotationQuaternion(XMLoadFloat4(&weather.stars_rotation_quaternion))); + XMStoreFloat4(&shaderscene.weather.stars_rotation, XMQuaternionNormalize(XMQuaternionInverse(XMLoadFloat4(&weather.stars_rotation_quaternion)))); shaderscene.weather.rain_amount = weather.rain_amount; shaderscene.weather.rain_length = weather.rain_length; shaderscene.weather.rain_speed = weather.rain_speed; @@ -4495,7 +4495,9 @@ namespace wi::scene // Get the quaternion from W because that reflects changes by other components (eg. softbody) XMVECTOR S, R, T; XMMatrixDecompose(&S, &R, &T, W); - XMStoreFloat4(&inst.quaternion, R); + XMFLOAT4 quaternionFP32; + XMStoreFloat4(&quaternionFP32, R); + inst.quaternion = wi::math::pack_half4(quaternionFP32); float size = std::max(XMVectorGetX(S), std::max(XMVectorGetY(S), XMVectorGetZ(S))); if (object.lightmap.IsValid()) @@ -4570,11 +4572,15 @@ namespace wi::scene // lightmap things: if (object.IsLightmapRenderRequested() && dt > 0) { - if (!object.lightmap.IsValid()) + if (!object.lightmap_render.IsValid()) { object.lightmapWidth = wi::math::GetNextPowerOfTwo(object.lightmapWidth + 1) / 2; object.lightmapHeight = wi::math::GetNextPowerOfTwo(object.lightmapHeight + 1) / 2; + // align to BC6 block size: + object.lightmapWidth = wi::graphics::AlignTo(object.lightmapWidth, 4u); + object.lightmapHeight = wi::graphics::AlignTo(object.lightmapHeight, 4u); + TextureDesc desc; desc.width = object.lightmapWidth; desc.height = object.lightmapHeight; @@ -4583,11 +4589,22 @@ namespace wi::scene // But the final lightmap will be compressed into an optimal format when the rendering is finished desc.format = Format::R32G32B32A32_FLOAT; - device->CreateTexture(&desc, nullptr, &object.lightmap); - device->SetName(&object.lightmap, "lightmap_renderable"); + device->CreateTexture(&desc, nullptr, &object.lightmap_render); + device->SetName(&object.lightmap_render, "lightmap_render"); object.lightmapIterationCount = 0; // reset accumulation } + if (!object.lightmap.IsValid()) + { + TextureDesc desc; + desc.width = object.lightmapWidth; + desc.height = object.lightmapHeight; + desc.bind_flags = BindFlag::UNORDERED_ACCESS | BindFlag::SHADER_RESOURCE; + desc.format = Format::R16G16B16A16_FLOAT; // denoiser needs at least half precision float + + device->CreateTexture(&desc, nullptr, &object.lightmap); + device->SetName(&object.lightmap, "lightmap"); + } } if (!object.lightmapTextureData.empty() && !object.lightmap.IsValid()) @@ -5402,269 +5419,292 @@ namespace wi::scene wi::jobsystem::Dispatch(ctx, (uint32_t)characters.GetCount(), 1, [&](wi::jobsystem::JobArgs args) { CharacterComponent& character = characters[args.jobIndex]; - if (!character.IsActive()) - return; Entity entity = characters.GetEntity(args.jobIndex); - uint32_t layer = 0; - LayerComponent* layercomponent = layers.GetComponent(entity); - if (layercomponent != nullptr) - { - layer = layercomponent->GetLayerMask(); - } - - const float fixed_update_fps = character.fixed_update_fps; - const float timestep = 1.0f / fixed_update_fps; - const float ground_friction = character.ground_friction; - const XMVECTOR wall_friction = XMVectorSet(character.ground_friction, 1, character.ground_friction, 1); - const float water_friction = character.water_friction; - const float slope_threshold = character.slope_threshold; - const float leaning_limit = character.leaning_limit; - const XMVECTOR gravity = XMVectorSet(0, character.gravity * timestep, 0, 0); - const float delta_to_timestep = timestep / dt; - - if (!character.humanoid_checked) + XMMATRIX facing_rot = XMMatrixLookToLH(XMVectorZero(), XMLoadFloat3(&character.facing), up); + if (character.IsActive()) { - // Search for humanoid entity that is either this entity or a child: - character.humanoid_checked = true; - if (humanoids.Contains(entity)) + uint32_t layer = 0; + LayerComponent* layercomponent = layers.GetComponent(entity); + if (layercomponent != nullptr) { - character.humanoidEntity = entity; + layer = layercomponent->GetLayerMask(); } - else + + const float fixed_update_fps = character.fixed_update_fps; + const float timestep = 1.0f / fixed_update_fps; + const float ground_friction = character.ground_friction; + const XMVECTOR wall_friction = XMVectorSet(character.ground_friction, 1, character.ground_friction, 1); + const float water_friction = character.water_friction; + const float slope_threshold = character.slope_threshold; + const float leaning_limit = character.leaning_limit; + const XMVECTOR gravity = XMVectorSet(0, character.gravity * timestep, 0, 0); + const float delta_to_timestep = timestep / dt; + + if (!character.humanoid_checked) { - for (size_t i = 0; i < humanoids.GetCount(); ++i) + // Search for humanoid entity that is either this entity or a child: + character.humanoid_checked = true; + if (humanoids.Contains(entity)) { - Entity humanoidEntity = humanoids.GetEntity(i); - if (Entity_IsDescendant(humanoidEntity, entity)) + character.humanoidEntity = entity; + } + else + { + for (size_t i = 0; i < humanoids.GetCount(); ++i) { - character.humanoidEntity = humanoidEntity; + Entity humanoidEntity = humanoids.GetEntity(i); + if (Entity_IsDescendant(humanoidEntity, entity)) + { + character.humanoidEntity = humanoidEntity; + } } } } - } - const HumanoidComponent* humanoid = humanoids.GetComponent(character.humanoidEntity); - if (humanoid != nullptr && humanoid->IsRagdollPhysicsEnabled()) - return; // if ragdoll active, don't update character movement + const HumanoidComponent* humanoid = humanoids.GetComponent(character.humanoidEntity); + if (humanoid != nullptr && humanoid->IsRagdollPhysicsEnabled()) + return; // if ragdoll active, don't update character movement - XMVECTOR velocity = XMLoadFloat3(&character.velocity); - XMVECTOR inertia = XMLoadFloat3(&character.inertia); - XMVECTOR movement = XMLoadFloat3(&character.movement); - XMVECTOR position = XMLoadFloat3(&character.position); - XMVECTOR height = XMVectorSet(0, character.height, 0, 0); + XMVECTOR velocity = XMLoadFloat3(&character.velocity); + XMVECTOR inertia = XMLoadFloat3(&character.inertia); + XMVECTOR movement = XMLoadFloat3(&character.movement); + XMVECTOR position = XMLoadFloat3(&character.position); + XMVECTOR height = XMVectorSet(0, character.height, 0, 0); - XMMATRIX facing_rot = XMMatrixLookToLH(XMVectorZero(), XMLoadFloat3(&character.facing), up); - - // Swimming: - character.swimming = false; - float swim_offset = 0; - if (humanoid != nullptr && humanoid->bones[size_t(HumanoidComponent::HumanoidBone::Neck)] != INVALID_ENTITY) - { - Entity neck_entity = humanoid->bones[size_t(HumanoidComponent::HumanoidBone::Neck)]; - TransformComponent* neck_transform = transforms.GetComponent(neck_entity); - if (neck_transform != nullptr) - { - XMFLOAT3 neck_pos = neck_transform->GetPosition(); - neck_pos.y += character.water_vertical_offset; - XMFLOAT3 ocean_pos = GetOceanPosAt(neck_pos); - float water_distance = ocean_pos.y - neck_pos.y; - if (water_distance > 0) - { - // Ocean is above the neck: - character.swimming = true; - swim_offset = water_distance; - } - else + // Swimming: + character.swimming = false; + float swim_offset = 0; + if (humanoid != nullptr && humanoid->bones[size_t(HumanoidComponent::HumanoidBone::Neck)] != INVALID_ENTITY) + { + Entity neck_entity = humanoid->bones[size_t(HumanoidComponent::HumanoidBone::Neck)]; + TransformComponent* neck_transform = transforms.GetComponent(neck_entity); + if (neck_transform != nullptr) { - Ray ray(neck_pos, XMFLOAT3(0, 1, 0), 0, 100); - RayIntersectionResult result = Intersects(ray, FILTER_WATER); - if (result.entity != INVALID_ENTITY) + XMFLOAT3 neck_pos = neck_transform->GetPosition(); + neck_pos.y += character.water_vertical_offset; + XMFLOAT3 ocean_pos = GetOceanPosAt(neck_pos); + float water_distance = ocean_pos.y - neck_pos.y; + if (water_distance > 0) { + // Ocean is above the neck: character.swimming = true; - swim_offset = result.distance; + swim_offset = water_distance; + } + else + { + Ray ray(neck_pos, XMFLOAT3(0, 1, 0), 0, 100); + RayIntersectionResult result = Intersects(ray, FILTER_WATER); + if (result.entity != INVALID_ENTITY) + { + character.swimming = true; + swim_offset = result.distance; + } } } } - } - - character.accumulator += dt; - const bool timestep_occurred = character.accumulator >= timestep; - if (timestep_occurred) - { - character.ground_intersect = false; - character.wall_intersect = false; - } + character.accumulator += dt; - // Fixed timestep logic: - int steps = 0; - while (character.accumulator >= timestep && steps <= max_substeps) - { - steps++; - XMStoreFloat3(&character.position_prev, position); - character.accumulator -= timestep; - if (character.swimming) - { - velocity *= water_friction; - } - if (character.velocity.y > character.gravity && !character.swimming) + const bool timestep_occurred = character.accumulator >= timestep; + if (timestep_occurred) { - velocity += gravity; + character.ground_intersect = false; + character.wall_intersect = false; } - velocity += movement; - - position += velocity * timestep; - position += inertia * delta_to_timestep; // inertia is from moving platforms which are delta velocity from previous frame - // Check ground: - Capsule capsule = Capsule(position, position + height, character.width); - CapsuleIntersectionResult result = Intersects(capsule, FILTER_NAVIGATION_MESH | FILTER_COLLIDER, ~layer); - if (result.entity != INVALID_ENTITY) + // Fixed timestep logic: + int steps = 0; + while (character.accumulator >= timestep && steps <= max_substeps) { - XMVECTOR collisionNormal = XMLoadFloat3(&result.normal); - const float slope = XMVectorGetX(XMVector3Dot(collisionNormal, up)); - if (slope > slope_threshold) -{ - character.ground_intersect = true; - velocity *= ground_friction; - position += XMVectorSet(0, result.depth, 0, 0); - - if (std::abs(result.velocity.x) > 0.001f || std::abs(result.velocity.y) > 0.001f || std::abs(result.velocity.z) > 0.001f) - { - inertia = XMLoadFloat3(&result.velocity); - } - else - { - inertia = XMVectorZero(); - } + steps++; + XMStoreFloat3(&character.position_prev, position); + character.accumulator -= timestep; + if (character.swimming) + { + velocity *= water_friction; } - } + if (character.velocity.y > character.gravity && !character.swimming) + { + velocity += gravity; + } + velocity += movement; - // Check wall: - capsule = Capsule(position, position + height, character.width); - result = Intersects(capsule, FILTER_NAVIGATION_MESH | FILTER_COLLIDER, ~layer); - if (result.entity != INVALID_ENTITY) - { - XMVECTOR collisionNormal = XMLoadFloat3(&result.normal); - const float slope = XMVectorGetX(XMVector3Dot(collisionNormal, up)); - if (slope <= slope_threshold) + position += velocity * timestep; + position += inertia * delta_to_timestep; // inertia is from moving platforms which are delta velocity from previous frame + + // Check ground: + Capsule capsule = Capsule(position, position + height, character.width); + CapsuleIntersectionResult result = Intersects(capsule, FILTER_NAVIGATION_MESH | FILTER_COLLIDER, ~layer); + if (result.entity != INVALID_ENTITY) { - character.wall_intersect = true; - if (!character.ground_intersect) + XMVECTOR collisionNormal = XMLoadFloat3(&result.normal); + const float slope = XMVectorGetX(XMVector3Dot(collisionNormal, up)); + if (slope > slope_threshold) { - velocity *= wall_friction; + character.ground_intersect = true; + velocity *= ground_friction; + position += XMVectorSet(0, result.depth, 0, 0); + + if (std::abs(result.velocity.x) > 0.001f || std::abs(result.velocity.y) > 0.001f || std::abs(result.velocity.z) > 0.001f) + { + inertia = XMLoadFloat3(&result.velocity); + } + else + { + inertia = XMVectorZero(); + } } - float velocityLen = XMVectorGetX(XMVector3Length(velocity)); - XMVECTOR velocityNormalized = XMVector3Normalize(velocity); - XMVECTOR undesiredMotion = collisionNormal * XMVector3Dot(velocityNormalized, collisionNormal); - XMVECTOR desiredMotion = velocityNormalized - undesiredMotion; - velocity = desiredMotion * velocityLen; - position += collisionNormal * result.depth; - inertia = XMVectorZero(); } - } - // Check character capsules: - if (!character.IsCharacterToCharacterCollisionDisabled()) - { + // Check wall: capsule = Capsule(position, position + height, character.width); - XMFLOAT3 incident_position = XMFLOAT3(0, 0, 0); - XMFLOAT3 incident_normal = XMFLOAT3(0, 0, 0); - float penetration_depth = 0; - for (size_t i = 0; i < character_capsules.size(); ++i) + result = Intersects(capsule, FILTER_NAVIGATION_MESH | FILTER_COLLIDER, ~layer); + if (result.entity != INVALID_ENTITY) { - if (i == args.jobIndex) - continue; - if (!characters[i].IsActive()) - continue; - if (characters[i].IsCharacterToCharacterCollisionDisabled()) - continue; - if (capsule.intersects(character_capsules[i], incident_position, incident_normal, penetration_depth)) + XMVECTOR collisionNormal = XMLoadFloat3(&result.normal); + const float slope = XMVectorGetX(XMVector3Dot(collisionNormal, up)); + if (slope <= slope_threshold) { - XMVECTOR collisionNormal = XMLoadFloat3(&incident_normal); + character.wall_intersect = true; + if (!character.ground_intersect) + { + velocity *= wall_friction; + } float velocityLen = XMVectorGetX(XMVector3Length(velocity)); XMVECTOR velocityNormalized = XMVector3Normalize(velocity); XMVECTOR undesiredMotion = collisionNormal * XMVector3Dot(velocityNormalized, collisionNormal); XMVECTOR desiredMotion = velocityNormalized - undesiredMotion; velocity = desiredMotion * velocityLen; - position += collisionNormal * penetration_depth; + position += collisionNormal * result.depth; inertia = XMVectorZero(); - break; } } - } - } - character.accumulator = clamp(character.accumulator, 0.0f, timestep); - character.alpha = character.accumulator / timestep; + // Check character capsules: + if (!character.IsCharacterToCharacterCollisionDisabled()) + { + capsule = Capsule(position, position + height, character.width); + XMFLOAT3 incident_position = XMFLOAT3(0, 0, 0); + XMFLOAT3 incident_normal = XMFLOAT3(0, 0, 0); + float penetration_depth = 0; + for (size_t i = 0; i < character_capsules.size(); ++i) + { + if (i == args.jobIndex) + continue; + if (!characters[i].IsActive()) + continue; + if (characters[i].IsCharacterToCharacterCollisionDisabled()) + continue; + if (capsule.intersects(character_capsules[i], incident_position, incident_normal, penetration_depth)) + { + XMVECTOR collisionNormal = XMLoadFloat3(&incident_normal); + float velocityLen = XMVectorGetX(XMVector3Length(velocity)); + XMVECTOR velocityNormalized = XMVector3Normalize(velocity); + XMVECTOR undesiredMotion = collisionNormal * XMVector3Dot(velocityNormalized, collisionNormal); + XMVECTOR desiredMotion = velocityNormalized - undesiredMotion; + velocity = desiredMotion * velocityLen; + position += collisionNormal * penetration_depth; + inertia = XMVectorZero(); + break; + } + } + } - position += XMVectorSet(0, swim_offset, 0, 0); + } + character.accumulator = clamp(character.accumulator, 0.0f, timestep); + character.alpha = character.accumulator / timestep; - // Smooth facing: - character.facing = wi::math::Lerp(character.facing, character.facing_next, dt * 5); - character.facing.y = 0; - XMVECTOR facing_next = XMVector3Normalize(XMLoadFloat3(&character.facing_next)); - XMVECTOR facing = XMVector3Normalize(XMLoadFloat3(&character.facing)); - XMStoreFloat3(&character.facing, facing); + position += XMVectorSet(0, swim_offset, 0, 0); - // Smooth leaning: - XMVECTOR facediff = XMVector3TransformNormal(facing_next - facing, facing_rot); - float velocity_leaning = clamp(XMVectorGetX(facediff * XMVector3Length(XMVectorSetY(velocity, 0))) * 0.08f, -leaning_limit, leaning_limit); - character.leaning_next = lerp(character.leaning_next, velocity_leaning, dt * 5); - character.leaning = lerp(character.leaning, character.leaning_next, dt * 5); + // Smooth facing: + character.facing = wi::math::Lerp(character.facing, character.facing_next, dt * 5); + character.facing.y = 0; + XMVECTOR facing_next = XMVector3Normalize(XMLoadFloat3(&character.facing_next)); + XMVECTOR facing = XMVector3Normalize(XMLoadFloat3(&character.facing)); + XMStoreFloat3(&character.facing, facing); - // Simple animation blending: - for (Entity animEntity : character.animations) - { - AnimationComponent* animation = animations.GetComponent(animEntity); - if (animation == nullptr) - continue; - if (animEntity == character.currentAnimation) + // Smooth leaning: + XMVECTOR facediff = XMVector3TransformNormal(facing_next - facing, facing_rot); + float velocity_leaning = clamp(XMVectorGetX(facediff * XMVector3Length(XMVectorSetY(velocity, 0))) * 0.08f, -leaning_limit, leaning_limit); + character.leaning_next = lerp(character.leaning_next, velocity_leaning, dt * 5); + character.leaning = lerp(character.leaning, character.leaning_next, dt * 5); + + // Simple animation blending: + for (Entity animEntity : character.animations) { - if (character.reset_anim) + AnimationComponent* animation = animations.GetComponent(animEntity); + if (animation == nullptr) + continue; + if (animEntity == character.currentAnimation) { - character.reset_anim = false; - animation->timer = animation->start; + if (character.reset_anim) + { + character.reset_anim = false; + animation->timer = animation->start; + } + animation->amount = clamp(animation->amount + dt, 0.0f, character.anim_amount); + animation->Play(); + character.anim_ended = animation->timer >= animation->end; + } + else + { + animation->amount = clamp(animation->amount - dt, 0.0f, 0.1f); + if (animation->amount <= 0) + { + animation->Stop(); + } } - animation->amount = clamp(animation->amount + dt, 0.0f, character.anim_amount); - animation->Play(); - character.anim_ended = animation->timer >= animation->end; } - else + + // Try to put water ripple under character: + float horizontal_velocity_length = XMVectorGetX(XMVector3Length(XMVectorSetY(velocity, 0))); + if (horizontal_velocity_length > 0.01) { - animation->amount = clamp(animation->amount - dt, 0.0f, 0.1f); - if (animation->amount <= 0) + XMFLOAT3 ocean_pos = GetOceanPosAt(character.position); + if (character.position.y < ocean_pos.y) + { + PutWaterRipple(XMFLOAT3(character.position.x, ocean_pos.y, character.position.z)); + } + else { - animation->Stop(); + Capsule capsule = Capsule(position, position + height, character.width); + CapsuleIntersectionResult result = Intersects(capsule, FILTER_WATER); + if (result.entity != INVALID_ENTITY) + { + PutWaterRipple(result.position); + } } } - } - // Try to put water ripple under character: - float horizontal_velocity_length = XMVectorGetX(XMVector3Length(XMVectorSetY(velocity, 0))); - if (horizontal_velocity_length > 0.01) - { - XMFLOAT3 ocean_pos = GetOceanPosAt(character.position); - if (character.position.y < ocean_pos.y) + XMStoreFloat3(&character.position, position); + XMStoreFloat3(&character.velocity, velocity); + XMStoreFloat3(&character.inertia, inertia); + character.movement = XMFLOAT3(0, 0, 0); + + if (character.pathfinding_thread == nullptr && character.process_goal) { - PutWaterRipple(XMFLOAT3(character.position.x, ocean_pos.y, character.position.z)); + character.pathfinding_thread = std::make_shared(); } - else + if (character.pathfinding_thread) { - Capsule capsule = Capsule(position, position + height, character.width); - CapsuleIntersectionResult result = Intersects(capsule, FILTER_WATER); - if (result.entity != INVALID_ENTITY) + if (AtomicLoad(&character.pathfinding_thread->process_goal_completed) != 0) + { + AtomicAnd(&character.pathfinding_thread->process_goal_completed, 0); + std::swap(character.pathfinding_thread->pathquery_work, character.pathquery); + } + if (character.process_goal && character.voxelgrid != nullptr && !wi::jobsystem::IsBusy(character.pathfinding_thread->ctx)) { - PutWaterRipple(result.position); + character.process_goal = false; + character.pathfinding_thread->ctx.priority = wi::jobsystem::Priority::Low; + wi::jobsystem::Execute(character.pathfinding_thread->ctx, [&](wi::jobsystem::JobArgs args) { + character.pathfinding_thread->pathquery_work.process(character.position, character.goal, *character.voxelgrid); + AtomicOr(&character.pathfinding_thread->process_goal_completed, 1); + }); } } } - XMStoreFloat3(&character.position, position); - XMStoreFloat3(&character.velocity, velocity); - XMStoreFloat3(&character.inertia, inertia); - character.movement = XMFLOAT3(0, 0, 0); - // Apply character transformation on transform component: + // This gets applied even on inactive characters TransformComponent* transform = transforms.GetComponent(entity); if (transform != nullptr) { @@ -5685,28 +5725,6 @@ namespace wi::scene transform->SetDirty(); } - if (character.pathfinding_thread == nullptr && character.process_goal) - { - character.pathfinding_thread = std::make_shared(); - } - if (character.pathfinding_thread) - { - if (AtomicLoad(&character.pathfinding_thread->process_goal_completed) != 0) - { - AtomicAnd(&character.pathfinding_thread->process_goal_completed, 0); - std::swap(character.pathfinding_thread->pathquery_work, character.pathquery); - } - if (character.process_goal && character.voxelgrid != nullptr && !wi::jobsystem::IsBusy(character.pathfinding_thread->ctx)) - { - character.process_goal = false; - character.pathfinding_thread->ctx.priority = wi::jobsystem::Priority::Low; - wi::jobsystem::Execute(character.pathfinding_thread->ctx, [&](wi::jobsystem::JobArgs args) { - character.pathfinding_thread->pathquery_work.process(character.position, character.goal, *character.voxelgrid); - AtomicOr(&character.pathfinding_thread->process_goal_completed, 1); - }); - } - } - }); wi::jobsystem::Wait(ctx); } diff --git a/WickedEngine/wiScene_Components.cpp b/WickedEngine/wiScene_Components.cpp index 2722318174..30a09bbde7 100644 --- a/WickedEngine/wiScene_Components.cpp +++ b/WickedEngine/wiScene_Components.cpp @@ -1927,7 +1927,8 @@ namespace wi::scene void ObjectComponent::ClearLightmap() { - lightmap = Texture(); + lightmap_render = {}; + lightmap = {}; lightmapWidth = 0; lightmapHeight = 0; lightmapIterationCount = 0; @@ -1936,7 +1937,8 @@ namespace wi::scene } void ObjectComponent::SaveLightmap() { - if (lightmap.IsValid() && has_flag(lightmap.desc.bind_flags, BindFlag::RENDER_TARGET)) + lightmap_render = {}; + if (lightmap.IsValid() && has_flag(lightmap.desc.bind_flags, BindFlag::UNORDERED_ACCESS)) { SetLightmapRenderRequest(false); @@ -1969,8 +1971,8 @@ namespace wi::scene // Create a denoising filter oidn::FilterRef filter = device.newFilter("RTLightmap"); - filter.setImage("color", lightmapTextureData_buffer, oidn::Format::Float3, width, height, 0, sizeof(XMFLOAT4)); - filter.setImage("output", texturedata_dst_buffer, oidn::Format::Float3, width, height, 0, sizeof(XMFLOAT4)); + filter.setImage("color", lightmapTextureData_buffer, oidn::Format::Half3, width, height, 0, sizeof(XMHALF4)); + filter.setImage("output", texturedata_dst_buffer, oidn::Format::Half3, width, height, 0, sizeof(XMHALF4)); filter.commit(); // Filter the image @@ -2008,12 +2010,12 @@ namespace wi::scene wi::vector packed_data; packed_data.resize(sizeof(XMFLOAT3PK) * lightmapWidth * lightmapHeight); XMFLOAT3PK* packed_ptr = (XMFLOAT3PK*)packed_data.data(); - XMFLOAT4* raw_ptr = (XMFLOAT4*)lightmapTextureData.data(); + XMHALF4* raw_ptr = (XMHALF4*)lightmapTextureData.data(); uint32_t texelcount = lightmapWidth * lightmapHeight; for (uint32_t i = 0; i < texelcount; ++i) { - XMStoreFloat3PK(packed_ptr + i, XMLoadFloat4(raw_ptr + i)); + XMStoreFloat3PK(packed_ptr + i, XMLoadHalf4(raw_ptr + i)); } lightmapTextureData = std::move(packed_data); diff --git a/WickedEngine/wiScene_Components.h b/WickedEngine/wiScene_Components.h index 325b35b8eb..03f6dbe913 100644 --- a/WickedEngine/wiScene_Components.h +++ b/WickedEngine/wiScene_Components.h @@ -846,6 +846,7 @@ namespace wi::scene // Non-serialized attributes: uint32_t filterMaskDynamic = 0; + wi::graphics::Texture lightmap_render; wi::graphics::Texture lightmap; mutable uint32_t lightmapIterationCount = 0; wi::graphics::GPUBuffer vb_ao; diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index 0cad0e82df..b5bc998ce4 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 627; + const int revision = 628; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);