Skip to content

Commit c493712

Browse files
svc-reach-platform-supportEvergreen
authored andcommitted
[Port] [6000.3] [UUM-129871] Fix performance regression and insufficient light rendering in FPTL/Cluster shaders
1 parent 92ef7b3 commit c493712

2 files changed

Lines changed: 30 additions & 20 deletions

File tree

Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild-clustered.compute

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ RWStructuredBuffer<float> g_logBaseBuffer : register( u3 ); // don't
6161

6262

6363
#define MAX_NR_COARSE_ENTRIES LIGHT_CLUSTER_MAX_COARSE_ENTRIES
64-
#define MAX_NR_VISIBLE_LIGHTS 826 // correspond to m_MaxLightsOnScreen in LightLoop.cs
64+
#define MAX_NR_VISIBLE_LIGHTS 4096 // correspond to m_MaxLightsOnScreen in LightLoop.cs
6565

66-
groupshared unsigned int ldsTilePassList[MAX_NR_VISIBLE_LIGHTS];
66+
groupshared unsigned int ldsTilePassList[MAX_NR_VISIBLE_LIGHTS/32];
6767
groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
6868
groupshared unsigned int clusterIdxs[MAX_NR_COARSE_ENTRIES/2];
6969
groupshared float4 lightPlanes[4*6]; // Each plane is defined by a float4. 6 planes per light, 4 lights (24 planes)
@@ -248,10 +248,9 @@ void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
248248
const bool lightSortRequired = g_iNrVisibLights > MAX_NR_COARSE_ENTRIES; // Uniform runtime branch
249249
if (lightSortRequired)
250250
{
251-
UNITY_LOOP
252-
for(i=t; i<MAX_NR_VISIBLE_LIGHTS; i+=NR_THREADS)
253-
if(i<MAX_NR_VISIBLE_LIGHTS)
254-
ldsTilePassList[i]=0;
251+
UNITY_UNROLLX((MAX_NR_VISIBLE_LIGHTS/32) / NR_THREADS)
252+
for(i=t; i<MAX_NR_VISIBLE_LIGHTS/32; i+=NR_THREADS)
253+
ldsTilePassList[i]=0;
255254
}
256255

257256
const uint log2TileSize = firstbithigh(TILE_SIZE_CLUSTERED);
@@ -351,9 +350,10 @@ void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
351350
{
352351
unsigned int uInc = 1;
353352
unsigned int uIndex;
353+
unsigned int ul = l;
354354
InterlockedAdd(lightOffs, uInc, uIndex);
355355
if (lightSortRequired)
356-
ldsTilePassList[l] = 1;
356+
InterlockedOr(ldsTilePassList[ul >> 5], 1u << (ul & (32 - 1))); // ldsTilePassList[ul/32] |= 1u << (ul % 32);
357357
else
358358
if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l; // add to light list
359359
}
@@ -368,10 +368,15 @@ void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
368368
if(t==0)
369369
{
370370
int c=0;
371-
for(int ii=0; ii<g_iNrVisibLights && c < MAX_NR_COARSE_ENTRIES; ii++)
371+
for (int ii = 0; ii < MAX_NR_VISIBLE_LIGHTS/32 && c < MAX_NR_COARSE_ENTRIES; ii++)
372372
{
373-
if(ldsTilePassList[ii] == 1)
374-
coarseList[c++] = ii;
373+
unsigned int mask = ldsTilePassList[ii];
374+
while (mask != 0u && c < MAX_NR_COARSE_ENTRIES)
375+
{
376+
unsigned int l = firstbitlow(mask);
377+
mask &= ~(1u << l);
378+
coarseList[c++] = ii * 32 + l;
379+
}
375380
}
376381
}
377382
}

Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild.compute

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ StructuredBuffer<uint> g_vBigTileLightList : register( t4 ); // don't sup
4949
RWStructuredBuffer<uint> g_vLightList : register( u0 ); // don't support RWBuffer yet in unity
5050

5151
#define CATEGORY_LIST_SIZE LIGHTCATEGORY_COUNT
52-
#define MAX_NR_VISIBLE_LIGHTS 826 // correspond to m_MaxLightsOnScreen in LightLoop.cs
52+
#define MAX_NR_VISIBLE_LIGHTS 4096 // correspond to m_MaxLightsOnScreen in LightLoop.cs
5353

54-
groupshared unsigned int ldsTilePassList[MAX_NR_VISIBLE_LIGHTS];
54+
groupshared unsigned int ldsTilePassList[MAX_NR_VISIBLE_LIGHTS/32];
5555
groupshared unsigned int coarseList[LIGHT_LIST_MAX_COARSE_ENTRIES];
5656
groupshared unsigned int prunedList[LIGHT_LIST_MAX_COARSE_ENTRIES]; // temporarily support room for all 64 while in LDS
5757

@@ -160,10 +160,9 @@ void TileLightListGen(uint3 dispatchThreadId : SV_DispatchThreadID, uint threadI
160160
const bool lightSortRequired = g_iNrVisibLights > LIGHT_LIST_MAX_COARSE_ENTRIES; // Uniform runtime branch
161161
if (lightSortRequired)
162162
{
163-
UNITY_LOOP
164-
for(i=t; i<MAX_NR_VISIBLE_LIGHTS; i+=NR_THREADS)
165-
if(i<MAX_NR_VISIBLE_LIGHTS)
166-
ldsTilePassList[i]=0;
163+
UNITY_UNROLLX((MAX_NR_VISIBLE_LIGHTS/32) / NR_THREADS)
164+
for(i=t; i<MAX_NR_VISIBLE_LIGHTS/32; i+=NR_THREADS)
165+
ldsTilePassList[i]=0;
167166
}
168167

169168
uint iWidth = g_viDimensions.x;
@@ -254,9 +253,10 @@ void TileLightListGen(uint3 dispatchThreadId : SV_DispatchThreadID, uint threadI
254253
{
255254
unsigned int uInc = 1;
256255
unsigned int uIndex;
256+
unsigned int ul = l;
257257
InterlockedAdd(lightOffs, uInc, uIndex);
258258
if (lightSortRequired)
259-
ldsTilePassList[l] = 1;
259+
InterlockedOr(ldsTilePassList[ul >> 5], 1u << (ul & (32 - 1))); // ldsTilePassList[ul/32] |= 1u << (ul % 32);
260260
else
261261
if(uIndex<LIGHT_LIST_MAX_COARSE_ENTRIES) coarseList[uIndex] = l; // add to light list
262262
}
@@ -275,10 +275,15 @@ void TileLightListGen(uint3 dispatchThreadId : SV_DispatchThreadID, uint threadI
275275
if(t==0)
276276
{
277277
int c=0;
278-
for(int ii=0; ii<g_iNrVisibLights && c < LIGHT_LIST_MAX_COARSE_ENTRIES; ii++)
278+
for (int ii = 0; ii < MAX_NR_VISIBLE_LIGHTS/32 && c < LIGHT_LIST_MAX_COARSE_ENTRIES; ii++)
279279
{
280-
if(ldsTilePassList[ii] == 1)
281-
coarseList[c++] = ii;
280+
unsigned int mask = ldsTilePassList[ii];
281+
while (mask != 0u && c < LIGHT_LIST_MAX_COARSE_ENTRIES)
282+
{
283+
unsigned int l = firstbitlow(mask);
284+
mask &= ~(1u << l);
285+
coarseList[c++] = ii * 32 + l;
286+
}
282287
}
283288
}
284289
}

0 commit comments

Comments
 (0)