Incorrect Frustum Culling behavior

26 Views Asked by At

I've written a C# script and a Compute Shader that should cull the instances that are not visible by the camera. The instances are getting culled but with some weird behavior - sometimes not getting culled, flickering etc.

Video of the issue

C# script:

private Camera camera;
 
struct DrawData
{
    public Vector3 position;
    public Quaternion rotation;
    public Vector3 scale;
}
 
[HideInInspector]
public Mesh mesh;
[HideInInspector]
public List<Matrix4x4> matrices;
[HideInInspector]
public Material material;
 
[Range(0, 1000f)]
public float distanceCutoff;
 
private List<DrawData> instances;
 
private ComputeShader cullShader;
private ComputeBuffer drawDataBuffer, argsBuffer, voteBuffer, scanBuffer, groupSumArrayBuffer, scannedGroupSumBuffer, resultBuffer;
 
private int numThreadGroups, numGroupScanThreadGroups;
 
private uint[] args = new uint[5];
private MaterialPropertyBlock mpb;
 
private void Awake()
{
    camera = Camera.main;
    mpb = new MaterialPropertyBlock();
   
    instances = new List<DrawData>();
    LoadInstances();
 
    drawDataBuffer = new ComputeBuffer(instances.Count, Marshal.SizeOf<DrawData>());
    drawDataBuffer.SetData(instances);
   
    numThreadGroups = Mathf.CeilToInt(instances.Count / 128.0f);
    numGroupScanThreadGroups = Mathf.CeilToInt(instances.Count / 1024.0f);
 
    cullShader = Resources.Load<ComputeShader>("ComputeShaders/Cull");
 
    voteBuffer = new ComputeBuffer(instances.Count, 4);
    scanBuffer = new ComputeBuffer(instances.Count, 4);
    groupSumArrayBuffer = new ComputeBuffer(instances.Count, 4);
    scannedGroupSumBuffer = new ComputeBuffer(instances.Count, 4);
    resultBuffer = new ComputeBuffer(instances.Count, Marshal.SizeOf<DrawData>());
   
    argsBuffer = new ComputeBuffer(5, sizeof(uint), ComputeBufferType.IndirectArguments);
    args[0] = mesh.GetIndexCount(0);
    args[1] = (uint)instances.Count;
    args[2] = (uint)mesh.GetIndexStart(0);
    args[3] = (uint)mesh.GetBaseVertex(0);
 
    mpb.SetBuffer("_DrawData", resultBuffer);
}
 
private void CullGrass(Matrix4x4 VP)
{
 
    argsBuffer.SetData(args);      
 
    // Vote
    cullShader.SetMatrix("MATRIX_VP", VP);
    cullShader.SetBuffer(0, "drawDataBuffer", drawDataBuffer);
    cullShader.SetBuffer(0, "voteBuffer", voteBuffer);
    cullShader.SetVector("cameraPosition", camera.transform.position);
    cullShader.SetFloat("_distance", distanceCutoff);
    cullShader.Dispatch(0, numThreadGroups, 1, 1);
   
    // Scan Instances
    cullShader.SetBuffer(1, "voteBuffer", voteBuffer);
    cullShader.SetBuffer(1, "scanBuffer", scanBuffer);
    cullShader.SetBuffer(1, "groupSumArray", groupSumArrayBuffer);
    cullShader.Dispatch(1, numThreadGroups, 1, 1);
   
    // Scan Groups
    cullShader.SetInt("numOfGroups", numThreadGroups);
    cullShader.SetBuffer(2, "groupSumArrayIn", groupSumArrayBuffer);
    cullShader.SetBuffer(2, "groupSumArrayOut", scannedGroupSumBuffer);
    cullShader.Dispatch(2, numGroupScanThreadGroups, 1, 1);
   
    // Compact
    cullShader.SetBuffer(3, "drawDataBuffer", drawDataBuffer);
    cullShader.SetBuffer(3, "voteBuffer", voteBuffer);
    cullShader.SetBuffer(3, "scanBuffer", scanBuffer);
    cullShader.SetBuffer(3, "argsBuffer", argsBuffer);
    cullShader.SetBuffer(3, "resultBuffer", resultBuffer);
    cullShader.SetBuffer(3, "groupSumArray", scannedGroupSumBuffer);
    cullShader.Dispatch(3, numThreadGroups, 1, 1);
}
 
private void Update()
{
    Matrix4x4 P = camera.projectionMatrix;
    Matrix4x4 V = camera.transform.worldToLocalMatrix;
    Matrix4x4 VP = P * V;
   
    CullGrass(VP);
   
    Graphics.DrawMeshInstancedIndirect(mesh, 0, material, new Bounds(Vector3.zero, Vector3.one * 100.0f), argsBuffer, 0, mpb);
}
 
private void LoadInstances()
{
    instances.Clear();
    foreach (var matrix in matrices)
    {
        instances.Add(new DrawData()
        {
            position = GetPositionFromMatrix(matrix),
            rotation = GetRotationFromMatrix(matrix),
            scale = GetScaleFromMatrix(matrix)
        });
    }
 
    Debug.Log($"Initialized {instances.Count} instances of grass.");
}
 
private Vector3 GetPositionFromMatrix(Matrix4x4 matrix)
{
    return matrix.GetColumn(3);
}
 
private Vector3 GetScaleFromMatrix(Matrix4x4 matrix)
{
    return new Vector3(matrix.GetColumn(0).magnitude, matrix.GetColumn(1).magnitude, matrix.GetColumn(2).magnitude);
}
 
private Quaternion GetRotationFromMatrix(Matrix4x4 matrix)
{
    float w = Mathf.Sqrt(1 + matrix.m00 + matrix.m11 + matrix.m22) / 2f;
    float x = (matrix.m21 - matrix.m12) / (w * 4);
    float y = (matrix.m02 - matrix.m20) / (w * 4);
    float z = (matrix.m10 - matrix.m01) / (w * 4);
 
    return new Quaternion(x, y, z, w);
}
 
private void OnDisable()
{
    argsBuffer?.Release();
    drawDataBuffer?.Release();
    voteBuffer?.Release();
    scanBuffer?.Release();
    groupSumArrayBuffer?.Release();
    scannedGroupSumBuffer?.Release();
    resultBuffer?.Release();
}

Compute Shader:

#pragma kernel Vote

#pragma kernel Scan

#pragma kernel ScanGroupSums

#pragma kernel Compact

#pragma kernel ResetArgs



#define NUM_THREAD_GROUPS_X 64



struct DrawData

{

   float3 position;

   float4 rotation;

   float3 scale;

};



RWStructuredBuffer<uint> argsBuffer;

RWStructuredBuffer<DrawData> drawDataBuffer;

RWStructuredBuffer<uint> voteBuffer;

RWStructuredBuffer<uint> scanBuffer;

RWStructuredBuffer<uint> groupSumArray;

RWStructuredBuffer<uint> groupSumArrayIn;

RWStructuredBuffer<uint> groupSumArrayOut;

RWStructuredBuffer<DrawData> resultBuffer;



float4x4 MATRIX_VP;

int numOfGroups;

groupshared uint temp[2 * NUM_THREAD_GROUPS_X];

groupshared uint grouptemp[2 * 1024];

float _distance;

float3 cameraPosition;



[numthreads(128, 1, 1)]

void Vote(uint3 id : SV_DispatchThreadID)

{

   float4 position = float4(drawDataBuffer[id.x].position, 1.0f);



   float4 viewspace = mul(MATRIX_VP, position);



   float3 clipspace = viewspace.xyz;



   clipspace /= -viewspace.w;



   clipspace.x = clipspace.x / 2.0f + 0.5f;

   clipspace.y = clipspace.y / 2.0f + 0.5f;

   clipspace.z = -viewspace.w;



   bool inView = clipspace.x < -0.2f || clipspace.x > 1.2f || clipspace.z <= -0.1f ? 0 : 1;

   bool withinDistance = distance(cameraPosition, position.xyz) < _distance;



   voteBuffer[id.x] = inView * withinDistance;

}



[numthreads(NUM_THREAD_GROUPS_X, 1, 1)]

void Scan(uint3 id : SV_DISPATCHTHREADID, uint groupIndex : SV_GROUPINDEX, uint3 _groupID : SV_GROUPID,

         uint3 groupThreadID : SV_GROUPTHREADID)

{

   int tid = (int)id.x;

   int groupTID = (int)groupThreadID.x;

   int groupID = (int)_groupID.x;



   int offset = 1;

   temp[2 * groupTID] = voteBuffer[2 * tid];

   temp[2 * groupTID + 1] = voteBuffer[2 * tid + 1];

   int d;

   int numElements = 2 * NUM_THREAD_GROUPS_X;



   for (d = numElements >> 1; d > 0; d >>= 1)

   {

       GroupMemoryBarrierWithGroupSync();



       if (groupTID < d)

       {

           int ai = offset * (2 * groupTID + 1) - 1;

           int bi = offset * (2 * groupTID + 2) - 1;

           temp[bi] += temp[ai];

       }



       offset *= 2;

   }



   if (groupTID == 0)

   {

       groupSumArray[_groupID.x] = temp[numElements - 1];

       temp[numElements - 1] = 0;

   }



   for (d = 1; d < numElements; d *= 2)

   {

       offset >>= 1;



       GroupMemoryBarrierWithGroupSync();

       if (groupTID < d)

       {

           int ai = offset * (2 * groupTID + 1) - 1;

           int bi = offset * (2 * groupTID + 2) - 1;

           int t = temp[ai];

           temp[ai] = temp[bi];

           temp[bi] += t;

       }

   }



   GroupMemoryBarrierWithGroupSync();



   scanBuffer[2 * tid] = temp[2 * groupTID];

   scanBuffer[2 * tid + 1] = temp[2 * groupTID + 1];

}



[numthreads(1024, 1, 1)]

void ScanGroupSums(uint3 id : SV_DISPATCHTHREADID, uint groupIndex : SV_GROUPINDEX, uint3 _groupID : SV_GROUPID,

                  uint3 groupThreadID : SV_GROUPTHREADID)

{

   int tid = (int)id.x;

   int groupTID = (int)groupThreadID.x;

   int groupID = (int)_groupID.x;



   int offset = 1;

   grouptemp[2 * groupTID] = groupSumArrayIn[2 * tid];

   grouptemp[2 * groupTID + 1] = groupSumArrayIn[2 * tid + 1];

   int d;



   for (d = numOfGroups >> 1; d > 0; d >>= 1)

   {

       GroupMemoryBarrierWithGroupSync();



       if (groupTID < d)

       {

           int ai = offset * (2 * groupTID + 1) - 1;

           int bi = offset * (2 * groupTID + 2) - 1;

           grouptemp[bi] += grouptemp[ai];

       }



       offset *= 2;

   }



   if (tid == 0)

       grouptemp[numOfGroups - 1] = 0;



   for (d = 1; d < numOfGroups; d *= 2)

   {

       offset >>= 1;



       GroupMemoryBarrierWithGroupSync();

       if (tid < d)

       {

           int ai = offset * (2 * groupTID + 1) - 1;

           int bi = offset * (2 * groupTID + 2) - 1;

           int t = grouptemp[ai];

           grouptemp[ai] = grouptemp[bi];

           grouptemp[bi] += t;

       }

   }



   GroupMemoryBarrierWithGroupSync();



   groupSumArrayOut[2 * tid] = grouptemp[2 * tid];

   groupSumArrayOut[2 * tid + 1] = grouptemp[2 * tid + 1];

}



[numthreads(128, 1, 1)]

void Compact(uint3 id : SV_DISPATCHTHREADID, uint groupIndex : SV_GROUPINDEX, uint3 _groupID : SV_GROUPID,

            uint3 groupThreadID : SV_GROUPTHREADID)

{

   uint tid = id.x;

   uint groupID = _groupID.x;

   uint groupSum = groupID.x > 0 ? groupSumArray[groupID.x] : 0;

   bool inCamera = voteBuffer[id.x];



   if (inCamera == 1)

   {

       InterlockedAdd(argsBuffer[1], 1);

       resultBuffer[scanBuffer[tid] + groupSum] = drawDataBuffer[tid];

   }

}



[numthreads(1, 1, 1)]

void ResetArgs(uint3 id : SV_DISPATCHTHREADID)

{

   argsBuffer[1] = (uint)0;

}

Does anybody know why is it not working?

I tried replacing some lines, moved them somewhere else but it resulted in the same issue - maybe worse.

0

There are 0 best solutions below