Problems Implementing Forward plus rendering in DX12

Started by
1 comment, last by gamehacker1999 3 years, 9 months ago

I am trying to implement forward plus rendering in my DX12 rendering engine. For some reason I am not correctly able to cull my point lights. I currently have a depth pre pass, a compute shader pass for light culling, and a final light calculation pass.

Here is my light culling compute shader

#include "Lighting.hlsli"

StructuredBuffer<Light> lights : register(t0);
Texture2D depthMap : register(t1);
RWStructuredBuffer<uint> LightIndices : register(u0);

//groupshared variables
groupshared uint minDepth;
groupshared uint maxDepth;
groupshared uint visibleLightCount;
groupshared uint visibleLightIndices[1024];
groupshared matrix viewProjection;
groupshared Frustum frustum;
groupshared float4 frustumPlanes[6];

#define TILE_SIZE 16

cbuffer externalData : register(b0)
{
    matrix view;
    matrix projection;
    matrix inverseProjection;
    int lightCount;
};

// Convert clip space coordinates to view space
float4 ClipToView(float4 clip)
{
    // View space position.
    float4 view = mul(clip, inverseProjection);
    // Perspective projection.
    view = view / view.w;
 
    return view;
}
 
// Convert screen space coordinates to view space.
float4 ScreenToView(float4 screen)
{
    // Convert to normalized texture coordinates
    float2 texCoord = screen.xy / float2(1280,720);
 
    // Convert to clip space
    float4 clip = float4(float2(texCoord.x,1.0f - texCoord.y) * 2.0f - 1.0f, screen.z, screen.w);
 
    return ClipToView(clip);
}


float3 CreatePlaneEquation(float3 b, float3 c)
{

    return normalize(cross(b, c));;
}


float GetSignedDistanceFromPlane(float3 p, float3 eqn)
{
    // dot(eqn.xyz,p) + eqn.w, , except we know eqn.w is zero 
    // (see CreatePlaneEquation above)
    return dot(eqn, p);
}

bool TestFrustumSides(float3 c, float r, float3 plane0, float3 plane1, float3 plane2, float3 plane3)
{
    bool intersectingOrInside0 = GetSignedDistanceFromPlane(c, plane0) < r;
    bool intersectingOrInside1 = GetSignedDistanceFromPlane(c, plane1) < r;
    bool intersectingOrInside2 = GetSignedDistanceFromPlane(c, plane2) < r;
    bool intersectingOrInside3 = GetSignedDistanceFromPlane(c, plane3) < r;

    return (intersectingOrInside0 &amp;amp;&amp;amp; intersectingOrInside1 &amp;amp;&amp;amp;
            intersectingOrInside2 &amp;amp;&amp;amp; intersectingOrInside3);
}

// calculate the number of tiles in the horizontal direction
uint GetNumTilesX()
{
    return (uint) ((1280 + 16 - 1) / (float) 16);
}

// calculate the number of tiles in the vertical direction
uint GetNumTilesY()
{
    return (uint) ((720 + 16 - 1) / (float) 16);
}

// convert a point from post-projection space into view space
float4 ConvertProjToView(float4 p)
{
    p = mul(p, inverseProjection);
    p /= p.w;
    return p;
}

// convert a depth value from post-projection space into view space
float ConvertProjDepthToView(float z)
{
    z = 1.f / (z * inverseProjection._34 + inverseProjection._44);
    return z;
}

[numthreads(TILE_SIZE, TILE_SIZE, 1)]
void main(int3 groupID : SV_GroupID, // 3D index of the thread group in the dispatch.
int3 groupThreadID : SV_GroupThreadID, // 3D index of local thread ID in a thread group.
int3 dispatchThreadID : SV_DispatchThreadID, // 3D index of global thread ID in the dispatch.
int groupIndex : SV_GroupIndex)
{
    int2 location = (dispatchThreadID.xy);
    int2 itemID = (groupThreadID.xy);
    int2 tileID = (groupID.xy);
    int2 tileNumber = (uint2(1280 / TILE_SIZE, 720 / TILE_SIZE));
    int index = tileID.y * tileNumber.x + tileID.x;
    
    float depth = depthMap.Load(int3(location, 0)).r;
    uint udepth = asuint(depth);
    
    if (groupIndex == 0)
    {
        minDepth = 0xffffffff;
        maxDepth = 0;
        visibleLightCount = 0;
        viewProjection = mul(view, projection);
    }
    
    GroupMemoryBarrierWithGroupSync();
    
        //creating the frustums
    if (groupIndex == 0)
    {
         // View space eye position is always at the origin.
        const float3 eyePos = float3(0, 0, 0);

       float4 screenSpace[4];
        screenSpace[0] = float4(dispatchThreadID.xy * TILE_SIZE, 1.0f, 1.0f);
        screenSpace[1] = float4(float2(dispatchThreadID.x + 1, dispatchThreadID.y) * TILE_SIZE, 1.0f, 1.0f);
        screenSpace[2] = float4(float2(dispatchThreadID.x, dispatchThreadID.y + 1) * TILE_SIZE, 1.0f, 1.0f);
        screenSpace[3] = float4(float2(dispatchThreadID.x + 1, dispatchThreadID.y + 1) * TILE_SIZE, 1.0f, 1.0f);
       
       float3 viewSpace[4];
       for (int i = 0; i < 4; i++)
       {
           viewSpace[i] = ScreenToView(screenSpace[i]).xyz;
       }
       
       
       frustum.frustumPlanes[0] = ComputePlane(eyePos, viewSpace[2], viewSpace[0]);
       frustum.frustumPlanes[1] = ComputePlane(eyePos, viewSpace[1], viewSpace[3]);
       frustum.frustumPlanes[2] = ComputePlane(eyePos, viewSpace[0], viewSpace[1]);
       frustum.frustumPlanes[3] = ComputePlane(eyePos, viewSpace[3], viewSpace[2]);
      

    }
    
    GroupMemoryBarrierWithGroupSync();
    
    //Calculating the min and max depth values for the tile
    InterlockedMin(minDepth, udepth);
    InterlockedMax(maxDepth, udepth);
    
    GroupMemoryBarrierWithGroupSync();
   
    float minGroupDepth = asfloat(minDepth);
    float maxGroupDepth = asfloat(maxDepth);
   
    
    // Convert depth values to view space.
    float minDepthVS = ScreenToView(float4(0, 0, minGroupDepth, 1)).z;
    float maxDepthVS = ScreenToView(float4(0, 0, maxGroupDepth, 1)).z;
    float nearClipVS = ScreenToView(float4(0, 0, 0, 1)).z;
 
    // Clipping plane for minimum depth value 
    Plane minPlane = { float3(0, 0, 1), minDepthVS };
    
    
    uint threadCount = TILE_SIZE * TILE_SIZE;
    uint passCount = (lightCount + threadCount - 1) / threadCount;
    for (uint i = 0; i < passCount; i ++)
    {
        uint lightIndex = i * threadCount + groupIndex;
        if (lightIndex >= lightCount)
        {
            break;
        }
        
        float4 vsPos = mul(float4(lights[lightIndex].position, 1.0), view);
        
        bool interSects = false;
        
        switch (lights[lightIndex].type)
        {
            case LIGHT_TYPE_DIR:
            {
               uint offset;
               InterlockedAdd(visibleLightCount, 1, offset);
               visibleLightIndices[offset] = lightIndex;
            }
            break;
   
            case LIGHT_TYPE_SPOT:
            {
                     
               interSects = false;
            }
            break;
            
            case LIGHT_TYPE_POINT:
            {      
               Sphere sphere;
               sphere.c = vsPos.xyz;
               sphere.r = lights[lightIndex].range;
               if (SphereInsideFrustum(sphere, frustum, nearClipVS, maxDepthVS) &amp;amp;&amp;amp; !SphereInsidePlane(sphere, minPlane))
               {
                  uint offset;
                  InterlockedAdd(visibleLightCount, 1, offset);
                  visibleLightIndices[offset] = lightIndex;
               }
             }
             break;
            
            case LIGHT_TYPE_AREA_RECT:    
            {
                 // Add all rect lights for now    
                uint offset;
                InterlockedAdd(visibleLightCount, 1, offset);
                visibleLightIndices[offset] = lightIndex;
             }
            break;
                     
            case LIGHT_TYPE_AREA_DISK:
            {
               interSects = false;
            }
            break;
           
        }
        
        if(interSects)
        {
        }
    }
    
    GroupMemoryBarrierWithGroupSync();
    
    if(groupIndex == 0)
    {
        uint offset = 1024 * index;
        for (uint i = 0; i < visibleLightCount;i++)
        {
            LightIndices[offset + i] = visibleLightIndices[i];
        }
        
        if (visibleLightCount != 1024)
        {
            LightIndices[offset + visibleLightCount] = -1;
        }

    }

}
Advertisement

For reference here is my sphere to frustum test

bool SphereInsidePlane(Sphere sphere, Plane plane)
{
    return dot(plane.normal, sphere.c) - plane.distance < -sphere.r;
}

// Check to see of a light is partially contained within the frustum.
bool SphereInsideFrustum(Sphere sphere, Frustum frustum, float zNear, float zFar)
{
    bool result = true;
 
    if (sphere.c.z - sphere.r > zFar || sphere.c.z + sphere.r < zNear)
    {
        result = false;
    }
    
    if (!result)
        return result;
 
    // Then check frustum planes
    for (int i = 0; i < 4 && result; i++)
    {
        if (SphereInsidePlane(sphere, frustum.frustumPlanes[i]))
        {
            result = false;
            return result;
        }
    }
 
    return result;
}

And my pixel shader that does the final lighting

    uint2 location = uint2(input.position.xy);
    uint2 tileID = location / uint2(TILE_SIZE, TILE_SIZE);
    uint numberOfTilesX = 1280 / TILE_SIZE;
    uint tileIndex = tileID.y * numberOfTilesX + tileID.x;

	uint index = entityIndex.index;

	float4 surfaceColor = material[index+0].Sample(basicSampler,input.uv);

	surfaceColor = pow(abs(surfaceColor), 2.2);

	//getting the normal texture
	float3 normalColor = material[index + 1].Sample(basicSampler, input.uv).xyz;
	float3 unpackedNormal = normalColor * 2.0f - 1.0f;

	//orthonormalizing T, B and N using the gram-schimdt process
	float3 N = normalize(input.normal);
	float3 T = input.tangent - dot(input.tangent, N) * N;
	T = normalize(T);
	float3 B = normalize(cross(T,N));

	float3x3 TBN = float3x3(T, B, N); //getting the tbn matrix

	//transforming normal from map to world space
	float3 finalNormal = mul(unpackedNormal, TBN);

	//getting the metalness of the pixel
	float3 metalColor = material[index + 3].Sample(basicSampler, input.uv).xyz;

	float3 f0 = float3(0.04f, 0.04f, 0.04f);
	f0 = lerp(f0, surfaceColor.xyz, metalColor);

	//getting the roughness of pixel
	float roughness = material[index + 2].Sample(basicSampler, input.uv).x;
    float3 diffuseColor = surfaceColor.rgb * (1 - metalColor);


	N = finalNormal;
	N = normalize(N); //normalizing the normal
	float3 V = normalize(cameraPosition - input.worldPosition); //view vector
	float3 R = reflect(-V, N); //reflect R over N
	
	float3 Lo = float3(0.0f, 0.0f, 0.0f);
	
	
    float ndotv = saturate(dot(N, V));
	
    float2 ltcUV = float2(roughness, sqrt(1-ndotv));
    ltcUV = ltcUV * LUT_SCALE + LUT_BIAS;

    float4 t1 = LtcLUT.Sample(basicSampler, ltcUV);
    float4 t2 = LtcLUT2.Sample(basicSampler, ltcUV);
    float4 envBRDF = brdfLUT.Sample(basicSampler, float2(ndotv, roughness));
	
    uint offset = tileIndex * 1024;
	
    bool enableSSS = subsurfaceScattering.enableSSS;
    
        for (uint i = 0; i < 1024 && LightIndices[offset + i] != -1; i++)
        {
            uint lightIndex = LightIndices[offset + i];
		
            switch (lights[lightIndex].type)
            {
                case LIGHT_TYPE_DIR:
                    Lo += DirectLightPBR(lights[lightIndex], N, input.worldPosition, cameraPosition,
			roughness, metalColor.r, surfaceColor.xyz, f0);
                    break;
                case LIGHT_TYPE_SPOT:
                    Lo += SpotLightPBR(lights[lightIndex], N, input.worldPosition, cameraPosition,
			roughness, metalColor.r, surfaceColor.xyz, f0);
                    break;
                case LIGHT_TYPE_POINT:
                    Lo += PointLightPBR(lights[lightIndex], N, input.worldPosition, cameraPosition,
			roughness, metalColor.r, surfaceColor.xyz, f0);
                    break;
                case LIGHT_TYPE_AREA_RECT:
                    Lo += RectAreaLightPBR(lights[lightIndex], N, V, input.worldPosition, cameraPosition, roughness, metalColor.x, surfaceColor.rgb, f0, t1, envBRDF, brdfSampler);
                    break;
                case LIGHT_TYPE_AREA_DISK:
                    Lo += DiskAreaLightPBR(lights[lightIndex], N, V, input.worldPosition, cameraPosition, roughness, metalColor.x, surfaceColor.rgb, f0, t1, envBRDF, brdfSampler);
                    break;
            }
        }
 

This topic is closed to new replies.

Advertisement