Hi Jp,
Bruzer and I have spoken a few times since this topic started originally and he has been great in helping me almost figure this thing out. For his sanity and for the good of the larger audience, it's probably best for us to bring the conversation back to this thread, though, so I'll post below what I've worked out so far with his help. Also, the chapter in GPU Pro 1 by Michal Drobot on Quadtree Displacement Mapping is a big help in understanding this, and is what the author of this article based his ray-tracing steps on. I still have some very major issues in my implementation (screenshots below), so I'm hoping that anyone reading over this may be able to help out and call me out on things I've done in a bone-headed way.
You'll notice in my implementation that some of the method arguments are a little different from what's in the book. For example, I pass the full float3 vectors to intersectDepthPlane and some other methods.
Also, I've done some preliminary testing on doing a small (8 or so iterations) linear ray march before doing the hi-z traversal in order to reduce artifacts of immediate intersections and found that it did help, but due to the current state of my shader, I pulled those back out until the basic stuff was working.
I hope this helps, and again, please call out any blatant errors you see in my current implementation attempt, as they clearly exist.
This is the pixel shader in its current state. Notice that currently I'm still trying to get the ray-tracing through the hi-z buffer part working, so I'm overwriting the cone-tracing output to be the equivalent to a cone angle of 0 (i.e., a perfectly smooth/mirror surface).
#include "HiZSSRConstantBuffer.hlsli"
#include "../../LightingModel/PBL/LightUtils.hlsli"
#include "../../ConstantBuffers/PerFrame.hlsli"
#include "../../ShaderConstants.hlsli"
struct VertexOut
{
float4 posH : SV_POSITION;
float3 viewRay : VIEWRAY;
float2 tex : TEXCOORD;
};
SamplerState sampPointClamp : register(s0); // point sampling, clamped borders
SamplerState sampTrilinearClamp : register(s1); // trilinear sampling, clamped borders
Texture2D hiZBuffer : register(t0); // hi-z buffer - all mip levels
Texture2D visibilityBuffer : register(t1); // visibility buffer - all mip levels
Texture2D colorBuffer : register(t2); // convolved color buffer - all mip levels
Texture2D normalBuffer : register(t3); // normal buffer - from g-buffer
Texture2D specularBuffer : register(t4); // specular buffer - from g-buffer (rgb = ior, a = roughness)
static const float HIZ_START_LEVEL = 2.0f;
static const float HIZ_STOP_LEVEL = 2.0f;
static const float HIZ_MAX_LEVEL = float(cb_mipCount);
static const float2 HIZ_CROSS_EPSILON = float2(texelWidth, texelHeight); // maybe need to be smaller or larger? this is mip level 0 texel size
static const uint MAX_ITERATIONS = 64u;
float linearizeDepth(float depth)
{
return projectionB / (depth - projectionA);
}
///////////////////////////////////////////////////////////////////////////////////////
// Hi-Z ray tracing methods
///////////////////////////////////////////////////////////////////////////////////////
static const float2 hiZSize = cb_screenSize; // not sure if correct - this is mip level 0 size
float3 intersectDepthPlane(float3 o, float3 d, float t)
{
return o + d * t;
}
float2 getCell(float2 ray, float2 cellCount)
{
// does this need to be floor, or does it need fractional part - i think cells are meant to be whole pixel values (integer values) but not sure
return floor(ray * cellCount);
}
float3 intersectCellBoundary(float3 o, float3 d, float2 cellIndex, float2 cellCount, float2 crossStep, float2 crossOffset)
{
float2 index = cellIndex + crossStep;
index /= cellCount;
index += crossOffset;
float2 delta = index - o.xy;
delta /= d.xy;
float t = min(delta.x, delta.y);
return intersectDepthPlane(o, d, t);
}
float getMinimumDepthPlane(float2 ray, float level, float rootLevel)
{
// not sure why we need rootLevel for this
return hiZBuffer.SampleLevel(sampPointClamp, ray.xy, level).r;
}
float2 getCellCount(float level, float rootLevel)
{
// not sure why we need rootLevel for this
float2 div = level == 0.0f ? 1.0f : exp2(level);
return cb_screenSize / div;
}
bool crossedCellBoundary(float2 cellIdxOne, float2 cellIdxTwo)
{
return cellIdxOne.x != cellIdxTwo.x || cellIdxOne.y != cellIdxTwo.y;
}
float3 hiZTrace(float3 p, float3 v)
{
const float rootLevel = float(cb_mipCount) - 1.0f; // convert to 0-based indexing
float level = HIZ_START_LEVEL;
uint iterations = 0u;
// get the cell cross direction and a small offset to enter the next cell when doing cell crossing
float2 crossStep = float2(v.x >= 0.0f ? 1.0f : -1.0f, v.y >= 0.0f ? 1.0f : -1.0f);
float2 crossOffset = float2(crossStep.xy * HIZ_CROSS_EPSILON.xy);
crossStep.xy = saturate(crossStep.xy);
// set current ray to original screen coordinate and depth
float3 ray = p.xyz;
// scale vector such that z is 1.0f (maximum depth)
float3 d = v.xyz / v.z;
// set starting point to the point where z equals 0.0f (minimum depth)
float3 o = intersectDepthPlane(p, d, -p.z);
// cross to next cell to avoid immediate self-intersection
float2 rayCell = getCell(ray.xy, hiZSize.xy);
ray = intersectCellBoundary(o, d, rayCell.xy, hiZSize.xy, crossStep.xy, crossOffset.xy);
while(level >= HIZ_STOP_LEVEL && iterations < MAX_ITERATIONS)
{
// get the minimum depth plane in which the current ray resides
float minZ = getMinimumDepthPlane(ray.xy, level, rootLevel);
// get the cell number of the current ray
const float2 cellCount = getCellCount(level, rootLevel);
const float2 oldCellIdx = getCell(ray.xy, cellCount);
// intersect only if ray depth is below the minimum depth plane
float3 tmpRay = intersectDepthPlane(o, d, max(ray.z, minZ));
// get the new cell number as well
const float2 newCellIdx = getCell(tmpRay.xy, cellCount);
// if the new cell number is different from the old cell number, a cell was crossed
if(crossedCellBoundary(oldCellIdx, newCellIdx))
{
// intersect the boundary of that cell instead, and go up a level for taking a larger step next iteration
tmpRay = intersectCellBoundary(o, d, oldCellIdx, cellCount.xy, crossStep.xy, crossOffset.xy); //// NOTE added .xy to o and d arguments
level = min(HIZ_MAX_LEVEL, level + 2.0f);
}
ray.xyz = tmpRay.xyz;
// go down a level in the hi-z buffer
--level;
++iterations;
}
return ray;
}
///////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////
// Hi-Z cone tracing methods
///////////////////////////////////////////////////////////////////////////////////////
float specularPowerToConeAngle(float specularPower)
{
// based on phong reflection model
const float xi = 0.244f;
float exponent = 1.0f / (specularPower + 1.0f);
/*
* may need to try clamping very high exponents to 0.0f, test out on mirror surfaces first to gauge
* return specularPower >= 8192 ? 0.0f : cos(pow(xi, exponent));
*/
return cos(pow(xi, exponent));
}
float isoscelesTriangleOpposite(float adjacentLength, float coneTheta)
{
// simple trig and algebra - soh, cah, toa - tan(theta) = opp/adj, opp = tan(theta) * adj, then multiply * 2.0f for isosceles triangle base
return 2.0f * tan(coneTheta) * adjacentLength;
}
float isoscelesTriangleInRadius(float a, float h)
{
float a2 = a * a;
float fh2 = 4.0f * h * h;
return (a * (sqrt(a2 + fh2) - a)) / (4.0f * max(h, 0.00001f));
}
float4 coneSampleWeightedColor(float2 samplePos, float mipChannel)
{
// placeholder - this is just to get something on screen
float3 sampleColor = colorBuffer.SampleLevel(sampTrilinearClamp, samplePos, mipChannel).rgb;
float visibility = visibilityBuffer.SampleLevel(sampTrilinearClamp, samplePos, mipChannel).r;
return float4(sampleColor * visibility, visibility);
}
float isoscelesTriangleNextAdjacent(float adjacentLength, float incircleRadius)
{
// subtract the diameter of the incircle to get the adjacent side of the next level on the cone
return adjacentLength - (incircleRadius * 2.0f);
}
///////////////////////////////////////////////////////////////////////////////////////
float4 main(VertexOut pIn) : SV_TARGET
{
/*
* Ray(t) = O + D> * t
* D> = V>SS / V>SSz
* O = PSS + D> * -PSSz
* V>SS = P'SS - PSS
* PSS = {texcoord.x, texcoord.y, depth} // screen/texture coordinate and depth
* PCS = (PVS + reflect(V>VS, N>VS)) * MPROJ
* P'SS = (PCS / PCSw) * [0.5f, -0.5f] + [0.5f, 0.5f]
*/
int3 loadIndices = int3(pIn.posH.xy, 0);
float depth = hiZBuffer.Load(loadIndices).r;
// PSS
float3 positionSS = float3(pIn.tex, depth);
float linearDepth = linearizeDepth(depth);
// PVS
float3 positionVS = pIn.viewRay * linearDepth;
// V>VS - since calculations are in view-space, we can just normalize the position to point at it
float3 toPositionVS = normalize(positionVS);
// N>VS
float3 normalVS = normalBuffer.Load(loadIndices).rgb;
if(dot(normalVS, float3(1.0f, 1.0f, 1.0f)) == 0.0f)
{
return float4(0.0f, 0.0f, 0.0f, 0.0f);
}
float3 reflectVS = reflect(toPositionVS, normalVS);
float4 positionPrimeSS4 = mul(float4(positionVS + reflectVS, 1.0f), projectionMatrix);
float3 positionPrimeSS = (positionPrimeSS4.xyz / positionPrimeSS4.w);
positionPrimeSS.x = positionPrimeSS.x * 0.5f + 0.5f;
positionPrimeSS.y = positionPrimeSS.y * -0.5f + 0.5f;
// V>SS - screen space reflection vector
float3 reflectSS = positionPrimeSS - positionSS;
// calculate the ray
float3 raySS = hiZTrace(positionSS, reflectSS);
// perform cone-tracing steps
// get specular power from roughness
float4 specularAll = specularBuffer.Load(loadIndices);
float specularPower = roughnessToSpecularPower(specularAll.a);
// convert to cone angle (maximum extent of the specular lobe aperture
float coneTheta = specularPowerToConeAngle(specularPower);
// P1 = positionSS, P2 = raySS, adjacent length = ||P2 - P1||
// need to check if this is correct calculation or not
float2 deltaP = raySS.xy - positionSS.xy;
float adjacentLength = length(deltaP);
// need to check if this is correct calculation or not
float2 adjacentUnit = normalize(deltaP);
float4 totalColor = float4(0.0f, 0.0f, 0.0f, 0.0f);
// cone-tracing using an isosceles triangle to approximate a cone in screen space
for(int i = 0; i < 7; ++i)
{
// intersection length is the adjacent side, get the opposite side using trig
float oppositeLength = isoscelesTriangleOpposite(adjacentLength, coneTheta);
// calculate in-radius of the isosceles triangle
float incircleSize = isoscelesTriangleInRadius(adjacentLength, oppositeLength);
// get the sample position in screen space
float2 samplePos = pIn.tex.xy + adjacentUnit * (adjacentLength - incircleSize);
// convert the in-radius into screen size then check what power N to raise 2 to reach it - that power N becomes mip level to sample from
float mipChannel = log2(incircleSize * max(cb_screenSize.x, cb_screenSize.y)); // try this with min intead of max
/*
* Read color and accumulate it using trilinear filtering and weight it.
* Uses pre-convolved image (color buffer), pre-integrated transparency (visibility buffer),
* and hi-z buffer (hiZBuffer).
* Checks if cone sphere is below, between, or above the hi-z minimum and maximum and weights
* it together with transparency (visibility).
* Visibility is accumulated in the alpha channel. Break if visibility is 100% or greater (>= 1.0f).
*/
totalColor += coneSampleWeightedColor(samplePos, mipChannel);
if(totalColor.a >= 1.0f)
{
break;
}
adjacentLength = isoscelesTriangleNextAdjacent(adjacentLength, incircleSize);
}
////////////
// fake implementation while testing - overwrites entire cone tracing loop - equivalent of cone angle being 0.0f
totalColor.rgb = colorBuffer.SampleLevel(sampPointClamp, raySS.xy, 0.0f).rgb;
// end fake
////////////
float3 toEye = -toPositionVS;
// test this with saturate instead of abs, too - see which gives best result
float3 specular = calculateFresnelTerm(specularAll.rgb, abs(dot(normalVS, toEye))) * RB_1DIVPI;
return float4(totalColor.rgb * specular, 1.0f);
}
Screenshots:
(EDIT: screenshots didn't show up so linking to Dropbox images instead)
https://www.dropbox.com/s/1852z89kuj7hnn4/screenshot_0.png
https://www.dropbox.com/s/rx8w8da2qazg112/screenshot_1.png
https://www.dropbox.com/s/f3z4sxf0cjfz29r/screenshot_2.png
https://www.dropbox.com/s/i8k4nuw25byx4jv/screenshot_3.png