3Dmigoto now open-source...
  84 / 143    
Working screen space reflection shader attached - rename it to .txt. Like many screen space reflections, these do not work correctly in stereo - they are rendered a little above the surface. It would be interesting to study these further, but this may be a little more complicated in this case because the engine is applying a stereo correction and we'd need to check where (could be in the projection matrix. Edit: Confirmed - there is a stereo correction in the projection, view-projection and inverse-projection matrices). I reverted a chunk of code back to the paper's implementation - I wasn't sure if you were experimenting or what there, but I figured it was better to stick to something known to work. diff: [code] --- orig/ShaderFixes/3ce369598f34925e-ps_replace.txt 2017-01-16 02:50:04.000000000 +1100 +++ 3ce369598f34925e-ps_replace.txt 2017-01-17 02:33:30.412258200 +1100 @@ -110,13 +110,13 @@ 0.0f, 0.0f, 0.0f, 1.0f}; - static const float2 cb_depthBufferSize = float2(1920,1080); // dimensions of the z-buffer + static float2 cb_depthBufferSize; // dimensions of the z-buffer static const float cb_zThickness = 1; // thickness to ascribe to each pixel in the depth buffer static const float cb_nearPlaneZ = 0.1; // the camera's near z plane static const float cb_stride = 1; // Step in horizontal or vertical pixels between samples. This is a float // because integer math is slow on GPUs, but should be set to an integer >= 1. - static const float cb_maxSteps = 5; // Maximum number of iterations. Higher gives better images but may be slow. + static float cb_maxSteps; // Maximum number of iterations. Higher gives better images but may be slow. static const float cb_maxDistance = 1000; // Maximum camera-space distance to trace before returning a miss. static const float cb_strideZCutoff = 1; // More distant pixels are smaller in screen space. This value tells at what point to // start relaxing the stride to give higher quality reflections for objects far from @@ -134,20 +134,11 @@ -float depthLinear(float depth) -{ - float f=cb_maxDistance; - float n = cb_nearPlaneZ; - float z = (2 * n) / (f + n - depth * (f - n)); - return z ; -} float linearizeDepth(float depth) { - float f=cb_maxDistance; - float n = cb_nearPlaneZ; - float z = (f * n) / (f - depth * (f - n)); - return z ; + float4 tmp = mul(inverseProj, float4(0, 0, depth, 1)); + return mul(projection, tmp / tmp.w).w; } @@ -206,18 +197,16 @@ // Camera space location of the ray hit out float3 hitPoint) { - //csDir.z = csDir.z * 0.5 +0.5; // Clip to the near plane - float rayLength = ((csOrig.z + csDir.z * cb_maxDistance) < cb_nearPlaneZ) ? - (cb_nearPlaneZ - csOrig.z) / csDir.z : cb_maxDistance; + float rayLength = ((csOrig.z + csDir.z * cb_maxDistance) > -cb_nearPlaneZ) ? // Changed < to > and negated near to match the paper + (-cb_nearPlaneZ - csOrig.z) / csDir.z : cb_maxDistance; float3 csEndPoint = csOrig + csDir * rayLength; // Project into homogeneous clip space - float4 H0 = mul(float4(csOrig, 1.0f), viewProjection); - H0.xy *= cb_depthBufferSize; - float4 H1 = mul(float4(csEndPoint, 1.0f), viewProjection); - H1.xy *= cb_depthBufferSize; + float4 H0 = mul(projection, float4(csOrig, 1.0f)); + float4 H1 = mul(projection, float4(csEndPoint, 1.0f)); + float k0 = 1.0f / H0.w; float k1 = 1.0f / H1.w; @@ -228,6 +217,11 @@ // Screen-space endpoints float2 P0 = H0.xy * k0; float2 P1 = H1.xy * k1; + + // Scale to pixels: + P0 = (P0 * float2(0.5, -0.5) + 0.5) * cb_depthBufferSize; + P1 = (P1 * float2(0.5, -0.5) + 0.5) * cb_depthBufferSize; + // If the line is degenerate, make it cover at least one pixel // to avoid handling zero-pixel extent as a special case later @@ -265,47 +259,44 @@ P0 += dP * jitter; Q0 += dQ * jitter; k0 += dk * jitter; - - // Slide P from P0 to P1, (now-homogeneous) Q from Q0 to Q1, k from k0 to k1 - float4 PQk = float4(P0, Q0.z, k0); - float4 dPQk = float4(dP, dQ.z, dk); - float3 Q = Q0; - - // Adjust end condition for iteration direction - float end = P1.x * stepDir; - - float stepCount = 0.0f; + + + float prevZMaxEstimate = csOrig.z; - float rayZMin = prevZMaxEstimate; - float rayZMax = prevZMaxEstimate; - float sceneZMax = rayZMax + 100.0f; - for(; - ((PQk.x * stepDir) <= end) && (stepCount < cb_maxSteps) && - !intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax) && - (sceneZMax != 0.0f); - ++stepCount) - { - rayZMin = prevZMaxEstimate; - rayZMax = (dPQk.z * 0.5f + PQk.z) / (dPQk.w * 0.5f + PQk.w); + + // Slide P from P0 to P1, (now-homogeneous) Q from Q0 to Q1, k from k0 to k1 + float3 Q = Q0; float k = k0, stepCount = 0, end = P1.x * stepDir; + for (float2 P = P0; + ((P.x * stepDir) <= end) && (stepCount < cb_maxSteps); + P += dP, Q.z += dQ.z, k += dk, stepCount += 1) { + // Project back from homogeneous to view space + hitPixel = permute ? P.yx : P; + + // The depth range that the ray covers within this loop iteration. + // Assume that the ray is moving in increasing z and swap if backwards. + float rayZMin = prevZMaxEstimate; + // Compute the value at 1/2 pixel into the future + float rayZMax = (dQ.z * 0.5 + Q.z) / (dk * 0.5 + k); + prevZMaxEstimate = rayZMax; if(rayZMin > rayZMax) { swap(rayZMin, rayZMax); } - hitPixel = permute ? PQk.yx : PQk.xy; - // You may need hitPixel.y = depthBufferSize.y - hitPixel.y; here if your vertical axis - // is different than ours in screen space - //hitPixel.y = cb_depthBufferSize.y - hitPixel.y; - sceneZMax = linearDepthTexelFetch(int2(hitPixel)); - - PQk += dPQk; + // Camera-space z of the background + float sceneZMax = linearDepthTexelFetch(int2(hitPixel)); + + float sceneZMin = sceneZMax - cb_zThickness; + + if (((rayZMax >= sceneZMin) && (rayZMin <= sceneZMax)) || (sceneZMax == 0)) + break; } // Advance Q based on the number of steps Q.xy += dQ.xy * stepCount; - hitPoint = Q * (1.0f / PQk.w); - return intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax); + hitPoint = Q / k; + return all(abs(hitPixel - (cb_depthBufferSize * 0.5)) <= cb_depthBufferSize * 0.5); } @@ -326,28 +317,25 @@ float4 v8 : TEXCOORD6, linear centroid float4 v9 : TEXCOORD7, out float4 o0 : SV_Target0, - float4 v10 : TEXCOORD8, //viewPosition - float3 v11 : TEXCOORD9, //viewNormal - float3 v12 : TEXCOORD10) //csPos + float4 viewPosition : TEXCOORD8, + float3 normalVS : TEXCOORD9, + float3 csPosition : TEXCOORD10) { float4 r0,r1,r2,r3,r4,r5; uint4 bitmask, uiDest; float4 fDest; + DepthBuffer.GetDimensions(cb_depthBufferSize.x, cb_depthBufferSize.y); + cb_maxSteps = cb_depthBufferSize.y; - float4 viewPosition = v10; - float3 viewNormal = v11; - float3 csPosition = v12; - float2 hitPixel = float2(0.0f, 0.0f); float3 hitPoint = float3(0.0f, 0.0f, 0.0f); int3 loadIndices = int3(v0.xy, 0); - float3 normalVS = viewNormal; float depth = DepthBuffer.Load(loadIndices); - float3 rayOriginVS = csPosition * linearizeDepth(depth); + float3 rayOriginVS = viewPosition * linearizeDepth(depth); @@ -367,7 +355,7 @@ //float jitter = cb_stride > 1.0f ? float(int(v0.x + v0.y) & 1) * 0.5f : 0.0f; float jitter = 0; // perform ray tracing - true if hit found, false otherwise - bool intersection = traceScreenSpaceRay(rayOriginVS, rayDirectionVS, jitter, hitPixel, hitPoint); + bool intersection = traceScreenSpaceRay(-rayOriginVS, -rayDirectionVS, jitter, hitPixel, hitPoint); depth = DepthBuffer.Load(int3(hitPixel, 0)); @@ -375,8 +363,7 @@ // move hit pixel from pixel position to UVs //hitPixel *= float2(1920,1080); - o0.xyz = HDRTex.Sample(HDRTex_s, hitPixel); - //o0.xyz = HDRTex.Load(float3(hitPixel, 0)); + o0.xyz = HDRTex.Load(float3(hitPixel, 0)); o0.w = 1; return; [/code]
Working screen space reflection shader attached - rename it to .txt.

Like many screen space reflections, these do not work correctly in stereo - they are rendered a little above the surface. It would be interesting to study these further, but this may be a little more complicated in this case because the engine is applying a stereo correction and we'd need to check where (could be in the projection matrix. Edit: Confirmed - there is a stereo correction in the projection, view-projection and inverse-projection matrices).

I reverted a chunk of code back to the paper's implementation - I wasn't sure if you were experimenting or what there, but I figured it was better to stick to something known to work.

diff:
--- orig/ShaderFixes/3ce369598f34925e-ps_replace.txt	2017-01-16 02:50:04.000000000 +1100
+++ 3ce369598f34925e-ps_replace.txt 2017-01-17 02:33:30.412258200 +1100
@@ -110,13 +110,13 @@
0.0f, 0.0f, 0.0f, 1.0f};


- static const float2 cb_depthBufferSize = float2(1920,1080); // dimensions of the z-buffer
+ static float2 cb_depthBufferSize; // dimensions of the z-buffer
static const float cb_zThickness = 1; // thickness to ascribe to each pixel in the depth buffer
static const float cb_nearPlaneZ = 0.1; // the camera's near z plane

static const float cb_stride = 1; // Step in horizontal or vertical pixels between samples. This is a float
// because integer math is slow on GPUs, but should be set to an integer >= 1.
- static const float cb_maxSteps = 5; // Maximum number of iterations. Higher gives better images but may be slow.
+ static float cb_maxSteps; // Maximum number of iterations. Higher gives better images but may be slow.
static const float cb_maxDistance = 1000; // Maximum camera-space distance to trace before returning a miss.
static const float cb_strideZCutoff = 1; // More distant pixels are smaller in screen space. This value tells at what point to
// start relaxing the stride to give higher quality reflections for objects far from
@@ -134,20 +134,11 @@



-float depthLinear(float depth)
-{
- float f=cb_maxDistance;
- float n = cb_nearPlaneZ;
- float z = (2 * n) / (f + n - depth * (f - n));
- return z ;
-}

float linearizeDepth(float depth)
{
- float f=cb_maxDistance;
- float n = cb_nearPlaneZ;
- float z = (f * n) / (f - depth * (f - n));
- return z ;
+ float4 tmp = mul(inverseProj, float4(0, 0, depth, 1));
+ return mul(projection, tmp / tmp.w).w;
}


@@ -206,18 +197,16 @@
// Camera space location of the ray hit
out float3 hitPoint)
{
- //csDir.z = csDir.z * 0.5 +0.5;
// Clip to the near plane
- float rayLength = ((csOrig.z + csDir.z * cb_maxDistance) < cb_nearPlaneZ) ?
- (cb_nearPlaneZ - csOrig.z) / csDir.z : cb_maxDistance;
+ float rayLength = ((csOrig.z + csDir.z * cb_maxDistance) > -cb_nearPlaneZ) ? // Changed < to > and negated near to match the paper
+ (-cb_nearPlaneZ - csOrig.z) / csDir.z : cb_maxDistance;
float3 csEndPoint = csOrig + csDir * rayLength;


// Project into homogeneous clip space
- float4 H0 = mul(float4(csOrig, 1.0f), viewProjection);
- H0.xy *= cb_depthBufferSize;
- float4 H1 = mul(float4(csEndPoint, 1.0f), viewProjection);
- H1.xy *= cb_depthBufferSize;
+ float4 H0 = mul(projection, float4(csOrig, 1.0f));
+ float4 H1 = mul(projection, float4(csEndPoint, 1.0f));
+
float k0 = 1.0f / H0.w;
float k1 = 1.0f / H1.w;

@@ -228,6 +217,11 @@
// Screen-space endpoints
float2 P0 = H0.xy * k0;
float2 P1 = H1.xy * k1;
+
+ // Scale to pixels:
+ P0 = (P0 * float2(0.5, -0.5) + 0.5) * cb_depthBufferSize;
+ P1 = (P1 * float2(0.5, -0.5) + 0.5) * cb_depthBufferSize;
+

// If the line is degenerate, make it cover at least one pixel
// to avoid handling zero-pixel extent as a special case later
@@ -265,47 +259,44 @@
P0 += dP * jitter;
Q0 += dQ * jitter;
k0 += dk * jitter;
-
- // Slide P from P0 to P1, (now-homogeneous) Q from Q0 to Q1, k from k0 to k1
- float4 PQk = float4(P0, Q0.z, k0);
- float4 dPQk = float4(dP, dQ.z, dk);
- float3 Q = Q0;
-
- // Adjust end condition for iteration direction
- float end = P1.x * stepDir;
-
- float stepCount = 0.0f;
+
+
+
float prevZMaxEstimate = csOrig.z;
- float rayZMin = prevZMaxEstimate;
- float rayZMax = prevZMaxEstimate;
- float sceneZMax = rayZMax + 100.0f;
- for(;
- ((PQk.x * stepDir) <= end) && (stepCount < cb_maxSteps) &&
- !intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax) &&
- (sceneZMax != 0.0f);
- ++stepCount)
- {
- rayZMin = prevZMaxEstimate;
- rayZMax = (dPQk.z * 0.5f + PQk.z) / (dPQk.w * 0.5f + PQk.w);
+
+ // Slide P from P0 to P1, (now-homogeneous) Q from Q0 to Q1, k from k0 to k1
+ float3 Q = Q0; float k = k0, stepCount = 0, end = P1.x * stepDir;
+ for (float2 P = P0;
+ ((P.x * stepDir) <= end) && (stepCount < cb_maxSteps);
+ P += dP, Q.z += dQ.z, k += dk, stepCount += 1) {
+ // Project back from homogeneous to view space
+ hitPixel = permute ? P.yx : P;
+
+ // The depth range that the ray covers within this loop iteration.
+ // Assume that the ray is moving in increasing z and swap if backwards.
+ float rayZMin = prevZMaxEstimate;
+ // Compute the value at 1/2 pixel into the future
+ float rayZMax = (dQ.z * 0.5 + Q.z) / (dk * 0.5 + k);
+
prevZMaxEstimate = rayZMax;
if(rayZMin > rayZMax)
{
swap(rayZMin, rayZMax);
}

- hitPixel = permute ? PQk.yx : PQk.xy;
- // You may need hitPixel.y = depthBufferSize.y - hitPixel.y; here if your vertical axis
- // is different than ours in screen space
- //hitPixel.y = cb_depthBufferSize.y - hitPixel.y;
- sceneZMax = linearDepthTexelFetch(int2(hitPixel));
-
- PQk += dPQk;
+ // Camera-space z of the background
+ float sceneZMax = linearDepthTexelFetch(int2(hitPixel));
+
+ float sceneZMin = sceneZMax - cb_zThickness;
+
+ if (((rayZMax >= sceneZMin) && (rayZMin <= sceneZMax)) || (sceneZMax == 0))
+ break;
}

// Advance Q based on the number of steps
Q.xy += dQ.xy * stepCount;
- hitPoint = Q * (1.0f / PQk.w);
- return intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax);
+ hitPoint = Q / k;
+ return all(abs(hitPixel - (cb_depthBufferSize * 0.5)) <= cb_depthBufferSize * 0.5);
}


@@ -326,28 +317,25 @@
float4 v8 : TEXCOORD6,
linear centroid float4 v9 : TEXCOORD7,
out float4 o0 : SV_Target0,
- float4 v10 : TEXCOORD8, //viewPosition
- float3 v11 : TEXCOORD9, //viewNormal
- float3 v12 : TEXCOORD10) //csPos
+ float4 viewPosition : TEXCOORD8,
+ float3 normalVS : TEXCOORD9,
+ float3 csPosition : TEXCOORD10)
{
float4 r0,r1,r2,r3,r4,r5;
uint4 bitmask, uiDest;
float4 fDest;

+ DepthBuffer.GetDimensions(cb_depthBufferSize.x, cb_depthBufferSize.y);
+ cb_maxSteps = cb_depthBufferSize.y;


- float4 viewPosition = v10;
- float3 viewNormal = v11;
- float3 csPosition = v12;
-
float2 hitPixel = float2(0.0f, 0.0f);
float3 hitPoint = float3(0.0f, 0.0f, 0.0f);

int3 loadIndices = int3(v0.xy, 0);
- float3 normalVS = viewNormal;

float depth = DepthBuffer.Load(loadIndices);
- float3 rayOriginVS = csPosition * linearizeDepth(depth);
+ float3 rayOriginVS = viewPosition * linearizeDepth(depth);



@@ -367,7 +355,7 @@
//float jitter = cb_stride > 1.0f ? float(int(v0.x + v0.y) & 1) * 0.5f : 0.0f;
float jitter = 0;
// perform ray tracing - true if hit found, false otherwise
- bool intersection = traceScreenSpaceRay(rayOriginVS, rayDirectionVS, jitter, hitPixel, hitPoint);
+ bool intersection = traceScreenSpaceRay(-rayOriginVS, -rayDirectionVS, jitter, hitPixel, hitPoint);


depth = DepthBuffer.Load(int3(hitPixel, 0));
@@ -375,8 +363,7 @@
// move hit pixel from pixel position to UVs
//hitPixel *= float2(1920,1080);

- o0.xyz = HDRTex.Sample(HDRTex_s, hitPixel);
- //o0.xyz = HDRTex.Load(float3(hitPixel, 0));
+ o0.xyz = HDRTex.Load(float3(hitPixel, 0));
o0.w = 1;
return;

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 01/16/2017 03:37 PM   
Wow, that's a lot of precious info. I'll respond to it in the evening. You said it's a glsl, but the last link I provided is actually a hlsl adaptation of the same paper, so there are some differences. I took it from here: http://roar11.com/2015/07/screen-space-glossy-reflections/ The thing I'm certainly wrong here is an interpretation of the input vectors csOrig and csDir and the tranform matrix from inside the function viewToTextureSpaceMatrix. Also I wrongly set up the constant buffer with the variables in the second link, so I made them all static constants in the 3rd. You said it's tracing is wrong, so it makes me wonder if you've tried only the first link as the 2 and 3rd should trace in a linear manner.
Wow, that's a lot of precious info. I'll respond to it in the evening. You said it's a glsl, but the last link I provided is actually a hlsl adaptation of the same paper, so there are some differences. I took it from here:


http://roar11.com/2015/07/screen-space-glossy-reflections/


The thing I'm certainly wrong here is an interpretation of the input vectors csOrig and csDir and the tranform matrix from inside the function
viewToTextureSpaceMatrix.

Also I wrongly set up the constant buffer with the variables in the second link, so I made them all static constants in the 3rd.

You said it's tracing is wrong, so it makes me wonder if you've tried only the first link as the 2 and 3rd should trace in a linear manner.

EVGA GeForce GTX 980 SC
Core i5 2500K
MSI Z77A-G45
8GB DDR3
Windows 10 x64

Posted 01/16/2017 04:07 PM   
I guess you posted before refreshing and reading the last post?
I guess you posted before refreshing and reading the last post?

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 01/16/2017 04:51 PM   
Yes, indeed, I didn't refresh, I was busy investigating broken locks on our garages from the last night. Can't wait to try it!!! My daughter is occupying my pc atm, And it's not easy to kick her out when she is on an online gaming spree :)
Yes, indeed, I didn't refresh, I was busy investigating broken locks on our garages from the last night. Can't wait to try it!!! My daughter is occupying my pc atm, And it's not easy to kick her out when she is on an online gaming spree :)

EVGA GeForce GTX 980 SC
Core i5 2500K
MSI Z77A-G45
8GB DDR3
Windows 10 x64

Posted 01/16/2017 04:58 PM   
Sweet - attached is a version that is accurate in stereo 3D (so long as you get the depth and accumulation buffers for the correct eye) :) The only change I made: [code] --- 3ce369598f34925e-ps_replace.txt.badstereo 2017-01-17 04:15:05.641044900 +1100 +++ 3ce369598f34925e-ps_replace.txt 2017-01-17 04:16:55.926193300 +1100 @@ -325,6 +325,11 @@ uint4 bitmask, uiDest; float4 fDest; + float4 stereo = StereoParams.Load(0); + float separation = stereo.x * (leftEye ? -1 : 1); + float convergence = stereo.y; + viewPosition.x += separation * convergence * inverseProj._m00; + DepthBuffer.GetDimensions(cb_depthBufferSize.x, cb_depthBufferSize.y); cb_maxSteps = cb_depthBufferSize.y; [/code] I'm just adjusting the viewPosition to take into account the stereo camera position - this sort of adjustment can move reflections from surface depth to correct depth in a *lot* of games (actually, the car windows in this game could probably use it). Normally I would try to apply it to the game's camera position (and more often than not that would be in world-space coordinates), but in this case that is not used as an input to the reflections so I had to keep looking to find the correct thing to adjust. This is pretty significant - the main reason I was helping you out with these was because I wanted to see why these were broken in so many games and how to fix them properly :) For other shaderhackers playing along at home - since this engine has a stereo correction built into the projection matrix, an adjustment to anything that uses the projection matrix would also normally be necessary, but here the engine takes care of that part for us. Also, note that the leftEye here is specific to this engine, and would not normally be required for games using 3D Vision Automatic. Also, for anyone looking at these, be sure to take a look at the two patterns of screen space reflections I managed to accurately fix in WATCH_DOGS2 recently, which work a little different to these. Edit: Deleted and reattached the shader - I think I had the wrong version attached.
Sweet - attached is a version that is accurate in stereo 3D (so long as you get the depth and accumulation buffers for the correct eye) :)

The only change I made:
--- 3ce369598f34925e-ps_replace.txt.badstereo	2017-01-17 04:15:05.641044900 +1100
+++ 3ce369598f34925e-ps_replace.txt 2017-01-17 04:16:55.926193300 +1100
@@ -325,6 +325,11 @@
uint4 bitmask, uiDest;
float4 fDest;

+ float4 stereo = StereoParams.Load(0);
+ float separation = stereo.x * (leftEye ? -1 : 1);
+ float convergence = stereo.y;
+ viewPosition.x += separation * convergence * inverseProj._m00;
+
DepthBuffer.GetDimensions(cb_depthBufferSize.x, cb_depthBufferSize.y);
cb_maxSteps = cb_depthBufferSize.y;


I'm just adjusting the viewPosition to take into account the stereo camera position - this sort of adjustment can move reflections from surface depth to correct depth in a *lot* of games (actually, the car windows in this game could probably use it). Normally I would try to apply it to the game's camera position (and more often than not that would be in world-space coordinates), but in this case that is not used as an input to the reflections so I had to keep looking to find the correct thing to adjust.

This is pretty significant - the main reason I was helping you out with these was because I wanted to see why these were broken in so many games and how to fix them properly :) For other shaderhackers playing along at home - since this engine has a stereo correction built into the projection matrix, an adjustment to anything that uses the projection matrix would also normally be necessary, but here the engine takes care of that part for us. Also, note that the leftEye here is specific to this engine, and would not normally be required for games using 3D Vision Automatic. Also, for anyone looking at these, be sure to take a look at the two patterns of screen space reflections I managed to accurately fix in WATCH_DOGS2 recently, which work a little different to these.


Edit: Deleted and reattached the shader - I think I had the wrong version attached.

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 01/16/2017 05:27 PM   
[quote="DarkStarSword"]This might work better for linear depth (you can optimise this if you wanted, but it's a starting point showing how to turn a Z buffer value into a linear depth using the projection matrix that will work in a lot of games... except those that do reverse Z projection which will get a divide by zero if you try this... and of course the usual quirk that the arguments to mul() may be swapped around in some games): [/quote] I was wondering for a while where can I get the clipping distances from, good to know. [quote="DarkStarSword"]Sweet - attached is a version that is accurate in stereo 3D (so long as you get the depth and accumulation buffers for the correct eye) :) [/quote] I wasn't expecting that to happen so quickly :) I sold my 3DTV so I have no means to test it. How does it look in 3D? Is the resource I'm copying from changing or staying the same? That game as you can see uses 2 independent reflection textures for each eye. Do I need to do the same? As for the artiffacts, there are some which will not be visible after adding ripples (it's just original ripples with mirror finish atm, still need to combine it with my water shader) [img]https://s27.postimg.org/b17l5yalf/dirt3_game_2017_01_16_23_59_01_210_crop.png[/img] and some other more prominent, like the shift between db and the texture in Y when the camera is above the lake surface. [img]https://s27.postimg.org/63u0ku8mb/dirt3_game_2017_01_17_00_06_15_991_crop.png[/img]
DarkStarSword said:This might work better for linear depth (you can optimise this if you wanted, but it's a starting point showing how to turn a Z buffer value into a linear depth using the projection matrix that will work in a lot of games... except those that do reverse Z projection which will get a divide by zero if you try this... and of course the usual quirk that the arguments to mul() may be swapped around in some games):

I was wondering for a while where can I get the clipping distances from, good to know.

DarkStarSword said:Sweet - attached is a version that is accurate in stereo 3D (so long as you get the depth and accumulation buffers for the correct eye) :)

I wasn't expecting that to happen so quickly :) I sold my 3DTV so I have no means to test it. How does it look in 3D? Is the resource I'm copying from changing or staying the same? That game as you can see uses 2 independent reflection textures for each eye. Do I need to do the same?

As for the artiffacts, there are some which will not be visible after adding ripples (it's just original ripples with mirror finish atm, still need to combine it with my water shader)

Image

and some other more prominent, like the shift between db and the texture in Y when the camera is above the lake surface.

Image

EVGA GeForce GTX 980 SC
Core i5 2500K
MSI Z77A-G45
8GB DDR3
Windows 10 x64

Posted 01/17/2017 12:27 AM   
[quote="DarkStarSword"]BTW you are fetching the depth buffer after drawing part of the water... In 2D that means you will be using a frame old depth buffer, but the way the engine works in 3D that means you will be using the depth buffer from the other eye. Is there a reason you need the depth buffer from that point? If you need the depth of the water surface I would think it would be easier to just pass that from the vertex shader to the pixel shader since that is what you are drawing. Just copying the currently assigned depth buffer seems to work well enough for me... though I guess you wanted to reduce the number of copy operations?[/quote] No, I need a complete db with all the partial water shader depth passes to sample from, the one from the current water shader contains only a partial water depth information, the rest of the screen contains data from the ground shader so the bottom of the lake gets sampled in the SSR. I use the db from the last water shader in the rendering queue. I wish to avoid the 1 frame delay though. What do you suggest in that case? [quote="DarkStarSword"]This might work better for linear depth (you can optimise this if you wanted, but it's a starting point showing how to turn a Z buffer value into a linear depth using the projection matrix that will work in a lot of games... except those that do reverse Z projection which will get a divide by zero if you try this... and of course the usual quirk that the arguments to mul() may be swapped around in some games): [code] float linearizeDepth(float depth) { float4 tmp = mul(inverseProj, float4(0, 0, depth, 1)); return mul(projection, tmp / tmp.w).w; } [/code] Alternatively, check what calculations the game does and copy that (light shaders are usually a good place to look). In other news it looks like this game implements the nvidia formula (or equivalent), so convergence matches what we know it to mean. Separation will always be positive, but the leftEye value in the CameraParamsConstantBuffer can be used to negate it for the left eye.[/quote] Which values from that matrix contain far and near values? Do you know? [quote="DarkStarSword"]You shouldn't need max_copies_per_frame on ResourceDepthBuffer2 at all - the shader you are copying that from is only run once per eye. Hmmm... Why do you have max_executions_per_frame in a resource section? That's only for custom shaders... I think... I really need to go and document some of my features better, and keep working to make the Ini Parser warn about more problems ;-)[/quote] Don't worry about it. It's just the remainings of the illogical attempts to try to resolve msaa before i knew what was causing the whole screen erase. [quote="DarkStarSword"] I also have plans to add conditional logic and expressions to 3DMigoto in the near future, which could potentially be used to implement your own copy limit and reset it when encountering that shader...[/quote] This sounds interesting. I smell a lot of potential.
DarkStarSword said:BTW you are fetching the depth buffer after drawing part of the water... In 2D that means you will be using a frame old depth buffer, but the way the engine works in 3D that means you will be using the depth buffer from the other eye. Is there a reason you need the depth buffer from that point? If you need the depth of the water surface I would think it would be easier to just pass that from the vertex shader to the pixel shader since that is what you are drawing. Just copying the currently assigned depth buffer seems to work well enough for me... though I guess you wanted to reduce the number of copy operations?

No, I need a complete db with all the partial water shader depth passes to sample from, the one from the current water shader contains only a partial water depth information, the rest of the screen contains data from the ground shader so the bottom of the lake gets sampled in the SSR. I use the db from the last water shader in the rendering queue. I wish to avoid the 1 frame delay though. What do you suggest in that case?

DarkStarSword said:This might work better for linear depth (you can optimise this if you wanted, but it's a starting point showing how to turn a Z buffer value into a linear depth using the projection matrix that will work in a lot of games... except those that do reverse Z projection which will get a divide by zero if you try this... and of course the usual quirk that the arguments to mul() may be swapped around in some games):

float linearizeDepth(float depth)
{
float4 tmp = mul(inverseProj, float4(0, 0, depth, 1));
return mul(projection, tmp / tmp.w).w;
}


Alternatively, check what calculations the game does and copy that (light shaders are usually a good place to look).

In other news it looks like this game implements the nvidia formula (or equivalent), so convergence matches what we know it to mean. Separation will always be positive, but the leftEye value in the CameraParamsConstantBuffer can be used to negate it for the left eye.

Which values from that matrix contain far and near values? Do you know?

DarkStarSword said:You shouldn't need max_copies_per_frame on ResourceDepthBuffer2 at all - the shader you are copying that from is only run once per eye.

Hmmm... Why do you have max_executions_per_frame in a resource section? That's only for custom shaders...


I think... I really need to go and document some of my features better, and keep working to make the Ini Parser warn about more problems ;-)

Don't worry about it. It's just the remainings of the illogical attempts to try to resolve msaa before i knew what was causing the whole screen erase.

DarkStarSword said: I also have plans to add conditional logic and expressions to 3DMigoto in the near future, which could potentially be used to implement your own copy limit and reset it when encountering that shader...

This sounds interesting. I smell a lot of potential.

EVGA GeForce GTX 980 SC
Core i5 2500K
MSI Z77A-G45
8GB DDR3
Windows 10 x64

Posted 01/17/2017 11:02 AM   
[quote="Oomek"]No, I need a complete db with all the partial water shader depth passes to sample from, the one from the current water shader contains only a partial water depth information, the rest of the screen contains data from the ground shader so the bottom of the lake gets sampled in the SSR. I use the db from the last water shader in the rendering queue. I wish to avoid the 1 frame delay though. What do you suggest in that case?[/quote]I think you can probably pass the output W coordinate from the vertex shader to the pixel shader and use that for the depth of the current pixel, and the depth buffer for anything else as it won't matter if it is missing the water depth (unless you need the depth of other water pixels?). Alternatively, at least in the case of the near shore pixel shader I was looking at the depth buffer seemed to already contain the depth of the water surface as I was just able to do 'ps-t100 = oD', but I don't know if that holds true for all the water shaders. [quote]Which values from that matrix contain far and near values? Do you know?[/quote]I posted about this a while back - just use that exact same function, passing depth=0 to find the near clipping plane and depth=1 to find the far clipping plane. The near and far clipping planes are usually encoded in two fields of the projection matrix, so it can also be possible to derive them using a bit of algebra, but this technique is simpler and also has the advantage that it works even if you don't have a pure projection matrix - it works with view-projection matrices, model-view-projection matrices, etc - so long as it ends in projection. [quote][quote="DarkStarSword"] I also have plans to add conditional logic and expressions to 3DMigoto in the near future, which could potentially be used to implement your own copy limit and reset it when encountering that shader...[/quote] This sounds interesting. I smell a lot of potential.[/quote]Indeed :)
Oomek said:No, I need a complete db with all the partial water shader depth passes to sample from, the one from the current water shader contains only a partial water depth information, the rest of the screen contains data from the ground shader so the bottom of the lake gets sampled in the SSR. I use the db from the last water shader in the rendering queue. I wish to avoid the 1 frame delay though. What do you suggest in that case?
I think you can probably pass the output W coordinate from the vertex shader to the pixel shader and use that for the depth of the current pixel, and the depth buffer for anything else as it won't matter if it is missing the water depth (unless you need the depth of other water pixels?). Alternatively, at least in the case of the near shore pixel shader I was looking at the depth buffer seemed to already contain the depth of the water surface as I was just able to do 'ps-t100 = oD', but I don't know if that holds true for all the water shaders.

Which values from that matrix contain far and near values? Do you know?
I posted about this a while back - just use that exact same function, passing depth=0 to find the near clipping plane and depth=1 to find the far clipping plane. The near and far clipping planes are usually encoded in two fields of the projection matrix, so it can also be possible to derive them using a bit of algebra, but this technique is simpler and also has the advantage that it works even if you don't have a pure projection matrix - it works with view-projection matrices, model-view-projection matrices, etc - so long as it ends in projection.

DarkStarSword said: I also have plans to add conditional logic and expressions to 3DMigoto in the near future, which could potentially be used to implement your own copy limit and reset it when encountering that shader...

This sounds interesting. I smell a lot of potential.
Indeed :)

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 01/17/2017 01:18 PM   
[quote="DarkStarSword"]I think you can probably pass the output W coordinate from the vertex shader to the pixel shader and use that for the depth of the current pixel, and the depth buffer for anything else as it won't matter if it is missing the water depth (unless you need the depth of other water pixels?). Alternatively, at least in the case of the near shore pixel shader I was looking at the depth buffer seemed to already contain the depth of the water surface as I was just able to do 'ps-t100 = oD', but I don't know if that holds true for all the water shaders.[/quote]I used the W value initially for the current pixel, but I'm not entirely sure if it matters for an intersection finding loop that a db lacks a water surface. The reflection vector goes the other way, so it shouldn't matter. Tell me if i'm wrong. I'm more concerned about the accumulation color buffer delay though, but I cannot see if it's late until I fix that Y shift I posted a screenshot of in my previous post. Do you know what might be causing it. Do you have any clues? You can see that while driving near a slope on the left in the middle of a first track. Edit: i've just found out that the db of a water shader is missing a lot of elements, especially the far tree line, so I cannot use it anyway.
DarkStarSword said:I think you can probably pass the output W coordinate from the vertex shader to the pixel shader and use that for the depth of the current pixel, and the depth buffer for anything else as it won't matter if it is missing the water depth (unless you need the depth of other water pixels?). Alternatively, at least in the case of the near shore pixel shader I was looking at the depth buffer seemed to already contain the depth of the water surface as I was just able to do 'ps-t100 = oD', but I don't know if that holds true for all the water shaders.
I used the W value initially for the current pixel, but I'm not entirely sure if it matters for an intersection finding loop that a db lacks a water surface. The reflection vector goes the other way, so it shouldn't matter. Tell me if i'm wrong. I'm more concerned about the accumulation color buffer delay though, but I cannot see if it's late until I fix that Y shift I posted a screenshot of in my previous post. Do you know what might be causing it. Do you have any clues? You can see that while driving near a slope on the left in the middle of a first track.

Edit: i've just found out that the db of a water shader is missing a lot of elements, especially the far tree line, so I cannot use it anyway.

EVGA GeForce GTX 980 SC
Core i5 2500K
MSI Z77A-G45
8GB DDR3
Windows 10 x64

Posted 01/17/2017 02:23 PM   
Walking through a few ways to derive the near & far clipping planes from the projection matrix. Ordinarily a projection matrix will look similar to this (from MSDN): [code] fw 0 0 0 0 fh 0 0 0 0 q 1 0 0 -q*near 0 [/code] Where q = far / (far - near) With a bit of algebra we can work out how to derive near & far [b](don't stop reading at this point)[/b]: [code] 1-> m22 = far / (far - near) far #1: m22 = ------------ (far - near) 1-> m32 = -m22 * near #2: m32 = -m22*near 2-> near -m32 #2: near = ---- m22 2-> #1 far #1: m22 = ------------ (far - near) 1-> replace near with #2 far #1: m22 = ----------- m32 (far + ---) m22 1-> far m32 #1: far = --------- (1 - m22) [/code] However there are some variations on the projection matrix that will trip this up, so let's take a quick look at this one I dumped out from Akiba's Trip and happen to have handy - notice where we expect to see a 1 in _m23 there is actually a -1: [code] // Projection matrix: // cb1[8].x: 1.0296495 // cb1[8].y: 0 // cb1[8].z: 0 // cb1[8].w: 0 // cb1[9].x: 0 // cb1[9].y: 1.83048797 // cb1[9].z: 0 // cb1[9].w: 0 // cb1[10].x: 0 // cb1[10].y: 0 // cb1[10].z: -1.00001001 // m22 // cb1[10].w: -0.100001 // m32 // cb1[11].x: 0 // cb1[11].y: 0 // cb1[11].z: -1 // cb1[11].w: 0 [/code] First, let's try the technique I outlined in the post above: [code] In [2]: import numpy as np In [3]: projection = np.matrix([[1.0296495, 0, 0, 0], [0, 1.83048797, 0, 0], [0, 0, -1.00001001, -1], [0, 0, -0.100001, 0]]) In [4]: def linear_depth(depth): tmp = [0, 0, depth, 1] * projection.I return (tmp / tmp[0,3] * projection)[0,3] In [5]: print(linear_depth(0)) # near 0.099999999 In [6]: print(linear_depth(1)) # far 9990.10989015 [/code] Those values look sane for a near and far clipping plane, so that worked in this case without modification. But, if we try the algebra: [code] 1-> #2 -m32 #2: near = ---- m22 2-> calculate Enter m22: -1.00001001 Enter m32: -0.100001 near = -0.09999999900001 2-> #1 m32 #1: far = --------- (1 - m22) 1-> calculate Enter m22: -1.00001001 Enter m32: -0.100001 far = -0.05000024974875 [/code] We get a negated near and a *very* wrong far. Since m23 was -1, let's see what happens if we negate m22 (negating m32 won't give the right answer - trust me): [code] 1-> #2 -m32 #2: near = ---- m22 2-> calculate Enter m22: 1.00001001 Enter m32: -0.100001 near = 0.09999999900001 2-> #1 m32 #1: far = --------- (1 - m22) 1-> calculate Enter m22: 1.00001001 Enter m32: -0.100001 far = 9990.1098901052 [/code] Success :) So, maybe the answer is that we need to use m22 * m23 everywhere I put m22 above. Maybe - it certainly looks that way, but I'm not willing to call it without understanding why m23 is negated in the first place. ... BUT, that's not the only curve ball that games will throw at you. Let's take a look at the projection matrix from ABZU: [code] cb1[24].x: 1.01477683 cb1[24].y: 0 cb1[24].z: 0 cb1[24].w: 0 cb1[25].x: 0 cb1[25].y: 1.8040477 cb1[25].z: 0 cb1[25].w: 0 cb1[26].x: 0 cb1[26].y: 0 cb1[26].z: 0 cb1[26].w: 1 cb1[27].x: 0 cb1[27].y: 0 cb1[27].z: 10 cb1[27].w: 0 [/code] Alright, this should be fairly straight forwa... uhh, maybe not - m22 is 0... Whaaaatt? :-/ Oh well, let's see what happens: [code] In [7]: projection = np.matrix([[1.01477683, 0, 0, 0], [0, 1.8040477, 0, 0], [0, 0, 0, 1], [0, 0, 10, 0]]) In [8]: print(linear_depth(0)) # near nan -c:3: RuntimeWarning: divide by zero encountered in true_divide -c:3: RuntimeWarning: invalid value encountered in true_divide In [9]: print(linear_depth(1)) # far 10.0 [/code] Clearly not right - near got a divide by zero and I can tell you with certainty that there are things further away than 10 units... Ok, how about our algebra solution: [code] 1-> #2 -m32 #2: near = ---- m22 2-> calculate Enter m22: 0 Enter m32: 10 Warning: Division by zero. near = -inf 2-> #1 m32 #1: far = --------- (1 - m22) 1-> calculate Enter m22: 0 Enter m32: 10 far = 10 [/code] Nope, same result. So, what's actually going on here? This game uses something called reverse Z projection, which means our assumption of how the projection matrix was composed is wrong. Let's work the other way and pass a couple of coordinates through the projection matrix to see where they end up: [code] In [4]: [0,0,0,1] * projection Out[4]: matrix([[ 0., 0., 10., 0.]]) [/code] A divide by zero at depth = 0 (there's a reason the near clipping plane cannot be 0 - the maths doesn't work there) [code] In [5]: [0,0,1,1] * projection Out[5]: matrix([[ 0., 0., 10., 1.]]) [/code] Linear depth = 1 produces a depth buffer value 10 / 1 = 10 (outside the valid range of 0-1, so clipped) [code] In [6]: [0,0,10,1] * projection Out[6]: matrix([[ 0., 0., 10., 10.]]) [/code] Linear depth = 10 produces a depth buffer value 10 / 10 = 1 [code] In [7]: [0,0,100,1] * projection Out[7]: matrix([[ 0., 0., 10., 100.]]) [/code] Linear depth = 100 produces a depth buffer value 10 / 100 = 0.1 [code] In [8]: [0,0,10000,1] * projection Out[8]: matrix([[ 0., 0., 10., 10000.]]) [/code] Linear depth = 10000 produces a depth buffer value 10 / 10000 = 0.001 So, now we see what's happening - as linear depth increases, the value on the depth buffer decreases, so you could consider that it has near and far swapped around in the projection, and if we look at the numbers near (where depth buffer = 1.0) is 10, and far (where depth buffer = 0.0) is infinity... so, our calculations were actually kind of right after all, just swapped around @_@
Walking through a few ways to derive the near & far clipping planes from the projection matrix. Ordinarily a projection matrix will look similar to this (from MSDN):

fw   0        0  0
0 fh 0 0
0 0 q 1
0 0 -q*near 0


Where q = far / (far - near)


With a bit of algebra we can work out how to derive near & far (don't stop reading at this point):

1-> m22 = far / (far - near)

far
#1: m22 = ------------
(far - near)

1-> m32 = -m22 * near

#2: m32 = -m22*near

2-> near

-m32
#2: near = ----
m22

2-> #1

far
#1: m22 = ------------
(far - near)

1-> replace near with #2

far
#1: m22 = -----------
m32
(far + ---)
m22

1-> far

m32
#1: far = ---------
(1 - m22)


However there are some variations on the projection matrix that will trip this up, so let's take a quick look at this one I dumped out from Akiba's Trip and happen to have handy - notice where we expect to see a 1 in _m23 there is actually a -1:

// Projection matrix:
// cb1[8].x: 1.0296495
// cb1[8].y: 0
// cb1[8].z: 0
// cb1[8].w: 0
// cb1[9].x: 0
// cb1[9].y: 1.83048797
// cb1[9].z: 0
// cb1[9].w: 0
// cb1[10].x: 0
// cb1[10].y: 0
// cb1[10].z: -1.00001001 // m22
// cb1[10].w: -0.100001 // m32
// cb1[11].x: 0
// cb1[11].y: 0
// cb1[11].z: -1
// cb1[11].w: 0


First, let's try the technique I outlined in the post above:

In [2]: import numpy as np

In [3]: projection = np.matrix([[1.0296495, 0, 0, 0],
[0, 1.83048797, 0, 0],
[0, 0, -1.00001001, -1],
[0, 0, -0.100001, 0]])

In [4]: def linear_depth(depth):
tmp = [0, 0, depth, 1] * projection.I
return (tmp / tmp[0,3] * projection)[0,3]

In [5]: print(linear_depth(0)) # near

0.099999999

In [6]: print(linear_depth(1)) # far

9990.10989015


Those values look sane for a near and far clipping plane, so that worked in this case without modification.

But, if we try the algebra:

1-> #2

-m32
#2: near = ----
m22

2-> calculate
Enter m22: -1.00001001
Enter m32: -0.100001
near = -0.09999999900001
2-> #1

m32
#1: far = ---------
(1 - m22)

1-> calculate
Enter m22: -1.00001001
Enter m32: -0.100001
far = -0.05000024974875


We get a negated near and a *very* wrong far. Since m23 was -1, let's see what happens if we negate m22 (negating m32 won't give the right answer - trust me):

1-> #2

-m32
#2: near = ----
m22

2-> calculate
Enter m22: 1.00001001
Enter m32: -0.100001
near = 0.09999999900001
2-> #1

m32
#1: far = ---------
(1 - m22)

1-> calculate
Enter m22: 1.00001001
Enter m32: -0.100001
far = 9990.1098901052


Success :)

So, maybe the answer is that we need to use m22 * m23 everywhere I put m22 above. Maybe - it certainly looks that way, but I'm not willing to call it without understanding why m23 is negated in the first place.


... BUT, that's not the only curve ball that games will throw at you. Let's take a look at the projection matrix from ABZU:

cb1[24].x: 1.01477683
cb1[24].y: 0
cb1[24].z: 0
cb1[24].w: 0
cb1[25].x: 0
cb1[25].y: 1.8040477
cb1[25].z: 0
cb1[25].w: 0
cb1[26].x: 0
cb1[26].y: 0
cb1[26].z: 0
cb1[26].w: 1
cb1[27].x: 0
cb1[27].y: 0
cb1[27].z: 10
cb1[27].w: 0


Alright, this should be fairly straight forwa... uhh, maybe not - m22 is 0... Whaaaatt? :-/

Oh well, let's see what happens:

In [7]: projection = np.matrix([[1.01477683, 0, 0, 0],
[0, 1.8040477, 0, 0],
[0, 0, 0, 1],
[0, 0, 10, 0]])

In [8]: print(linear_depth(0)) # near

nan

-c:3: RuntimeWarning: divide by zero encountered in true_divide
-c:3: RuntimeWarning: invalid value encountered in true_divide

In [9]: print(linear_depth(1)) # far

10.0


Clearly not right - near got a divide by zero and I can tell you with certainty that there are things further away than 10 units...

Ok, how about our algebra solution:
1-> #2

-m32
#2: near = ----
m22

2-> calculate
Enter m22: 0
Enter m32: 10
Warning: Division by zero.
near = -inf
2-> #1

m32
#1: far = ---------
(1 - m22)

1-> calculate
Enter m22: 0
Enter m32: 10
far = 10


Nope, same result. So, what's actually going on here? This game uses something called reverse Z projection, which means our assumption of how the projection matrix was composed is wrong. Let's work the other way and pass a couple of coordinates through the projection matrix to see where they end up:

In [4]: [0,0,0,1] * projection
Out[4]: matrix([[ 0., 0., 10., 0.]])


A divide by zero at depth = 0 (there's a reason the near clipping plane cannot be 0 - the maths doesn't work there)

In [5]: [0,0,1,1] * projection
Out[5]: matrix([[ 0., 0., 10., 1.]])


Linear depth = 1 produces a depth buffer value 10 / 1 = 10 (outside the valid range of 0-1, so clipped)

In [6]: [0,0,10,1] * projection
Out[6]: matrix([[ 0., 0., 10., 10.]])


Linear depth = 10 produces a depth buffer value 10 / 10 = 1

In [7]: [0,0,100,1] * projection
Out[7]: matrix([[ 0., 0., 10., 100.]])


Linear depth = 100 produces a depth buffer value 10 / 100 = 0.1

In [8]: [0,0,10000,1] * projection
Out[8]: matrix([[ 0., 0., 10., 10000.]])


Linear depth = 10000 produces a depth buffer value 10 / 10000 = 0.001


So, now we see what's happening - as linear depth increases, the value on the depth buffer decreases, so you could consider that it has near and far swapped around in the projection, and if we look at the numbers near (where depth buffer = 1.0) is 10, and far (where depth buffer = 0.0) is infinity... so, our calculations were actually kind of right after all, just swapped around @_@

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 01/17/2017 02:59 PM   
[quote="Oomek"]I used the W value initially for the current pixel, but I'm not entirely sure if it matters for an intersection finding loop that a db lacks a water surface. The reflection vector goes the other way, so it shouldn't matter. Tell me if i'm wrong.[/quote]I agree - it shouldn't matter since the reflection should always be at an angle - it would only ever travel along the water surface if the camera was looking flat on the surface, in which case the surface won't be visible anyway. [quote]I cannot see if it's late until I fix that Y shift I posted a screenshot of in my previous post. Do you know what might be causing it. Do you have any clues? You can see that while driving near a slope on the left in the middle of a first track.[/quote]No, I haven't looked at that yet - I only looked at the water right at the start of the track. [quote]Edit: i've just found out that the db of a water shader is missing a lot of elements, especially the far tree line, so I cannot use it anyway.[/quote]Bugger. I noticed it was missing grass and rocks, but it did have the trees where I was looking, which seemed like the most important thing. That will be tricky... I was thinking you could maybe delay drawing the water reflections and do them in a post pass, but you would have to be able to find all the water pixels to shade again, and not draw over the top of anything since drawn in front of the water. You could probably draw the water depth to a dedicated buffer then compare that depth to the current depth in the depth buffer in a post pass, but if there are any transparent effects drawn in front of the water they will complicate things. Hmmm... I wonder if that is why WATCH_DOGS2 uses a frame old buffer for their screen space reflections...?
Oomek said:I used the W value initially for the current pixel, but I'm not entirely sure if it matters for an intersection finding loop that a db lacks a water surface. The reflection vector goes the other way, so it shouldn't matter. Tell me if i'm wrong.
I agree - it shouldn't matter since the reflection should always be at an angle - it would only ever travel along the water surface if the camera was looking flat on the surface, in which case the surface won't be visible anyway.

I cannot see if it's late until I fix that Y shift I posted a screenshot of in my previous post. Do you know what might be causing it. Do you have any clues? You can see that while driving near a slope on the left in the middle of a first track.
No, I haven't looked at that yet - I only looked at the water right at the start of the track.

Edit: i've just found out that the db of a water shader is missing a lot of elements, especially the far tree line, so I cannot use it anyway.
Bugger. I noticed it was missing grass and rocks, but it did have the trees where I was looking, which seemed like the most important thing. That will be tricky... I was thinking you could maybe delay drawing the water reflections and do them in a post pass, but you would have to be able to find all the water pixels to shade again, and not draw over the top of anything since drawn in front of the water. You could probably draw the water depth to a dedicated buffer then compare that depth to the current depth in the depth buffer in a post pass, but if there are any transparent effects drawn in front of the water they will complicate things.

Hmmm... I wonder if that is why WATCH_DOGS2 uses a frame old buffer for their screen space reflections...?

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 01/17/2017 03:21 PM   
Same in the Witcher 3, they even render reflections with some noise pattern and interpolate to save some cycles. You can see it when you render only the reflections.
Same in the Witcher 3, they even render reflections with some noise pattern and interpolate to save some cycles. You can see it when you render only the reflections.

EVGA GeForce GTX 980 SC
Core i5 2500K
MSI Z77A-G45
8GB DDR3
Windows 10 x64

Posted 01/17/2017 03:30 PM   
Btw, your post about extracting clipping distances from the projection matrix deserves a special place in the wiki even though it's not directly related to the 3DM syntax. It would be a huge waste if it got burried inside that thread :D It was a pleasure reading it.
Btw, your post about extracting clipping distances from the projection matrix deserves a special place in the wiki even though it's not directly related to the 3DM syntax. It would be a huge waste if it got burried inside that thread :D It was a pleasure reading it.

EVGA GeForce GTX 980 SC
Core i5 2500K
MSI Z77A-G45
8GB DDR3
Windows 10 x64

Posted 01/17/2017 03:38 PM   
[quote]Success :) So, maybe the answer is that we need to use m22 * m23 everywhere I put m22 above. Maybe - it certainly looks that way, but I'm not willing to call it without understanding why m23 is negated in the first place.[/quote] For Akiba's trip, that looks like they are using a Right-handed coordinate system to me, instead of the normal Left-handed one that DirectX uses by default. With the RH coordinates, the Z is inverted, which is where the negative comes from. If that is correct, then m22 * m23 should always work. [url]https://msdn.microsoft.com/en-us/library/windows/desktop/bb205355(v=vs.85).aspx[/url] Super great post, thanks for writing that up.
Success :)

So, maybe the answer is that we need to use m22 * m23 everywhere I put m22 above. Maybe - it certainly looks that way, but I'm not willing to call it without understanding why m23 is negated in the first place.

For Akiba's trip, that looks like they are using a Right-handed coordinate system to me, instead of the normal Left-handed one that DirectX uses by default.

With the RH coordinates, the Z is inverted, which is where the negative comes from. If that is correct, then m22 * m23 should always work.

https://msdn.microsoft.com/en-us/library/windows/desktop/bb205355(v=vs.85).aspx


Super great post, thanks for writing that up.

Acer H5360 (1280x720@120Hz) - ASUS VG248QE with GSync mod - 3D Vision 1&2 - Driver 372.54
GTX 970 - i5-4670K@4.2GHz - 12GB RAM - Win7x64+evilKB2670838 - 4 Disk X25 RAID
SAGER NP9870-S - GTX 980 - i7-6700K - Win10 Pro 1607
Latest 3Dmigoto Release
Bo3b's School for ShaderHackers

Posted 01/18/2017 02:37 AM   
[b][center][color="orange"][size="XL"]3DMigoto 1.2.54[/size][/color] [size="M"][url]https://github.com/bo3b/3Dmigoto/releases[/url][/size] [/center][/b] - Adds support for key combinations (e.g. key=ctrl 1) - ... and exclusions (e.g. key=no_ctrl 1) - Adds a shorthand to exclude all standard modifiers (e.g. key=no_modifiers F1) - Template d3dx.ini now excludes modifiers on all standard bindings (removes conflict between F11 to switch SBS mode and Ctrl+Alt+F11 to switch compatibility mode) - Adds "ctrl" as an alias for "control" - Fixes a problem where certain blend states (blend_factor and all the src1 options) did not work correctly in custom shader sections - Ini handler will now warn about unrecognised sections (no more [ShaderOverideFoo] typos costing time) - Ini handler will now sound an audible warning about ShaderOverride/TextureOverride sections missing hash= or Key sections missing key= - New [CommandList] section type to group commonly used commands together. Use it in the same way as custom shader sections (e.g. run=CommandListFoo)


- Adds support for key combinations (e.g. key=ctrl 1)
- ... and exclusions (e.g. key=no_ctrl 1)
- Adds a shorthand to exclude all standard modifiers (e.g. key=no_modifiers F1)
- Template d3dx.ini now excludes modifiers on all standard bindings (removes conflict between F11 to switch SBS mode and Ctrl+Alt+F11 to switch compatibility mode)
- Adds "ctrl" as an alias for "control"
- Fixes a problem where certain blend states (blend_factor and all the src1 options) did not work correctly in custom shader sections
- Ini handler will now warn about unrecognised sections (no more [ShaderOverideFoo] typos costing time)
- Ini handler will now sound an audible warning about ShaderOverride/TextureOverride sections missing hash= or Key sections missing key=
- New [CommandList] section type to group commonly used commands together. Use it in the same way as custom shader sections (e.g. run=CommandListFoo)

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 01/18/2017 04:52 AM   
  84 / 143    
Scroll To Top