Nier Automata
  3 / 10    
Ok so for the most part i managed to fix the shadows but they're still a bit off on objects that are closer to the camera. It's almost perfect, at least it's good enough to be able to play on. I'm guessing that the r1 value for position is in view space since it is multiplied by g_ViewInverseMatrix to turn it into world coordinates which is then multiplied by g_ShadowViewProj to go into light space and calculate the rest of the shadows. So i used the r1.z value for depth to calculate separation value in view space then divide it by g_Proj._m00 and apply it to r1. [code] r1.x += (stereo.x * (r1.z - stereo.y))/g_Proj._m00;[/code] but as i said shadows that are close up are a bit off (to much depth). All the farther away shadows are perfect. I'm not sure why this is happening, maybe the shadows aren't encoded in linear depth? I've played the game for quite a bit and if the shadows get fixed that solves like 99% of the issues. I believe there's another shadow shader for the flying sections but it's hard to test changes since i keep getting shot at and can't really stay still. we could probably just disable that shader since the shadows don't add to much to the overhead flying sections and they're not that common mostly just the very start of the game. Anyhow here's the full shader for 40285a8c8fd28555-ps_replace. relevant code on lines 64-65; [code]//Nier Automata. Shadows 1b. // ---- Created with 3Dmigoto v1.2.56 on Fri Mar 17 16:38:25 2017 cbuffer SceneBuffer : register(b0) { float4x4 g_View : packoffset(c0); float4x4 g_Proj : packoffset(c4); float4x4 g_ViewProjection : packoffset(c8); float4x4 g_ViewInverseMatrix : packoffset(c12); } cbuffer CamParam_HPixel_Buffer : register(b13) { float4 g_CameraParam : packoffset(c0); float4 g_CameraVec : packoffset(c1); } cbuffer ShadowView_Buffer : register(b10) { float4 g_ShadowFarInv : packoffset(c0); float4 g_DepthOffset : packoffset(c1); float4 g_BokeOffset : packoffset(c2); float4x4 g_ShadowView : packoffset(c3); float4x4 g_ShadowViewProj[4] : packoffset(c7); } SamplerState g_Z_TexSampler_s : register(s6); SamplerComparisonState g_Shadow_TexSampler_s : register(s11); Texture2D<float4> g_Z_Tex : register(t6); Texture2D<float4> g_Shadow_Tex : register(t11); // 3Dmigoto declarations #define cmp - Texture1D<float4> IniParams : register(t120); Texture2D<float4> StereoParams : register(t125); void main( float4 v0 : SV_POSITION0, float4 v1 : TEXCOORD0, float4 v2 : TEXCOORD1, out float4 o0 : SV_Target0) { const float4 icb[] = { { 1.000000, 0, 0, 0}, { 0, 1.000000, 0, 0}, { 0, 0, 1.000000, 0}, { 0, 0, 0, 1.000000} }; float4 r0,r1,r2,r3,r4,r5,r6,r7,r8; uint4 bitmask, uiDest; float4 fDest; float4 x0[4]; r0.x = g_Z_Tex.Sample(g_Z_TexSampler_s, v1.xy).x; r0.x = r0.x * g_CameraParam.y + g_CameraParam.x; r0.yz = v2.xy * r0.xx; r1.xy = v2.zw * r0.yz; r1.z = -r0.x; r1.w = 1; // r1 is in view space so use the z value for depth to calculate separation value in view space then divide it by g_Proj._m00 and apply it to r1 float4 stereo = StereoParams.Load(0); r1.x += (stereo.x * (r1.z - stereo.y))/g_Proj._m00; r0.x = dot(r1.xyzw, g_ViewInverseMatrix._m00_m10_m20_m30); r0.y = dot(r1.xyzw, g_ViewInverseMatrix._m01_m11_m21_m31); r0.z = dot(r1.xyzw, g_ViewInverseMatrix._m02_m12_m22_m32); r0.w = 1; r1.x = dot(r0.xyzw, g_ShadowViewProj[0]._m00_m10_m20_m30); r1.y = dot(r0.xyzw, g_ShadowViewProj[0]._m01_m11_m21_m31); r1.w = dot(r0.xyzw, g_ShadowViewProj[0]._m02_m12_m22_m32); r1.z = dot(r0.xyzw, g_ShadowViewProj[0]._m03_m13_m23_m33); x0[0].xyw = r1.xyz; r1.x = r1.w / r1.z; r2.xyzw = g_DepthOffset.xyzw * g_ShadowFarInv.xyzw; r1.x = -r2.x * 0.100000001 + r1.x; x0[0].z = r1.x; r1.x = dot(r0.xyzw, g_ShadowViewProj[1]._m00_m10_m20_m30); r1.y = dot(r0.xyzw, g_ShadowViewProj[1]._m01_m11_m21_m31); r1.w = dot(r0.xyzw, g_ShadowViewProj[1]._m02_m12_m22_m32); r1.z = dot(r0.xyzw, g_ShadowViewProj[1]._m03_m13_m23_m33); x0[1].xyw = r1.xyz; r1.x = r1.w / r1.z; r1.x = -r2.y * 0.100000001 + r1.x; x0[1].z = r1.x; r1.x = dot(r0.xyzw, g_ShadowViewProj[2]._m00_m10_m20_m30); r1.y = dot(r0.xyzw, g_ShadowViewProj[2]._m01_m11_m21_m31); r1.w = dot(r0.xyzw, g_ShadowViewProj[2]._m02_m12_m22_m32); r1.z = dot(r0.xyzw, g_ShadowViewProj[2]._m03_m13_m23_m33); x0[2].xyw = r1.xyz; r1.x = r1.w / r1.z; r1.x = -r2.z * 0.100000001 + r1.x; x0[2].z = r1.x; r1.x = dot(r0.xyzw, g_ShadowViewProj[3]._m00_m10_m20_m30); r1.y = dot(r0.xyzw, g_ShadowViewProj[3]._m01_m11_m21_m31); r1.w = dot(r0.xyzw, g_ShadowViewProj[3]._m02_m12_m22_m32); r1.z = dot(r0.xyzw, g_ShadowViewProj[3]._m03_m13_m23_m33); x0[3].xyw = r1.xyz; r0.x = r1.w / r1.z; r0.x = -r2.w * 0.100000001 + r0.x; x0[3].z = r0.x; r0.xyzw = float4(0,0,0,0); while (true) { r1.x = cmp((uint)r0.y >= 4); if (r1.x != 0) break; r1.xyzw = x0[r0.y+0].xyzw; r1.xy = r1.xy / r1.ww; r1.z = max(abs(r1.y), abs(r1.z)); r1.z = max(abs(r1.x), r1.z); r1.z = cmp(0.99000001 >= r1.z); if (r1.z != 0) { r0.zw = r1.xy; break; } r0.xy = (int2)r0.xy + int2(1,1); r0.zw = r1.xy; } r0.y = cmp((uint)r0.x < 4); if (r0.y != 0) { r0.y = 1 + r0.z; r0.z = (int)r0.x & 1; r0.z = (uint)r0.z; r0.z = 0.5 * r0.z; r1.x = r0.y * 0.25 + r0.z; r0.y = 1 + -r0.w; r0.z = (uint)r0.x >> 1; r0.z = (uint)r0.z; r0.z = 0.5 * r0.z; r1.y = r0.y * 0.25 + r0.z; r1.z = x0[r0.x+0].z; r0.x = dot(g_BokeOffset.xyzw, icb[r0.x+0].xyzw); r0.yzw = r0.xxx * float3(0.5,0.5,0) + r1.xyz; r2.xyz = r0.xxx * float3(-0.5,-0.5,0) + r1.xyz; r3.xyz = r0.xxx * float3(-0.5,0.5,0) + r1.xyz; r4.xyz = r0.xxx * float3(0.5,-0.5,0) + r1.xyz; r5.xyz = r0.xxx * float3(-1.5,0.5,0) + r1.xyz; r6.xyz = r0.xxx * float3(1.5,-0.5,0) + r1.xyz; r7.xyz = r0.xxx * float3(-0.5,1.5,0) + r1.xyz; r8.xyz = r0.xxx * float3(0.5,-1.5,0) + r1.xyz; r1.w = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r1.xy, r1.z).x; r0.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r0.yz, r0.w).x; r0.y = r0.y * 0.800000012 + r1.w; r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r2.xy, r2.z).x; r0.y = r0.z * 0.800000012 + r0.y; r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r3.xy, r3.z).x; r0.y = r0.z * 0.800000012 + r0.y; r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r4.xy, r4.z).x; r0.y = r0.z * 0.800000012 + r0.y; r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r5.xy, r5.z).x; r0.y = r0.z * 0.449999988 + r0.y; r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r6.xy, r6.z).x; r0.y = r0.z * 0.449999988 + r0.y; r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r7.xy, r7.z).x; r0.y = r0.z * 0.449999988 + r0.y; r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r8.xy, r8.z).x; r0.y = r0.z * 0.449999988 + r0.y; r0.z = cmp(r0.y != 0.000000); if (r0.z != 0) { r2.xyz = r0.xxx * float3(1.5,1.5,0) + r1.xyz; r3.xyz = r0.xxx * float3(-1.5,-1.5,0) + r1.xyz; r4.xyz = r0.xxx * float3(-1.5,1.5,0) + r1.xyz; r5.xyz = r0.xxx * float3(1.5,-1.5,0) + r1.xyz; r6.xyz = r0.xxx * float3(1.5,0.5,0) + r1.xyz; r7.xyz = r0.xxx * float3(-1.5,-0.5,0) + r1.xyz; r8.xyz = r0.xxx * float3(0.5,1.5,0) + r1.xyz; r0.xzw = r0.xxx * float3(-0.5,-1.5,0) + r1.xyz; r1.x = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r2.xy, r2.z).x; r1.x = r1.x * 0.150000006 + r0.y; r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r3.xy, r3.z).x; r1.x = r1.y * 0.150000006 + r1.x; r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r4.xy, r4.z).x; r1.x = r1.y * 0.150000006 + r1.x; r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r5.xy, r5.z).x; r1.x = r1.y * 0.150000006 + r1.x; r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r6.xy, r6.z).x; r1.x = r1.y * 0.449999988 + r1.x; r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r7.xy, r7.z).x; r1.x = r1.y * 0.449999988 + r1.x; r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r8.xy, r8.z).x; r1.x = r1.y * 0.449999988 + r1.x; r0.x = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r0.xz, r0.w).x; r0.x = r0.x * 0.449999988 + r1.x; r0.x = 0.119047619 * r0.x; } else { r0.x = 0.166666672 * r0.y; } r0.x = 1 + -r0.x; } else { r0.x = 1; } o0.xyzw = r0.xxxx; return; } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // // Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 // // using 3Dmigoto v1.2.56 on Fri Mar 17 16:38:25 2017 // // // Buffer Definitions: // // cbuffer SceneBuffer // { // // float4x4 g_View; // Offset: 0 Size: 64 [unused] // float4x4 g_Proj; // Offset: 64 Size: 64 [unused] // float4x4 g_ViewProjection; // Offset: 128 Size: 64 [unused] // float4x4 g_ViewInverseMatrix; // Offset: 192 Size: 64 // // } // // cbuffer CamParam_HPixel_Buffer // { // // float4 g_CameraParam; // Offset: 0 Size: 16 // float4 g_CameraVec; // Offset: 16 Size: 16 [unused] // // } // // cbuffer ShadowView_Buffer // { // // float4 g_ShadowFarInv; // Offset: 0 Size: 16 // float4 g_DepthOffset; // Offset: 16 Size: 16 // float4 g_BokeOffset; // Offset: 32 Size: 16 // float4x4 g_ShadowView; // Offset: 48 Size: 64 [unused] // float4x4 g_ShadowViewProj[4]; // Offset: 112 Size: 256 // // } // // // Resource Bindings: // // Name Type Format Dim Slot Elements // ------------------------------ ---------- ------- ----------- ---- -------- // g_Z_TexSampler sampler NA NA 6 1 // g_Shadow_TexSampler sampler_c NA NA 11 1 // g_Z_Tex texture float4 2d 6 1 // g_Shadow_Tex texture float4 2d 11 1 // SceneBuffer cbuffer NA NA 0 1 // ShadowView_Buffer cbuffer NA NA 10 1 // CamParam_HPixel_Buffer cbuffer NA NA 13 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_POSITION 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // TEXCOORD 1 xyzw 2 NONE float xyzw // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET float xyzw // ps_5_0 dcl_globalFlags refactoringAllowed dcl_immediateConstantBuffer { { 1.000000, 0, 0, 0}, { 0, 1.000000, 0, 0}, { 0, 0, 1.000000, 0}, { 0, 0, 0, 1.000000} } dcl_constantbuffer cb0[15], immediateIndexed dcl_constantbuffer cb13[1], immediateIndexed dcl_constantbuffer cb10[23], immediateIndexed dcl_sampler s6, mode_default dcl_sampler s11, mode_comparison dcl_resource_texture2d (float,float,float,float) t6 dcl_resource_texture2d (float,float,float,float) t11 dcl_input_ps linear v1.xy dcl_input_ps linear v2.xyzw dcl_output o0.xyzw dcl_temps 9 dcl_indexableTemp x0[4], 4 sample_indexable(texture2d)(float,float,float,float) r0.x, v1.xyxx, t6.xyzw, s6 mad r0.x, r0.x, cb13[0].y, cb13[0].x mul r0.yz, r0.xxxx, v2.xxyx mul r1.xy, r0.yzyy, v2.zwzz mov r1.z, -r0.x mov r1.w, l(1.000000) dp4 r0.x, r1.xyzw, cb0[12].xyzw dp4 r0.y, r1.xyzw, cb0[13].xyzw dp4 r0.z, r1.xyzw, cb0[14].xyzw mov r0.w, l(1.000000) dp4 r1.x, r0.xyzw, cb10[7].xyzw dp4 r1.y, r0.xyzw, cb10[8].xyzw dp4 r1.w, r0.xyzw, cb10[9].xyzw dp4 r1.z, r0.xyzw, cb10[10].xyzw mov x0[0].xyw, r1.xyxz div r1.x, r1.w, r1.z mul r2.xyzw, cb10[0].xyzw, cb10[1].xyzw mad r1.x, -r2.x, l(0.100000), r1.x mov x0[0].z, r1.x dp4 r1.x, r0.xyzw, cb10[11].xyzw dp4 r1.y, r0.xyzw, cb10[12].xyzw dp4 r1.w, r0.xyzw, cb10[13].xyzw dp4 r1.z, r0.xyzw, cb10[14].xyzw mov x0[1].xyw, r1.xyxz div r1.x, r1.w, r1.z mad r1.x, -r2.y, l(0.100000), r1.x mov x0[1].z, r1.x dp4 r1.x, r0.xyzw, cb10[15].xyzw dp4 r1.y, r0.xyzw, cb10[16].xyzw dp4 r1.w, r0.xyzw, cb10[17].xyzw dp4 r1.z, r0.xyzw, cb10[18].xyzw mov x0[2].xyw, r1.xyxz div r1.x, r1.w, r1.z mad r1.x, -r2.z, l(0.100000), r1.x mov x0[2].z, r1.x dp4 r1.x, r0.xyzw, cb10[19].xyzw dp4 r1.y, r0.xyzw, cb10[20].xyzw dp4 r1.w, r0.xyzw, cb10[21].xyzw dp4 r1.z, r0.xyzw, cb10[22].xyzw mov x0[3].xyw, r1.xyxz div r0.x, r1.w, r1.z mad r0.x, -r2.w, l(0.100000), r0.x mov x0[3].z, r0.x mov r0.xyzw, l(0,0,0,0) loop uge r1.x, r0.y, l(4) breakc_nz r1.x mov r1.xyzw, x0[r0.y + 0].xyzw div r1.xy, r1.xyxx, r1.wwww max r1.z, |r1.z|, |r1.y| max r1.z, r1.z, |r1.x| ge r1.z, l(0.990000), r1.z if_nz r1.z mov r0.zw, r1.xxxy break endif iadd r0.xy, r0.xyxx, l(1, 1, 0, 0) mov r0.zw, r1.xxxy endloop ult r0.y, r0.x, l(4) if_nz r0.y add r0.y, r0.z, l(1.000000) and r0.z, r0.x, l(1) utof r0.z, r0.z mul r0.z, r0.z, l(0.500000) mad r1.x, r0.y, l(0.250000), r0.z add r0.y, -r0.w, l(1.000000) ushr r0.z, r0.x, l(1) utof r0.z, r0.z mul r0.z, r0.z, l(0.500000) mad r1.y, r0.y, l(0.250000), r0.z mov r1.z, x0[r0.x + 0].z dp4 r0.x, cb10[2].xyzw, icb[r0.x + 0].xyzw mad r0.yzw, r0.xxxx, l(0.000000, 0.500000, 0.500000, 0.000000), r1.xxyz mad r2.xyz, r0.xxxx, l(-0.500000, -0.500000, 0.000000, 0.000000), r1.xyzx mad r3.xyz, r0.xxxx, l(-0.500000, 0.500000, 0.000000, 0.000000), r1.xyzx mad r4.xyz, r0.xxxx, l(0.500000, -0.500000, 0.000000, 0.000000), r1.xyzx mad r5.xyz, r0.xxxx, l(-1.500000, 0.500000, 0.000000, 0.000000), r1.xyzx mad r6.xyz, r0.xxxx, l(1.500000, -0.500000, 0.000000, 0.000000), r1.xyzx mad r7.xyz, r0.xxxx, l(-0.500000, 1.500000, 0.000000, 0.000000), r1.xyzx mad r8.xyz, r0.xxxx, l(0.500000, -1.500000, 0.000000, 0.000000), r1.xyzx sample_c_lz_indexable(texture2d)(float,float,float,float) r1.w, r1.xyxx, t11.xxxx, s11, r1.z sample_c_lz_indexable(texture2d)(float,float,float,float) r0.y, r0.yzyy, t11.xxxx, s11, r0.w mad r0.y, r0.y, l(0.800000), r1.w sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r2.xyxx, t11.xxxx, s11, r2.z mad r0.y, r0.z, l(0.800000), r0.y sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r3.xyxx, t11.xxxx, s11, r3.z mad r0.y, r0.z, l(0.800000), r0.y sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r4.xyxx, t11.xxxx, s11, r4.z mad r0.y, r0.z, l(0.800000), r0.y sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r5.xyxx, t11.xxxx, s11, r5.z mad r0.y, r0.z, l(0.450000), r0.y sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r6.xyxx, t11.xxxx, s11, r6.z mad r0.y, r0.z, l(0.450000), r0.y sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r7.xyxx, t11.xxxx, s11, r7.z mad r0.y, r0.z, l(0.450000), r0.y sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r8.xyxx, t11.xxxx, s11, r8.z mad r0.y, r0.z, l(0.450000), r0.y ne r0.z, r0.y, l(0.000000) if_nz r0.z mad r2.xyz, r0.xxxx, l(1.500000, 1.500000, 0.000000, 0.000000), r1.xyzx mad r3.xyz, r0.xxxx, l(-1.500000, -1.500000, 0.000000, 0.000000), r1.xyzx mad r4.xyz, r0.xxxx, l(-1.500000, 1.500000, 0.000000, 0.000000), r1.xyzx mad r5.xyz, r0.xxxx, l(1.500000, -1.500000, 0.000000, 0.000000), r1.xyzx mad r6.xyz, r0.xxxx, l(1.500000, 0.500000, 0.000000, 0.000000), r1.xyzx mad r7.xyz, r0.xxxx, l(-1.500000, -0.500000, 0.000000, 0.000000), r1.xyzx mad r8.xyz, r0.xxxx, l(0.500000, 1.500000, 0.000000, 0.000000), r1.xyzx mad r0.xzw, r0.xxxx, l(-0.500000, 0.000000, -1.500000, 0.000000), r1.xxyz sample_c_lz_indexable(texture2d)(float,float,float,float) r1.x, r2.xyxx, t11.xxxx, s11, r2.z mad r1.x, r1.x, l(0.150000), r0.y sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r3.xyxx, t11.xxxx, s11, r3.z mad r1.x, r1.y, l(0.150000), r1.x sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r4.xyxx, t11.xxxx, s11, r4.z mad r1.x, r1.y, l(0.150000), r1.x sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r5.xyxx, t11.xxxx, s11, r5.z mad r1.x, r1.y, l(0.150000), r1.x sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r6.xyxx, t11.xxxx, s11, r6.z mad r1.x, r1.y, l(0.450000), r1.x sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r7.xyxx, t11.xxxx, s11, r7.z mad r1.x, r1.y, l(0.450000), r1.x sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r8.xyxx, t11.xxxx, s11, r8.z mad r1.x, r1.y, l(0.450000), r1.x sample_c_lz_indexable(texture2d)(float,float,float,float) r0.x, r0.xzxx, t11.xxxx, s11, r0.w mad r0.x, r0.x, l(0.450000), r1.x mul r0.x, r0.x, l(0.119047619) else mul r0.x, r0.y, l(0.166666672) endif add r0.x, -r0.x, l(1.000000) else mov r0.x, l(1.000000) endif mov o0.xyzw, r0.xxxx ret // Approximately 134 instruction slots used ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/[/code] Hopefully someone with more experience can shed some light. This is the first time i've even attempted to fix a shader. Cheers!
Ok so for the most part i managed to fix the shadows but they're still a bit off on objects that are closer to the camera. It's almost perfect, at least it's good enough to be able to play on. I'm guessing that the r1 value for position is in view space since it is multiplied by g_ViewInverseMatrix to turn it into world coordinates which is then multiplied by g_ShadowViewProj to go into light space and calculate the rest of the shadows. So i used the r1.z value for depth to calculate separation value in view space then divide it by g_Proj._m00 and apply it to r1.

r1.x += (stereo.x * (r1.z - stereo.y))/g_Proj._m00;


but as i said shadows that are close up are a bit off (to much depth). All the farther away shadows are perfect. I'm not sure why this is happening, maybe the shadows aren't encoded in linear depth? I've played the game for quite a bit and if the shadows get fixed that solves like 99% of the issues. I believe there's another shadow shader for the flying sections but it's hard to test changes since i keep getting shot at and can't really stay still. we could probably just disable that shader since the shadows don't add to much to the overhead flying sections and they're not that common mostly just the very start of the game. Anyhow here's the full shader for 40285a8c8fd28555-ps_replace. relevant code on lines 64-65;

//Nier Automata. Shadows 1b.
// ---- Created with 3Dmigoto v1.2.56 on Fri Mar 17 16:38:25 2017

cbuffer SceneBuffer : register(b0)
{
float4x4 g_View : packoffset(c0);
float4x4 g_Proj : packoffset(c4);
float4x4 g_ViewProjection : packoffset(c8);
float4x4 g_ViewInverseMatrix : packoffset(c12);
}

cbuffer CamParam_HPixel_Buffer : register(b13)
{
float4 g_CameraParam : packoffset(c0);
float4 g_CameraVec : packoffset(c1);
}

cbuffer ShadowView_Buffer : register(b10)
{
float4 g_ShadowFarInv : packoffset(c0);
float4 g_DepthOffset : packoffset(c1);
float4 g_BokeOffset : packoffset(c2);
float4x4 g_ShadowView : packoffset(c3);
float4x4 g_ShadowViewProj[4] : packoffset(c7);
}

SamplerState g_Z_TexSampler_s : register(s6);
SamplerComparisonState g_Shadow_TexSampler_s : register(s11);
Texture2D<float4> g_Z_Tex : register(t6);
Texture2D<float4> g_Shadow_Tex : register(t11);


// 3Dmigoto declarations
#define cmp -
Texture1D<float4> IniParams : register(t120);
Texture2D<float4> StereoParams : register(t125);


void main(
float4 v0 : SV_POSITION0,
float4 v1 : TEXCOORD0,
float4 v2 : TEXCOORD1,
out float4 o0 : SV_Target0)
{
const float4 icb[] = { { 1.000000, 0, 0, 0},
{ 0, 1.000000, 0, 0},
{ 0, 0, 1.000000, 0},
{ 0, 0, 0, 1.000000} };
float4 r0,r1,r2,r3,r4,r5,r6,r7,r8;
uint4 bitmask, uiDest;
float4 fDest;


float4 x0[4];

r0.x = g_Z_Tex.Sample(g_Z_TexSampler_s, v1.xy).x;
r0.x = r0.x * g_CameraParam.y + g_CameraParam.x;
r0.yz = v2.xy * r0.xx;
r1.xy = v2.zw * r0.yz;
r1.z = -r0.x;
r1.w = 1;

// r1 is in view space so use the z value for depth to calculate separation value in view space then divide it by g_Proj._m00 and apply it to r1
float4 stereo = StereoParams.Load(0);
r1.x += (stereo.x * (r1.z - stereo.y))/g_Proj._m00;

r0.x = dot(r1.xyzw, g_ViewInverseMatrix._m00_m10_m20_m30);
r0.y = dot(r1.xyzw, g_ViewInverseMatrix._m01_m11_m21_m31);
r0.z = dot(r1.xyzw, g_ViewInverseMatrix._m02_m12_m22_m32);
r0.w = 1;


r1.x = dot(r0.xyzw, g_ShadowViewProj[0]._m00_m10_m20_m30);
r1.y = dot(r0.xyzw, g_ShadowViewProj[0]._m01_m11_m21_m31);
r1.w = dot(r0.xyzw, g_ShadowViewProj[0]._m02_m12_m22_m32);
r1.z = dot(r0.xyzw, g_ShadowViewProj[0]._m03_m13_m23_m33);
x0[0].xyw = r1.xyz;
r1.x = r1.w / r1.z;
r2.xyzw = g_DepthOffset.xyzw * g_ShadowFarInv.xyzw;
r1.x = -r2.x * 0.100000001 + r1.x;
x0[0].z = r1.x;
r1.x = dot(r0.xyzw, g_ShadowViewProj[1]._m00_m10_m20_m30);
r1.y = dot(r0.xyzw, g_ShadowViewProj[1]._m01_m11_m21_m31);
r1.w = dot(r0.xyzw, g_ShadowViewProj[1]._m02_m12_m22_m32);
r1.z = dot(r0.xyzw, g_ShadowViewProj[1]._m03_m13_m23_m33);
x0[1].xyw = r1.xyz;
r1.x = r1.w / r1.z;
r1.x = -r2.y * 0.100000001 + r1.x;
x0[1].z = r1.x;
r1.x = dot(r0.xyzw, g_ShadowViewProj[2]._m00_m10_m20_m30);
r1.y = dot(r0.xyzw, g_ShadowViewProj[2]._m01_m11_m21_m31);
r1.w = dot(r0.xyzw, g_ShadowViewProj[2]._m02_m12_m22_m32);
r1.z = dot(r0.xyzw, g_ShadowViewProj[2]._m03_m13_m23_m33);
x0[2].xyw = r1.xyz;
r1.x = r1.w / r1.z;
r1.x = -r2.z * 0.100000001 + r1.x;
x0[2].z = r1.x;
r1.x = dot(r0.xyzw, g_ShadowViewProj[3]._m00_m10_m20_m30);
r1.y = dot(r0.xyzw, g_ShadowViewProj[3]._m01_m11_m21_m31);
r1.w = dot(r0.xyzw, g_ShadowViewProj[3]._m02_m12_m22_m32);
r1.z = dot(r0.xyzw, g_ShadowViewProj[3]._m03_m13_m23_m33);
x0[3].xyw = r1.xyz;
r0.x = r1.w / r1.z;
r0.x = -r2.w * 0.100000001 + r0.x;
x0[3].z = r0.x;
r0.xyzw = float4(0,0,0,0);
while (true) {
r1.x = cmp((uint)r0.y >= 4);
if (r1.x != 0) break;
r1.xyzw = x0[r0.y+0].xyzw;
r1.xy = r1.xy / r1.ww;
r1.z = max(abs(r1.y), abs(r1.z));
r1.z = max(abs(r1.x), r1.z);
r1.z = cmp(0.99000001 >= r1.z);
if (r1.z != 0) {
r0.zw = r1.xy;
break;
}
r0.xy = (int2)r0.xy + int2(1,1);
r0.zw = r1.xy;
}
r0.y = cmp((uint)r0.x < 4);
if (r0.y != 0) {
r0.y = 1 + r0.z;
r0.z = (int)r0.x & 1;
r0.z = (uint)r0.z;
r0.z = 0.5 * r0.z;
r1.x = r0.y * 0.25 + r0.z;
r0.y = 1 + -r0.w;
r0.z = (uint)r0.x >> 1;
r0.z = (uint)r0.z;
r0.z = 0.5 * r0.z;
r1.y = r0.y * 0.25 + r0.z;
r1.z = x0[r0.x+0].z;
r0.x = dot(g_BokeOffset.xyzw, icb[r0.x+0].xyzw);
r0.yzw = r0.xxx * float3(0.5,0.5,0) + r1.xyz;
r2.xyz = r0.xxx * float3(-0.5,-0.5,0) + r1.xyz;
r3.xyz = r0.xxx * float3(-0.5,0.5,0) + r1.xyz;
r4.xyz = r0.xxx * float3(0.5,-0.5,0) + r1.xyz;
r5.xyz = r0.xxx * float3(-1.5,0.5,0) + r1.xyz;
r6.xyz = r0.xxx * float3(1.5,-0.5,0) + r1.xyz;
r7.xyz = r0.xxx * float3(-0.5,1.5,0) + r1.xyz;
r8.xyz = r0.xxx * float3(0.5,-1.5,0) + r1.xyz;
r1.w = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r1.xy, r1.z).x;
r0.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r0.yz, r0.w).x;
r0.y = r0.y * 0.800000012 + r1.w;
r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r2.xy, r2.z).x;
r0.y = r0.z * 0.800000012 + r0.y;
r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r3.xy, r3.z).x;
r0.y = r0.z * 0.800000012 + r0.y;
r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r4.xy, r4.z).x;
r0.y = r0.z * 0.800000012 + r0.y;
r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r5.xy, r5.z).x;
r0.y = r0.z * 0.449999988 + r0.y;
r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r6.xy, r6.z).x;
r0.y = r0.z * 0.449999988 + r0.y;
r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r7.xy, r7.z).x;
r0.y = r0.z * 0.449999988 + r0.y;
r0.z = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r8.xy, r8.z).x;
r0.y = r0.z * 0.449999988 + r0.y;
r0.z = cmp(r0.y != 0.000000);
if (r0.z != 0) {
r2.xyz = r0.xxx * float3(1.5,1.5,0) + r1.xyz;
r3.xyz = r0.xxx * float3(-1.5,-1.5,0) + r1.xyz;
r4.xyz = r0.xxx * float3(-1.5,1.5,0) + r1.xyz;
r5.xyz = r0.xxx * float3(1.5,-1.5,0) + r1.xyz;
r6.xyz = r0.xxx * float3(1.5,0.5,0) + r1.xyz;
r7.xyz = r0.xxx * float3(-1.5,-0.5,0) + r1.xyz;
r8.xyz = r0.xxx * float3(0.5,1.5,0) + r1.xyz;
r0.xzw = r0.xxx * float3(-0.5,-1.5,0) + r1.xyz;
r1.x = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r2.xy, r2.z).x;
r1.x = r1.x * 0.150000006 + r0.y;
r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r3.xy, r3.z).x;
r1.x = r1.y * 0.150000006 + r1.x;
r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r4.xy, r4.z).x;
r1.x = r1.y * 0.150000006 + r1.x;
r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r5.xy, r5.z).x;
r1.x = r1.y * 0.150000006 + r1.x;
r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r6.xy, r6.z).x;
r1.x = r1.y * 0.449999988 + r1.x;
r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r7.xy, r7.z).x;
r1.x = r1.y * 0.449999988 + r1.x;
r1.y = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r8.xy, r8.z).x;
r1.x = r1.y * 0.449999988 + r1.x;
r0.x = g_Shadow_Tex.SampleCmpLevelZero(g_Shadow_TexSampler_s, r0.xz, r0.w).x;
r0.x = r0.x * 0.449999988 + r1.x;
r0.x = 0.119047619 * r0.x;
} else {
r0.x = 0.166666672 * r0.y;
}
r0.x = 1 + -r0.x;
} else {
r0.x = 1;
}
o0.xyzw = r0.xxxx;
return;
}

/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111
//
// using 3Dmigoto v1.2.56 on Fri Mar 17 16:38:25 2017
//
//
// Buffer Definitions:
//
// cbuffer SceneBuffer
// {
//
// float4x4 g_View; // Offset: 0 Size: 64 [unused]
// float4x4 g_Proj; // Offset: 64 Size: 64 [unused]
// float4x4 g_ViewProjection; // Offset: 128 Size: 64 [unused]
// float4x4 g_ViewInverseMatrix; // Offset: 192 Size: 64
//
// }
//
// cbuffer CamParam_HPixel_Buffer
// {
//
// float4 g_CameraParam; // Offset: 0 Size: 16
// float4 g_CameraVec; // Offset: 16 Size: 16 [unused]
//
// }
//
// cbuffer ShadowView_Buffer
// {
//
// float4 g_ShadowFarInv; // Offset: 0 Size: 16
// float4 g_DepthOffset; // Offset: 16 Size: 16
// float4 g_BokeOffset; // Offset: 32 Size: 16
// float4x4 g_ShadowView; // Offset: 48 Size: 64 [unused]
// float4x4 g_ShadowViewProj[4]; // Offset: 112 Size: 256
//
// }
//
//
// Resource Bindings:
//
// Name Type Format Dim Slot Elements
// ------------------------------ ---------- ------- ----------- ---- --------
// g_Z_TexSampler sampler NA NA 6 1
// g_Shadow_TexSampler sampler_c NA NA 11 1
// g_Z_Tex texture float4 2d 6 1
// g_Shadow_Tex texture float4 2d 11 1
// SceneBuffer cbuffer NA NA 0 1
// ShadowView_Buffer cbuffer NA NA 10 1
// CamParam_HPixel_Buffer cbuffer NA NA 13 1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_POSITION 0 xyzw 0 POS float
// TEXCOORD 0 xy 1 NONE float xy
// TEXCOORD 1 xyzw 2 NONE float xyzw
//
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_Target 0 xyzw 0 TARGET float xyzw
//
ps_5_0
dcl_globalFlags refactoringAllowed
dcl_immediateConstantBuffer { { 1.000000, 0, 0, 0},
{ 0, 1.000000, 0, 0},
{ 0, 0, 1.000000, 0},
{ 0, 0, 0, 1.000000} }
dcl_constantbuffer cb0[15], immediateIndexed
dcl_constantbuffer cb13[1], immediateIndexed
dcl_constantbuffer cb10[23], immediateIndexed
dcl_sampler s6, mode_default
dcl_sampler s11, mode_comparison
dcl_resource_texture2d (float,float,float,float) t6
dcl_resource_texture2d (float,float,float,float) t11
dcl_input_ps linear v1.xy
dcl_input_ps linear v2.xyzw
dcl_output o0.xyzw
dcl_temps 9
dcl_indexableTemp x0[4], 4
sample_indexable(texture2d)(float,float,float,float) r0.x, v1.xyxx, t6.xyzw, s6
mad r0.x, r0.x, cb13[0].y, cb13[0].x
mul r0.yz, r0.xxxx, v2.xxyx
mul r1.xy, r0.yzyy, v2.zwzz
mov r1.z, -r0.x
mov r1.w, l(1.000000)
dp4 r0.x, r1.xyzw, cb0[12].xyzw
dp4 r0.y, r1.xyzw, cb0[13].xyzw
dp4 r0.z, r1.xyzw, cb0[14].xyzw
mov r0.w, l(1.000000)
dp4 r1.x, r0.xyzw, cb10[7].xyzw
dp4 r1.y, r0.xyzw, cb10[8].xyzw
dp4 r1.w, r0.xyzw, cb10[9].xyzw
dp4 r1.z, r0.xyzw, cb10[10].xyzw
mov x0[0].xyw, r1.xyxz
div r1.x, r1.w, r1.z
mul r2.xyzw, cb10[0].xyzw, cb10[1].xyzw
mad r1.x, -r2.x, l(0.100000), r1.x
mov x0[0].z, r1.x
dp4 r1.x, r0.xyzw, cb10[11].xyzw
dp4 r1.y, r0.xyzw, cb10[12].xyzw
dp4 r1.w, r0.xyzw, cb10[13].xyzw
dp4 r1.z, r0.xyzw, cb10[14].xyzw
mov x0[1].xyw, r1.xyxz
div r1.x, r1.w, r1.z
mad r1.x, -r2.y, l(0.100000), r1.x
mov x0[1].z, r1.x
dp4 r1.x, r0.xyzw, cb10[15].xyzw
dp4 r1.y, r0.xyzw, cb10[16].xyzw
dp4 r1.w, r0.xyzw, cb10[17].xyzw
dp4 r1.z, r0.xyzw, cb10[18].xyzw
mov x0[2].xyw, r1.xyxz
div r1.x, r1.w, r1.z
mad r1.x, -r2.z, l(0.100000), r1.x
mov x0[2].z, r1.x
dp4 r1.x, r0.xyzw, cb10[19].xyzw
dp4 r1.y, r0.xyzw, cb10[20].xyzw
dp4 r1.w, r0.xyzw, cb10[21].xyzw
dp4 r1.z, r0.xyzw, cb10[22].xyzw
mov x0[3].xyw, r1.xyxz
div r0.x, r1.w, r1.z
mad r0.x, -r2.w, l(0.100000), r0.x
mov x0[3].z, r0.x
mov r0.xyzw, l(0,0,0,0)
loop
uge r1.x, r0.y, l(4)
breakc_nz r1.x
mov r1.xyzw, x0[r0.y + 0].xyzw
div r1.xy, r1.xyxx, r1.wwww
max r1.z, |r1.z|, |r1.y|
max r1.z, r1.z, |r1.x|
ge r1.z, l(0.990000), r1.z
if_nz r1.z
mov r0.zw, r1.xxxy
break
endif
iadd r0.xy, r0.xyxx, l(1, 1, 0, 0)
mov r0.zw, r1.xxxy
endloop
ult r0.y, r0.x, l(4)
if_nz r0.y
add r0.y, r0.z, l(1.000000)
and r0.z, r0.x, l(1)
utof r0.z, r0.z
mul r0.z, r0.z, l(0.500000)
mad r1.x, r0.y, l(0.250000), r0.z
add r0.y, -r0.w, l(1.000000)
ushr r0.z, r0.x, l(1)
utof r0.z, r0.z
mul r0.z, r0.z, l(0.500000)
mad r1.y, r0.y, l(0.250000), r0.z
mov r1.z, x0[r0.x + 0].z
dp4 r0.x, cb10[2].xyzw, icb[r0.x + 0].xyzw
mad r0.yzw, r0.xxxx, l(0.000000, 0.500000, 0.500000, 0.000000), r1.xxyz
mad r2.xyz, r0.xxxx, l(-0.500000, -0.500000, 0.000000, 0.000000), r1.xyzx
mad r3.xyz, r0.xxxx, l(-0.500000, 0.500000, 0.000000, 0.000000), r1.xyzx
mad r4.xyz, r0.xxxx, l(0.500000, -0.500000, 0.000000, 0.000000), r1.xyzx
mad r5.xyz, r0.xxxx, l(-1.500000, 0.500000, 0.000000, 0.000000), r1.xyzx
mad r6.xyz, r0.xxxx, l(1.500000, -0.500000, 0.000000, 0.000000), r1.xyzx
mad r7.xyz, r0.xxxx, l(-0.500000, 1.500000, 0.000000, 0.000000), r1.xyzx
mad r8.xyz, r0.xxxx, l(0.500000, -1.500000, 0.000000, 0.000000), r1.xyzx
sample_c_lz_indexable(texture2d)(float,float,float,float) r1.w, r1.xyxx, t11.xxxx, s11, r1.z
sample_c_lz_indexable(texture2d)(float,float,float,float) r0.y, r0.yzyy, t11.xxxx, s11, r0.w
mad r0.y, r0.y, l(0.800000), r1.w
sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r2.xyxx, t11.xxxx, s11, r2.z
mad r0.y, r0.z, l(0.800000), r0.y
sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r3.xyxx, t11.xxxx, s11, r3.z
mad r0.y, r0.z, l(0.800000), r0.y
sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r4.xyxx, t11.xxxx, s11, r4.z
mad r0.y, r0.z, l(0.800000), r0.y
sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r5.xyxx, t11.xxxx, s11, r5.z
mad r0.y, r0.z, l(0.450000), r0.y
sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r6.xyxx, t11.xxxx, s11, r6.z
mad r0.y, r0.z, l(0.450000), r0.y
sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r7.xyxx, t11.xxxx, s11, r7.z
mad r0.y, r0.z, l(0.450000), r0.y
sample_c_lz_indexable(texture2d)(float,float,float,float) r0.z, r8.xyxx, t11.xxxx, s11, r8.z
mad r0.y, r0.z, l(0.450000), r0.y
ne r0.z, r0.y, l(0.000000)
if_nz r0.z
mad r2.xyz, r0.xxxx, l(1.500000, 1.500000, 0.000000, 0.000000), r1.xyzx
mad r3.xyz, r0.xxxx, l(-1.500000, -1.500000, 0.000000, 0.000000), r1.xyzx
mad r4.xyz, r0.xxxx, l(-1.500000, 1.500000, 0.000000, 0.000000), r1.xyzx
mad r5.xyz, r0.xxxx, l(1.500000, -1.500000, 0.000000, 0.000000), r1.xyzx
mad r6.xyz, r0.xxxx, l(1.500000, 0.500000, 0.000000, 0.000000), r1.xyzx
mad r7.xyz, r0.xxxx, l(-1.500000, -0.500000, 0.000000, 0.000000), r1.xyzx
mad r8.xyz, r0.xxxx, l(0.500000, 1.500000, 0.000000, 0.000000), r1.xyzx
mad r0.xzw, r0.xxxx, l(-0.500000, 0.000000, -1.500000, 0.000000), r1.xxyz
sample_c_lz_indexable(texture2d)(float,float,float,float) r1.x, r2.xyxx, t11.xxxx, s11, r2.z
mad r1.x, r1.x, l(0.150000), r0.y
sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r3.xyxx, t11.xxxx, s11, r3.z
mad r1.x, r1.y, l(0.150000), r1.x
sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r4.xyxx, t11.xxxx, s11, r4.z
mad r1.x, r1.y, l(0.150000), r1.x
sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r5.xyxx, t11.xxxx, s11, r5.z
mad r1.x, r1.y, l(0.150000), r1.x
sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r6.xyxx, t11.xxxx, s11, r6.z
mad r1.x, r1.y, l(0.450000), r1.x
sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r7.xyxx, t11.xxxx, s11, r7.z
mad r1.x, r1.y, l(0.450000), r1.x
sample_c_lz_indexable(texture2d)(float,float,float,float) r1.y, r8.xyxx, t11.xxxx, s11, r8.z
mad r1.x, r1.y, l(0.450000), r1.x
sample_c_lz_indexable(texture2d)(float,float,float,float) r0.x, r0.xzxx, t11.xxxx, s11, r0.w
mad r0.x, r0.x, l(0.450000), r1.x
mul r0.x, r0.x, l(0.119047619)
else
mul r0.x, r0.y, l(0.166666672)
endif
add r0.x, -r0.x, l(1.000000)
else
mov r0.x, l(1.000000)
endif
mov o0.xyzw, r0.xxxx
ret
// Approximately 134 instruction slots used

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/


Hopefully someone with more experience can shed some light. This is the first time i've even attempted to fix a shader.

Cheers!

Like my work? You can send a donation via Paypal to sgs.rules@gmail.com

Windows 7 Pro 64x - Nvidia Driver 398.82 - EVGA 980Ti SC - Optoma HD26 with Edid override - 3D Vision 2 - i7-8700K CPU at 5.0Ghz - ASROCK Z370 Ext 4 Motherboard - 32 GB RAM Corsair Vengeance - 512 GB Samsung SSD 850 Pro - Creative Sound Blaster Z

#31
Posted 03/18/2017 09:27 AM   
here are some screenshots. Sorry i forgot how to post jps files and it's way to late to look up how to so i just attached them. *Edit: nevermind I'm getting a increase max_post_size error when trying to add an attachment. fuckit i'm going to bed :/
here are some screenshots. Sorry i forgot how to post jps files and it's way to late to look up how to so i just attached them.

*Edit: nevermind I'm getting a increase max_post_size error when trying to add an attachment. fuckit i'm going to bed :/

Like my work? You can send a donation via Paypal to sgs.rules@gmail.com

Windows 7 Pro 64x - Nvidia Driver 398.82 - EVGA 980Ti SC - Optoma HD26 with Edid override - 3D Vision 2 - i7-8700K CPU at 5.0Ghz - ASROCK Z370 Ext 4 Motherboard - 32 GB RAM Corsair Vengeance - 512 GB Samsung SSD 850 Pro - Creative Sound Blaster Z

#32
Posted 03/18/2017 09:45 AM   
@sgsrules Try this (the g_proj need to divide only that part, not the entire formula): [code]float4 stereo = StereoParams.Load(0); r1.x += stereo.x * (r1.z - stereo.y)/g_Proj._m00;[/code] If that don't work properly you need to change signs and test.
@sgsrules

Try this (the g_proj need to divide only that part, not the entire formula):
float4 stereo = StereoParams.Load(0);
r1.x += stereo.x * (r1.z - stereo.y)/g_Proj._m00;


If that don't work properly you need to change signs and test.

MY WEB

Helix Mod - Making 3D Better

My 3D Screenshot Gallery

Like my fixes? you can donate to Paypal: dhr.donation@gmail.com

#33
Posted 03/18/2017 09:49 AM   
[code]r1.x += (stereo.x * (r1.z - stereo.y))/g_Proj._m00;[/code] and [code]r1.x += stereo.x * (r1.z - stereo.y)/g_Proj._m00;[/code] are mathematically the same, i really didnt need the parenthesis to begin with. But you were right about the signs. I don't know how i missed that. Anyhow the following: [code]1.x += stereo.x * (r1.z + stereo.y)/g_Proj._m00;[/code] works perfectly! thanks for pointing that out DHR! as i mentioned before now that the shadows are fixed the game is almost perfect. I fixed the other shadow shader using the same pattern, it affects some of the flying sections as well as a few spotlights in the main game. I'm not sure how to fix the blood splatter decals from enemies though since it happens so quickly, but its a really minor issue. How do I go about posting this on the helix blog? Woohoo my first fix! and all it took was two lines of code lol!
r1.x += (stereo.x * (r1.z - stereo.y))/g_Proj._m00;

and
r1.x += stereo.x * (r1.z - stereo.y)/g_Proj._m00;

are mathematically the same, i really didnt need the parenthesis to begin with. But you were right about the signs. I don't know how i missed that. Anyhow the following:
1.x += stereo.x * (r1.z + stereo.y)/g_Proj._m00;

works perfectly! thanks for pointing that out DHR!
as i mentioned before now that the shadows are fixed the game is almost perfect. I fixed the other shadow shader using the same pattern, it affects some of the flying sections as well as a few spotlights in the main game. I'm not sure how to fix the blood splatter decals from enemies though since it happens so quickly, but its a really minor issue. How do I go about posting this on the helix blog?

Woohoo my first fix! and all it took was two lines of code lol!

Like my work? You can send a donation via Paypal to sgs.rules@gmail.com

Windows 7 Pro 64x - Nvidia Driver 398.82 - EVGA 980Ti SC - Optoma HD26 with Edid override - 3D Vision 2 - i7-8700K CPU at 5.0Ghz - ASROCK Z370 Ext 4 Motherboard - 32 GB RAM Corsair Vengeance - 512 GB Samsung SSD 850 Pro - Creative Sound Blaster Z

#34
Posted 03/18/2017 10:59 AM   
[quote="sgsrules"] Woohoo my first fix! and all it took was two lines of code lol! [/quote] Congratulations! [quote="sgsrules"]How do I go about posting this on the helix blog?[/quote] Wait a second. Why don't you first wait until you have advanced more in the game or finished it? More issues can arise (like water if you haven't seen it yet, for example). Also, features like HUD depth hotkeys would be cool too, as well as convergence hotkeys (one for gameplay and one for cutscenes at least). You can post a WIP fix here in the meantime. About the blood decals that happen quickly, what happens when you pause the game? If it's only the interface on top of the game image, you can disable its shaders (with an optional hotkey) to hunt the blood shader more easily. If the game stops rendering the game when paused, then you are screwed and will have to hunt very slowly, cycling shaders one by one as you receive hits.
sgsrules said:
Woohoo my first fix! and all it took was two lines of code lol!


Congratulations!

sgsrules said:How do I go about posting this on the helix blog?


Wait a second. Why don't you first wait until you have advanced more in the game or finished it? More issues can arise (like water if you haven't seen it yet, for example). Also, features like HUD depth hotkeys would be cool too, as well as convergence hotkeys (one for gameplay and one for cutscenes at least).

You can post a WIP fix here in the meantime.

About the blood decals that happen quickly, what happens when you pause the game? If it's only the interface on top of the game image, you can disable its shaders (with an optional hotkey) to hunt the blood shader more easily. If the game stops rendering the game when paused, then you are screwed and will have to hunt very slowly, cycling shaders one by one as you receive hits.

CPU: Intel Core i7 7700K @ 4.9GHz
Motherboard: Gigabyte Aorus GA-Z270X-Gaming 5
RAM: GSKILL Ripjaws Z 16GB 3866MHz CL18
GPU: Gainward Phoenix 1080 GLH
Monitor: Asus PG278QR
Speakers: Logitech Z506
Donations account: masterotakusuko@gmail.com

#35
Posted 03/18/2017 11:57 AM   
Congrats on fixing the shadows, sgrules! You can use 3Dmigoto's Frame Analysis feature for those hard to catch issues. In the d3dx.ini file, enable the F8 function: [code]; Dumps out the contents of each render target after every immediate draw call ; for the next frame. Takes up a large amount of space, so disabled by default. analyse_frame = no_modifiers VK_F8[/code] Also enable the Dump All Shaders feature: [code]; save all shaders seen as HLSL code, autofixed or not. 1= HLSL only, 2=HLSL+OriginalASM, 3=HLSL+OriginalASM+RecompiledASM export_hlsl=2[/code] And when the blood comes on the screen, quickly press 'F8'. It will take around a minute and the render targets will be saved as JPS files The files will be dumped to a folder named "FrameAnalysis-..." in the game directory. The folder itself will probably be around 300 MB and will have hundreds of image files. The files will be named something like "001323-o0-vs=b0aeec301e43566e-ps=2b8e85e540f08e32.jps". Once you've identified the shaders for the blood, open the ShaderCache directory & find the files. Copy those files to the ShaderFixes directory. If you only need one eye images, you can add 'mono' to the analyse_options in the d3dx.ini file. For example: [code]analyse_options = log dump_rt_jps clear_rt mono [/code] This doesn't take as long to dump (maybe a few seconds) and also doesn't take up as much space on your drive. If you get few or no image files in the FrameAnalysis folder, you may have to dump the render targets as DDS files instead and view them with a DDS viewer.
Congrats on fixing the shadows, sgrules!

You can use 3Dmigoto's Frame Analysis feature for those hard to catch issues. In the d3dx.ini file, enable the F8 function:

; Dumps out the contents of each render target after every immediate draw call
; for the next frame. Takes up a large amount of space, so disabled by default.

analyse_frame = no_modifiers VK_F8


Also enable the Dump All Shaders feature:

; save all shaders seen as HLSL code, autofixed or not. 1= HLSL only, 2=HLSL+OriginalASM, 3=HLSL+OriginalASM+RecompiledASM
export_hlsl=2



And when the blood comes on the screen, quickly press 'F8'. It will take around a minute and the render targets will be saved as JPS files The files will be dumped to a folder named "FrameAnalysis-..." in the game directory. The folder itself will probably be around 300 MB and will have hundreds of image files. The files will be named something like "001323-o0-vs=b0aeec301e43566e-ps=2b8e85e540f08e32.jps". Once you've identified the shaders for the blood, open the ShaderCache directory & find the files. Copy those files to the ShaderFixes directory.

If you only need one eye images, you can add 'mono' to the analyse_options in the d3dx.ini file. For example:
analyse_options = log dump_rt_jps clear_rt mono

This doesn't take as long to dump (maybe a few seconds) and also doesn't take up as much space on your drive.



If you get few or no image files in the FrameAnalysis folder, you may have to dump the render targets as DDS files instead and view them with a DDS viewer.

Dual boot Win 7 x64 & Win 10 (1809) | Geforce Drivers 417.35

#36
Posted 03/18/2017 12:34 PM   
@sgsrules Great that work!! I know that there are mathematically the same...but i've seen a couple of times some strange behavior when using formulas in shaders - using () -, so is always better to put a "clean" formula. Go further in game, before posting in the blog. Like masterotaku says, more issues can appear. I never seens a game fixed (even a WIP) with only 2 shaders.
@sgsrules
Great that work!!

I know that there are mathematically the same...but i've seen a couple of times some strange behavior when using formulas in shaders - using () -, so is always better to put a "clean" formula.

Go further in game, before posting in the blog. Like masterotaku says, more issues can appear. I never seens a game fixed (even a WIP) with only 2 shaders.

MY WEB

Helix Mod - Making 3D Better

My 3D Screenshot Gallery

Like my fixes? you can donate to Paypal: dhr.donation@gmail.com

#37
Posted 03/18/2017 12:55 PM   
@sgsrules Thanks just post a WIP in the forums.
@sgsrules

Thanks just post a WIP in the forums.

Gigabyte Z370 Gaming 7 32GB Ram i9-9900K GigaByte Aorus Extreme Gaming 2080TI (single) Game Blaster Z Windows 10 X64 build #17763.195 Define R6 Blackout Case Corsair H110i GTX Sandisk 1TB (OS) SanDisk 2TB SSD (Games) Seagate EXOs 8 and 12 TB drives Samsung UN46c7000 HD TV Samsung UN55HU9000 UHD TVCurrently using ACER PASSIVE EDID override on 3D TVs LG 55

#38
Posted 03/18/2017 01:05 PM   
Good Job, but I would also wait. They might release a patch or 2 and break the fix.
Good Job, but I would also wait. They might release a patch or 2 and break the fix.

#39
Posted 03/18/2017 03:53 PM   
@sgrules Well done, congratulations and thanks !!
@sgrules

Well done, congratulations and thanks !!

Win7 64bit Pro
CPU: 4790K 4.8 GHZ
GPU: Aurus 1080 TI 2.08 GHZ - 100% Watercooled !
Monitor: Asus PG278QR
And lots of ram and HD's ;)

#40
Posted 03/18/2017 04:27 PM   
[quote] Wait a second. Why don't you first wait until you have advanced more in the game or finished it? More issues can arise (like water if you haven't seen it yet, for example). Also, features like HUD depth hotkeys would be cool too, as well as convergence hotkeys (one for gameplay and one for cutscenes at least). [/quote] Yes, I meant posting a WIP since i haven't completed the main game. I've played for about 8 hours and after glancing at a walkthrough it looks like i'm about 75% through the main game, (I've skipped most of the side missions). Amazingly enough the only issues i've run into are the two shadow shaders, the decals which you very rarely see, and one random vertex shader that shows up in a small comet during an ingame cutscene. I think it fairly safe to assume that there won't be to many more issues. 99% of the cut scenes are prerendered so there's not really a reason to add convergence hotkeys. One thing i would like to add would be a way to adjust HUD depth. Right now most HUD elements render at screen depth. It'd be great if some of them like objective pointers or enemy hit points would render at the same depth as their target but i have no idea how to do this. 4everAwake thanks for the pointers, I remember glancing over some posts about frame analysis but had no idea what it was for or how to use it, so this info has definitely helped. I managed to find the broken shader for the decals , I'm not sure if there are more. I tried fixing it by applying the stereo adjustment in view space to the r1 variable before line 74, but it didn't affect anything at all. I also tried to bypass it completely by having the shader just output a float4(0,0,0,0) value for o0 and that made no difference whatsoever. I'm pretty sure it's the right shader because when i have the shader selected in game with 3dm the decals show up as pink boxes. I suspect 3dm is not dumping the shader properly becuase there's a bunch of commented text that says "Known bad code for instruction (needs manual fix):" so it's just skipping the shader fix entirely. Is there a way to tell if the shader fix has compilation errors? I'm guessing that it will use the original shader if it does. When i can't tell that a shaders been compiled i test it by modifying one of the variables and reloading it, But I'm sure there's a better way. Anyhow here's the decal shader: [code]// decals? // ---- Created with 3Dmigoto v1.2.56 on Sat Mar 18 08:10:24 2017 cbuffer SceneBuffer : register(b0) { float4x4 g_View : packoffset(c0); float4x4 g_Proj : packoffset(c4); float4x4 g_ViewProjection : packoffset(c8); float4x4 g_ViewInverseMatrix : packoffset(c12); } cbuffer HPixel_Buffer : register(b12) { float4 g_TargetUvParam : packoffset(c0); } cbuffer CamParam_HPixel_Buffer : register(b13) { float4 g_CameraParam : packoffset(c0); float4 g_CameraVec : packoffset(c1); } cbuffer EffectParameterIndex : register(b1) { uint g_EffectParameterIndex : packoffset(c0); } cbuffer ExposureBuffer : register(b7) { float4 g_Exposure : packoffset(c0); } SamplerState g_ColorTextureSampler_s : register(s0); Texture2D<float4> g_ColorTexture : register(t0); Texture2D<float4> g_ZTexture : register(t13); StructuredBuffer<g_EffectPixelCommonBuffers> g_EffectPixelCommonBuffers : register(t27); StructuredBuffer<g_EffectPixelProjectionBuffers> g_EffectPixelProjectionBuffers : register(t28); // 3Dmigoto declarations #define cmp - Texture1D<float4> IniParams : register(t120); Texture2D<float4> StereoParams : register(t125); void main( float4 v0 : SV_POSITION0, float4 v1 : TEXCOORD0, out float4 o0 : SV_Target0) { // Needs manual fix for instruction: // unknown dcl_: dcl_resource_structured t27, 112 // Needs manual fix for instruction: // unknown dcl_: dcl_resource_structured t28, 96 float4 r0,r1,r2,r3,r4; uint4 bitmask, uiDest; float4 fDest; r0.xy = g_TargetUvParam.xy * v0.xy; r0.zw = r0.xy + r0.xy; g_ZTexture.GetDimensions(0, fDest.x, fDest.y, fDest.z); r1.xy = fDest.xy; r0.zw = r1.xy * r0.zw; r1.xy = (int2)r0.zw; r1.zw = float2(0,0); r0.z = g_ZTexture.Load(r1.xyz).x; r0.z = r0.z * g_CameraParam.y + g_CameraParam.x; r0.xy = float2(4,4) * r0.xy; r0.xy = r0.xy * float2(1,-1) + float2(-1,1); r0.xy = r0.xy * r0.zz; r1.xy = g_CameraParam.zw * r0.xy; r1.z = -r0.z; r1.w = 1; r0.x = dot(r1.xyzw, g_ViewInverseMatrix._m00_m10_m20_m30); r0.y = dot(r1.xyzw, g_ViewInverseMatrix._m01_m11_m21_m31); r0.z = dot(r1.xyzw, g_ViewInverseMatrix._m02_m12_m22_m32); r0.w = dot(r1.xyzw, g_ViewInverseMatrix._m03_m13_m23_m33); // Known bad code for instruction (needs manual fix): ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.xyzw, cb1[0].x, l(0), t28.xyzw r1.x = g_ColorTextureSampler[]..swiz; r1.y = g_ColorTextureSampler[]..swiz; r1.z = g_ColorTextureSampler[]..swiz; r1.w = g_ColorTextureSampler[]..swiz; // Known bad code for instruction (needs manual fix): ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xyzw, cb1[0].x, l(16), t28.xyzw r2.x = g_ColorTextureSampler[]..swiz; r2.y = g_ColorTextureSampler[]..swiz; r2.z = g_ColorTextureSampler[]..swiz; r2.w = g_ColorTextureSampler[]..swiz; r1.x = dot(r0.xyzw, r1.xyzw); r1.x = 1 + r1.x; r0.w = dot(r0.xyzw, r2.xyzw); r1.y = 1 + r0.w; // Known bad code for instruction (needs manual fix): ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.zw, cb1[0].x, l(80), t28.xxxy r1.z = g_ColorTextureSampler[]..swiz; r1.w = g_ColorTextureSampler[]..swiz; // Known bad code for instruction (needs manual fix): ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xy, cb1[0].x, l(64), t28.xyxx r2.x = g_ColorTextureSampler[]..swiz; r2.y = g_ColorTextureSampler[]..swiz; r1.zw = r1.xy * r1.zw + r2.xy; r2.xyzw = g_ColorTexture.Sample(g_ColorTextureSampler_s, r1.zw).xyzw; r0.w = r2.w * r2.w; // Known bad code for instruction (needs manual fix): ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.z, cb1[0].x, l(64), t27.xxxx r1.z = g_ColorTextureSampler[]..swiz; r1.w = -r2.w * r2.w + r2.w; r0.w = r1.z * r1.w + r0.w; // Known bad code for instruction (needs manual fix): ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r3.xyz, cb1[0].x, l(32), t28.xyzx r3.x = g_ColorTextureSampler[]..swiz; r3.y = g_ColorTextureSampler[]..swiz; r3.z = g_ColorTextureSampler[]..swiz; // Known bad code for instruction (needs manual fix): ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r4.xyzw, cb1[0].x, l(48), t28.xyzw r4.x = g_ColorTextureSampler[]..swiz; r4.y = g_ColorTextureSampler[]..swiz; r4.z = g_ColorTextureSampler[]..swiz; r4.w = g_ColorTextureSampler[]..swiz; r0.xyz = -r3.xyz + r0.xyz; r0.x = dot(r0.xyz, r4.xyz); r0.x = saturate(abs(r0.x) * r4.w); r0.yz = float2(1,1) + -r1.xy; r0.yz = saturate(floor(r0.yz)); r0.xyz = float3(1,1,1) + -r0.xyz; r1.xy = saturate(floor(r1.xy)); r1.xy = float2(1,1) + -r1.xy; r0.y = r0.y * r0.z; r0.y = r0.y * r1.x; r0.y = r0.y * r1.y; r0.x = r0.x * r0.y; r2.w = r0.w * r0.x; r0.xyzw = v1.xyzw * r2.xyzw; r1.x = cmp(9.99999975e-005 >= r0.w); if (r1.x != 0) discard; r1.xyz = r0.xyz * g_Exposure.xxx + -r0.xyz; r1.xyz = g_Exposure.yyy * r1.xyz; // Known bad code for instruction (needs manual fix): ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.w, cb1[0].x, l(76), t27.xxxx r1.w = g_ColorTextureSampler[]..swiz; o0.xyz = abs(r1.www) * r1.xyz + r0.xyz; o0.w = saturate(r0.w); return; } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // // Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 // // using 3Dmigoto v1.2.56 on Sat Mar 18 08:10:24 2017 // // // Buffer Definitions: // // cbuffer SceneBuffer // { // // float4x4 g_View; // Offset: 0 Size: 64 [unused] // float4x4 g_Proj; // Offset: 64 Size: 64 [unused] // float4x4 g_ViewProjection; // Offset: 128 Size: 64 [unused] // float4x4 g_ViewInverseMatrix; // Offset: 192 Size: 64 // // } // // cbuffer HPixel_Buffer // { // // float4 g_TargetUvParam; // Offset: 0 Size: 16 // // } // // cbuffer CamParam_HPixel_Buffer // { // // float4 g_CameraParam; // Offset: 0 Size: 16 // float4 g_CameraVec; // Offset: 16 Size: 16 [unused] // // } // // cbuffer EffectParameterIndex // { // // uint g_EffectParameterIndex; // Offset: 0 Size: 4 // // } // // cbuffer ExposureBuffer // { // // float4 g_Exposure; // Offset: 0 Size: 16 // // } // // Resource bind info for g_EffectPixelCommonBuffers // { // // struct EffectPixelCommonBuffer // { // // float4 m_ToneCurveRate; // Offset: 0 // float4 m_DepthRate; // Offset: 16 // float4 m_ColorCorrection; // Offset: 32 // float4 m_FogColor; // Offset: 48 // float4 m_Gamma; // Offset: 64 // float4 m_AlphaCurveRate; // Offset: 80 // float4 m_DitherRate; // Offset: 96 // // } $Element; // Offset: 0 Size: 112 // // } // // Resource bind info for g_EffectPixelProjectionBuffers // { // // struct EffectPixelProjectionBuffer // { // // float4 m_WorldInverseMatrixCol0;// Offset: 0 // float4 m_WorldInverseMatrixCol1;// Offset: 16 // float4 m_ProjectionCenter; // Offset: 32 // float4 m_ProjectionDir; // Offset: 48 // float4 m_ProjectionUvOffset; // Offset: 64 // float4 m_ProjectionUvTile; // Offset: 80 // // } $Element; // Offset: 0 Size: 96 // // } // // // Resource Bindings: // // Name Type Format Dim Slot Elements // ------------------------------ ---------- ------- ----------- ---- -------- // g_ColorTextureSampler sampler NA NA 0 1 // g_ColorTexture texture float4 2d 0 1 // g_ZTexture texture float4 2d 13 1 // g_EffectPixelCommonBuffers texture struct r/o 27 1 // g_EffectPixelProjectionBuffers texture struct r/o 28 1 // SceneBuffer cbuffer NA NA 0 1 // EffectParameterIndex cbuffer NA NA 1 1 // ExposureBuffer cbuffer NA NA 7 1 // HPixel_Buffer cbuffer NA NA 12 1 // CamParam_HPixel_Buffer cbuffer NA NA 13 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_POSITION 0 xyzw 0 POS float xy // TEXCOORD 0 xyzw 1 NONE float xyzw // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET float xyzw // ps_5_0 dcl_globalFlags refactoringAllowed dcl_constantbuffer cb0[16], immediateIndexed dcl_constantbuffer cb12[1], immediateIndexed dcl_constantbuffer cb13[1], immediateIndexed dcl_constantbuffer cb1[1], immediateIndexed dcl_constantbuffer cb7[1], immediateIndexed dcl_sampler s0, mode_default dcl_resource_texture2d (float,float,float,float) t0 dcl_resource_texture2d (float,float,float,float) t13 dcl_resource_structured t27, 112 dcl_resource_structured t28, 96 dcl_input_ps_siv linear noperspective v0.xy, position dcl_input_ps linear v1.xyzw dcl_output o0.xyzw dcl_temps 5 mul r0.xy, v0.xyxx, cb12[0].xyxx add r0.zw, r0.xxxy, r0.xxxy resinfo_indexable(texture2d)(float,float,float,float) r1.xy, l(0), t13.xyzw mul r0.zw, r0.zzzw, r1.xxxy ftoi r1.xy, r0.zwzz mov r1.zw, l(0,0,0,0) ld_indexable(texture2d)(float,float,float,float) r0.z, r1.xyzw, t13.yzxw mad r0.z, r0.z, cb13[0].y, cb13[0].x mul r0.xy, r0.xyxx, l(4.000000, 4.000000, 0.000000, 0.000000) mad r0.xy, r0.xyxx, l(1.000000, -1.000000, 0.000000, 0.000000), l(-1.000000, 1.000000, 0.000000, 0.000000) mul r0.xy, r0.zzzz, r0.xyxx mul r1.xy, r0.xyxx, cb13[0].zwzz mov r1.z, -r0.z mov r1.w, l(1.000000) dp4 r0.x, r1.xyzw, cb0[12].xyzw dp4 r0.y, r1.xyzw, cb0[13].xyzw dp4 r0.z, r1.xyzw, cb0[14].xyzw dp4 r0.w, r1.xyzw, cb0[15].xyzw ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.xyzw, cb1[0].x, l(0), t28.xyzw ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xyzw, cb1[0].x, l(16), t28.xyzw dp4 r1.x, r0.xyzw, r1.xyzw add r1.x, r1.x, l(1.000000) dp4 r0.w, r0.xyzw, r2.xyzw add r1.y, r0.w, l(1.000000) ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.zw, cb1[0].x, l(80), t28.xxxy ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xy, cb1[0].x, l(64), t28.xyxx mad r1.zw, r1.xxxy, r1.zzzw, r2.xxxy sample_indexable(texture2d)(float,float,float,float) r2.xyzw, r1.zwzz, t0.xyzw, s0 mul r0.w, r2.w, r2.w ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.z, cb1[0].x, l(64), t27.xxxx mad r1.w, -r2.w, r2.w, r2.w mad r0.w, r1.z, r1.w, r0.w ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r3.xyz, cb1[0].x, l(32), t28.xyzx ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r4.xyzw, cb1[0].x, l(48), t28.xyzw add r0.xyz, r0.xyzx, -r3.xyzx dp3 r0.x, r0.xyzx, r4.xyzx mul_sat r0.x, r4.w, |r0.x| add r0.yz, -r1.xxyx, l(0.000000, 1.000000, 1.000000, 0.000000) round_ni_sat r0.yz, r0.yyzy add r0.xyz, -r0.xyzx, l(1.000000, 1.000000, 1.000000, 0.000000) round_ni_sat r1.xy, r1.xyxx add r1.xy, -r1.xyxx, l(1.000000, 1.000000, 0.000000, 0.000000) mul r0.y, r0.z, r0.y mul r0.y, r1.x, r0.y mul r0.y, r1.y, r0.y mul r0.x, r0.y, r0.x mul r2.w, r0.x, r0.w mul r0.xyzw, r2.xyzw, v1.xyzw ge r1.x, l(0.000100), r0.w discard_nz r1.x mad r1.xyz, r0.xyzx, cb7[0].xxxx, -r0.xyzx mul r1.xyz, r1.xyzx, cb7[0].yyyy ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.w, cb1[0].x, l(76), t27.xxxx mad o0.xyz, |r1.wwww|, r1.xyzx, r0.xyzx mov_sat o0.w, r0.w ret // Approximately 56 instruction slots used ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ [/code]

Wait a second. Why don't you first wait until you have advanced more in the game or finished it? More issues can arise (like water if you haven't seen it yet, for example). Also, features like HUD depth hotkeys would be cool too, as well as convergence hotkeys (one for gameplay and one for cutscenes at least).


Yes, I meant posting a WIP since i haven't completed the main game. I've played for about 8 hours and after glancing at a walkthrough it looks like i'm about 75% through the main game, (I've skipped most of the side missions). Amazingly enough the only issues i've run into are the two shadow shaders, the decals which you very rarely see, and one random vertex shader that shows up in a small comet during an ingame cutscene. I think it fairly safe to assume that there won't be to many more issues. 99% of the cut scenes are prerendered so there's not really a reason to add convergence hotkeys.

One thing i would like to add would be a way to adjust HUD depth. Right now most HUD elements render at screen depth. It'd be great if some of them like objective pointers or enemy hit points would render at the same depth as their target but i have no idea how to do this.

4everAwake thanks for the pointers, I remember glancing over some posts about frame analysis but had no idea what it was for or how to use it, so this info has definitely helped.

I managed to find the broken shader for the decals , I'm not sure if there are more. I tried fixing it by applying the stereo adjustment in view space to the r1 variable before line 74, but it didn't affect anything at all. I also tried to bypass it completely by having the shader just output a float4(0,0,0,0) value for o0 and that made no difference whatsoever. I'm pretty sure it's the right shader because when i have the shader selected in game with 3dm the decals show up as pink boxes. I suspect 3dm is not dumping the shader properly becuase there's a bunch of commented text that says "Known bad code for instruction (needs manual fix):" so it's just skipping the shader fix entirely.

Is there a way to tell if the shader fix has compilation errors? I'm guessing that it will use the original shader if it does. When i can't tell that a shaders been compiled i test it by modifying one of the variables and reloading it, But I'm sure there's a better way.

Anyhow here's the decal shader:
// decals?
// ---- Created with 3Dmigoto v1.2.56 on Sat Mar 18 08:10:24 2017

cbuffer SceneBuffer : register(b0)
{
float4x4 g_View : packoffset(c0);
float4x4 g_Proj : packoffset(c4);
float4x4 g_ViewProjection : packoffset(c8);
float4x4 g_ViewInverseMatrix : packoffset(c12);
}

cbuffer HPixel_Buffer : register(b12)
{
float4 g_TargetUvParam : packoffset(c0);
}

cbuffer CamParam_HPixel_Buffer : register(b13)
{
float4 g_CameraParam : packoffset(c0);
float4 g_CameraVec : packoffset(c1);
}

cbuffer EffectParameterIndex : register(b1)
{
uint g_EffectParameterIndex : packoffset(c0);
}

cbuffer ExposureBuffer : register(b7)
{
float4 g_Exposure : packoffset(c0);
}

SamplerState g_ColorTextureSampler_s : register(s0);
Texture2D<float4> g_ColorTexture : register(t0);
Texture2D<float4> g_ZTexture : register(t13);
StructuredBuffer<g_EffectPixelCommonBuffers> g_EffectPixelCommonBuffers : register(t27);
StructuredBuffer<g_EffectPixelProjectionBuffers> g_EffectPixelProjectionBuffers : register(t28);


// 3Dmigoto declarations
#define cmp -
Texture1D<float4> IniParams : register(t120);
Texture2D<float4> StereoParams : register(t125);


void main(
float4 v0 : SV_POSITION0,
float4 v1 : TEXCOORD0,
out float4 o0 : SV_Target0)
{
// Needs manual fix for instruction:
// unknown dcl_: dcl_resource_structured t27, 112
// Needs manual fix for instruction:
// unknown dcl_: dcl_resource_structured t28, 96
float4 r0,r1,r2,r3,r4;
uint4 bitmask, uiDest;
float4 fDest;

r0.xy = g_TargetUvParam.xy * v0.xy;
r0.zw = r0.xy + r0.xy;
g_ZTexture.GetDimensions(0, fDest.x, fDest.y, fDest.z);
r1.xy = fDest.xy;
r0.zw = r1.xy * r0.zw;
r1.xy = (int2)r0.zw;
r1.zw = float2(0,0);
r0.z = g_ZTexture.Load(r1.xyz).x;
r0.z = r0.z * g_CameraParam.y + g_CameraParam.x;
r0.xy = float2(4,4) * r0.xy;
r0.xy = r0.xy * float2(1,-1) + float2(-1,1);
r0.xy = r0.xy * r0.zz;
r1.xy = g_CameraParam.zw * r0.xy;
r1.z = -r0.z;
r1.w = 1;
r0.x = dot(r1.xyzw, g_ViewInverseMatrix._m00_m10_m20_m30);
r0.y = dot(r1.xyzw, g_ViewInverseMatrix._m01_m11_m21_m31);
r0.z = dot(r1.xyzw, g_ViewInverseMatrix._m02_m12_m22_m32);
r0.w = dot(r1.xyzw, g_ViewInverseMatrix._m03_m13_m23_m33);
// Known bad code for instruction (needs manual fix):
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.xyzw, cb1[0].x, l(0), t28.xyzw
r1.x = g_ColorTextureSampler[]..swiz;
r1.y = g_ColorTextureSampler[]..swiz;
r1.z = g_ColorTextureSampler[]..swiz;
r1.w = g_ColorTextureSampler[]..swiz;
// Known bad code for instruction (needs manual fix):
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xyzw, cb1[0].x, l(16), t28.xyzw
r2.x = g_ColorTextureSampler[]..swiz;
r2.y = g_ColorTextureSampler[]..swiz;
r2.z = g_ColorTextureSampler[]..swiz;
r2.w = g_ColorTextureSampler[]..swiz;
r1.x = dot(r0.xyzw, r1.xyzw);
r1.x = 1 + r1.x;
r0.w = dot(r0.xyzw, r2.xyzw);
r1.y = 1 + r0.w;
// Known bad code for instruction (needs manual fix):
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.zw, cb1[0].x, l(80), t28.xxxy
r1.z = g_ColorTextureSampler[]..swiz;
r1.w = g_ColorTextureSampler[]..swiz;
// Known bad code for instruction (needs manual fix):
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xy, cb1[0].x, l(64), t28.xyxx
r2.x = g_ColorTextureSampler[]..swiz;
r2.y = g_ColorTextureSampler[]..swiz;
r1.zw = r1.xy * r1.zw + r2.xy;
r2.xyzw = g_ColorTexture.Sample(g_ColorTextureSampler_s, r1.zw).xyzw;
r0.w = r2.w * r2.w;
// Known bad code for instruction (needs manual fix):
ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.z, cb1[0].x, l(64), t27.xxxx
r1.z = g_ColorTextureSampler[]..swiz;
r1.w = -r2.w * r2.w + r2.w;
r0.w = r1.z * r1.w + r0.w;
// Known bad code for instruction (needs manual fix):
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r3.xyz, cb1[0].x, l(32), t28.xyzx
r3.x = g_ColorTextureSampler[]..swiz;
r3.y = g_ColorTextureSampler[]..swiz;
r3.z = g_ColorTextureSampler[]..swiz;
// Known bad code for instruction (needs manual fix):
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r4.xyzw, cb1[0].x, l(48), t28.xyzw
r4.x = g_ColorTextureSampler[]..swiz;
r4.y = g_ColorTextureSampler[]..swiz;
r4.z = g_ColorTextureSampler[]..swiz;
r4.w = g_ColorTextureSampler[]..swiz;
r0.xyz = -r3.xyz + r0.xyz;
r0.x = dot(r0.xyz, r4.xyz);
r0.x = saturate(abs(r0.x) * r4.w);
r0.yz = float2(1,1) + -r1.xy;
r0.yz = saturate(floor(r0.yz));
r0.xyz = float3(1,1,1) + -r0.xyz;
r1.xy = saturate(floor(r1.xy));
r1.xy = float2(1,1) + -r1.xy;
r0.y = r0.y * r0.z;
r0.y = r0.y * r1.x;
r0.y = r0.y * r1.y;
r0.x = r0.x * r0.y;
r2.w = r0.w * r0.x;
r0.xyzw = v1.xyzw * r2.xyzw;
r1.x = cmp(9.99999975e-005 >= r0.w);
if (r1.x != 0) discard;
r1.xyz = r0.xyz * g_Exposure.xxx + -r0.xyz;
r1.xyz = g_Exposure.yyy * r1.xyz;
// Known bad code for instruction (needs manual fix):
ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.w, cb1[0].x, l(76), t27.xxxx
r1.w = g_ColorTextureSampler[]..swiz;
o0.xyz = abs(r1.www) * r1.xyz + r0.xyz;
o0.w = saturate(r0.w);
return;
}

/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111
//
// using 3Dmigoto v1.2.56 on Sat Mar 18 08:10:24 2017
//
//
// Buffer Definitions:
//
// cbuffer SceneBuffer
// {
//
// float4x4 g_View; // Offset: 0 Size: 64 [unused]
// float4x4 g_Proj; // Offset: 64 Size: 64 [unused]
// float4x4 g_ViewProjection; // Offset: 128 Size: 64 [unused]
// float4x4 g_ViewInverseMatrix; // Offset: 192 Size: 64
//
// }
//
// cbuffer HPixel_Buffer
// {
//
// float4 g_TargetUvParam; // Offset: 0 Size: 16
//
// }
//
// cbuffer CamParam_HPixel_Buffer
// {
//
// float4 g_CameraParam; // Offset: 0 Size: 16
// float4 g_CameraVec; // Offset: 16 Size: 16 [unused]
//
// }
//
// cbuffer EffectParameterIndex
// {
//
// uint g_EffectParameterIndex; // Offset: 0 Size: 4
//
// }
//
// cbuffer ExposureBuffer
// {
//
// float4 g_Exposure; // Offset: 0 Size: 16
//
// }
//
// Resource bind info for g_EffectPixelCommonBuffers
// {
//
// struct EffectPixelCommonBuffer
// {
//
// float4 m_ToneCurveRate; // Offset: 0
// float4 m_DepthRate; // Offset: 16
// float4 m_ColorCorrection; // Offset: 32
// float4 m_FogColor; // Offset: 48
// float4 m_Gamma; // Offset: 64
// float4 m_AlphaCurveRate; // Offset: 80
// float4 m_DitherRate; // Offset: 96
//
// } $Element; // Offset: 0 Size: 112
//
// }
//
// Resource bind info for g_EffectPixelProjectionBuffers
// {
//
// struct EffectPixelProjectionBuffer
// {
//
// float4 m_WorldInverseMatrixCol0;// Offset: 0
// float4 m_WorldInverseMatrixCol1;// Offset: 16
// float4 m_ProjectionCenter; // Offset: 32
// float4 m_ProjectionDir; // Offset: 48
// float4 m_ProjectionUvOffset; // Offset: 64
// float4 m_ProjectionUvTile; // Offset: 80
//
// } $Element; // Offset: 0 Size: 96
//
// }
//
//
// Resource Bindings:
//
// Name Type Format Dim Slot Elements
// ------------------------------ ---------- ------- ----------- ---- --------
// g_ColorTextureSampler sampler NA NA 0 1
// g_ColorTexture texture float4 2d 0 1
// g_ZTexture texture float4 2d 13 1
// g_EffectPixelCommonBuffers texture struct r/o 27 1
// g_EffectPixelProjectionBuffers texture struct r/o 28 1
// SceneBuffer cbuffer NA NA 0 1
// EffectParameterIndex cbuffer NA NA 1 1
// ExposureBuffer cbuffer NA NA 7 1
// HPixel_Buffer cbuffer NA NA 12 1
// CamParam_HPixel_Buffer cbuffer NA NA 13 1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_POSITION 0 xyzw 0 POS float xy
// TEXCOORD 0 xyzw 1 NONE float xyzw
//
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_Target 0 xyzw 0 TARGET float xyzw
//
ps_5_0
dcl_globalFlags refactoringAllowed
dcl_constantbuffer cb0[16], immediateIndexed
dcl_constantbuffer cb12[1], immediateIndexed
dcl_constantbuffer cb13[1], immediateIndexed
dcl_constantbuffer cb1[1], immediateIndexed
dcl_constantbuffer cb7[1], immediateIndexed
dcl_sampler s0, mode_default
dcl_resource_texture2d (float,float,float,float) t0
dcl_resource_texture2d (float,float,float,float) t13
dcl_resource_structured t27, 112
dcl_resource_structured t28, 96
dcl_input_ps_siv linear noperspective v0.xy, position
dcl_input_ps linear v1.xyzw
dcl_output o0.xyzw
dcl_temps 5
mul r0.xy, v0.xyxx, cb12[0].xyxx
add r0.zw, r0.xxxy, r0.xxxy
resinfo_indexable(texture2d)(float,float,float,float) r1.xy, l(0), t13.xyzw
mul r0.zw, r0.zzzw, r1.xxxy
ftoi r1.xy, r0.zwzz
mov r1.zw, l(0,0,0,0)
ld_indexable(texture2d)(float,float,float,float) r0.z, r1.xyzw, t13.yzxw
mad r0.z, r0.z, cb13[0].y, cb13[0].x
mul r0.xy, r0.xyxx, l(4.000000, 4.000000, 0.000000, 0.000000)
mad r0.xy, r0.xyxx, l(1.000000, -1.000000, 0.000000, 0.000000), l(-1.000000, 1.000000, 0.000000, 0.000000)
mul r0.xy, r0.zzzz, r0.xyxx
mul r1.xy, r0.xyxx, cb13[0].zwzz
mov r1.z, -r0.z
mov r1.w, l(1.000000)
dp4 r0.x, r1.xyzw, cb0[12].xyzw
dp4 r0.y, r1.xyzw, cb0[13].xyzw
dp4 r0.z, r1.xyzw, cb0[14].xyzw
dp4 r0.w, r1.xyzw, cb0[15].xyzw
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.xyzw, cb1[0].x, l(0), t28.xyzw
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xyzw, cb1[0].x, l(16), t28.xyzw
dp4 r1.x, r0.xyzw, r1.xyzw
add r1.x, r1.x, l(1.000000)
dp4 r0.w, r0.xyzw, r2.xyzw
add r1.y, r0.w, l(1.000000)
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.zw, cb1[0].x, l(80), t28.xxxy
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xy, cb1[0].x, l(64), t28.xyxx
mad r1.zw, r1.xxxy, r1.zzzw, r2.xxxy
sample_indexable(texture2d)(float,float,float,float) r2.xyzw, r1.zwzz, t0.xyzw, s0
mul r0.w, r2.w, r2.w
ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.z, cb1[0].x, l(64), t27.xxxx
mad r1.w, -r2.w, r2.w, r2.w
mad r0.w, r1.z, r1.w, r0.w
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r3.xyz, cb1[0].x, l(32), t28.xyzx
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r4.xyzw, cb1[0].x, l(48), t28.xyzw
add r0.xyz, r0.xyzx, -r3.xyzx
dp3 r0.x, r0.xyzx, r4.xyzx
mul_sat r0.x, r4.w, |r0.x|
add r0.yz, -r1.xxyx, l(0.000000, 1.000000, 1.000000, 0.000000)
round_ni_sat r0.yz, r0.yyzy
add r0.xyz, -r0.xyzx, l(1.000000, 1.000000, 1.000000, 0.000000)
round_ni_sat r1.xy, r1.xyxx
add r1.xy, -r1.xyxx, l(1.000000, 1.000000, 0.000000, 0.000000)
mul r0.y, r0.z, r0.y
mul r0.y, r1.x, r0.y
mul r0.y, r1.y, r0.y
mul r0.x, r0.y, r0.x
mul r2.w, r0.x, r0.w
mul r0.xyzw, r2.xyzw, v1.xyzw
ge r1.x, l(0.000100), r0.w
discard_nz r1.x
mad r1.xyz, r0.xyzx, cb7[0].xxxx, -r0.xyzx
mul r1.xyz, r1.xyzx, cb7[0].yyyy
ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.w, cb1[0].x, l(76), t27.xxxx
mad o0.xyz, |r1.wwww|, r1.xyzx, r0.xyzx
mov_sat o0.w, r0.w
ret
// Approximately 56 instruction slots used

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

Like my work? You can send a donation via Paypal to sgs.rules@gmail.com

Windows 7 Pro 64x - Nvidia Driver 398.82 - EVGA 980Ti SC - Optoma HD26 with Edid override - 3D Vision 2 - i7-8700K CPU at 5.0Ghz - ASROCK Z370 Ext 4 Motherboard - 32 GB RAM Corsair Vengeance - 512 GB Samsung SSD 850 Pro - Creative Sound Blaster Z

#41
Posted 03/19/2017 12:43 AM   
what settings do you use? SGSrules?
what settings do you use? SGSrules?

I'm ishiki, forum screwed up my name.

7700k @4.7 GHZ, 16GBDDR4@3466MHZ, 2080 Ti

#42
Posted 03/19/2017 12:51 AM   
@ishiki, game settings? Im running everything maxed out at 720p 8x aa and get a solid 60fps with 3d on. I normally use 1440p with DSR in other games, but in this game doing so drops my fps to the 40 to 60 range and with 3d on its 20 to mid 30s, so i rather get a higher framerate at a lower res. It looks like the game is highly dependent on resolution. Not the best port, but it works, lets hope the dev patches it soon.
@ishiki, game settings? Im running everything maxed out at 720p 8x aa and get a solid 60fps with 3d on. I normally use 1440p with DSR in other games, but in this game doing so drops my fps to the 40 to 60 range and with 3d on its 20 to mid 30s, so i rather get a higher framerate at a lower res. It looks like the game is highly dependent on resolution. Not the best port, but it works, lets hope the dev patches it soon.

Like my work? You can send a donation via Paypal to sgs.rules@gmail.com

Windows 7 Pro 64x - Nvidia Driver 398.82 - EVGA 980Ti SC - Optoma HD26 with Edid override - 3D Vision 2 - i7-8700K CPU at 5.0Ghz - ASROCK Z370 Ext 4 Motherboard - 32 GB RAM Corsair Vengeance - 512 GB Samsung SSD 850 Pro - Creative Sound Blaster Z

#43
Posted 03/19/2017 12:59 AM   
It's now set up on the shared accounts. If any other shaderhackers want to take a look, it's ready to go. Thanks End0fw0r1d!
It's now set up on the shared accounts. If any other shaderhackers want to take a look, it's ready to go. Thanks End0fw0r1d!

#44
Posted 03/19/2017 04:01 AM   
[quote="sgsrules"]One thing i would like to add would be a way to adjust HUD depth. Right now most HUD elements render at screen depth. It'd be great if some of them like objective pointers or enemy hit points would render at the same depth as their target but i have no idea how to do this.[/quote] It might look ok if you just place the floating HUD elements at a fixed depth. Post the VS for the floating HUD elements if you need further assistance with this. Regarding auto-hud depth adjustments, someone else can give a better answer, since I don't have that much experience with that in 3Dmigoto. [quote="sgsrules"]I managed to find the broken shader for the decals , I'm not sure if there are more. I tried fixing it by applying the stereo adjustment in view space to the r1 variable before line 74, but it didn't affect anything at all. I also tried to bypass it completely by having the shader just output a float4(0,0,0,0) value for o0 and that made no difference whatsoever. I'm pretty sure it's the right shader because when i have the shader selected in game with 3dm the decals show up as pink boxes. I suspect 3dm is not dumping the shader properly becuase there's a bunch of commented text that says "Known bad code for instruction (needs manual fix):" so it's just skipping the shader fix entirely. Is there a way to tell if the shader fix has compilation errors? I'm guessing that it will use the original shader if it does. When i can't tell that a shaders been compiled i test it by modifying one of the variables and reloading it, But I'm sure there's a better way.[/quote] It can be tricky finding HLSL compiling errors. In some cases, 3Dmigoto won't detect any errors with the shaders, but will still have compiling issues. (You may see flickering, distorted or missing textures). If your case, as you correctly stated, 3Dmigoto is defaulting to the original shader because of the errors compiling the HLSL. For any HLSL issues, you can try editing the ASM version of the shader instead. 1) In the ShaderFixes directory, delete "_replace" at the end of the filename (for example: change [i] 05d54df8550849bf-ps_replace.txt[/i] to [i]05d54df8550849bf-ps.txt[/i] ) 2) Open the shader file and remove the HLSL portion. 3) Edit the ASM decal shader like this: [code]// decals? // Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 // // using 3Dmigoto v1.2.56 on Sat Mar 18 08:10:24 2017 // // // Buffer Definitions: // // cbuffer SceneBuffer // { // // float4x4 g_View; // Offset: 0 Size: 64 [unused] // float4x4 g_Proj; // Offset: 64 Size: 64 [unused] // float4x4 g_ViewProjection; // Offset: 128 Size: 64 [unused] // float4x4 g_ViewInverseMatrix; // Offset: 192 Size: 64 // // } // // cbuffer HPixel_Buffer // { // // float4 g_TargetUvParam; // Offset: 0 Size: 16 // // } // // cbuffer CamParam_HPixel_Buffer // { // // float4 g_CameraParam; // Offset: 0 Size: 16 // float4 g_CameraVec; // Offset: 16 Size: 16 [unused] // // } // // cbuffer EffectParameterIndex // { // // uint g_EffectParameterIndex; // Offset: 0 Size: 4 // // } // // cbuffer ExposureBuffer // { // // float4 g_Exposure; // Offset: 0 Size: 16 // // } // // Resource bind info for g_EffectPixelCommonBuffers // { // // struct EffectPixelCommonBuffer // { // // float4 m_ToneCurveRate; // Offset: 0 // float4 m_DepthRate; // Offset: 16 // float4 m_ColorCorrection; // Offset: 32 // float4 m_FogColor; // Offset: 48 // float4 m_Gamma; // Offset: 64 // float4 m_AlphaCurveRate; // Offset: 80 // float4 m_DitherRate; // Offset: 96 // // } $Element; // Offset: 0 Size: 112 // // } // // Resource bind info for g_EffectPixelProjectionBuffers // { // // struct EffectPixelProjectionBuffer // { // // float4 m_WorldInverseMatrixCol0;// Offset: 0 // float4 m_WorldInverseMatrixCol1;// Offset: 16 // float4 m_ProjectionCenter; // Offset: 32 // float4 m_ProjectionDir; // Offset: 48 // float4 m_ProjectionUvOffset; // Offset: 64 // float4 m_ProjectionUvTile; // Offset: 80 // // } $Element; // Offset: 0 Size: 96 // // } // // // Resource Bindings: // // Name Type Format Dim Slot Elements // ------------------------------ ---------- ------- ----------- ---- -------- // g_ColorTextureSampler sampler NA NA 0 1 // g_ColorTexture texture float4 2d 0 1 // g_ZTexture texture float4 2d 13 1 // g_EffectPixelCommonBuffers texture struct r/o 27 1 // g_EffectPixelProjectionBuffers texture struct r/o 28 1 // SceneBuffer cbuffer NA NA 0 1 // EffectParameterIndex cbuffer NA NA 1 1 // ExposureBuffer cbuffer NA NA 7 1 // HPixel_Buffer cbuffer NA NA 12 1 // CamParam_HPixel_Buffer cbuffer NA NA 13 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_POSITION 0 xyzw 0 POS float xy // TEXCOORD 0 xyzw 1 NONE float xyzw // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET float xyzw // ps_5_0 dcl_globalFlags refactoringAllowed dcl_constantbuffer cb0[16], immediateIndexed dcl_constantbuffer cb12[1], immediateIndexed dcl_constantbuffer cb13[1], immediateIndexed dcl_constantbuffer cb1[1], immediateIndexed dcl_constantbuffer cb7[1], immediateIndexed dcl_sampler s0, mode_default dcl_resource_texture2d (float,float,float,float) t0 dcl_resource_texture2d (float,float,float,float) t13 dcl_resource_structured t27, 112 dcl_resource_structured t28, 96 dcl_input_ps_siv linear noperspective v0.xy, position dcl_input_ps linear v1.xyzw dcl_output o0.xyzw //declare 3d vision sampler dcl_resource_texture2d (float,float,float,float) t125 //increase temporary registers from 5 to 6 dcl_temps 6 mul r0.xy, v0.xyxx, cb12[0].xyxx add r0.zw, r0.xxxy, r0.xxxy resinfo_indexable(texture2d)(float,float,float,float) r1.xy, l(0), t13.xyzw mul r0.zw, r0.zzzw, r1.xxxy ftoi r1.xy, r0.zwzz mov r1.zw, l(0,0,0,0) ld_indexable(texture2d)(float,float,float,float) r0.z, r1.xyzw, t13.yzxw mad r0.z, r0.z, cb13[0].y, cb13[0].x mul r0.xy, r0.xyxx, l(4.000000, 4.000000, 0.000000, 0.000000) mad r0.xy, r0.xyxx, l(1.000000, -1.000000, 0.000000, 0.000000), l(-1.000000, 1.000000, 0.000000, 0.000000) mul r0.xy, r0.zzzz, r0.xyxx mul r1.xy, r0.xyxx, cb13[0].zwzz mov r1.z, -r0.z mov r1.w, l(1.000000) //This is the ASM version of the formula you used for the shadows. //(I'm not sure if this will work) ld_indexable(texture2d)(float,float,float,float) r5.xyzw, l(0, 0, 0, 0), t125.xyzw add r5.y, r1.z, r5.y mul r5.x, r5.x, r5.y rcp r5.w, cb0[4].x mul r5.x, r5.x, r5.w add r1.x, r1.x, r5.x dp4 r0.x, r1.xyzw, cb0[12].xyzw dp4 r0.y, r1.xyzw, cb0[13].xyzw dp4 r0.z, r1.xyzw, cb0[14].xyzw dp4 r0.w, r1.xyzw, cb0[15].xyzw ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.xyzw, cb1[0].x, l(0), t28.xyzw ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xyzw, cb1[0].x, l(16), t28.xyzw dp4 r1.x, r0.xyzw, r1.xyzw add r1.x, r1.x, l(1.000000) dp4 r0.w, r0.xyzw, r2.xyzw add r1.y, r0.w, l(1.000000) ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.zw, cb1[0].x, l(80), t28.xxxy ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xy, cb1[0].x, l(64), t28.xyxx mad r1.zw, r1.xxxy, r1.zzzw, r2.xxxy sample_indexable(texture2d)(float,float,float,float) r2.xyzw, r1.zwzz, t0.xyzw, s0 mul r0.w, r2.w, r2.w ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.z, cb1[0].x, l(64), t27.xxxx mad r1.w, -r2.w, r2.w, r2.w mad r0.w, r1.z, r1.w, r0.w ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r3.xyz, cb1[0].x, l(32), t28.xyzx ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r4.xyzw, cb1[0].x, l(48), t28.xyzw add r0.xyz, r0.xyzx, -r3.xyzx dp3 r0.x, r0.xyzx, r4.xyzx mul_sat r0.x, r4.w, |r0.x| add r0.yz, -r1.xxyx, l(0.000000, 1.000000, 1.000000, 0.000000) round_ni_sat r0.yz, r0.yyzy add r0.xyz, -r0.xyzx, l(1.000000, 1.000000, 1.000000, 0.000000) round_ni_sat r1.xy, r1.xyxx add r1.xy, -r1.xyxx, l(1.000000, 1.000000, 0.000000, 0.000000) mul r0.y, r0.z, r0.y mul r0.y, r1.x, r0.y mul r0.y, r1.y, r0.y mul r0.x, r0.y, r0.x mul r2.w, r0.x, r0.w mul r0.xyzw, r2.xyzw, v1.xyzw ge r1.x, l(0.000100), r0.w discard_nz r1.x mad r1.xyz, r0.xyzx, cb7[0].xxxx, -r0.xyzx mul r1.xyz, r1.xyzx, cb7[0].yyyy ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.w, cb1[0].x, l(76), t27.xxxx mad o0.xyz, |r1.wwww|, r1.xyzx, r0.xyzx mov_sat o0.w, r0.w ret // Approximately 56 instruction slots used ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ [/code]
sgsrules said:One thing i would like to add would be a way to adjust HUD depth. Right now most HUD elements render at screen depth. It'd be great if some of them like objective pointers or enemy hit points would render at the same depth as their target but i have no idea how to do this.


It might look ok if you just place the floating HUD elements at a fixed depth. Post the VS for the floating HUD elements if you need further assistance with this. Regarding auto-hud depth adjustments, someone else can give a better answer, since I don't have that much experience with that in 3Dmigoto.

sgsrules said:I managed to find the broken shader for the decals , I'm not sure if there are more. I tried fixing it by applying the stereo adjustment in view space to the r1 variable before line 74, but it didn't affect anything at all. I also tried to bypass it completely by having the shader just output a float4(0,0,0,0) value for o0 and that made no difference whatsoever. I'm pretty sure it's the right shader because when i have the shader selected in game with 3dm the decals show up as pink boxes. I suspect 3dm is not dumping the shader properly becuase there's a bunch of commented text that says "Known bad code for instruction (needs manual fix):" so it's just skipping the shader fix entirely.

Is there a way to tell if the shader fix has compilation errors? I'm guessing that it will use the original shader if it does. When i can't tell that a shaders been compiled i test it by modifying one of the variables and reloading it, But I'm sure there's a better way.


It can be tricky finding HLSL compiling errors. In some cases, 3Dmigoto won't detect any errors with the shaders, but will still have compiling issues. (You may see flickering, distorted or missing textures).
If your case, as you correctly stated, 3Dmigoto is defaulting to the original shader because of the errors compiling the HLSL. For any HLSL issues, you can try editing the ASM version of the shader instead.

1) In the ShaderFixes directory, delete "_replace" at the end of the filename (for example: change 05d54df8550849bf-ps_replace.txt to 05d54df8550849bf-ps.txt )
2) Open the shader file and remove the HLSL portion.

3) Edit the ASM decal shader like this:
// decals?
// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111
//
// using 3Dmigoto v1.2.56 on Sat Mar 18 08:10:24 2017
//
//
// Buffer Definitions:
//
// cbuffer SceneBuffer
// {
//
// float4x4 g_View; // Offset: 0 Size: 64 [unused]
// float4x4 g_Proj; // Offset: 64 Size: 64 [unused]
// float4x4 g_ViewProjection; // Offset: 128 Size: 64 [unused]
// float4x4 g_ViewInverseMatrix; // Offset: 192 Size: 64
//
// }
//
// cbuffer HPixel_Buffer
// {
//
// float4 g_TargetUvParam; // Offset: 0 Size: 16
//
// }
//
// cbuffer CamParam_HPixel_Buffer
// {
//
// float4 g_CameraParam; // Offset: 0 Size: 16
// float4 g_CameraVec; // Offset: 16 Size: 16 [unused]
//
// }
//
// cbuffer EffectParameterIndex
// {
//
// uint g_EffectParameterIndex; // Offset: 0 Size: 4
//
// }
//
// cbuffer ExposureBuffer
// {
//
// float4 g_Exposure; // Offset: 0 Size: 16
//
// }
//
// Resource bind info for g_EffectPixelCommonBuffers
// {
//
// struct EffectPixelCommonBuffer
// {
//
// float4 m_ToneCurveRate; // Offset: 0
// float4 m_DepthRate; // Offset: 16
// float4 m_ColorCorrection; // Offset: 32
// float4 m_FogColor; // Offset: 48
// float4 m_Gamma; // Offset: 64
// float4 m_AlphaCurveRate; // Offset: 80
// float4 m_DitherRate; // Offset: 96
//
// } $Element; // Offset: 0 Size: 112
//
// }
//
// Resource bind info for g_EffectPixelProjectionBuffers
// {
//
// struct EffectPixelProjectionBuffer
// {
//
// float4 m_WorldInverseMatrixCol0;// Offset: 0
// float4 m_WorldInverseMatrixCol1;// Offset: 16
// float4 m_ProjectionCenter; // Offset: 32
// float4 m_ProjectionDir; // Offset: 48
// float4 m_ProjectionUvOffset; // Offset: 64
// float4 m_ProjectionUvTile; // Offset: 80
//
// } $Element; // Offset: 0 Size: 96
//
// }
//
//
// Resource Bindings:
//
// Name Type Format Dim Slot Elements
// ------------------------------ ---------- ------- ----------- ---- --------
// g_ColorTextureSampler sampler NA NA 0 1
// g_ColorTexture texture float4 2d 0 1
// g_ZTexture texture float4 2d 13 1
// g_EffectPixelCommonBuffers texture struct r/o 27 1
// g_EffectPixelProjectionBuffers texture struct r/o 28 1
// SceneBuffer cbuffer NA NA 0 1
// EffectParameterIndex cbuffer NA NA 1 1
// ExposureBuffer cbuffer NA NA 7 1
// HPixel_Buffer cbuffer NA NA 12 1
// CamParam_HPixel_Buffer cbuffer NA NA 13 1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_POSITION 0 xyzw 0 POS float xy
// TEXCOORD 0 xyzw 1 NONE float xyzw
//
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_Target 0 xyzw 0 TARGET float xyzw
//
ps_5_0
dcl_globalFlags refactoringAllowed
dcl_constantbuffer cb0[16], immediateIndexed
dcl_constantbuffer cb12[1], immediateIndexed
dcl_constantbuffer cb13[1], immediateIndexed
dcl_constantbuffer cb1[1], immediateIndexed
dcl_constantbuffer cb7[1], immediateIndexed
dcl_sampler s0, mode_default
dcl_resource_texture2d (float,float,float,float) t0
dcl_resource_texture2d (float,float,float,float) t13
dcl_resource_structured t27, 112
dcl_resource_structured t28, 96
dcl_input_ps_siv linear noperspective v0.xy, position
dcl_input_ps linear v1.xyzw
dcl_output o0.xyzw

//declare 3d vision sampler
dcl_resource_texture2d (float,float,float,float) t125

//increase temporary registers from 5 to 6
dcl_temps 6

mul r0.xy, v0.xyxx, cb12[0].xyxx
add r0.zw, r0.xxxy, r0.xxxy
resinfo_indexable(texture2d)(float,float,float,float) r1.xy, l(0), t13.xyzw
mul r0.zw, r0.zzzw, r1.xxxy
ftoi r1.xy, r0.zwzz
mov r1.zw, l(0,0,0,0)
ld_indexable(texture2d)(float,float,float,float) r0.z, r1.xyzw, t13.yzxw
mad r0.z, r0.z, cb13[0].y, cb13[0].x
mul r0.xy, r0.xyxx, l(4.000000, 4.000000, 0.000000, 0.000000)
mad r0.xy, r0.xyxx, l(1.000000, -1.000000, 0.000000, 0.000000), l(-1.000000, 1.000000, 0.000000, 0.000000)
mul r0.xy, r0.zzzz, r0.xyxx
mul r1.xy, r0.xyxx, cb13[0].zwzz
mov r1.z, -r0.z
mov r1.w, l(1.000000)

//This is the ASM version of the formula you used for the shadows.
//(I'm not sure if this will work)
ld_indexable(texture2d)(float,float,float,float) r5.xyzw, l(0, 0, 0, 0), t125.xyzw
add r5.y, r1.z, r5.y
mul r5.x, r5.x, r5.y
rcp r5.w, cb0[4].x
mul r5.x, r5.x, r5.w
add r1.x, r1.x, r5.x


dp4 r0.x, r1.xyzw, cb0[12].xyzw
dp4 r0.y, r1.xyzw, cb0[13].xyzw
dp4 r0.z, r1.xyzw, cb0[14].xyzw
dp4 r0.w, r1.xyzw, cb0[15].xyzw
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.xyzw, cb1[0].x, l(0), t28.xyzw
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xyzw, cb1[0].x, l(16), t28.xyzw
dp4 r1.x, r0.xyzw, r1.xyzw
add r1.x, r1.x, l(1.000000)
dp4 r0.w, r0.xyzw, r2.xyzw
add r1.y, r0.w, l(1.000000)
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r1.zw, cb1[0].x, l(80), t28.xxxy
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r2.xy, cb1[0].x, l(64), t28.xyxx
mad r1.zw, r1.xxxy, r1.zzzw, r2.xxxy
sample_indexable(texture2d)(float,float,float,float) r2.xyzw, r1.zwzz, t0.xyzw, s0
mul r0.w, r2.w, r2.w
ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.z, cb1[0].x, l(64), t27.xxxx
mad r1.w, -r2.w, r2.w, r2.w
mad r0.w, r1.z, r1.w, r0.w
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r3.xyz, cb1[0].x, l(32), t28.xyzx
ld_structured_indexable(structured_buffer, stride=96)(mixed,mixed,mixed,mixed) r4.xyzw, cb1[0].x, l(48), t28.xyzw
add r0.xyz, r0.xyzx, -r3.xyzx
dp3 r0.x, r0.xyzx, r4.xyzx
mul_sat r0.x, r4.w, |r0.x|
add r0.yz, -r1.xxyx, l(0.000000, 1.000000, 1.000000, 0.000000)
round_ni_sat r0.yz, r0.yyzy
add r0.xyz, -r0.xyzx, l(1.000000, 1.000000, 1.000000, 0.000000)
round_ni_sat r1.xy, r1.xyxx
add r1.xy, -r1.xyxx, l(1.000000, 1.000000, 0.000000, 0.000000)
mul r0.y, r0.z, r0.y
mul r0.y, r1.x, r0.y
mul r0.y, r1.y, r0.y
mul r0.x, r0.y, r0.x
mul r2.w, r0.x, r0.w
mul r0.xyzw, r2.xyzw, v1.xyzw
ge r1.x, l(0.000100), r0.w
discard_nz r1.x
mad r1.xyz, r0.xyzx, cb7[0].xxxx, -r0.xyzx
mul r1.xyz, r1.xyzx, cb7[0].yyyy
ld_structured_indexable(structured_buffer, stride=112)(mixed,mixed,mixed,mixed) r1.w, cb1[0].x, l(76), t27.xxxx
mad o0.xyz, |r1.wwww|, r1.xyzx, r0.xyzx
mov_sat o0.w, r0.w
ret
// Approximately 56 instruction slots used

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

Dual boot Win 7 x64 & Win 10 (1809) | Geforce Drivers 417.35

#45
Posted 03/19/2017 04:23 AM   
  3 / 10    
Scroll To Top