3Dmigoto now open-source...
  14 / 143    
Another interesting case. This one doesn't compile because the sampler is specified incorrectly by the decompiler. I added this as a sample shader to the project, as one that doesn't decompile right. BTW. The first line was a warning not an error. The key distinction being that the warnings, especially for truncation, almost never matter. Here is the hand-fixed code that will compile and generate the same ASM. [code]// Manually fixed shader. // key instructions are the same after recompile with fxc. Texture2D<float4> t1 : register(t1); Texture2D<float4> t0 : register(t0); // Used as a SampleCmpLevelZero, so needs to be comparison type SamplerComparisonState s1 : register(s1); SamplerState s0 : register(s0); cbuffer cb2 : register(b2) { float4 cb2[4]; } cbuffer cb1 : register(b1) { float4 cb1[25]; } cbuffer cb0 : register(b0) { float4 cb0[8]; } Texture2D<float4> StereoParams : register(t125); void main( float4 v0 : SV_POSITION0, float2 v1 : TEXCOORD0, float3 v2 : TEXCOORD1, out float4 o0 : SV_Target0) { float4 r0,r1,r2,r3,r4; uint4 bitmask; r0.xyzw = t0.Sample(s0, v1.xy).xyzw; //------------------------warning (fixed) r0.x = cb0[7].x * r0.x + cb0[7].y; r0.x = 1.000000e+000 / r0.x; r0.yzw = v2.xyz * r0.xxx; r0.x = 1.000000000e+000 + -r0.x; r1.xy = float2(1.000000e+000,2.550000e+002) * r0.xx; r1.xy = frac(r1.xy); r2.xyzw = cb2[1].xyzw * r0.zzzz; r2.xyzw = cb2[0].xyzw * r0.yyyy + r2.xyzw; r0.xyzw = cb2[2].xyzw * r0.wwww + r2.xyzw; r0.xyzw = cb2[3].xyzw + r0.xyzw; r2.xyz = -cb1[0].xyz + r0.xyz; r2.x = dot(r2.xyz, r2.xyz); r3.xyz = -cb1[1].xyz + r0.xyz; r2.y = dot(r3.xyz, r3.xyz); r3.xyz = -cb1[2].xyz + r0.xyz; r2.z = dot(r3.xyz, r3.xyz); r3.xyz = -cb1[3].xyz + r0.xyz; r2.w = dot(r3.xyz, r3.xyz); r2.x = r2.x < cb1[4].x; r2.y = r2.y < cb1[4].y; r2.z = r2.z < cb1[4].z; r2.w = r2.w < cb1[4].w; r3.xyz = r2.xyz ? float3(-1.000000e+000,-1.000000e+000,-1.000000e+000) : float3(-0.000000e+000,-0.000000e+000,-0.000000e+000); r2.x = r2.x ? 1 : 0; r2.y = r2.y ? 1 : 0; r2.z = r2.z ? 1 : 0; r2.w = r2.w ? 1 : 0; r2.yzw = r2.yzw + r3.xyz; r2.yzw = max(r2.yzw, float3(0.000000e+000,0.000000e+000,0.000000e+000)); r3.xyz = cb1[13].xyz * r0.yyy; r3.xyz = cb1[12].xyz * r0.xxx + r3.xyz; r3.xyz = cb1[14].xyz * r0.zzz + r3.xyz; r3.xyz = cb1[15].xyz * r0.www + r3.xyz; r3.xyz = r3.xyz * r2.yyy; r4.xyz = cb1[9].xyz * r0.yyy; r4.xyz = cb1[8].xyz * r0.xxx + r4.xyz; r4.xyz = cb1[10].xyz * r0.zzz + r4.xyz; r4.xyz = cb1[11].xyz * r0.www + r4.xyz; r3.xyz = r4.xyz * r2.xxx + r3.xyz; r4.xyz = cb1[17].xyz * r0.yyy; r4.xyz = cb1[16].xyz * r0.xxx + r4.xyz; r4.xyz = cb1[18].xyz * r0.zzz + r4.xyz; r4.xyz = cb1[19].xyz * r0.www + r4.xyz; r2.xyz = r4.xyz * r2.zzz + r3.xyz; r3.xyz = cb1[21].xyz * r0.yyy; r3.xyz = cb1[20].xyz * r0.xxx + r3.xyz; r0.xyz = cb1[22].xyz * r0.zzz + r3.xyz; r0.xyz = cb1[23].xyz * r0.www + r0.xyz; r0.xyz = r0.xyz * r2.www + r2.xyz; // sample_c_lz r0.x, r0.xyxx, t1.xxxx, s1, r0.z r0.x = t1.SampleCmpLevelZero(s1, r0.xy, r0.z).x; //------------------------ERROR (fixed) r0.y = 1.000000000e+000 + -cb1[24].x; o0.x = r0.x * r0.y + cb1[24].x; o0.z = -r1.y * 3.921568859e-003 + r1.x; o0.w = r1.y; o0.y = 1.000000000e+000; return; } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // // Generated by Microsoft (R) D3D Shader Disassembler // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_POSITION 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // TEXCOORD 1 xyz 2 NONE float xyz // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET float xyzw // ps_4_0 dcl_constantbuffer cb0[8], immediateIndexed dcl_constantbuffer cb1[25], immediateIndexed dcl_constantbuffer cb2[4], immediateIndexed dcl_sampler s0, mode_default dcl_sampler s1, mode_comparison dcl_resource_texture2d (float,float,float,float) t0 dcl_resource_texture2d (float,float,float,float) t1 dcl_input_ps linear v1.xy dcl_input_ps linear v2.xyz dcl_output o0.xyzw dcl_temps 5 sample r0.xyzw, v1.xyxx, t0.xyzw, s0 mad r0.x, cb0[7].x, r0.x, cb0[7].y div r0.x, l(1.000000, 1.000000, 1.000000, 1.000000), r0.x mul r0.yzw, r0.xxxx, v2.xxyz add r0.x, -r0.x, l(1.000000) mul r1.xy, r0.xxxx, l(1.000000, 255.000000, 0.000000, 0.000000) frc r1.xy, r1.xyxx mul r2.xyzw, r0.zzzz, cb2[1].xyzw mad r2.xyzw, cb2[0].xyzw, r0.yyyy, r2.xyzw mad r0.xyzw, cb2[2].xyzw, r0.wwww, r2.xyzw add r0.xyzw, r0.xyzw, cb2[3].xyzw add r2.xyz, r0.xyzx, -cb1[0].xyzx dp3 r2.x, r2.xyzx, r2.xyzx add r3.xyz, r0.xyzx, -cb1[1].xyzx dp3 r2.y, r3.xyzx, r3.xyzx add r3.xyz, r0.xyzx, -cb1[2].xyzx dp3 r2.z, r3.xyzx, r3.xyzx add r3.xyz, r0.xyzx, -cb1[3].xyzx dp3 r2.w, r3.xyzx, r3.xyzx lt r2.xyzw, r2.xyzw, cb1[4].xyzw movc r3.xyz, r2.xyzx, l(-1.000000,-1.000000,-1.000000,0), l(-0.000000,-0.000000,-0.000000,0) and r2.xyzw, r2.xyzw, l(0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000) add r2.yzw, r3.xxyz, r2.yyzw max r2.yzw, r2.yyzw, l(0.000000, 0.000000, 0.000000, 0.000000) mul r3.xyz, r0.yyyy, cb1[13].xyzx mad r3.xyz, cb1[12].xyzx, r0.xxxx, r3.xyzx mad r3.xyz, cb1[14].xyzx, r0.zzzz, r3.xyzx mad r3.xyz, cb1[15].xyzx, r0.wwww, r3.xyzx mul r3.xyz, r2.yyyy, r3.xyzx mul r4.xyz, r0.yyyy, cb1[9].xyzx mad r4.xyz, cb1[8].xyzx, r0.xxxx, r4.xyzx mad r4.xyz, cb1[10].xyzx, r0.zzzz, r4.xyzx mad r4.xyz, cb1[11].xyzx, r0.wwww, r4.xyzx mad r3.xyz, r4.xyzx, r2.xxxx, r3.xyzx mul r4.xyz, r0.yyyy, cb1[17].xyzx mad r4.xyz, cb1[16].xyzx, r0.xxxx, r4.xyzx mad r4.xyz, cb1[18].xyzx, r0.zzzz, r4.xyzx mad r4.xyz, cb1[19].xyzx, r0.wwww, r4.xyzx mad r2.xyz, r4.xyzx, r2.zzzz, r3.xyzx mul r3.xyz, r0.yyyy, cb1[21].xyzx mad r3.xyz, cb1[20].xyzx, r0.xxxx, r3.xyzx mad r0.xyz, cb1[22].xyzx, r0.zzzz, r3.xyzx mad r0.xyz, cb1[23].xyzx, r0.wwww, r0.xyzx mad r0.xyz, r0.xyzx, r2.wwww, r2.xyzx sample_c_lz r0.x, r0.xyxx, t1.xxxx, s1, r0.z add r0.y, -cb1[24].x, l(1.000000) mad o0.x, r0.x, r0.y, cb1[24].x mad o0.z, -r1.y, l(0.003922), r1.x mov o0.w, r1.y mov o0.y, l(1.000000) ret // Approximately 0 instruction slots used */[/code]
Another interesting case. This one doesn't compile because the sampler is specified incorrectly by the decompiler. I added this as a sample shader to the project, as one that doesn't decompile right.

BTW. The first line was a warning not an error. The key distinction being that the warnings, especially for truncation, almost never matter.


Here is the hand-fixed code that will compile and generate the same ASM.

// Manually fixed shader.
// key instructions are the same after recompile with fxc.

Texture2D<float4> t1 : register(t1);

Texture2D<float4> t0 : register(t0);

// Used as a SampleCmpLevelZero, so needs to be comparison type
SamplerComparisonState s1 : register(s1);

SamplerState s0 : register(s0);

cbuffer cb2 : register(b2)
{
float4 cb2[4];
}

cbuffer cb1 : register(b1)
{
float4 cb1[25];
}

cbuffer cb0 : register(b0)
{
float4 cb0[8];
}


Texture2D<float4> StereoParams : register(t125);

void main(
float4 v0 : SV_POSITION0,
float2 v1 : TEXCOORD0,
float3 v2 : TEXCOORD1,
out float4 o0 : SV_Target0)
{
float4 r0,r1,r2,r3,r4;
uint4 bitmask;
r0.xyzw = t0.Sample(s0, v1.xy).xyzw; //------------------------warning (fixed)
r0.x = cb0[7].x * r0.x + cb0[7].y;
r0.x = 1.000000e+000 / r0.x;
r0.yzw = v2.xyz * r0.xxx;
r0.x = 1.000000000e+000 + -r0.x;
r1.xy = float2(1.000000e+000,2.550000e+002) * r0.xx;
r1.xy = frac(r1.xy);
r2.xyzw = cb2[1].xyzw * r0.zzzz;
r2.xyzw = cb2[0].xyzw * r0.yyyy + r2.xyzw;
r0.xyzw = cb2[2].xyzw * r0.wwww + r2.xyzw;
r0.xyzw = cb2[3].xyzw + r0.xyzw;
r2.xyz = -cb1[0].xyz + r0.xyz;
r2.x = dot(r2.xyz, r2.xyz);
r3.xyz = -cb1[1].xyz + r0.xyz;
r2.y = dot(r3.xyz, r3.xyz);
r3.xyz = -cb1[2].xyz + r0.xyz;
r2.z = dot(r3.xyz, r3.xyz);
r3.xyz = -cb1[3].xyz + r0.xyz;
r2.w = dot(r3.xyz, r3.xyz);
r2.x = r2.x < cb1[4].x;
r2.y = r2.y < cb1[4].y;
r2.z = r2.z < cb1[4].z;
r2.w = r2.w < cb1[4].w;
r3.xyz = r2.xyz ? float3(-1.000000e+000,-1.000000e+000,-1.000000e+000) : float3(-0.000000e+000,-0.000000e+000,-0.000000e+000);
r2.x = r2.x ? 1 : 0;
r2.y = r2.y ? 1 : 0;
r2.z = r2.z ? 1 : 0;
r2.w = r2.w ? 1 : 0;
r2.yzw = r2.yzw + r3.xyz;
r2.yzw = max(r2.yzw, float3(0.000000e+000,0.000000e+000,0.000000e+000));
r3.xyz = cb1[13].xyz * r0.yyy;
r3.xyz = cb1[12].xyz * r0.xxx + r3.xyz;
r3.xyz = cb1[14].xyz * r0.zzz + r3.xyz;
r3.xyz = cb1[15].xyz * r0.www + r3.xyz;
r3.xyz = r3.xyz * r2.yyy;
r4.xyz = cb1[9].xyz * r0.yyy;
r4.xyz = cb1[8].xyz * r0.xxx + r4.xyz;
r4.xyz = cb1[10].xyz * r0.zzz + r4.xyz;
r4.xyz = cb1[11].xyz * r0.www + r4.xyz;
r3.xyz = r4.xyz * r2.xxx + r3.xyz;
r4.xyz = cb1[17].xyz * r0.yyy;
r4.xyz = cb1[16].xyz * r0.xxx + r4.xyz;
r4.xyz = cb1[18].xyz * r0.zzz + r4.xyz;
r4.xyz = cb1[19].xyz * r0.www + r4.xyz;
r2.xyz = r4.xyz * r2.zzz + r3.xyz;
r3.xyz = cb1[21].xyz * r0.yyy;
r3.xyz = cb1[20].xyz * r0.xxx + r3.xyz;
r0.xyz = cb1[22].xyz * r0.zzz + r3.xyz;
r0.xyz = cb1[23].xyz * r0.www + r0.xyz;
r0.xyz = r0.xyz * r2.www + r2.xyz;

// sample_c_lz r0.x, r0.xyxx, t1.xxxx, s1, r0.z
r0.x = t1.SampleCmpLevelZero(s1, r0.xy, r0.z).x; //------------------------ERROR (fixed)

r0.y = 1.000000000e+000 + -cb1[24].x;
o0.x = r0.x * r0.y + cb1[24].x;
o0.z = -r1.y * 3.921568859e-003 + r1.x;
o0.w = r1.y;
o0.y = 1.000000000e+000;
return;
}

/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
// Generated by Microsoft (R) D3D Shader Disassembler
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_POSITION 0 xyzw 0 POS float
// TEXCOORD 0 xy 1 NONE float xy
// TEXCOORD 1 xyz 2 NONE float xyz
//
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_Target 0 xyzw 0 TARGET float xyzw
//
ps_4_0
dcl_constantbuffer cb0[8], immediateIndexed
dcl_constantbuffer cb1[25], immediateIndexed
dcl_constantbuffer cb2[4], immediateIndexed
dcl_sampler s0, mode_default
dcl_sampler s1, mode_comparison
dcl_resource_texture2d (float,float,float,float) t0
dcl_resource_texture2d (float,float,float,float) t1
dcl_input_ps linear v1.xy
dcl_input_ps linear v2.xyz
dcl_output o0.xyzw
dcl_temps 5
sample r0.xyzw, v1.xyxx, t0.xyzw, s0
mad r0.x, cb0[7].x, r0.x, cb0[7].y
div r0.x, l(1.000000, 1.000000, 1.000000, 1.000000), r0.x
mul r0.yzw, r0.xxxx, v2.xxyz
add r0.x, -r0.x, l(1.000000)
mul r1.xy, r0.xxxx, l(1.000000, 255.000000, 0.000000, 0.000000)
frc r1.xy, r1.xyxx
mul r2.xyzw, r0.zzzz, cb2[1].xyzw
mad r2.xyzw, cb2[0].xyzw, r0.yyyy, r2.xyzw
mad r0.xyzw, cb2[2].xyzw, r0.wwww, r2.xyzw
add r0.xyzw, r0.xyzw, cb2[3].xyzw
add r2.xyz, r0.xyzx, -cb1[0].xyzx
dp3 r2.x, r2.xyzx, r2.xyzx
add r3.xyz, r0.xyzx, -cb1[1].xyzx
dp3 r2.y, r3.xyzx, r3.xyzx
add r3.xyz, r0.xyzx, -cb1[2].xyzx
dp3 r2.z, r3.xyzx, r3.xyzx
add r3.xyz, r0.xyzx, -cb1[3].xyzx
dp3 r2.w, r3.xyzx, r3.xyzx
lt r2.xyzw, r2.xyzw, cb1[4].xyzw
movc r3.xyz, r2.xyzx, l(-1.000000,-1.000000,-1.000000,0), l(-0.000000,-0.000000,-0.000000,0)
and r2.xyzw, r2.xyzw, l(0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000)
add r2.yzw, r3.xxyz, r2.yyzw
max r2.yzw, r2.yyzw, l(0.000000, 0.000000, 0.000000, 0.000000)
mul r3.xyz, r0.yyyy, cb1[13].xyzx
mad r3.xyz, cb1[12].xyzx, r0.xxxx, r3.xyzx
mad r3.xyz, cb1[14].xyzx, r0.zzzz, r3.xyzx
mad r3.xyz, cb1[15].xyzx, r0.wwww, r3.xyzx
mul r3.xyz, r2.yyyy, r3.xyzx
mul r4.xyz, r0.yyyy, cb1[9].xyzx
mad r4.xyz, cb1[8].xyzx, r0.xxxx, r4.xyzx
mad r4.xyz, cb1[10].xyzx, r0.zzzz, r4.xyzx
mad r4.xyz, cb1[11].xyzx, r0.wwww, r4.xyzx
mad r3.xyz, r4.xyzx, r2.xxxx, r3.xyzx
mul r4.xyz, r0.yyyy, cb1[17].xyzx
mad r4.xyz, cb1[16].xyzx, r0.xxxx, r4.xyzx
mad r4.xyz, cb1[18].xyzx, r0.zzzz, r4.xyzx
mad r4.xyz, cb1[19].xyzx, r0.wwww, r4.xyzx
mad r2.xyz, r4.xyzx, r2.zzzz, r3.xyzx
mul r3.xyz, r0.yyyy, cb1[21].xyzx
mad r3.xyz, cb1[20].xyzx, r0.xxxx, r3.xyzx
mad r0.xyz, cb1[22].xyzx, r0.zzzz, r3.xyzx
mad r0.xyz, cb1[23].xyzx, r0.wwww, r0.xyzx
mad r0.xyz, r0.xyzx, r2.wwww, r2.xyzx
sample_c_lz r0.x, r0.xyxx, t1.xxxx, s1, r0.z
add r0.y, -cb1[24].x, l(1.000000)
mad o0.x, r0.x, r0.y, cb1[24].x
mad o0.z, -r1.y, l(0.003922), r1.x
mov o0.w, r1.y
mov o0.y, l(1.000000)
ret
// Approximately 0 instruction slots used
*/

Acer H5360 (1280x720@120Hz) - ASUS VG248QE with GSync mod - 3D Vision 1&2 - Driver 372.54
GTX 970 - i5-4670K@4.2GHz - 12GB RAM - Win7x64+evilKB2670838 - 4 Disk X25 RAID
SAGER NP9870-S - GTX 980 - i7-6700K - Win10 Pro 1607
Latest 3Dmigoto Release
Bo3b's School for ShaderHackers

Posted 10/27/2014 06:44 AM   
I tested it out and the shader works now. Thanks bo3b!
I tested it out and the shader works now. Thanks bo3b!

Dual boot Win 7 x64 & Win 10 (1809) | Geforce Drivers 417.35

Posted 10/27/2014 03:10 PM   
If I want to shader hunt in Assassins Creed 3, how exactly do I go about doing that? I tried different releases but everything crashes.
If I want to shader hunt in Assassins Creed 3, how exactly do I go about doing that? I tried different releases but everything crashes.

1080 Ti - i7 5820k - 16Gb RAM - Win 10 version 1607 - ASUS VG236H (1920x1080@120Hz)

Posted 11/06/2014 06:19 PM   
[quote="Jan-Itor"]If I want to shader hunt in Assassins Creed 3, how exactly do I go about doing that? I tried different releases but everything crashes.[/quote]Even though Assassins Creed 3 is already fixed? The code there is the oldest we've got, and I will update the fix at some point with a more modern version. Until then, you can use any recent release of the 3Dmigoto, but best bet would be to use the reently updated AC4 dlls: [url]https://github.com/bo3b/3Dmigoto/releases/tag/0.98-beta[/url] Use everything from that fix, except for the ShaderFixes folder. The AC3 ShaderFixes folder will still work with this latest code. This latest code base fixes some requirements we had earlier, no need for KB platform update, and it works OK on 8.1 now. Your crashes are nearly sure to be one of those two problems. Using that AC4 version, you can edit the .ini file to enable hunting=1, and it should work pretty well. You [i]might [/i]need to also enable force_cpu_affinity=1 while hunting, as AC3 had a weird flickering shader when selected. Let me know if that doesn't work for you.
Jan-Itor said:If I want to shader hunt in Assassins Creed 3, how exactly do I go about doing that? I tried different releases but everything crashes.
Even though Assassins Creed 3 is already fixed?

The code there is the oldest we've got, and I will update the fix at some point with a more modern version.

Until then, you can use any recent release of the 3Dmigoto, but best bet would be to use the reently updated AC4 dlls: https://github.com/bo3b/3Dmigoto/releases/tag/0.98-beta

Use everything from that fix, except for the ShaderFixes folder. The AC3 ShaderFixes folder will still work with this latest code.


This latest code base fixes some requirements we had earlier, no need for KB platform update, and it works OK on 8.1 now. Your crashes are nearly sure to be one of those two problems.


Using that AC4 version, you can edit the .ini file to enable hunting=1, and it should work pretty well. You might need to also enable force_cpu_affinity=1 while hunting, as AC3 had a weird flickering shader when selected.

Let me know if that doesn't work for you.

Acer H5360 (1280x720@120Hz) - ASUS VG248QE with GSync mod - 3D Vision 1&2 - Driver 372.54
GTX 970 - i5-4670K@4.2GHz - 12GB RAM - Win7x64+evilKB2670838 - 4 Disk X25 RAID
SAGER NP9870-S - GTX 980 - i7-6700K - Win10 Pro 1607
Latest 3Dmigoto Release
Bo3b's School for ShaderHackers

Posted 11/06/2014 10:49 PM   
[quote="bo3b"]Even though Assassins Creed 3 is already fixed?[/quote] I wanna remove these white outlines around NPCs: http://i.minus.com/ibwL6VOBJ5y9rK.png I've done it for all the DX9 Assassins Creeds with Helix wrapper, now I'm trying to do it for the DX11 games :) [quote="bo3b"]The code there is the oldest we've got, and I will update the fix at some point with a more modern version. Until then, you can use any recent release of the 3Dmigoto, but best bet would be to use the reently updated AC4 dlls: [url]https://github.com/bo3b/3Dmigoto/releases/tag/0.98-beta[/url] Use everything from that fix, except for the ShaderFixes folder. The AC3 ShaderFixes folder will still work with this latest code. This latest code base fixes some requirements we had earlier, no need for KB platform update, and it works OK on 8.1 now. Your crashes are nearly sure to be one of those two problems. Using that AC4 version, you can edit the .ini file to enable hunting=1, and it should work pretty well. You [i]might [/i]need to also enable force_cpu_affinity=1 while hunting, as AC3 had a weird flickering shader when selected. Let me know if that doesn't work for you.[/quote] Well it doesn't crash anymore, but shader hunting doesn't seem to be working, nothing happens when I press the numpad keys. (I've set hunting=1) Thanks for you help
bo3b said:Even though Assassins Creed 3 is already fixed?

I wanna remove these white outlines around NPCs:
http://i.minus.com/ibwL6VOBJ5y9rK.png

I've done it for all the DX9 Assassins Creeds with Helix wrapper, now I'm trying to do it for the DX11 games :)

bo3b said:The code there is the oldest we've got, and I will update the fix at some point with a more modern version.

Until then, you can use any recent release of the 3Dmigoto, but best bet would be to use the reently updated AC4 dlls: https://github.com/bo3b/3Dmigoto/releases/tag/0.98-beta

Use everything from that fix, except for the ShaderFixes folder. The AC3 ShaderFixes folder will still work with this latest code.


This latest code base fixes some requirements we had earlier, no need for KB platform update, and it works OK on 8.1 now. Your crashes are nearly sure to be one of those two problems.


Using that AC4 version, you can edit the .ini file to enable hunting=1, and it should work pretty well. You might need to also enable force_cpu_affinity=1 while hunting, as AC3 had a weird flickering shader when selected.

Let me know if that doesn't work for you.


Well it doesn't crash anymore, but shader hunting doesn't seem to be working, nothing happens when I press the numpad keys. (I've set hunting=1)

Thanks for you help

1080 Ti - i7 5820k - 16Gb RAM - Win 10 version 1607 - ASUS VG236H (1920x1080@120Hz)

Posted 11/07/2014 11:01 AM   
OK, cool, we love to see any changes you like to make it better. Since then I've also implemented constants, so we can make your change an optional setting via the .ini file. I went ahead and tested it, to see how it works with the current codebase. It looks good to me. Runs without crashing, shaderhunting was working OK for me. I did get the flicker on shaders, instead of blanking them, and had to set the affinity flag as well to make it searchable. (really hammers performance of course). I also got to be reminded why I hate both Steam and UPlay now, because they both have to launch, and both want to put up overlays. All the people involved with this double DRM thing are evil, hateful people. Did you disable the fake-3D with ctrl-alt-F11? I don't think shader hunting works in CM. That's all I can think of at the moment, the newer code base is simpler and much more compatible with all games.
OK, cool, we love to see any changes you like to make it better.

Since then I've also implemented constants, so we can make your change an optional setting via the .ini file.


I went ahead and tested it, to see how it works with the current codebase. It looks good to me.

Runs without crashing, shaderhunting was working OK for me. I did get the flicker on shaders, instead of blanking them, and had to set the affinity flag as well to make it searchable. (really hammers performance of course).

I also got to be reminded why I hate both Steam and UPlay now, because they both have to launch, and both want to put up overlays. All the people involved with this double DRM thing are evil, hateful people.


Did you disable the fake-3D with ctrl-alt-F11? I don't think shader hunting works in CM. That's all I can think of at the moment, the newer code base is simpler and much more compatible with all games.

Acer H5360 (1280x720@120Hz) - ASUS VG248QE with GSync mod - 3D Vision 1&2 - Driver 372.54
GTX 970 - i5-4670K@4.2GHz - 12GB RAM - Win7x64+evilKB2670838 - 4 Disk X25 RAID
SAGER NP9870-S - GTX 980 - i7-6700K - Win10 Pro 1607
Latest 3Dmigoto Release
Bo3b's School for ShaderHackers

Posted 11/07/2014 11:35 AM   
[quote="bo3b"]the newer code base is simpler and much more compatible with all games.[/quote] So that's in development? Maybe I should just wait for that instead :P
bo3b said:the newer code base is simpler and much more compatible with all games.


So that's in development? Maybe I should just wait for that instead :P

1080 Ti - i7 5820k - 16Gb RAM - Win 10 version 1607 - ASUS VG236H (1920x1080@120Hz)

Posted 11/07/2014 12:15 PM   
[quote="Jan-Itor"][quote="bo3b"]the newer code base is simpler and much more compatible with all games.[/quote] So that's in development? Maybe I should just wait for that instead :P[/quote]No sorry, when I mean newer code base, it's newer than the AC3 fix, but is the same as the current AC4 fix. I made a lot of changes to support games like Saints Row and WatchDogs, and think the current code base is pretty solid. I used the AC4 dlls, and it worked well on AC3.
Jan-Itor said:
bo3b said:the newer code base is simpler and much more compatible with all games.


So that's in development? Maybe I should just wait for that instead :P
No sorry, when I mean newer code base, it's newer than the AC3 fix, but is the same as the current AC4 fix. I made a lot of changes to support games like Saints Row and WatchDogs, and think the current code base is pretty solid.

I used the AC4 dlls, and it worked well on AC3.

Acer H5360 (1280x720@120Hz) - ASUS VG248QE with GSync mod - 3D Vision 1&2 - Driver 372.54
GTX 970 - i5-4670K@4.2GHz - 12GB RAM - Win7x64+evilKB2670838 - 4 Disk X25 RAID
SAGER NP9870-S - GTX 980 - i7-6700K - Win10 Pro 1607
Latest 3Dmigoto Release
Bo3b's School for ShaderHackers

Posted 11/07/2014 01:06 PM   
So I took a look at the recent mordor fix and have a few questions: 1.For this part: [code] float4 stereo = StereoParams.Load(0); float4 r23 = r1; r1.x += stereo.x * (r1.w - stereo.y); o0.xyzw = r23.xyzw; //o0.xyzw = r1.xyzw;[/code] Can't you just put [code] float4 stereo = StereoParams.Load(0); o0.xyzw = r1.xyzw; r1.x += stereo.x * (r1.w - stereo.y); [/code] instead or is it necessary to create the r23 variable? 2. Something else new that I saw was that you guys straight up created a new variable to be exported. Here's the full code for the shader: [code] //Shadows cbuffer _Globals : register(b0) { float k_fInvLightRadius : packoffset(c0); row_major float3x4 k_mCubeProjector_LightTransform : packoffset(c1); row_major float3x4 k_mDirectional_ObjectToTex : packoffset(c4); row_major float4x4 k_mSpotProjector_LightTransform : packoffset(c7); float4 k_vSpotProjector_TanXYRadiusZW : packoffset(c11); row_major float3x4 k_mWorldToVolume : packoffset(c12); float3 k_vDirectional_Dir : packoffset(c15); float4 k_vObjectLightColor : packoffset(c16); float3 k_vObjectSpaceLightPos : packoffset(c17); float4 k_vSpecularColor : packoffset(c18); float4 k_vSpotProjector_ClipFar : packoffset(c19); float4 k_vSpotProjector_ClipNear : packoffset(c20); float3 k_vHalfDims : packoffset(c21); float4 k_vShadowMapParams : packoffset(c22); float4 k_vShadowMapParams2 : packoffset(c23); row_major float4x4 k_mShadowMapMatrix : packoffset(c24); float4 k_vShadowCascadeOffsetX : packoffset(c28); float4 k_vShadowCascadeOffsetZ : packoffset(c29); float k_fStaticLightVolume_Mip : packoffset(c30); float4 k_vShadowSplitDistSquared : packoffset(c31); float4 k_vShadowSplitDistSelect : packoffset(c32); float4 k_vShadowMoveX : packoffset(c33); float4 k_vShadowMoveY : packoffset(c34); float4 k_vShadowMoveZ : packoffset(c35); float4 k_vShadowScaleXY : packoffset(c36); float4 k_vShadowScaleX : packoffset(c37); float4 k_vShadowScaleY : packoffset(c38); float4 k_vShadowScaleZ : packoffset(c39); float3 k_vLightBleed_Offset : packoffset(c40); float4 k_vStaticLightVolume_PivotContrast : packoffset(c41); float k_fLightLayer : packoffset(c42); row_major float3x4 k_mViewToLight : packoffset(c43); row_major float4x4 k_mObjectToClip : packoffset(c46); row_major float3x4 k_mObjectToView : packoffset(c50); row_major float3x4 k_mObjectToWorld : packoffset(c53); float4 k_vObjectColor : packoffset(c56); float3 k_vObjectSpaceEyePos : packoffset(c57); row_major float4x4 k_mPrevObjectToClip : packoffset(c58); row_major float3x4 k_mPrevObjectToView : packoffset(c62); row_major float3x4 k_mPrevObjectToWorld : packoffset(c65); } cbuffer CBuffer_View : register(b12) { row_major float4x4 k_mWorldToClip : packoffset(c0); row_major float3x4 k_mViewToWorld : packoffset(c4); row_major float3x4 k_mWorldToView : packoffset(c7); row_major float4x4 k_mDrawPrimToClip : packoffset(c10); float4 k_vFogRayleigh : packoffset(c14); float4 k_vFogMieA : packoffset(c15); float4 k_vFogMieB : packoffset(c16); float4 k_vFogMieC : packoffset(c17); float4 k_vFogSky : packoffset(c18); float4 k_vFogSunColor : packoffset(c19); float3 k_vFogSunDir : packoffset(c20); float4 k_vFullViewPlane : packoffset(c21); float2 k_vPerspOrthoMask : packoffset(c22); float2 k_vScene_ScreenRes : packoffset(c23); float2 k_vScene_TexCoordScale : packoffset(c24); float3 k_vScene_ZRange : packoffset(c25); float3 k_vWorldSpaceCameraDir : packoffset(c26); float4 k_vHDRLuminanceWeights : packoffset(c27); float4 k_vHDRBloomParams : packoffset(c28); float4 k_vHDRBloomParams2 : packoffset(c29); float4 k_vWindDirection : packoffset(c30); float4 k_vVegetationForce1 : packoffset(c31); float4 k_vVegetationForce2 : packoffset(c32); float4 k_vVegetationRadius : packoffset(c33); row_major float4x4 k_mSkyShadowMapMatrix : packoffset(c34); float4 k_vSkyShadowParams : packoffset(c38); float4 k_vParaboloidShadowParams : packoffset(c39); row_major float4x4 k_mPrevWorldToClip : packoffset(c40); row_major float3x4 k_mPrevViewToWorld : packoffset(c44); row_major float3x4 k_mPrevWorldToView : packoffset(c47); float4 k_vMotionBlurParams : packoffset(c50); } SamplerState sBilinear_CLAMPCLAMPCLAMP_Sampler_s : register(s0); SamplerState sPoint_CLAMPCLAMPCLAMP_Sampler_s : register(s2); SamplerComparisonState sBilinear_CLAMPCLAMPCLAMP_CompareSampler_s : register(s1); Texture2D<float4> tSpotProjector_LightMap : register(t0); Texture2DArray<float4> tShadowMapLinear : register(t1); Texture2D<float4> tDepthMap : register(t2); Texture2D<float4> tDeferredDiffuse : register(t3); Texture2D<float4> tDeferredNormals : register(t4); Texture2D<float4> tDeferredSpecular : register(t5); Texture2D<float4> tAmbientOcclusion : register(t6); Texture2D<float4> StereoParams : register(t125); void main( float4 v0 : SV_POSITION0, float4 v1 : TEXCOORD0, float2 v2 : TEXCOORD1, float3 v3 : TEXCOORD2, float4 v4 : TEXCOORD3, out float4 o0 : SV_Target0) { float4 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9; uint4 bitmask; r0.xy = v1.xy / v1.ww; r0.zw = v2.xy / v1.ww; r1.z = tDepthMap.Sample(sPoint_CLAMPCLAMPCLAMP_Sampler_s, r0.xy).x; r1.xy = r1.zz * r0.zw; float4 stereo = StereoParams.Load(0); r1.x -= stereo.x * (r1.z - stereo.y)*v4.z/2; //k_vFullViewPlane.z/2; //*0.187; r2.xyzw = tDeferredSpecular.Sample(sPoint_CLAMPCLAMPCLAMP_Sampler_s, r0.xy).xyzw; r0.z = r2.w >= 5.000000000e-001; r0.z = r0.z ? 1.000000 : 0; r3.xyz = r0.zzz * k_vLightBleed_Offset.xyz + r1.xyz; r3.w = 1.000000000e+000; r4.x = dot(k_mShadowMapMatrix._m00_m01_m02_m03, r3.xyzw); r4.y = dot(k_mShadowMapMatrix._m10_m11_m12_m13, r3.xyzw); r4.z = dot(k_mShadowMapMatrix._m20_m21_m22_m23, r3.xyzw); r0.z = dot(k_mShadowMapMatrix._m30_m31_m32_m33, r3.xyzw); r3.xyz = r4.xyz / r0.zzz; r4.xyzw = r3.xyxy * float4(5.000000e-001,-5.000000e-001,5.000000e-001,-5.000000e-001) + float4(5.000000e-001,5.000000e-001,5.000000e-001,5.000000e-001); r0.z = 1.000000000e+000 + -r3.z; r3.xyzw = k_vShadowMapParams2.zwzw * float4(5.000000e-001,-5.000000e-001,-5.000000e-001,-5.000000e-001) + r4.zwzw; r5.xy = r3.zw; r5.z = k_vShadowCascadeOffsetZ.x; r0.w = tShadowMapLinear.SampleCmpLevelZero(sBilinear_CLAMPCLAMPCLAMP_CompareSampler_s, r5.xyz, r0.z).x; r3.z = k_vShadowCascadeOffsetZ.x; r3.x = tShadowMapLinear.SampleCmpLevelZero(sBilinear_CLAMPCLAMPCLAMP_CompareSampler_s, r3.xyz, r0.z).x; r4.xyzw = k_vShadowMapParams2.zwzw * float4(5.000000e-001,5.000000e-001,-5.000000e-001,5.000000e-001) + r4.zwxy; r5.xy = r4.zw; r5.z = k_vShadowCascadeOffsetZ.x; r3.y = tShadowMapLinear.SampleCmpLevelZero(sBilinear_CLAMPCLAMPCLAMP_CompareSampler_s, r5.xyz, r0.z).x; r4.z = k_vShadowCascadeOffsetZ.x; r0.z = tShadowMapLinear.SampleCmpLevelZero(sBilinear_CLAMPCLAMPCLAMP_CompareSampler_s, r4.xyz, r0.z).x; r0.w = r3.x + r0.w; r0.w = r0.w + r3.y; r0.z = r0.w + r0.z; r0.z = -r0.z * 2.500000000e-001 + 1.000000000e+000; r0.w = 1.000000000e+000 + -k_vObjectLightColor.w; r0.z = k_vObjectLightColor.w * r0.z + r0.w; r3.xyz = k_vObjectSpaceLightPos.xyz + -r1.xyz; r3.xyz = k_fInvLightRadius * r3.xyz; r4.x = dot(k_mViewToWorld._m00_m01_m02, r3.xyz); r4.y = dot(k_mViewToWorld._m10_m11_m12, r3.xyz); r4.z = dot(k_mViewToWorld._m20_m21_m22, r3.xyz); r0.w = dot(r4.xyz, r4.xyz); r3.x = min(r0.w, 1.000000000e+000); r3.x = 1.000000000e+000 + -r3.x; r3.x = r3.x * r3.x; r1.w = 1.000000000e+000; r5.x = dot(k_mSpotProjector_LightTransform._m00_m01_m02_m03, r1.xyzw); r5.y = dot(k_mSpotProjector_LightTransform._m10_m11_m12_m13, r1.xyzw); r1.x = dot(k_mSpotProjector_LightTransform._m30_m31_m32_m33, r1.xyzw); r1.xy = r5.xy / r1.xx; r1.xyzw = tSpotProjector_LightMap.SampleLevel(sBilinear_CLAMPCLAMPCLAMP_Sampler_s, r1.xy, 0.000000000e+000).xyzw; r1.xyz = k_vObjectLightColor.xyz * r1.xyz; r5.xyzw = tDeferredDiffuse.Sample(sPoint_CLAMPCLAMPCLAMP_Sampler_s, r0.xy).xyzw; r3.yzw = r5.xyz * r1.xyz; r4.w = dot(v3.xyz, v3.xyz); r4.w = rsqrt(r4.w); r6.xyzw = tDeferredNormals.Sample(sPoint_CLAMPCLAMPCLAMP_Sampler_s, r0.xy).xyzw; r6.xyz = r6.xyz * float3(2.000000e+000,2.000000e+000,2.000000e+000) + float3(-1.000000e+000,-1.000000e+000,-1.000000e+000); r7.x = dot(r6.xyz, r6.xyz); r7.x = rsqrt(r7.x); r6.xyz = r7.xxx * r6.xyz; r0.w = rsqrt(r0.w); r4.xyz = r4.xyz * r0.www; r7.xyz = k_vSpecularColor.xyz * r1.www; r0.w = r2.x * r2.x; r0.xy = tAmbientOcclusion.SampleLevel(sBilinear_CLAMPCLAMPCLAMP_Sampler_s, r0.xy, 0.000000000e+000).xy; r8.xyz = v3.xyz * r4.www + r4.xyz; r1.w = dot(r8.xyz, r8.xyz); r1.w = rsqrt(r1.w); r8.xyz = r8.xyz * r1.www; r1.w = saturate(dot(r6.xyz, r8.xyz)); r9.xy = r0.ww * float2(4.096000e+003,4.096000e+003) + float2(4.000000e-003,2.004000e+000); r0.w = 1.250000000e-001 * r9.y; r1.w = log2(r1.w); r1.w = r9.x * r1.w; r1.w = exp2(r1.w); r0.w = r1.w * r0.w; r1.w = 1.000000000e+000 + -r2.z; r2.x = saturate(dot(r4.xyz, r8.xyz)); r2.x = 1.000000000e+000 + -r2.x; r4.w = r2.x * r2.x; r4.w = r4.w * r4.w; r2.x = r4.w * r2.x; r1.w = r1.w * r2.x + r2.z; r0.w = r1.w * r0.w; r7.xyz = r0.xxx * r7.xyz; r2.xyz = r7.xyz * r2.yyy; r0.x = dot(r6.xyz, r4.xyz); r1.w = r0.x + r5.w; r1.w = -1.000000000e+000 + r1.w; r1.w = saturate(r1.w / r5.w); r2.xyz = r2.xyz * r0.www + r3.yzw; r2.xyz = r2.xyz * r1.www; r0.w = 0.000000000e+000 < r2.w; if (r0.w != 0) { r0.w = r6.w < 5.098039508e-001; r3.yzw = r3.yzw + r3.yzw; r1.w = max(r5.z, r5.y); r1.w = max(r1.w, r5.x); r4.xyz = saturate(r5.xyz / r1.www); r1.xyz = saturate(r1.xyz); r4.xyz = r4.xyz * r4.xyz; r1.xyz = r4.xyz * r1.xyz; r1.xyz = float3(1.500000e-001,1.500000e-001,1.500000e-001) * r1.xyz; r1.x = r0.w ? r3.y : r1.x; r1.y = r0.w ? r3.z : r1.y; r1.z = r0.w ? r3.w : r1.z; r0.w = saturate(-5.000000000e-001 + r2.w); r3.yzw = r1.xyz * r0.www; r4.xy = saturate(float2(2.500000e-001,1.000000e+000) + -r0.xx); r3.yzw = r3.yzw * r4.xxx + r2.xyz; r0.w = min(r2.w, 5.000000000e-001); r1.xyz = r1.xyz * r0.www; r1.xyz = r1.xyz * r4.yyy; r0.x = saturate(2.500000000e-001 + r0.x); r2.xyz = r1.xyz * r0.xxx + r3.yzw; } r1.xyz = r3.xxx * r2.xyz; r0.xzw = r1.xyz * r0.zzz; r0.y = saturate(3.330000043e-001 + r0.y); o0.xyz = r0.xzw * r0.yyy; o0.w = 1.000000000e+000; // o0=0; return; } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // // Generated by Microsoft (R) HLSL Shader Compiler 9.30.9200.20546 // // // Buffer Definitions: // // cbuffer $Globals // { // // float k_fInvLightRadius; // Offset: 0 Size: 4 // row_major float3x4 k_mCubeProjector_LightTransform;// Offset: 16 Size: 48 [unused] // row_major float3x4 k_mDirectional_ObjectToTex;// Offset: 64 Size: 48 [unused] // row_major float4x4 k_mSpotProjector_LightTransform;// Offset: 112 Size: 64 // float4 k_vSpotProjector_TanXYRadiusZW;// Offset: 176 Size: 16 [unused] // row_major float3x4 k_mWorldToVolume;// Offset: 192 Size: 48 [unused] // float3 k_vDirectional_Dir; // Offset: 240 Size: 12 [unused] // float4 k_vObjectLightColor; // Offset: 256 Size: 16 // float3 k_vObjectSpaceLightPos; // Offset: 272 Size: 12 // float4 k_vSpecularColor; // Offset: 288 Size: 16 // float4 k_vSpotProjector_ClipFar; // Offset: 304 Size: 16 [unused] // float4 k_vSpotProjector_ClipNear; // Offset: 320 Size: 16 [unused] // float3 k_vHalfDims; // Offset: 336 Size: 12 [unused] // float4 k_vShadowMapParams; // Offset: 352 Size: 16 [unused] // float4 k_vShadowMapParams2; // Offset: 368 Size: 16 // row_major float4x4 k_mShadowMapMatrix;// Offset: 384 Size: 64 // float4 k_vShadowCascadeOffsetX; // Offset: 448 Size: 16 [unused] // float4 k_vShadowCascadeOffsetZ; // Offset: 464 Size: 16 // float k_fStaticLightVolume_Mip; // Offset: 480 Size: 4 [unused] // float4 k_vShadowSplitDistSquared; // Offset: 496 Size: 16 [unused] // float4 k_vShadowSplitDistSelect; // Offset: 512 Size: 16 [unused] // float4 k_vShadowMoveX; // Offset: 528 Size: 16 [unused] // float4 k_vShadowMoveY; // Offset: 544 Size: 16 [unused] // float4 k_vShadowMoveZ; // Offset: 560 Size: 16 [unused] // float4 k_vShadowScaleXY; // Offset: 576 Size: 16 [unused] // float4 k_vShadowScaleX; // Offset: 592 Size: 16 [unused] // float4 k_vShadowScaleY; // Offset: 608 Size: 16 [unused] // float4 k_vShadowScaleZ; // Offset: 624 Size: 16 [unused] // float3 k_vLightBleed_Offset; // Offset: 640 Size: 12 // float4 k_vStaticLightVolume_PivotContrast;// Offset: 656 Size: 16 [unused] // float k_fLightLayer; // Offset: 672 Size: 4 [unused] // row_major float3x4 k_mViewToLight; // Offset: 688 Size: 48 [unused] // row_major float4x4 k_mObjectToClip;// Offset: 736 Size: 64 [unused] // row_major float3x4 k_mObjectToView;// Offset: 800 Size: 48 [unused] // row_major float3x4 k_mObjectToWorld;// Offset: 848 Size: 48 [unused] // float4 k_vObjectColor; // Offset: 896 Size: 16 [unused] // float3 k_vObjectSpaceEyePos; // Offset: 912 Size: 12 [unused] // row_major float4x4 k_mPrevObjectToClip;// Offset: 928 Size: 64 [unused] // row_major float3x4 k_mPrevObjectToView;// Offset: 992 Size: 48 [unused] // row_major float3x4 k_mPrevObjectToWorld;// Offset: 1040 Size: 48 [unused] // // } // // cbuffer CBuffer_View // { // // row_major float4x4 k_mWorldToClip; // Offset: 0 Size: 64 [unused] // row_major float3x4 k_mViewToWorld; // Offset: 64 Size: 48 // row_major float3x4 k_mWorldToView; // Offset: 112 Size: 48 [unused] // row_major float4x4 k_mDrawPrimToClip;// Offset: 160 Size: 64 [unused] // float4 k_vFogRayleigh; // Offset: 224 Size: 16 [unused] // float4 k_vFogMieA; // Offset: 240 Size: 16 [unused] // float4 k_vFogMieB; // Offset: 256 Size: 16 [unused] // float4 k_vFogMieC; // Offset: 272 Size: 16 [unused] // float4 k_vFogSky; // Offset: 288 Size: 16 [unused] // float4 k_vFogSunColor; // Offset: 304 Size: 16 [unused] // float3 k_vFogSunDir; // Offset: 320 Size: 12 [unused] // float4 k_vFullViewPlane; // Offset: 336 Size: 16 [unused] // float2 k_vPerspOrthoMask; // Offset: 352 Size: 8 [unused] // float2 k_vScene_ScreenRes; // Offset: 368 Size: 8 [unused] // float2 k_vScene_TexCoordScale; // Offset: 384 Size: 8 [unused] // float3 k_vScene_ZRange; // Offset: 400 Size: 12 [unused] // float3 k_vWorldSpaceCameraDir; // Offset: 416 Size: 12 [unused] // float4 k_vHDRLuminanceWeights; // Offset: 432 Size: 16 [unused] // float4 k_vHDRBloomParams; // Offset: 448 Size: 16 [unused] // float4 k_vHDRBloomParams2; // Offset: 464 Size: 16 [unused] // float4 k_vWindDirection; // Offset: 480 Size: 16 [unused] // float4 k_vVegetationForce1; // Offset: 496 Size: 16 [unused] // float4 k_vVegetationForce2; // Offset: 512 Size: 16 [unused] // float4 k_vVegetationRadius; // Offset: 528 Size: 16 [unused] // row_major float4x4 k_mSkyShadowMapMatrix;// Offset: 544 Size: 64 [unused] // float4 k_vSkyShadowParams; // Offset: 608 Size: 16 [unused] // float4 k_vParaboloidShadowParams; // Offset: 624 Size: 16 [unused] // row_major float4x4 k_mPrevWorldToClip;// Offset: 640 Size: 64 [unused] // row_major float3x4 k_mPrevViewToWorld;// Offset: 704 Size: 48 [unused] // row_major float3x4 k_mPrevWorldToView;// Offset: 752 Size: 48 [unused] // float4 k_vMotionBlurParams; // Offset: 800 Size: 16 [unused] // // } // // // Resource Bindings: // // Name Type Format Dim Slot Elements // ------------------------------ ---------- ------- ----------- ---- -------- // sBilinear_CLAMPCLAMPCLAMP_Sampler sampler NA NA 0 1 // sBilinear_CLAMPCLAMPCLAMP_CompareSampler sampler_c NA NA 1 1 // sPoint_CLAMPCLAMPCLAMP_Sampler sampler NA NA 2 1 // tSpotProjector_LightMap texture float4 2d 0 1 // tShadowMapLinear texture float4 2darray 1 1 // tDepthMap texture float4 2d 2 1 // tDeferredDiffuse texture float4 2d 3 1 // tDeferredNormals texture float4 2d 4 1 // tDeferredSpecular texture float4 2d 5 1 // tAmbientOcclusion texture float4 2d 6 1 // $Globals cbuffer NA NA 0 1 // CBuffer_View cbuffer NA NA 12 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_POSITION 0 xyzw 0 POS float // TEXCOORD 0 xyzw 1 NONE float xy w // TEXCOORD 1 xy 2 NONE float xy // TEXCOORD 2 xyz 3 NONE float xyz // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET float xyzw // ps_5_0 dcl_globalFlags refactoringAllowed dcl_constantbuffer cb0[41], immediateIndexed dcl_constantbuffer cb12[7], immediateIndexed dcl_sampler s0, mode_default dcl_sampler s1, mode_comparison dcl_sampler s2, mode_default dcl_resource_texture2d (float,float,float,float) t0 dcl_resource_texture2darray (float,float,float,float) t1 dcl_resource_texture2d (float,float,float,float) t2 dcl_resource_texture2d (float,float,float,float) t3 dcl_resource_texture2d (float,float,float,float) t4 dcl_resource_texture2d (float,float,float,float) t5 dcl_resource_texture2d (float,float,float,float) t6 dcl_input_ps linear v1.xyw dcl_input_ps linear v2.xy dcl_input_ps linear v3.xyz dcl_output o0.xyzw dcl_temps 10 div r0.xy, v1.xyxx, v1.wwww div r0.zw, v2.xxxy, v1.wwww sample_indexable(texture2d)(float,float,float,float) r1.z, r0.xyxx, t2.yzxw, s2 mul r1.xy, r0.zwzz, r1.zzzz sample_indexable(texture2d)(float,float,float,float) r2.xyzw, r0.xyxx, t5.xyzw, s2 ge r0.z, r2.w, l(0.500000) and r0.z, r0.z, l(0x3f800000) mad r3.xyz, r0.zzzz, cb0[40].xyzx, r1.xyzx mov r3.w, l(1.000000) dp4 r4.x, cb0[24].xyzw, r3.xyzw dp4 r4.y, cb0[25].xyzw, r3.xyzw dp4 r4.z, cb0[26].xyzw, r3.xyzw dp4 r0.z, cb0[27].xyzw, r3.xyzw div r3.xyz, r4.xyzx, r0.zzzz mad r4.xyzw, r3.xyxy, l(0.500000, -0.500000, 0.500000, -0.500000), l(0.500000, 0.500000, 0.500000, 0.500000) add r0.z, -r3.z, l(1.000000) mad r3.xyzw, cb0[23].zwzw, l(0.500000, -0.500000, -0.500000, -0.500000), r4.zwzw mov r5.xy, r3.zwzz mov r5.z, cb0[29].x sample_c_lz_indexable(texture2darray)(float,float,float,float) r0.w, r5.xyzx, t1.xxxx, s1, r0.z mov r3.z, cb0[29].x sample_c_lz_indexable(texture2darray)(float,float,float,float) r3.x, r3.xyzx, t1.xxxx, s1, r0.z mad r4.xyzw, cb0[23].zwzw, l(0.500000, 0.500000, -0.500000, 0.500000), r4.zwxy mov r5.xy, r4.zwzz mov r5.z, cb0[29].x sample_c_lz_indexable(texture2darray)(float,float,float,float) r3.y, r5.xyzx, t1.xxxx, s1, r0.z mov r4.z, cb0[29].x sample_c_lz_indexable(texture2darray)(float,float,float,float) r0.z, r4.xyzx, t1.xxxx, s1, r0.z add r0.w, r0.w, r3.x add r0.w, r3.y, r0.w add r0.z, r0.z, r0.w mad r0.z, -r0.z, l(0.250000), l(1.000000) add r0.w, -cb0[16].w, l(1.000000) mad r0.z, cb0[16].w, r0.z, r0.w add r3.xyz, -r1.xyzx, cb0[17].xyzx mul r3.xyz, r3.xyzx, cb0[0].xxxx dp3 r4.x, cb12[4].xyzx, r3.xyzx dp3 r4.y, cb12[5].xyzx, r3.xyzx dp3 r4.z, cb12[6].xyzx, r3.xyzx dp3 r0.w, r4.xyzx, r4.xyzx min r3.x, r0.w, l(1.000000) add r3.x, -r3.x, l(1.000000) mul r3.x, r3.x, r3.x mov r1.w, l(1.000000) dp4 r5.x, cb0[7].xyzw, r1.xyzw dp4 r5.y, cb0[8].xyzw, r1.xyzw dp4 r1.x, cb0[10].xyzw, r1.xyzw div r1.xy, r5.xyxx, r1.xxxx sample_l_indexable(texture2d)(float,float,float,float) r1.xyzw, r1.xyxx, t0.xyzw, s0, l(0.000000) mul r1.xyz, r1.xyzx, cb0[16].xyzx sample_indexable(texture2d)(float,float,float,float) r5.xyzw, r0.xyxx, t3.xyzw, s2 mul r3.yzw, r1.xxyz, r5.xxyz dp3 r4.w, v3.xyzx, v3.xyzx rsq r4.w, r4.w sample_indexable(texture2d)(float,float,float,float) r6.xyzw, r0.xyxx, t4.xyzw, s2 mad r6.xyz, r6.xyzx, l(2.000000, 2.000000, 2.000000, 0.000000), l(-1.000000, -1.000000, -1.000000, 0.000000) dp3 r7.x, r6.xyzx, r6.xyzx rsq r7.x, r7.x mul r6.xyz, r6.xyzx, r7.xxxx rsq r0.w, r0.w mul r4.xyz, r0.wwww, r4.xyzx mul r7.xyz, r1.wwww, cb0[18].xyzx mul r0.w, r2.x, r2.x sample_l_indexable(texture2d)(float,float,float,float) r0.xy, r0.xyxx, t6.xyzw, s0, l(0.000000) mad r8.xyz, v3.xyzx, r4.wwww, r4.xyzx dp3 r1.w, r8.xyzx, r8.xyzx rsq r1.w, r1.w mul r8.xyz, r1.wwww, r8.xyzx dp3_sat r1.w, r6.xyzx, r8.xyzx mad r9.xy, r0.wwww, l(4096.000000, 4096.000000, 0.000000, 0.000000), l(0.004000, 2.004000, 0.000000, 0.000000) mul r0.w, r9.y, l(0.125000) log r1.w, r1.w mul r1.w, r1.w, r9.x exp r1.w, r1.w mul r0.w, r0.w, r1.w add r1.w, -r2.z, l(1.000000) dp3_sat r2.x, r4.xyzx, r8.xyzx add r2.x, -r2.x, l(1.000000) mul r4.w, r2.x, r2.x mul r4.w, r4.w, r4.w mul r2.x, r2.x, r4.w mad r1.w, r1.w, r2.x, r2.z mul r0.w, r0.w, r1.w mul r7.xyz, r7.xyzx, r0.xxxx mul r2.xyz, r2.yyyy, r7.xyzx dp3 r0.x, r6.xyzx, r4.xyzx add r1.w, r5.w, r0.x add r1.w, r1.w, l(-1.000000) div_sat r1.w, r1.w, r5.w mad r2.xyz, r2.xyzx, r0.wwww, r3.yzwy mul r2.xyz, r1.wwww, r2.xyzx lt r0.w, l(0.000000), r2.w if_nz r0.w lt r0.w, r6.w, l(0.509804) add r3.yzw, r3.yyzw, r3.yyzw max r1.w, r5.z, r5.y max r1.w, r1.w, r5.x div_sat r4.xyz, r5.xyzx, r1.wwww mov_sat r1.xyz, r1.xyzx mul r4.xyz, r4.xyzx, r4.xyzx mul r1.xyz, r1.xyzx, r4.xyzx mul r1.xyz, r1.xyzx, l(0.150000, 0.150000, 0.150000, 0.000000) movc r1.xyz, r0.wwww, r3.yzwy, r1.xyzx add_sat r0.w, r2.w, l(-0.500000) mul r3.yzw, r0.wwww, r1.xxyz add_sat r4.xy, -r0.xxxx, l(0.250000, 1.000000, 0.000000, 0.000000) mad r3.yzw, r3.yyzw, r4.xxxx, r2.xxyz min r0.w, r2.w, l(0.500000) mul r1.xyz, r0.wwww, r1.xyzx mul r1.xyz, r4.yyyy, r1.xyzx add_sat r0.x, r0.x, l(0.250000) mad r2.xyz, r1.xyzx, r0.xxxx, r3.yzwy endif mul r1.xyz, r2.xyzx, r3.xxxx mul r0.xzw, r0.zzzz, r1.xxyz add_sat r0.y, r0.y, l(0.333000) mul o0.xyz, r0.yyyy, r0.xzwx mov o0.w, l(1.000000) ret // Approximately 119 instruction slots used ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ [/code] I'm talking about the "[b]float4 v4 : TEXCOORD3[/b]" whose z parameter was used for the stereo fix. How did you know that that could help to fix the shader for 3d? 3. Can I assume that this line: [code]r1.xyz = r0.www ? r3.yzw : r1.xyz;[/code] being converted to [code]r1.x = r0.w ? r3.y : r1.x; r1.y = r0.w ? r3.z : r1.y; r1.z = r0.w ? r3.w : r1.z; [/code] is just manual decompiler fix or is it part of the 3d fix? I'm leaning on the decompiler fix side. 4. I'm really confused by this part of a fix: [code] o0.xyzw = r0.xyzw; o1.zw = r0.zw; r1.xy = k_vScene_TexCoordScale.xy * r0.ww; r0.zw = k_vFullViewPlane.xy * r0.ww; r1.zw = k_vScene_TexCoordScale.xy * float2(1.000000e+000,-1.000000e+000); r0.xy = r0.xy * r1.zw + r1.xy; o1.xy = r0.xy; o2.xy = r0.xy * k_vFullViewPlane.zw + r0.zw; return;[/code] that becomes: [code]float4 stereo = StereoParams.Load(0); float4 r23, r20; r23.xyzw = r0.xyzw; r23.x += stereo.x * (r23.w - stereo.y); o0.xyzw = r0.xyzw; // o0.xyzw = r0.xyzw; o1.zw = r0.zw; r1.xy = k_vScene_TexCoordScale.xy * r0.ww; r0.zw = k_vFullViewPlane.xy * r0.ww; r1.zw = k_vScene_TexCoordScale.xy * float2(1.000000e+000,-1.000000e+000); r0.xy = r0.xy * r1.zw + r1.xy; r20.xy = r23.xy*r1.zw + r1.xy; o1.xy = r20.xy; o2.xy = r20.xy * k_vFullViewPlane.zw + r0.zw; return;[/code] Can you maybe give me a simple breakdown of one would arrive to this fix? If my questions are annoying you bo3b/mike then just tell me so and I won't post any more. I'm just curious on how 3d fixes work. Thanks.
So I took a look at the recent mordor fix and have a few questions:

1.For this part:
float4 stereo = StereoParams.Load(0);
float4 r23 = r1;
r1.x += stereo.x * (r1.w - stereo.y);
o0.xyzw = r23.xyzw;

//o0.xyzw = r1.xyzw;


Can't you just put
float4 stereo = StereoParams.Load(0);
o0.xyzw = r1.xyzw;
r1.x += stereo.x * (r1.w - stereo.y);

instead or is it necessary to create the r23 variable?


2. Something else new that I saw was that you guys straight up created a new variable to be exported. Here's the full code for the shader:
//Shadows
cbuffer _Globals : register(b0)
{
float k_fInvLightRadius : packoffset(c0);
row_major float3x4 k_mCubeProjector_LightTransform : packoffset(c1);
row_major float3x4 k_mDirectional_ObjectToTex : packoffset(c4);
row_major float4x4 k_mSpotProjector_LightTransform : packoffset(c7);
float4 k_vSpotProjector_TanXYRadiusZW : packoffset(c11);
row_major float3x4 k_mWorldToVolume : packoffset(c12);
float3 k_vDirectional_Dir : packoffset(c15);
float4 k_vObjectLightColor : packoffset(c16);
float3 k_vObjectSpaceLightPos : packoffset(c17);
float4 k_vSpecularColor : packoffset(c18);
float4 k_vSpotProjector_ClipFar : packoffset(c19);
float4 k_vSpotProjector_ClipNear : packoffset(c20);
float3 k_vHalfDims : packoffset(c21);
float4 k_vShadowMapParams : packoffset(c22);
float4 k_vShadowMapParams2 : packoffset(c23);
row_major float4x4 k_mShadowMapMatrix : packoffset(c24);
float4 k_vShadowCascadeOffsetX : packoffset(c28);
float4 k_vShadowCascadeOffsetZ : packoffset(c29);
float k_fStaticLightVolume_Mip : packoffset(c30);
float4 k_vShadowSplitDistSquared : packoffset(c31);
float4 k_vShadowSplitDistSelect : packoffset(c32);
float4 k_vShadowMoveX : packoffset(c33);
float4 k_vShadowMoveY : packoffset(c34);
float4 k_vShadowMoveZ : packoffset(c35);
float4 k_vShadowScaleXY : packoffset(c36);
float4 k_vShadowScaleX : packoffset(c37);
float4 k_vShadowScaleY : packoffset(c38);
float4 k_vShadowScaleZ : packoffset(c39);
float3 k_vLightBleed_Offset : packoffset(c40);
float4 k_vStaticLightVolume_PivotContrast : packoffset(c41);
float k_fLightLayer : packoffset(c42);
row_major float3x4 k_mViewToLight : packoffset(c43);
row_major float4x4 k_mObjectToClip : packoffset(c46);
row_major float3x4 k_mObjectToView : packoffset(c50);
row_major float3x4 k_mObjectToWorld : packoffset(c53);
float4 k_vObjectColor : packoffset(c56);
float3 k_vObjectSpaceEyePos : packoffset(c57);
row_major float4x4 k_mPrevObjectToClip : packoffset(c58);
row_major float3x4 k_mPrevObjectToView : packoffset(c62);
row_major float3x4 k_mPrevObjectToWorld : packoffset(c65);
}

cbuffer CBuffer_View : register(b12)
{
row_major float4x4 k_mWorldToClip : packoffset(c0);
row_major float3x4 k_mViewToWorld : packoffset(c4);
row_major float3x4 k_mWorldToView : packoffset(c7);
row_major float4x4 k_mDrawPrimToClip : packoffset(c10);
float4 k_vFogRayleigh : packoffset(c14);
float4 k_vFogMieA : packoffset(c15);
float4 k_vFogMieB : packoffset(c16);
float4 k_vFogMieC : packoffset(c17);
float4 k_vFogSky : packoffset(c18);
float4 k_vFogSunColor : packoffset(c19);
float3 k_vFogSunDir : packoffset(c20);
float4 k_vFullViewPlane : packoffset(c21);
float2 k_vPerspOrthoMask : packoffset(c22);
float2 k_vScene_ScreenRes : packoffset(c23);
float2 k_vScene_TexCoordScale : packoffset(c24);
float3 k_vScene_ZRange : packoffset(c25);
float3 k_vWorldSpaceCameraDir : packoffset(c26);
float4 k_vHDRLuminanceWeights : packoffset(c27);
float4 k_vHDRBloomParams : packoffset(c28);
float4 k_vHDRBloomParams2 : packoffset(c29);
float4 k_vWindDirection : packoffset(c30);
float4 k_vVegetationForce1 : packoffset(c31);
float4 k_vVegetationForce2 : packoffset(c32);
float4 k_vVegetationRadius : packoffset(c33);
row_major float4x4 k_mSkyShadowMapMatrix : packoffset(c34);
float4 k_vSkyShadowParams : packoffset(c38);
float4 k_vParaboloidShadowParams : packoffset(c39);
row_major float4x4 k_mPrevWorldToClip : packoffset(c40);
row_major float3x4 k_mPrevViewToWorld : packoffset(c44);
row_major float3x4 k_mPrevWorldToView : packoffset(c47);
float4 k_vMotionBlurParams : packoffset(c50);
}
SamplerState sBilinear_CLAMPCLAMPCLAMP_Sampler_s : register(s0);
SamplerState sPoint_CLAMPCLAMPCLAMP_Sampler_s : register(s2);
SamplerComparisonState sBilinear_CLAMPCLAMPCLAMP_CompareSampler_s : register(s1);
Texture2D<float4> tSpotProjector_LightMap : register(t0);
Texture2DArray<float4> tShadowMapLinear : register(t1);
Texture2D<float4> tDepthMap : register(t2);
Texture2D<float4> tDeferredDiffuse : register(t3);
Texture2D<float4> tDeferredNormals : register(t4);
Texture2D<float4> tDeferredSpecular : register(t5);
Texture2D<float4> tAmbientOcclusion : register(t6);

Texture2D<float4> StereoParams : register(t125);

void main(
float4 v0 : SV_POSITION0,
float4 v1 : TEXCOORD0,
float2 v2 : TEXCOORD1,
float3 v3 : TEXCOORD2,
float4 v4 : TEXCOORD3,
out float4 o0 : SV_Target0)
{
float4 r0,r1,r2,r3,r4,r5,r6,r7,r8,r9;
uint4 bitmask;
r0.xy = v1.xy / v1.ww;
r0.zw = v2.xy / v1.ww;
r1.z = tDepthMap.Sample(sPoint_CLAMPCLAMPCLAMP_Sampler_s, r0.xy).x;
r1.xy = r1.zz * r0.zw;

float4 stereo = StereoParams.Load(0);
r1.x -= stereo.x * (r1.z - stereo.y)*v4.z/2; //k_vFullViewPlane.z/2; //*0.187;

r2.xyzw = tDeferredSpecular.Sample(sPoint_CLAMPCLAMPCLAMP_Sampler_s, r0.xy).xyzw;
r0.z = r2.w >= 5.000000000e-001;
r0.z = r0.z ? 1.000000 : 0;
r3.xyz = r0.zzz * k_vLightBleed_Offset.xyz + r1.xyz;
r3.w = 1.000000000e+000;

r4.x = dot(k_mShadowMapMatrix._m00_m01_m02_m03, r3.xyzw);
r4.y = dot(k_mShadowMapMatrix._m10_m11_m12_m13, r3.xyzw);
r4.z = dot(k_mShadowMapMatrix._m20_m21_m22_m23, r3.xyzw);
r0.z = dot(k_mShadowMapMatrix._m30_m31_m32_m33, r3.xyzw);
r3.xyz = r4.xyz / r0.zzz;
r4.xyzw = r3.xyxy * float4(5.000000e-001,-5.000000e-001,5.000000e-001,-5.000000e-001) + float4(5.000000e-001,5.000000e-001,5.000000e-001,5.000000e-001);
r0.z = 1.000000000e+000 + -r3.z;
r3.xyzw = k_vShadowMapParams2.zwzw * float4(5.000000e-001,-5.000000e-001,-5.000000e-001,-5.000000e-001) + r4.zwzw;
r5.xy = r3.zw;
r5.z = k_vShadowCascadeOffsetZ.x;
r0.w = tShadowMapLinear.SampleCmpLevelZero(sBilinear_CLAMPCLAMPCLAMP_CompareSampler_s, r5.xyz, r0.z).x;
r3.z = k_vShadowCascadeOffsetZ.x;
r3.x = tShadowMapLinear.SampleCmpLevelZero(sBilinear_CLAMPCLAMPCLAMP_CompareSampler_s, r3.xyz, r0.z).x;
r4.xyzw = k_vShadowMapParams2.zwzw * float4(5.000000e-001,5.000000e-001,-5.000000e-001,5.000000e-001) + r4.zwxy;
r5.xy = r4.zw;
r5.z = k_vShadowCascadeOffsetZ.x;
r3.y = tShadowMapLinear.SampleCmpLevelZero(sBilinear_CLAMPCLAMPCLAMP_CompareSampler_s, r5.xyz, r0.z).x;
r4.z = k_vShadowCascadeOffsetZ.x;
r0.z = tShadowMapLinear.SampleCmpLevelZero(sBilinear_CLAMPCLAMPCLAMP_CompareSampler_s, r4.xyz, r0.z).x;
r0.w = r3.x + r0.w;
r0.w = r0.w + r3.y;
r0.z = r0.w + r0.z;
r0.z = -r0.z * 2.500000000e-001 + 1.000000000e+000;
r0.w = 1.000000000e+000 + -k_vObjectLightColor.w;
r0.z = k_vObjectLightColor.w * r0.z + r0.w;
r3.xyz = k_vObjectSpaceLightPos.xyz + -r1.xyz;
r3.xyz = k_fInvLightRadius * r3.xyz;
r4.x = dot(k_mViewToWorld._m00_m01_m02, r3.xyz);
r4.y = dot(k_mViewToWorld._m10_m11_m12, r3.xyz);
r4.z = dot(k_mViewToWorld._m20_m21_m22, r3.xyz);
r0.w = dot(r4.xyz, r4.xyz);
r3.x = min(r0.w, 1.000000000e+000);
r3.x = 1.000000000e+000 + -r3.x;
r3.x = r3.x * r3.x;
r1.w = 1.000000000e+000;
r5.x = dot(k_mSpotProjector_LightTransform._m00_m01_m02_m03, r1.xyzw);
r5.y = dot(k_mSpotProjector_LightTransform._m10_m11_m12_m13, r1.xyzw);
r1.x = dot(k_mSpotProjector_LightTransform._m30_m31_m32_m33, r1.xyzw);
r1.xy = r5.xy / r1.xx;
r1.xyzw = tSpotProjector_LightMap.SampleLevel(sBilinear_CLAMPCLAMPCLAMP_Sampler_s, r1.xy, 0.000000000e+000).xyzw;
r1.xyz = k_vObjectLightColor.xyz * r1.xyz;
r5.xyzw = tDeferredDiffuse.Sample(sPoint_CLAMPCLAMPCLAMP_Sampler_s, r0.xy).xyzw;
r3.yzw = r5.xyz * r1.xyz;
r4.w = dot(v3.xyz, v3.xyz);
r4.w = rsqrt(r4.w);
r6.xyzw = tDeferredNormals.Sample(sPoint_CLAMPCLAMPCLAMP_Sampler_s, r0.xy).xyzw;
r6.xyz = r6.xyz * float3(2.000000e+000,2.000000e+000,2.000000e+000) + float3(-1.000000e+000,-1.000000e+000,-1.000000e+000);
r7.x = dot(r6.xyz, r6.xyz);
r7.x = rsqrt(r7.x);
r6.xyz = r7.xxx * r6.xyz;
r0.w = rsqrt(r0.w);
r4.xyz = r4.xyz * r0.www;
r7.xyz = k_vSpecularColor.xyz * r1.www;
r0.w = r2.x * r2.x;
r0.xy = tAmbientOcclusion.SampleLevel(sBilinear_CLAMPCLAMPCLAMP_Sampler_s, r0.xy, 0.000000000e+000).xy;
r8.xyz = v3.xyz * r4.www + r4.xyz;
r1.w = dot(r8.xyz, r8.xyz);
r1.w = rsqrt(r1.w);
r8.xyz = r8.xyz * r1.www;
r1.w = saturate(dot(r6.xyz, r8.xyz));
r9.xy = r0.ww * float2(4.096000e+003,4.096000e+003) + float2(4.000000e-003,2.004000e+000);
r0.w = 1.250000000e-001 * r9.y;
r1.w = log2(r1.w);
r1.w = r9.x * r1.w;
r1.w = exp2(r1.w);
r0.w = r1.w * r0.w;
r1.w = 1.000000000e+000 + -r2.z;
r2.x = saturate(dot(r4.xyz, r8.xyz));
r2.x = 1.000000000e+000 + -r2.x;
r4.w = r2.x * r2.x;
r4.w = r4.w * r4.w;
r2.x = r4.w * r2.x;
r1.w = r1.w * r2.x + r2.z;
r0.w = r1.w * r0.w;
r7.xyz = r0.xxx * r7.xyz;
r2.xyz = r7.xyz * r2.yyy;
r0.x = dot(r6.xyz, r4.xyz);
r1.w = r0.x + r5.w;
r1.w = -1.000000000e+000 + r1.w;
r1.w = saturate(r1.w / r5.w);
r2.xyz = r2.xyz * r0.www + r3.yzw;
r2.xyz = r2.xyz * r1.www;
r0.w = 0.000000000e+000 < r2.w;
if (r0.w != 0) {
r0.w = r6.w < 5.098039508e-001;
r3.yzw = r3.yzw + r3.yzw;
r1.w = max(r5.z, r5.y);
r1.w = max(r1.w, r5.x);
r4.xyz = saturate(r5.xyz / r1.www);
r1.xyz = saturate(r1.xyz);
r4.xyz = r4.xyz * r4.xyz;
r1.xyz = r4.xyz * r1.xyz;
r1.xyz = float3(1.500000e-001,1.500000e-001,1.500000e-001) * r1.xyz;
r1.x = r0.w ? r3.y : r1.x;
r1.y = r0.w ? r3.z : r1.y;
r1.z = r0.w ? r3.w : r1.z;
r0.w = saturate(-5.000000000e-001 + r2.w);
r3.yzw = r1.xyz * r0.www;
r4.xy = saturate(float2(2.500000e-001,1.000000e+000) + -r0.xx);
r3.yzw = r3.yzw * r4.xxx + r2.xyz;
r0.w = min(r2.w, 5.000000000e-001);
r1.xyz = r1.xyz * r0.www;
r1.xyz = r1.xyz * r4.yyy;
r0.x = saturate(2.500000000e-001 + r0.x);
r2.xyz = r1.xyz * r0.xxx + r3.yzw;
}
r1.xyz = r3.xxx * r2.xyz;
r0.xzw = r1.xyz * r0.zzz;
r0.y = saturate(3.330000043e-001 + r0.y);
o0.xyz = r0.xzw * r0.yyy;
o0.w = 1.000000000e+000;

// o0=0;
return;
}

/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
// Generated by Microsoft (R) HLSL Shader Compiler 9.30.9200.20546
//
//
// Buffer Definitions:
//
// cbuffer $Globals
// {
//
// float k_fInvLightRadius; // Offset: 0 Size: 4
// row_major float3x4 k_mCubeProjector_LightTransform;// Offset: 16 Size: 48 [unused]
// row_major float3x4 k_mDirectional_ObjectToTex;// Offset: 64 Size: 48 [unused]
// row_major float4x4 k_mSpotProjector_LightTransform;// Offset: 112 Size: 64
// float4 k_vSpotProjector_TanXYRadiusZW;// Offset: 176 Size: 16 [unused]
// row_major float3x4 k_mWorldToVolume;// Offset: 192 Size: 48 [unused]
// float3 k_vDirectional_Dir; // Offset: 240 Size: 12 [unused]
// float4 k_vObjectLightColor; // Offset: 256 Size: 16
// float3 k_vObjectSpaceLightPos; // Offset: 272 Size: 12
// float4 k_vSpecularColor; // Offset: 288 Size: 16
// float4 k_vSpotProjector_ClipFar; // Offset: 304 Size: 16 [unused]
// float4 k_vSpotProjector_ClipNear; // Offset: 320 Size: 16 [unused]
// float3 k_vHalfDims; // Offset: 336 Size: 12 [unused]
// float4 k_vShadowMapParams; // Offset: 352 Size: 16 [unused]
// float4 k_vShadowMapParams2; // Offset: 368 Size: 16
// row_major float4x4 k_mShadowMapMatrix;// Offset: 384 Size: 64
// float4 k_vShadowCascadeOffsetX; // Offset: 448 Size: 16 [unused]
// float4 k_vShadowCascadeOffsetZ; // Offset: 464 Size: 16
// float k_fStaticLightVolume_Mip; // Offset: 480 Size: 4 [unused]
// float4 k_vShadowSplitDistSquared; // Offset: 496 Size: 16 [unused]
// float4 k_vShadowSplitDistSelect; // Offset: 512 Size: 16 [unused]
// float4 k_vShadowMoveX; // Offset: 528 Size: 16 [unused]
// float4 k_vShadowMoveY; // Offset: 544 Size: 16 [unused]
// float4 k_vShadowMoveZ; // Offset: 560 Size: 16 [unused]
// float4 k_vShadowScaleXY; // Offset: 576 Size: 16 [unused]
// float4 k_vShadowScaleX; // Offset: 592 Size: 16 [unused]
// float4 k_vShadowScaleY; // Offset: 608 Size: 16 [unused]
// float4 k_vShadowScaleZ; // Offset: 624 Size: 16 [unused]
// float3 k_vLightBleed_Offset; // Offset: 640 Size: 12
// float4 k_vStaticLightVolume_PivotContrast;// Offset: 656 Size: 16 [unused]
// float k_fLightLayer; // Offset: 672 Size: 4 [unused]
// row_major float3x4 k_mViewToLight; // Offset: 688 Size: 48 [unused]
// row_major float4x4 k_mObjectToClip;// Offset: 736 Size: 64 [unused]
// row_major float3x4 k_mObjectToView;// Offset: 800 Size: 48 [unused]
// row_major float3x4 k_mObjectToWorld;// Offset: 848 Size: 48 [unused]
// float4 k_vObjectColor; // Offset: 896 Size: 16 [unused]
// float3 k_vObjectSpaceEyePos; // Offset: 912 Size: 12 [unused]
// row_major float4x4 k_mPrevObjectToClip;// Offset: 928 Size: 64 [unused]
// row_major float3x4 k_mPrevObjectToView;// Offset: 992 Size: 48 [unused]
// row_major float3x4 k_mPrevObjectToWorld;// Offset: 1040 Size: 48 [unused]
//
// }
//
// cbuffer CBuffer_View
// {
//
// row_major float4x4 k_mWorldToClip; // Offset: 0 Size: 64 [unused]
// row_major float3x4 k_mViewToWorld; // Offset: 64 Size: 48
// row_major float3x4 k_mWorldToView; // Offset: 112 Size: 48 [unused]
// row_major float4x4 k_mDrawPrimToClip;// Offset: 160 Size: 64 [unused]
// float4 k_vFogRayleigh; // Offset: 224 Size: 16 [unused]
// float4 k_vFogMieA; // Offset: 240 Size: 16 [unused]
// float4 k_vFogMieB; // Offset: 256 Size: 16 [unused]
// float4 k_vFogMieC; // Offset: 272 Size: 16 [unused]
// float4 k_vFogSky; // Offset: 288 Size: 16 [unused]
// float4 k_vFogSunColor; // Offset: 304 Size: 16 [unused]
// float3 k_vFogSunDir; // Offset: 320 Size: 12 [unused]
// float4 k_vFullViewPlane; // Offset: 336 Size: 16 [unused]
// float2 k_vPerspOrthoMask; // Offset: 352 Size: 8 [unused]
// float2 k_vScene_ScreenRes; // Offset: 368 Size: 8 [unused]
// float2 k_vScene_TexCoordScale; // Offset: 384 Size: 8 [unused]
// float3 k_vScene_ZRange; // Offset: 400 Size: 12 [unused]
// float3 k_vWorldSpaceCameraDir; // Offset: 416 Size: 12 [unused]
// float4 k_vHDRLuminanceWeights; // Offset: 432 Size: 16 [unused]
// float4 k_vHDRBloomParams; // Offset: 448 Size: 16 [unused]
// float4 k_vHDRBloomParams2; // Offset: 464 Size: 16 [unused]
// float4 k_vWindDirection; // Offset: 480 Size: 16 [unused]
// float4 k_vVegetationForce1; // Offset: 496 Size: 16 [unused]
// float4 k_vVegetationForce2; // Offset: 512 Size: 16 [unused]
// float4 k_vVegetationRadius; // Offset: 528 Size: 16 [unused]
// row_major float4x4 k_mSkyShadowMapMatrix;// Offset: 544 Size: 64 [unused]
// float4 k_vSkyShadowParams; // Offset: 608 Size: 16 [unused]
// float4 k_vParaboloidShadowParams; // Offset: 624 Size: 16 [unused]
// row_major float4x4 k_mPrevWorldToClip;// Offset: 640 Size: 64 [unused]
// row_major float3x4 k_mPrevViewToWorld;// Offset: 704 Size: 48 [unused]
// row_major float3x4 k_mPrevWorldToView;// Offset: 752 Size: 48 [unused]
// float4 k_vMotionBlurParams; // Offset: 800 Size: 16 [unused]
//
// }
//
//
// Resource Bindings:
//
// Name Type Format Dim Slot Elements
// ------------------------------ ---------- ------- ----------- ---- --------
// sBilinear_CLAMPCLAMPCLAMP_Sampler sampler NA NA 0 1
// sBilinear_CLAMPCLAMPCLAMP_CompareSampler sampler_c NA NA 1 1
// sPoint_CLAMPCLAMPCLAMP_Sampler sampler NA NA 2 1
// tSpotProjector_LightMap texture float4 2d 0 1
// tShadowMapLinear texture float4 2darray 1 1
// tDepthMap texture float4 2d 2 1
// tDeferredDiffuse texture float4 2d 3 1
// tDeferredNormals texture float4 2d 4 1
// tDeferredSpecular texture float4 2d 5 1
// tAmbientOcclusion texture float4 2d 6 1
// $Globals cbuffer NA NA 0 1
// CBuffer_View cbuffer NA NA 12 1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_POSITION 0 xyzw 0 POS float
// TEXCOORD 0 xyzw 1 NONE float xy w
// TEXCOORD 1 xy 2 NONE float xy
// TEXCOORD 2 xyz 3 NONE float xyz
//
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_Target 0 xyzw 0 TARGET float xyzw
//
ps_5_0
dcl_globalFlags refactoringAllowed
dcl_constantbuffer cb0[41], immediateIndexed
dcl_constantbuffer cb12[7], immediateIndexed
dcl_sampler s0, mode_default
dcl_sampler s1, mode_comparison
dcl_sampler s2, mode_default
dcl_resource_texture2d (float,float,float,float) t0
dcl_resource_texture2darray (float,float,float,float) t1
dcl_resource_texture2d (float,float,float,float) t2
dcl_resource_texture2d (float,float,float,float) t3
dcl_resource_texture2d (float,float,float,float) t4
dcl_resource_texture2d (float,float,float,float) t5
dcl_resource_texture2d (float,float,float,float) t6
dcl_input_ps linear v1.xyw
dcl_input_ps linear v2.xy
dcl_input_ps linear v3.xyz
dcl_output o0.xyzw
dcl_temps 10
div r0.xy, v1.xyxx, v1.wwww
div r0.zw, v2.xxxy, v1.wwww
sample_indexable(texture2d)(float,float,float,float) r1.z, r0.xyxx, t2.yzxw, s2
mul r1.xy, r0.zwzz, r1.zzzz
sample_indexable(texture2d)(float,float,float,float) r2.xyzw, r0.xyxx, t5.xyzw, s2
ge r0.z, r2.w, l(0.500000)
and r0.z, r0.z, l(0x3f800000)
mad r3.xyz, r0.zzzz, cb0[40].xyzx, r1.xyzx
mov r3.w, l(1.000000)
dp4 r4.x, cb0[24].xyzw, r3.xyzw
dp4 r4.y, cb0[25].xyzw, r3.xyzw
dp4 r4.z, cb0[26].xyzw, r3.xyzw
dp4 r0.z, cb0[27].xyzw, r3.xyzw
div r3.xyz, r4.xyzx, r0.zzzz
mad r4.xyzw, r3.xyxy, l(0.500000, -0.500000, 0.500000, -0.500000), l(0.500000, 0.500000, 0.500000, 0.500000)
add r0.z, -r3.z, l(1.000000)
mad r3.xyzw, cb0[23].zwzw, l(0.500000, -0.500000, -0.500000, -0.500000), r4.zwzw
mov r5.xy, r3.zwzz
mov r5.z, cb0[29].x
sample_c_lz_indexable(texture2darray)(float,float,float,float) r0.w, r5.xyzx, t1.xxxx, s1, r0.z
mov r3.z, cb0[29].x
sample_c_lz_indexable(texture2darray)(float,float,float,float) r3.x, r3.xyzx, t1.xxxx, s1, r0.z
mad r4.xyzw, cb0[23].zwzw, l(0.500000, 0.500000, -0.500000, 0.500000), r4.zwxy
mov r5.xy, r4.zwzz
mov r5.z, cb0[29].x
sample_c_lz_indexable(texture2darray)(float,float,float,float) r3.y, r5.xyzx, t1.xxxx, s1, r0.z
mov r4.z, cb0[29].x
sample_c_lz_indexable(texture2darray)(float,float,float,float) r0.z, r4.xyzx, t1.xxxx, s1, r0.z
add r0.w, r0.w, r3.x
add r0.w, r3.y, r0.w
add r0.z, r0.z, r0.w
mad r0.z, -r0.z, l(0.250000), l(1.000000)
add r0.w, -cb0[16].w, l(1.000000)
mad r0.z, cb0[16].w, r0.z, r0.w
add r3.xyz, -r1.xyzx, cb0[17].xyzx
mul r3.xyz, r3.xyzx, cb0[0].xxxx
dp3 r4.x, cb12[4].xyzx, r3.xyzx
dp3 r4.y, cb12[5].xyzx, r3.xyzx
dp3 r4.z, cb12[6].xyzx, r3.xyzx
dp3 r0.w, r4.xyzx, r4.xyzx
min r3.x, r0.w, l(1.000000)
add r3.x, -r3.x, l(1.000000)
mul r3.x, r3.x, r3.x
mov r1.w, l(1.000000)
dp4 r5.x, cb0[7].xyzw, r1.xyzw
dp4 r5.y, cb0[8].xyzw, r1.xyzw
dp4 r1.x, cb0[10].xyzw, r1.xyzw
div r1.xy, r5.xyxx, r1.xxxx
sample_l_indexable(texture2d)(float,float,float,float) r1.xyzw, r1.xyxx, t0.xyzw, s0, l(0.000000)
mul r1.xyz, r1.xyzx, cb0[16].xyzx
sample_indexable(texture2d)(float,float,float,float) r5.xyzw, r0.xyxx, t3.xyzw, s2
mul r3.yzw, r1.xxyz, r5.xxyz
dp3 r4.w, v3.xyzx, v3.xyzx
rsq r4.w, r4.w
sample_indexable(texture2d)(float,float,float,float) r6.xyzw, r0.xyxx, t4.xyzw, s2
mad r6.xyz, r6.xyzx, l(2.000000, 2.000000, 2.000000, 0.000000), l(-1.000000, -1.000000, -1.000000, 0.000000)
dp3 r7.x, r6.xyzx, r6.xyzx
rsq r7.x, r7.x
mul r6.xyz, r6.xyzx, r7.xxxx
rsq r0.w, r0.w
mul r4.xyz, r0.wwww, r4.xyzx
mul r7.xyz, r1.wwww, cb0[18].xyzx
mul r0.w, r2.x, r2.x
sample_l_indexable(texture2d)(float,float,float,float) r0.xy, r0.xyxx, t6.xyzw, s0, l(0.000000)
mad r8.xyz, v3.xyzx, r4.wwww, r4.xyzx
dp3 r1.w, r8.xyzx, r8.xyzx
rsq r1.w, r1.w
mul r8.xyz, r1.wwww, r8.xyzx
dp3_sat r1.w, r6.xyzx, r8.xyzx
mad r9.xy, r0.wwww, l(4096.000000, 4096.000000, 0.000000, 0.000000), l(0.004000, 2.004000, 0.000000, 0.000000)
mul r0.w, r9.y, l(0.125000)
log r1.w, r1.w
mul r1.w, r1.w, r9.x
exp r1.w, r1.w
mul r0.w, r0.w, r1.w
add r1.w, -r2.z, l(1.000000)
dp3_sat r2.x, r4.xyzx, r8.xyzx
add r2.x, -r2.x, l(1.000000)
mul r4.w, r2.x, r2.x
mul r4.w, r4.w, r4.w
mul r2.x, r2.x, r4.w
mad r1.w, r1.w, r2.x, r2.z
mul r0.w, r0.w, r1.w
mul r7.xyz, r7.xyzx, r0.xxxx
mul r2.xyz, r2.yyyy, r7.xyzx
dp3 r0.x, r6.xyzx, r4.xyzx
add r1.w, r5.w, r0.x
add r1.w, r1.w, l(-1.000000)
div_sat r1.w, r1.w, r5.w
mad r2.xyz, r2.xyzx, r0.wwww, r3.yzwy
mul r2.xyz, r1.wwww, r2.xyzx
lt r0.w, l(0.000000), r2.w
if_nz r0.w
lt r0.w, r6.w, l(0.509804)
add r3.yzw, r3.yyzw, r3.yyzw
max r1.w, r5.z, r5.y
max r1.w, r1.w, r5.x
div_sat r4.xyz, r5.xyzx, r1.wwww
mov_sat r1.xyz, r1.xyzx
mul r4.xyz, r4.xyzx, r4.xyzx
mul r1.xyz, r1.xyzx, r4.xyzx
mul r1.xyz, r1.xyzx, l(0.150000, 0.150000, 0.150000, 0.000000)
movc r1.xyz, r0.wwww, r3.yzwy, r1.xyzx
add_sat r0.w, r2.w, l(-0.500000)
mul r3.yzw, r0.wwww, r1.xxyz
add_sat r4.xy, -r0.xxxx, l(0.250000, 1.000000, 0.000000, 0.000000)
mad r3.yzw, r3.yyzw, r4.xxxx, r2.xxyz
min r0.w, r2.w, l(0.500000)
mul r1.xyz, r0.wwww, r1.xyzx
mul r1.xyz, r4.yyyy, r1.xyzx
add_sat r0.x, r0.x, l(0.250000)
mad r2.xyz, r1.xyzx, r0.xxxx, r3.yzwy
endif
mul r1.xyz, r2.xyzx, r3.xxxx
mul r0.xzw, r0.zzzz, r1.xxyz
add_sat r0.y, r0.y, l(0.333000)
mul o0.xyz, r0.yyyy, r0.xzwx
mov o0.w, l(1.000000)
ret
// Approximately 119 instruction slots used

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/


I'm talking about the "float4 v4 : TEXCOORD3" whose z parameter was used for the stereo fix. How did you know that that could help to fix the shader for 3d?


3. Can I assume that this line:
r1.xyz = r0.www ? r3.yzw : r1.xyz;


being converted to
r1.x = r0.w ? r3.y : r1.x; 
r1.y = r0.w ? r3.z : r1.y;
r1.z = r0.w ? r3.w : r1.z;

is just manual decompiler fix or is it part of the 3d fix? I'm leaning on the decompiler fix side.


4. I'm really confused by this part of a fix:
o0.xyzw = r0.xyzw;
o1.zw = r0.zw;
r1.xy = k_vScene_TexCoordScale.xy * r0.ww;
r0.zw = k_vFullViewPlane.xy * r0.ww;
r1.zw = k_vScene_TexCoordScale.xy * float2(1.000000e+000,-1.000000e+000);
r0.xy = r0.xy * r1.zw + r1.xy;
o1.xy = r0.xy;
o2.xy = r0.xy * k_vFullViewPlane.zw + r0.zw;
return;


that becomes:
float4 stereo = StereoParams.Load(0);
float4 r23, r20;
r23.xyzw = r0.xyzw;
r23.x += stereo.x * (r23.w - stereo.y);

o0.xyzw = r0.xyzw;

// o0.xyzw = r0.xyzw;
o1.zw = r0.zw;
r1.xy = k_vScene_TexCoordScale.xy * r0.ww;
r0.zw = k_vFullViewPlane.xy * r0.ww;
r1.zw = k_vScene_TexCoordScale.xy * float2(1.000000e+000,-1.000000e+000);
r0.xy = r0.xy * r1.zw + r1.xy;
r20.xy = r23.xy*r1.zw + r1.xy;
o1.xy = r20.xy;
o2.xy = r20.xy * k_vFullViewPlane.zw + r0.zw;
return;


Can you maybe give me a simple breakdown of one would arrive to this fix?

If my questions are annoying you bo3b/mike then just tell me so and I won't post any more. I'm just curious on how 3d fixes work.

Thanks.

Posted 11/16/2014 01:05 AM   
No worries, I think it's good to explain some of these things in these threads, because it makes it easier for other people to learn as well, and they can be found with google searches. Sometimes we won't get back to you because of being too busy, but we always want to try to explain things. [quote="ForgottenProdigy"] Can't you just put [code] float4 stereo = StereoParams.Load(0); o0.xyzw = r1.xyzw; r1.x += stereo.x * (r1.w - stereo.y); [/code] instead or is it necessary to create the r23 variable?[/quote] Yep, you can do the simpler version. This is an artifact of copy and paste where early on we didn't quite understand how flexible HLSL is, and were using techniques that came from the ASM side, where you have to do it via temporary variables. This fix is similar to the blood spatters in Alien, where the output itself needs to be unchanged, but we want to modify the follow-on uses of that variable to make it stereo. [quote="ForgottenProdigy"] 2. Something else new that I saw was that you guys straight up created a new variable to be exported. Here's the full code for the shader: ... I'm talking about the "[b]float4 v4 : TEXCOORD3[/b]" whose z parameter was used for the stereo fix. How did you know that that could help to fix the shader for 3d?[/quote] This is actually fixing the problem in the corresponding PixelShader. The variable was added in the VertexShader associated with this effect, and is passing in the .z parameter as a specific matrix value to be used in the PixelShader. The VS had the proper matrix, the PS did not, so this is one way to pass that on where it's needed. For whatever reason, this seems to work more often in DX11/HLSL than in the ASM versions. Sometimes it just doesn't work in ASM, and it's not clear why. [quote="ForgottenProdigy"] 3. Can I assume that this line: [code]r1.xyz = r0.www ? r3.yzw : r1.xyz;[/code] being converted to [code]r1.x = r0.w ? r3.y : r1.x; r1.y = r0.w ? r3.z : r1.y; r1.z = r0.w ? r3.w : r1.z; [/code] is just manual decompiler fix or is it part of the 3d fix? I'm leaning on the decompiler fix side. [/quote] Yes, this is part of the back-and-forth that Mike and I do on fixes. Sometimes we find stuff that is broken because the Decompiler generated bad code, and I work out how to fix the code generation. This one is actually the opposite, the unrolled version is actually wrong, the rolled up version is correct. We have some in-progress stuff that we don't bother to clean up, as long as we know it's working. The unrolled version can do something like "r1.x = r1.x ? r2.x : r3.x", then in the next line, use r1.x again, and generate bad results. Since the output can possibly the test parameter, it's safer and generates the proper ASM to roll it back up. I hesitantly changed a bunch of these, because Chiri believes that they need to be unrolled, and wrote it that way originally. I've tested a lot of code, and am confident enough to leave them rolled up. For the last part of the question, I'll leave that to Mike, as I don't know what it's doing either.
No worries, I think it's good to explain some of these things in these threads, because it makes it easier for other people to learn as well, and they can be found with google searches. Sometimes we won't get back to you because of being too busy, but we always want to try to explain things.

ForgottenProdigy said:
Can't you just put
float4 stereo = StereoParams.Load(0);
o0.xyzw = r1.xyzw;
r1.x += stereo.x * (r1.w - stereo.y);

instead or is it necessary to create the r23 variable?

Yep, you can do the simpler version. This is an artifact of copy and paste where early on we didn't quite understand how flexible HLSL is, and were using techniques that came from the ASM side, where you have to do it via temporary variables.

This fix is similar to the blood spatters in Alien, where the output itself needs to be unchanged, but we want to modify the follow-on uses of that variable to make it stereo.


ForgottenProdigy said:
2. Something else new that I saw was that you guys straight up created a new variable to be exported. Here's the full code for the shader:
...
I'm talking about the "float4 v4 : TEXCOORD3" whose z parameter was used for the stereo fix. How did you know that that could help to fix the shader for 3d?

This is actually fixing the problem in the corresponding PixelShader. The variable was added in the VertexShader associated with this effect, and is passing in the .z parameter as a specific matrix value to be used in the PixelShader. The VS had the proper matrix, the PS did not, so this is one way to pass that on where it's needed.

For whatever reason, this seems to work more often in DX11/HLSL than in the ASM versions. Sometimes it just doesn't work in ASM, and it's not clear why.


ForgottenProdigy said:
3. Can I assume that this line:
r1.xyz = r0.www ? r3.yzw : r1.xyz;


being converted to
r1.x = r0.w ? r3.y : r1.x; 
r1.y = r0.w ? r3.z : r1.y;
r1.z = r0.w ? r3.w : r1.z;

is just manual decompiler fix or is it part of the 3d fix? I'm leaning on the decompiler fix side.

Yes, this is part of the back-and-forth that Mike and I do on fixes. Sometimes we find stuff that is broken because the Decompiler generated bad code, and I work out how to fix the code generation.

This one is actually the opposite, the unrolled version is actually wrong, the rolled up version is correct. We have some in-progress stuff that we don't bother to clean up, as long as we know it's working.

The unrolled version can do something like "r1.x = r1.x ? r2.x : r3.x", then in the next line, use r1.x again, and generate bad results. Since the output can possibly the test parameter, it's safer and generates the proper ASM to roll it back up. I hesitantly changed a bunch of these, because Chiri believes that they need to be unrolled, and wrote it that way originally. I've tested a lot of code, and am confident enough to leave them rolled up.


For the last part of the question, I'll leave that to Mike, as I don't know what it's doing either.

Acer H5360 (1280x720@120Hz) - ASUS VG248QE with GSync mod - 3D Vision 1&2 - Driver 372.54
GTX 970 - i5-4670K@4.2GHz - 12GB RAM - Win7x64+evilKB2670838 - 4 Disk X25 RAID
SAGER NP9870-S - GTX 980 - i7-6700K - Win10 Pro 1607
Latest 3Dmigoto Release
Bo3b's School for ShaderHackers

Posted 11/16/2014 03:11 AM   
Continued from here: [url]https://forums.geforce.com/default/topic/766890/3d-vision/bo3bs-school-for-shaderhackers/post/4365642/#4365642[/url] I have the debug & unbuffered flags set to 1 and export_hlsl is set to 3. Now the game crashes immediately.
Continued from here: https://forums.geforce.com/default/topic/766890/3d-vision/bo3bs-school-for-shaderhackers/post/4365642/#4365642

I have the debug & unbuffered flags set to 1 and export_hlsl is set to 3. Now the game crashes immediately.
Attachments

d3d11_log.txt.jpg

Dual boot Win 7 x64 & Win 10 (1809) | Geforce Drivers 417.35

Posted 11/17/2014 02:11 PM   
[quote="bo3b"]No worries, I think it's good to explain some of these things in these threads, because it makes it easier for other people to learn as well, and they can be found with google searches. Sometimes we won't get back to you because of being too busy, but we always want to try to explain things. [quote="ForgottenProdigy"] Can't you just put [code] float4 stereo = StereoParams.Load(0); o0.xyzw = r1.xyzw; r1.x += stereo.x * (r1.w - stereo.y); [/code] instead or is it necessary to create the r23 variable?[/quote] Yep, you can do the simpler version. This is an artifact of copy and paste where early on we didn't quite understand how flexible HLSL is, and were using techniques that came from the ASM side, where you have to do it via temporary variables. This fix is similar to the blood spatters in Alien, where the output itself needs to be unchanged, but we want to modify the follow-on uses of that variable to make it stereo. [quote="ForgottenProdigy"] 2. Something else new that I saw was that you guys straight up created a new variable to be exported. Here's the full code for the shader: ... I'm talking about the "[b]float4 v4 : TEXCOORD3[/b]" whose z parameter was used for the stereo fix. How did you know that that could help to fix the shader for 3d?[/quote] This is actually fixing the problem in the corresponding PixelShader. The variable was added in the VertexShader associated with this effect, and is passing in the .z parameter as a specific matrix value to be used in the PixelShader. The VS had the proper matrix, the PS did not, so this is one way to pass that on where it's needed. For whatever reason, this seems to work more often in DX11/HLSL than in the ASM versions. Sometimes it just doesn't work in ASM, and it's not clear why. [quote="ForgottenProdigy"] 3. Can I assume that this line: [code]r1.xyz = r0.www ? r3.yzw : r1.xyz;[/code] being converted to [code]r1.x = r0.w ? r3.y : r1.x; r1.y = r0.w ? r3.z : r1.y; r1.z = r0.w ? r3.w : r1.z; [/code] is just manual decompiler fix or is it part of the 3d fix? I'm leaning on the decompiler fix side. [/quote] Yes, this is part of the back-and-forth that Mike and I do on fixes. Sometimes we find stuff that is broken because the Decompiler generated bad code, and I work out how to fix the code generation. This one is actually the opposite, the unrolled version is actually wrong, the rolled up version is correct. We have some in-progress stuff that we don't bother to clean up, as long as we know it's working. The unrolled version can do something like "r1.x = r1.x ? r2.x : r3.x", then in the next line, use r1.x again, and generate bad results. Since the output can possibly the test parameter, it's safer and generates the proper ASM to roll it back up. I hesitantly changed a bunch of these, because Chiri believes that they need to be unrolled, and wrote it that way originally. I've tested a lot of code, and am confident enough to leave them rolled up. For the last part of the question, I'll leave that to Mike, as I don't know what it's doing either.[/quote] Regarding the last part... As bo3b notes we sometimes have artifacts of experimentation left in the code. If you work it through this code is exactly the same as just fixing the r0.xyzw variable, so I could not be bothered to put it back. There are a couple of shaders I found early that required one of the o1 or o2 variables to use the non-shifted r0, that's why I created the duplicate r20 variable. I then applied this fix to several other similar shaders, only to find that it screwed them up so I had to revert back. VS are much less consistent in their patterns than PS, so this happens a lot. On average though, autofixing and then correcting the few things that break is still far quicker and more efficient than individually tracking every shader down. Regarding the v4 variable I pass in. You will see some VS in the shaderfixes that pass out a TEXCOORD3 derived entirely form the k_vFullViewPlane constant. I found out that the z component has the FOV correction factor that is needed when correcting in View Space (which is what happens in the PS) so I pass that forward. To start with I copied the struct into the PS and was using the k_vFullViewPlane variable from there - this worked for some shaders, but not all. For the ones that did not work, the value was either always 0 so no correction was applied, or it changed to zero at some view angles which led to shadow/light 'snapping'. Because of this, I reverted most of them to use the TEXCOORD3 passing approach. I have had other issues with this particular struct (b(12) actually) not having any values in the VS for the Particle fixes, so I am forced to use a hardcoded magic number that only works for one FOV (normal gameplay) and not the benchmark (higher fov) or when using the bow (lower fov).
bo3b said:No worries, I think it's good to explain some of these things in these threads, because it makes it easier for other people to learn as well, and they can be found with google searches. Sometimes we won't get back to you because of being too busy, but we always want to try to explain things.

ForgottenProdigy said:
Can't you just put
float4 stereo = StereoParams.Load(0);
o0.xyzw = r1.xyzw;
r1.x += stereo.x * (r1.w - stereo.y);

instead or is it necessary to create the r23 variable?

Yep, you can do the simpler version. This is an artifact of copy and paste where early on we didn't quite understand how flexible HLSL is, and were using techniques that came from the ASM side, where you have to do it via temporary variables.

This fix is similar to the blood spatters in Alien, where the output itself needs to be unchanged, but we want to modify the follow-on uses of that variable to make it stereo.


ForgottenProdigy said:
2. Something else new that I saw was that you guys straight up created a new variable to be exported. Here's the full code for the shader:
...
I'm talking about the "float4 v4 : TEXCOORD3" whose z parameter was used for the stereo fix. How did you know that that could help to fix the shader for 3d?

This is actually fixing the problem in the corresponding PixelShader. The variable was added in the VertexShader associated with this effect, and is passing in the .z parameter as a specific matrix value to be used in the PixelShader. The VS had the proper matrix, the PS did not, so this is one way to pass that on where it's needed.

For whatever reason, this seems to work more often in DX11/HLSL than in the ASM versions. Sometimes it just doesn't work in ASM, and it's not clear why.


ForgottenProdigy said:
3. Can I assume that this line:
r1.xyz = r0.www ? r3.yzw : r1.xyz;


being converted to
r1.x = r0.w ? r3.y : r1.x; 
r1.y = r0.w ? r3.z : r1.y;
r1.z = r0.w ? r3.w : r1.z;

is just manual decompiler fix or is it part of the 3d fix? I'm leaning on the decompiler fix side.

Yes, this is part of the back-and-forth that Mike and I do on fixes. Sometimes we find stuff that is broken because the Decompiler generated bad code, and I work out how to fix the code generation.

This one is actually the opposite, the unrolled version is actually wrong, the rolled up version is correct. We have some in-progress stuff that we don't bother to clean up, as long as we know it's working.

The unrolled version can do something like "r1.x = r1.x ? r2.x : r3.x", then in the next line, use r1.x again, and generate bad results. Since the output can possibly the test parameter, it's safer and generates the proper ASM to roll it back up. I hesitantly changed a bunch of these, because Chiri believes that they need to be unrolled, and wrote it that way originally. I've tested a lot of code, and am confident enough to leave them rolled up.


For the last part of the question, I'll leave that to Mike, as I don't know what it's doing either.


Regarding the last part... As bo3b notes we sometimes have artifacts of experimentation left in the code. If you work it through this code is exactly the same as just fixing the r0.xyzw variable, so I could not be bothered to put it back. There are a couple of shaders I found early that required one of the o1 or o2 variables to use the non-shifted r0, that's why I created the duplicate r20 variable. I then applied this fix to several other similar shaders, only to find that it screwed them up so I had to revert back. VS are much less consistent in their patterns than PS, so this happens a lot. On average though, autofixing and then correcting the few things that break is still far quicker and more efficient than individually tracking every shader down.

Regarding the v4 variable I pass in. You will see some VS in the shaderfixes that pass out a TEXCOORD3 derived entirely form the k_vFullViewPlane constant. I found out that the z component has the FOV correction factor that is needed when correcting in View Space (which is what happens in the PS) so I pass that forward. To start with I copied the struct into the PS and was using the k_vFullViewPlane variable from there - this worked for some shaders, but not all. For the ones that did not work, the value was either always 0 so no correction was applied, or it changed to zero at some view angles which led to shadow/light 'snapping'. Because of this, I reverted most of them to use the TEXCOORD3 passing approach. I have had other issues with this particular struct (b(12) actually) not having any values in the VS for the Particle fixes, so I am forced to use a hardcoded magic number that only works for one FOV (normal gameplay) and not the benchmark (higher fov) or when using the bow (lower fov).

Rig: Intel i7-8700K @4.7GHz, 16Gb Ram, SSD, GTX 1080Ti, Win10x64, Asus VG278

Posted 11/17/2014 02:29 PM   
[quote="4everAwake"]Continued from here: [url]https://forums.geforce.com/default/topic/766890/3d-vision/bo3bs-school-for-shaderhackers/post/4365642/#4365642[/url] I have the debug & unbuffered flags set to 1 and export_hlsl is set to 3. Now the game crashes immediately. [/quote] OK, thanks for that log. This one has a single shader that decompiles OK, then crashes on the next one it sees. This looks like a parse failure where it creates an exception, but that should have been caught by my exception handler and reported. Do you have any debugging tools installed that might interfere with exception handling? One last thing to try for me would be to set the affinity flag so that it runs mostly single threaded. This can help me tell if it might be some multi-threaded problem. Set all the flags to 1 in the Logging section, but especially force_cpu_affinity=1 for this. Thanks for the help. Edit: BTW, this debug build is worth trying in this case, because it might catch the crash earlier with an Assert, and includes a multi-threading Decompiler patch we needed for Mordor crashes. https://github.com/bo3b/3Dmigoto/releases/download/0.99.2-beta/3Dmigoto-Debug-0.99.2.zip
4everAwake said:Continued from here: https://forums.geforce.com/default/topic/766890/3d-vision/bo3bs-school-for-shaderhackers/post/4365642/#4365642

I have the debug & unbuffered flags set to 1 and export_hlsl is set to 3. Now the game crashes immediately.

OK, thanks for that log. This one has a single shader that decompiles OK, then crashes on the next one it sees.

This looks like a parse failure where it creates an exception, but that should have been caught by my exception handler and reported.


Do you have any debugging tools installed that might interfere with exception handling?

One last thing to try for me would be to set the affinity flag so that it runs mostly single threaded. This can help me tell if it might be some multi-threaded problem. Set all the flags to 1 in the Logging section, but especially force_cpu_affinity=1 for this. Thanks for the help.


Edit: BTW, this debug build is worth trying in this case, because it might catch the crash earlier with an Assert, and includes a multi-threading Decompiler patch we needed for Mordor crashes.

https://github.com/bo3b/3Dmigoto/releases/download/0.99.2-beta/3Dmigoto-Debug-0.99.2.zip

Acer H5360 (1280x720@120Hz) - ASUS VG248QE with GSync mod - 3D Vision 1&2 - Driver 372.54
GTX 970 - i5-4670K@4.2GHz - 12GB RAM - Win7x64+evilKB2670838 - 4 Disk X25 RAID
SAGER NP9870-S - GTX 980 - i7-6700K - Win10 Pro 1607
Latest 3Dmigoto Release
Bo3b's School for ShaderHackers

Posted 11/18/2014 04:19 AM   
Ok I'm using version 0.99.2. I have all flags in the Logging section set to 1 and export_hlsl still set to 3. Now when I run the game, a dialog box pops up "Video Card does not meet the minimum requirements for DX 11 Switching to DX 9". The game still starts, but I'm not sure if 3dMigoto is hooked (nothing blinks out when I cycle through shaders).
Ok I'm using version 0.99.2. I have all flags in the Logging section set to 1 and export_hlsl still set to 3. Now when I run the game, a dialog box pops up "Video Card does not meet the minimum requirements for DX 11 Switching to DX 9". The game still starts, but I'm not sure if 3dMigoto is hooked (nothing blinks out when I cycle through shaders).
Attachments

d3d11_log.txt.jpg

Dual boot Win 7 x64 & Win 10 (1809) | Geforce Drivers 417.35

Posted 11/19/2014 05:05 PM   
Totally weird. I have no idea what happened there to make it decide that it wasn't DX11. This might be the peril of working on a pre-release game, not sure. Once it decided to switch to DX9 the logging stops because there is no longer any DX9 hook in 3Dmigoto. It might be the affinity flag that makes it think to switch to DX9, hard to say. Worth experimenting with force_cpu_affinity=0, but still use the debug version of 0.99.2.
Totally weird. I have no idea what happened there to make it decide that it wasn't DX11. This might be the peril of working on a pre-release game, not sure.

Once it decided to switch to DX9 the logging stops because there is no longer any DX9 hook in 3Dmigoto.

It might be the affinity flag that makes it think to switch to DX9, hard to say. Worth experimenting with force_cpu_affinity=0, but still use the debug version of 0.99.2.

Acer H5360 (1280x720@120Hz) - ASUS VG248QE with GSync mod - 3D Vision 1&2 - Driver 372.54
GTX 970 - i5-4670K@4.2GHz - 12GB RAM - Win7x64+evilKB2670838 - 4 Disk X25 RAID
SAGER NP9870-S - GTX 980 - i7-6700K - Win10 Pro 1607
Latest 3Dmigoto Release
Bo3b's School for ShaderHackers

Posted 11/20/2014 12:24 AM   
  14 / 143    
Scroll To Top