3Dmigoto now open-source...
  41 / 143    
Question: is it possible to increase the shadows resolution with pixel/vertex shader modifications? Shadows on "High" in Dark Souls 2 SOTFS are still at a pretty low resolution. For example, I could make the shadow filtering very smooth and antialiased, but the resolution of the shadows themselves didn't change (it looked like the polygon warping of the first Playstation in conjunction with high resolution). I could also change the distance where the shadows change to a lower resolution version, but it has bugs, like unwanted shadows appearing, some shadows completely disappearing at certain distances, etc (I guess I didn't do it correctly). Code of the pixel shader: [code] //Shadows PS. cbuffer cbBase : register(b0) { float4 FC_DifColMul : packoffset(c0); float FC_GlobalTime : packoffset(c1); row_major float3x4 FC_MatrixViewT : packoffset(c2); float4 FC_FarClipInfo : packoffset(c5); float4 FC_ShadowMapParam : packoffset(c6); float4 FC_ShadowColor : packoffset(c7); float4 FC_ScreenSize : packoffset(c8); float4 FC_FinalColorMult : packoffset(c9); } cbuffer cbScreenSpaceShadow : register(b6) { row_major float4x4 g_InvProjMatrixT : packoffset(c0); row_major float4x4 g_ProjSpaceToShadowMatrixT[4] : packoffset(c4); float4 g_CascadeSelectDist : packoffset(c20); float4 g_vDepthComputeParam : packoffset(c21); float4 g_avSampleOffsets[16] : packoffset(c22); } SamplerState g_DepthMapSampler_sampler_s : register(s1); SamplerState g_SSAOMap_sampler_s : register(s13); SamplerComparisonState g_ShadowMap_sampler_s : register(s12); Texture2D<float4> g_DepthMapSampler_texture : register(t1); Texture2D<float4> g_ShadowMap_texture : register(t12); Texture2D<float4> g_SSAOMap_texture : register(t13); Texture2D<float4> StereoParams : register(t125); Texture1D<float4> IniParams : register(t120); void main( float4 v0 : SV_Position0, float4 v1 : TEXCOORD0, float4 v2 : TEXCOORD1, out float4 o0 : SV_Target0) { float4 r0,r1,r2,r3,r4; uint4 bitmask, uiDest; float4 fDest; float4 stereo = StereoParams.Load(0); float separation = stereo.x; float convergence = stereo.y; r0.z = g_DepthMapSampler_texture.Sample(g_DepthMapSampler_sampler_s, v1.xy).x; r1.x = g_vDepthComputeParam.x + -r0.z; r1.x = -g_vDepthComputeParam.y / r1.x; //High quality shadow distance multiplier. Multiplying "g_CascadeSelectDist.xyzw" by 1.4 for example works. //r2.xyzw = g_CascadeSelectDist.xyzw < r1.xxxx; r2.xyzw = (g_CascadeSelectDist.xyzw < r1.xxxx) ? -1 : 0; // <---- I saw that bo3b in github said that it's more accurate this way (the commented line above is the default code as it was dumped). r1.x = FC_ShadowMapParam.x + -r1.x; r1.x = saturate(FC_ShadowMapParam.y * r1.x); r2.xyzw = r2.xyzw ? 1.00000 : 0; r1.y = dot(r2.xyzw, float4(1,1,1,1)); r1.y = (int)r1.y; r1.z = (int)r1.y == 4; r1.y = (uint)r1.y << 2; r1.y = r1.z ? 12 : r1.y; if (r1.z != 0) discard; r0.xy = v2.xy; r0.w = 1; float depth=dot(g_ProjSpaceToShadowMatrixT[r1.y/4]._m30_m31_m32_m33, r0.xyzw); r0.x+=separation*(depth*convergence-1); r2.x = dot(g_ProjSpaceToShadowMatrixT[r1.y/4]._m00_m01_m02_m03, r0.xyzw); r2.y = dot(g_ProjSpaceToShadowMatrixT[r1.y/4]._m10_m11_m12_m13, r0.xyzw); r2.z = dot(g_ProjSpaceToShadowMatrixT[r1.y/4]._m20_m21_m22_m23, r0.xyzw); r2.w = dot(g_ProjSpaceToShadowMatrixT[r1.y/4]._m30_m31_m32_m33, r0.xyzw); r0.xy = FC_ShadowMapParam.zw * r2.ww; r0.w = -r0.y; r0.z = 0; r3.xyzw = r2.xyzw + r0.xwzz; r0.w = 1 / r3.w; r1.yzw = r3.xyz * r0.www; r3.y = g_ShadowMap_texture.SampleCmp(g_ShadowMap_sampler_s, r1.yz, r1.w).x; //<--- Multiplying this and other lines that look like this one by something makes the shadow filtering be at higher resolution. r4.xyzw = r2.xyzw + r0.xyzz; r0.w = 1 / r4.w; r1.yzw = r4.xyz * r0.www; r3.x = g_ShadowMap_texture.SampleCmp(g_ShadowMap_sampler_s, r1.yz, r1.w).x; r4.xyzw = r0.xyzz * float4(-1,1,1,1) + r2.xyzw; r0.xyzw = r0.xyzz * float4(-1,-1,1,1) + r2.xyzw; r1.y = 1 / r4.w; r1.yzw = r4.xyz * r1.yyy; r3.z = g_ShadowMap_texture.SampleCmp(g_ShadowMap_sampler_s, r1.yz, r1.w).x; r0.w = 1 / r0.w; r0.xyz = r0.xyz * r0.www; r3.w = g_ShadowMap_texture.SampleCmp(g_ShadowMap_sampler_s, r0.xy, r0.z).x; r0.x = dot(r3.xyzw, float4(0.25,0.25,0.25,0.25)); o0.xyz = -r1.xxx * r0.xxx + float3(1,1,1); r0.x = g_SSAOMap_texture.Sample(g_SSAOMap_sampler_s, v1.xy).w; o0.w = r0.x; return; } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // // Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111 // // using 3Dmigoto v1.2.6 on Mon Nov 02 01:04:06 2015 // // // Buffer Definitions: // // cbuffer cbBase // { // // float4 FC_DifColMul; // Offset: 0 Size: 16 [unused] // float FC_GlobalTime; // Offset: 16 Size: 4 [unused] // row_major float3x4 FC_MatrixViewT; // Offset: 32 Size: 48 [unused] // float4 FC_FarClipInfo; // Offset: 80 Size: 16 [unused] // float4 FC_ShadowMapParam; // Offset: 96 Size: 16 // float4 FC_ShadowColor; // Offset: 112 Size: 16 [unused] // float4 FC_ScreenSize; // Offset: 128 Size: 16 [unused] // float4 FC_FinalColorMult; // Offset: 144 Size: 16 [unused] // // } // // cbuffer cbScreenSpaceShadow // { // // row_major float4x4 g_InvProjMatrixT;// Offset: 0 Size: 64 [unused] // row_major float4x4 g_ProjSpaceToShadowMatrixT[4];// Offset: 64 Size: 256 // float4 g_CascadeSelectDist; // Offset: 320 Size: 16 // float4 g_vDepthComputeParam; // Offset: 336 Size: 16 // float4 g_avSampleOffsets[16]; // Offset: 352 Size: 256 [unused] // // } // // // Resource Bindings: // // Name Type Format Dim Slot Elements // ------------------------------ ---------- ------- ----------- ---- -------- // g_DepthMapSampler_sampler sampler NA NA 1 1 // g_ShadowMap_sampler sampler_c NA NA 12 1 // g_SSAOMap_sampler sampler NA NA 13 1 // g_DepthMapSampler_texture texture float4 2d 1 1 // g_ShadowMap_texture texture float4 2d 12 1 // g_SSAOMap_texture texture float4 2d 13 1 // cbBase cbuffer NA NA 0 1 // cbScreenSpaceShadow cbuffer NA NA 6 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // TEXCOORD 1 xyzw 2 NONE float xy // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET float xyzw // ps_5_0 dcl_globalFlags refactoringAllowed dcl_constantbuffer cb0[7], immediateIndexed dcl_constantbuffer cb6[22], dynamicIndexed dcl_sampler s1, mode_default dcl_sampler s12, mode_comparison dcl_sampler s13, mode_default dcl_resource_texture2d (float,float,float,float) t1 dcl_resource_texture2d (float,float,float,float) t12 dcl_resource_texture2d (float,float,float,float) t13 dcl_input_ps linear v1.xy dcl_input_ps linear v2.xy dcl_output o0.xyzw dcl_temps 5 sample_indexable(texture2d)(float,float,float,float) r0.z, v1.xyxx, t1.yzxw, s1 add r1.x, -r0.z, cb6[21].x div r1.x, -cb6[21].y, r1.x lt r2.xyzw, cb6[20].xyzw, r1.xxxx add r1.x, -r1.x, cb0[6].x mul_sat r1.x, r1.x, cb0[6].y and r2.xyzw, r2.xyzw, l(0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000) dp4 r1.y, r2.xyzw, l(1.000000, 1.000000, 1.000000, 1.000000) ftoi r1.y, r1.y ieq r1.z, r1.y, l(4) ishl r1.y, r1.y, l(2) movc r1.y, r1.z, l(12), r1.y discard_nz r1.z mov r0.xy, v2.xyxx mov r0.w, l(1.000000) dp4 r2.x, cb6[r1.y + 4].xyzw, r0.xyzw dp4 r2.y, cb6[r1.y + 5].xyzw, r0.xyzw dp4 r2.z, cb6[r1.y + 6].xyzw, r0.xyzw dp4 r2.w, cb6[r1.y + 7].xyzw, r0.xyzw mul r0.xy, r2.wwww, cb0[6].zwzz mov r0.w, -r0.y mov r0.z, l(0) add r3.xyzw, r0.xwzz, r2.xyzw div r0.w, l(1.000000, 1.000000, 1.000000, 1.000000), r3.w mul r1.yzw, r0.wwww, r3.xxyz sample_c_indexable(texture2d)(float,float,float,float) r3.y, r1.yzyy, t12.xxxx, s12, r1.w add r4.xyzw, r0.xyzz, r2.xyzw div r0.w, l(1.000000, 1.000000, 1.000000, 1.000000), r4.w mul r1.yzw, r0.wwww, r4.xxyz sample_c_indexable(texture2d)(float,float,float,float) r3.x, r1.yzyy, t12.xxxx, s12, r1.w mad r4.xyzw, r0.xyzz, l(-1.000000, 1.000000, 1.000000, 1.000000), r2.xyzw mad r0.xyzw, r0.xyzz, l(-1.000000, -1.000000, 1.000000, 1.000000), r2.xyzw div r1.y, l(1.000000, 1.000000, 1.000000, 1.000000), r4.w mul r1.yzw, r1.yyyy, r4.xxyz sample_c_indexable(texture2d)(float,float,float,float) r3.z, r1.yzyy, t12.xxxx, s12, r1.w div r0.w, l(1.000000, 1.000000, 1.000000, 1.000000), r0.w mul r0.xyz, r0.wwww, r0.xyzx sample_c_indexable(texture2d)(float,float,float,float) r3.w, r0.xyxx, t12.xxxx, s12, r0.z dp4 r0.x, r3.xyzw, l(0.250000, 0.250000, 0.250000, 0.250000) mad o0.xyz, -r1.xxxx, r0.xxxx, l(1.000000, 1.000000, 1.000000, 0.000000) sample_indexable(texture2d)(float,float,float,float) r0.x, v1.xyxx, t13.wxyz, s13 mov o0.w, r0.x ret // Approximately 43 instruction slots used ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ [/code]
Question: is it possible to increase the shadows resolution with pixel/vertex shader modifications?

Shadows on "High" in Dark Souls 2 SOTFS are still at a pretty low resolution. For example, I could make the shadow filtering very smooth and antialiased, but the resolution of the shadows themselves didn't change (it looked like the polygon warping of the first Playstation in conjunction with high resolution).

I could also change the distance where the shadows change to a lower resolution version, but it has bugs, like unwanted shadows appearing, some shadows completely disappearing at certain distances, etc (I guess I didn't do it correctly).

Code of the pixel shader:

//Shadows PS.
cbuffer cbBase : register(b0)
{
float4 FC_DifColMul : packoffset(c0);
float FC_GlobalTime : packoffset(c1);
row_major float3x4 FC_MatrixViewT : packoffset(c2);
float4 FC_FarClipInfo : packoffset(c5);
float4 FC_ShadowMapParam : packoffset(c6);
float4 FC_ShadowColor : packoffset(c7);
float4 FC_ScreenSize : packoffset(c8);
float4 FC_FinalColorMult : packoffset(c9);
}

cbuffer cbScreenSpaceShadow : register(b6)
{
row_major float4x4 g_InvProjMatrixT : packoffset(c0);
row_major float4x4 g_ProjSpaceToShadowMatrixT[4] : packoffset(c4);
float4 g_CascadeSelectDist : packoffset(c20);
float4 g_vDepthComputeParam : packoffset(c21);
float4 g_avSampleOffsets[16] : packoffset(c22);
}
SamplerState g_DepthMapSampler_sampler_s : register(s1);
SamplerState g_SSAOMap_sampler_s : register(s13);
SamplerComparisonState g_ShadowMap_sampler_s : register(s12);
Texture2D<float4> g_DepthMapSampler_texture : register(t1);
Texture2D<float4> g_ShadowMap_texture : register(t12);
Texture2D<float4> g_SSAOMap_texture : register(t13);

Texture2D<float4> StereoParams : register(t125);
Texture1D<float4> IniParams : register(t120);

void main(
float4 v0 : SV_Position0,
float4 v1 : TEXCOORD0,
float4 v2 : TEXCOORD1,
out float4 o0 : SV_Target0)
{
float4 r0,r1,r2,r3,r4;
uint4 bitmask, uiDest;
float4 fDest;

float4 stereo = StereoParams.Load(0);
float separation = stereo.x;
float convergence = stereo.y;

r0.z = g_DepthMapSampler_texture.Sample(g_DepthMapSampler_sampler_s, v1.xy).x;
r1.x = g_vDepthComputeParam.x + -r0.z;
r1.x = -g_vDepthComputeParam.y / r1.x;
//High quality shadow distance multiplier. Multiplying "g_CascadeSelectDist.xyzw" by 1.4 for example works.
//r2.xyzw = g_CascadeSelectDist.xyzw < r1.xxxx;
r2.xyzw = (g_CascadeSelectDist.xyzw < r1.xxxx) ? -1 : 0; // <---- I saw that bo3b in github said that it's more accurate this way (the commented line above is the default code as it was dumped).
r1.x = FC_ShadowMapParam.x + -r1.x;
r1.x = saturate(FC_ShadowMapParam.y * r1.x);
r2.xyzw = r2.xyzw ? 1.00000 : 0;
r1.y = dot(r2.xyzw, float4(1,1,1,1));
r1.y = (int)r1.y;
r1.z = (int)r1.y == 4;
r1.y = (uint)r1.y << 2;
r1.y = r1.z ? 12 : r1.y;
if (r1.z != 0) discard;
r0.xy = v2.xy;
r0.w = 1;
float depth=dot(g_ProjSpaceToShadowMatrixT[r1.y/4]._m30_m31_m32_m33, r0.xyzw);
r0.x+=separation*(depth*convergence-1);
r2.x = dot(g_ProjSpaceToShadowMatrixT[r1.y/4]._m00_m01_m02_m03, r0.xyzw);
r2.y = dot(g_ProjSpaceToShadowMatrixT[r1.y/4]._m10_m11_m12_m13, r0.xyzw);
r2.z = dot(g_ProjSpaceToShadowMatrixT[r1.y/4]._m20_m21_m22_m23, r0.xyzw);
r2.w = dot(g_ProjSpaceToShadowMatrixT[r1.y/4]._m30_m31_m32_m33, r0.xyzw);
r0.xy = FC_ShadowMapParam.zw * r2.ww;
r0.w = -r0.y;
r0.z = 0;
r3.xyzw = r2.xyzw + r0.xwzz;
r0.w = 1 / r3.w;
r1.yzw = r3.xyz * r0.www;
r3.y = g_ShadowMap_texture.SampleCmp(g_ShadowMap_sampler_s, r1.yz, r1.w).x; //<--- Multiplying this and other lines that look like this one by something makes the shadow filtering be at higher resolution.
r4.xyzw = r2.xyzw + r0.xyzz;
r0.w = 1 / r4.w;
r1.yzw = r4.xyz * r0.www;
r3.x = g_ShadowMap_texture.SampleCmp(g_ShadowMap_sampler_s, r1.yz, r1.w).x;
r4.xyzw = r0.xyzz * float4(-1,1,1,1) + r2.xyzw;
r0.xyzw = r0.xyzz * float4(-1,-1,1,1) + r2.xyzw;
r1.y = 1 / r4.w;
r1.yzw = r4.xyz * r1.yyy;
r3.z = g_ShadowMap_texture.SampleCmp(g_ShadowMap_sampler_s, r1.yz, r1.w).x;
r0.w = 1 / r0.w;
r0.xyz = r0.xyz * r0.www;
r3.w = g_ShadowMap_texture.SampleCmp(g_ShadowMap_sampler_s, r0.xy, r0.z).x;
r0.x = dot(r3.xyzw, float4(0.25,0.25,0.25,0.25));
o0.xyz = -r1.xxx * r0.xxx + float3(1,1,1);
r0.x = g_SSAOMap_texture.Sample(g_SSAOMap_sampler_s, v1.xy).w;
o0.w = r0.x;
return;
}

/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111
//
// using 3Dmigoto v1.2.6 on Mon Nov 02 01:04:06 2015
//
//
// Buffer Definitions:
//
// cbuffer cbBase
// {
//
// float4 FC_DifColMul; // Offset: 0 Size: 16 [unused]
// float FC_GlobalTime; // Offset: 16 Size: 4 [unused]
// row_major float3x4 FC_MatrixViewT; // Offset: 32 Size: 48 [unused]
// float4 FC_FarClipInfo; // Offset: 80 Size: 16 [unused]
// float4 FC_ShadowMapParam; // Offset: 96 Size: 16
// float4 FC_ShadowColor; // Offset: 112 Size: 16 [unused]
// float4 FC_ScreenSize; // Offset: 128 Size: 16 [unused]
// float4 FC_FinalColorMult; // Offset: 144 Size: 16 [unused]
//
// }
//
// cbuffer cbScreenSpaceShadow
// {
//
// row_major float4x4 g_InvProjMatrixT;// Offset: 0 Size: 64 [unused]
// row_major float4x4 g_ProjSpaceToShadowMatrixT[4];// Offset: 64 Size: 256
// float4 g_CascadeSelectDist; // Offset: 320 Size: 16
// float4 g_vDepthComputeParam; // Offset: 336 Size: 16
// float4 g_avSampleOffsets[16]; // Offset: 352 Size: 256 [unused]
//
// }
//
//
// Resource Bindings:
//
// Name Type Format Dim Slot Elements
// ------------------------------ ---------- ------- ----------- ---- --------
// g_DepthMapSampler_sampler sampler NA NA 1 1
// g_ShadowMap_sampler sampler_c NA NA 12 1
// g_SSAOMap_sampler sampler NA NA 13 1
// g_DepthMapSampler_texture texture float4 2d 1 1
// g_ShadowMap_texture texture float4 2d 12 1
// g_SSAOMap_texture texture float4 2d 13 1
// cbBase cbuffer NA NA 0 1
// cbScreenSpaceShadow cbuffer NA NA 6 1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_Position 0 xyzw 0 POS float
// TEXCOORD 0 xy 1 NONE float xy
// TEXCOORD 1 xyzw 2 NONE float xy
//
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_Target 0 xyzw 0 TARGET float xyzw
//
ps_5_0
dcl_globalFlags refactoringAllowed
dcl_constantbuffer cb0[7], immediateIndexed
dcl_constantbuffer cb6[22], dynamicIndexed
dcl_sampler s1, mode_default
dcl_sampler s12, mode_comparison
dcl_sampler s13, mode_default
dcl_resource_texture2d (float,float,float,float) t1
dcl_resource_texture2d (float,float,float,float) t12
dcl_resource_texture2d (float,float,float,float) t13
dcl_input_ps linear v1.xy
dcl_input_ps linear v2.xy
dcl_output o0.xyzw
dcl_temps 5
sample_indexable(texture2d)(float,float,float,float) r0.z, v1.xyxx, t1.yzxw, s1
add r1.x, -r0.z, cb6[21].x
div r1.x, -cb6[21].y, r1.x
lt r2.xyzw, cb6[20].xyzw, r1.xxxx
add r1.x, -r1.x, cb0[6].x
mul_sat r1.x, r1.x, cb0[6].y
and r2.xyzw, r2.xyzw, l(0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000)
dp4 r1.y, r2.xyzw, l(1.000000, 1.000000, 1.000000, 1.000000)
ftoi r1.y, r1.y
ieq r1.z, r1.y, l(4)
ishl r1.y, r1.y, l(2)
movc r1.y, r1.z, l(12), r1.y
discard_nz r1.z
mov r0.xy, v2.xyxx
mov r0.w, l(1.000000)
dp4 r2.x, cb6[r1.y + 4].xyzw, r0.xyzw
dp4 r2.y, cb6[r1.y + 5].xyzw, r0.xyzw
dp4 r2.z, cb6[r1.y + 6].xyzw, r0.xyzw
dp4 r2.w, cb6[r1.y + 7].xyzw, r0.xyzw
mul r0.xy, r2.wwww, cb0[6].zwzz
mov r0.w, -r0.y
mov r0.z, l(0)
add r3.xyzw, r0.xwzz, r2.xyzw
div r0.w, l(1.000000, 1.000000, 1.000000, 1.000000), r3.w
mul r1.yzw, r0.wwww, r3.xxyz
sample_c_indexable(texture2d)(float,float,float,float) r3.y, r1.yzyy, t12.xxxx, s12, r1.w
add r4.xyzw, r0.xyzz, r2.xyzw
div r0.w, l(1.000000, 1.000000, 1.000000, 1.000000), r4.w
mul r1.yzw, r0.wwww, r4.xxyz
sample_c_indexable(texture2d)(float,float,float,float) r3.x, r1.yzyy, t12.xxxx, s12, r1.w
mad r4.xyzw, r0.xyzz, l(-1.000000, 1.000000, 1.000000, 1.000000), r2.xyzw
mad r0.xyzw, r0.xyzz, l(-1.000000, -1.000000, 1.000000, 1.000000), r2.xyzw
div r1.y, l(1.000000, 1.000000, 1.000000, 1.000000), r4.w
mul r1.yzw, r1.yyyy, r4.xxyz
sample_c_indexable(texture2d)(float,float,float,float) r3.z, r1.yzyy, t12.xxxx, s12, r1.w
div r0.w, l(1.000000, 1.000000, 1.000000, 1.000000), r0.w
mul r0.xyz, r0.wwww, r0.xyzx
sample_c_indexable(texture2d)(float,float,float,float) r3.w, r0.xyxx, t12.xxxx, s12, r0.z
dp4 r0.x, r3.xyzw, l(0.250000, 0.250000, 0.250000, 0.250000)
mad o0.xyz, -r1.xxxx, r0.xxxx, l(1.000000, 1.000000, 1.000000, 0.000000)
sample_indexable(texture2d)(float,float,float,float) r0.x, v1.xyxx, t13.wxyz, s13
mov o0.w, r0.x
ret
// Approximately 43 instruction slots used

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

CPU: Intel Core i7 7700K @ 4.9GHz
Motherboard: Gigabyte Aorus GA-Z270X-Gaming 5
RAM: GSKILL Ripjaws Z 16GB 3866MHz CL18
GPU: MSI GeForce RTX 2080Ti Gaming X Trio
Monitor: Asus PG278QR
Speakers: Logitech Z506
Donations account: masterotakusuko@gmail.com

Posted 12/08/2015 04:46 PM   
[quote="bo3b"]OK, that shader is fairly busted up so I can't promise this is right, but here is a hand-fix version of that shader that is worth trying. ... If you have only a few of these to do, hand-fixing is probably the fastest way at the moment. If you have more than 10, you'll probably want to look at using the ASM version, as I don't think I can nail this down until next week. [/quote] Thank you sir. That fixed code was perfect, and with it I was able to fix the shader outright. Yay for progess! :) The next couple shaders I dumped were quite similar (pretty much all broken effects are types of fog/smoke or various aura's, so I think that's to be expected), and I was able to make some of the same corrections you had shown me, but there's 1 specific difference that I can't quite wrap my head around... however, rather than ask you to hand fix this one, I'd like to ask how do I go about working on the ASM code. Specifically, how do I correctly declare the StereoParams and IniParams and then utilize them. I know the HelixMod way where you declare the sampler through register s0, and get the stereo paramaters through texldl, but I'm assuming that's different with how 3DMigoto passes the stereo paramaters. Using the MSDN reference, here's what I came up with, but doesn't seem to work (nothing seems to change at all). I'm assuming I'm just using the wrong declaration, so if you could kindly look and advise, would again be grateful. [code]vs_5_0 //Declare StereoParams from t125 register, IniParams from t120 register dcl_resource t125, Texture2D, float4 dcl_resource t120, Texture1D, float4 ...snip... mov r0, o1 mov r1, t125 add r1.y, r0.z, -r1.y mad r0.x, -r1.x, r1.y, r0.x mov o1, r0 ret [/code]
bo3b said:OK, that shader is fairly busted up so I can't promise this is right, but here is a hand-fix version of that shader that is worth trying.

...

If you have only a few of these to do, hand-fixing is probably the fastest way at the moment. If you have more than 10, you'll probably want to look at using the ASM version, as I don't think I can nail this down until next week.


Thank you sir. That fixed code was perfect, and with it I was able to fix the shader outright. Yay for progess! :)

The next couple shaders I dumped were quite similar (pretty much all broken effects are types of fog/smoke or various aura's, so I think that's to be expected), and I was able to make some of the same corrections you had shown me, but there's 1 specific difference that I can't quite wrap my head around... however, rather than ask you to hand fix this one, I'd like to ask how do I go about working on the ASM code. Specifically, how do I correctly declare the StereoParams and IniParams and then utilize them.

I know the HelixMod way where you declare the sampler through register s0, and get the stereo paramaters through texldl, but I'm assuming that's different with how 3DMigoto passes the stereo paramaters.

Using the MSDN reference, here's what I came up with, but doesn't seem to work (nothing seems to change at all). I'm assuming I'm just using the wrong declaration, so if you could kindly look and advise, would again be grateful.

vs_5_0

//Declare StereoParams from t125 register, IniParams from t120 register
dcl_resource t125, Texture2D, float4
dcl_resource t120, Texture1D, float4

...snip...

mov r0, o1
mov r1, t125
add r1.y, r0.z, -r1.y
mad r0.x, -r1.x, r1.y, r0.x
mov o1, r0

ret

3D Gaming Rig: CPU: i7 7700K @ 4.9Ghz | Mobo: Asus Maximus Hero VIII | RAM: Corsair Dominator 16GB | GPU: 2 x GTX 1080 Ti SLI | 3xSSDs for OS and Apps, 2 x HDD's for 11GB storage | PSU: Seasonic X-1250 M2| Case: Corsair C70 | Cooling: Corsair H115i Hydro cooler | Displays: Asus PG278QR, BenQ XL2420TX & BenQ HT1075 | OS: Windows 10 Pro + Windows 7 dual boot

Like my fixes? Dontations can be made to: www.paypal.me/DShanz or rshannonca@gmail.com
Like electronic music? Check out: www.soundcloud.com/dj-ryan-king

Posted 12/09/2015 04:31 AM   
[quote="masterotaku"]Question: is it possible to increase the shadows resolution with pixel/vertex shader modifications?[/quote]Currently no, but it might be doable if we add the ability for 3DMigoto to override a render target width/height and you use it to force the shadow maps to a higher resolution. I can't be certain if it would work, but I can't see any reason it wouldn't. [quote]Code of the pixel shader:[/quote]The way to do it would be to use a [TextureOverride] section to force g_ShadowMap_texture to a higher resolution. You can use ShaderUsage.txt or frame analysis to identify the hash of the texture in slot 12 in this shader for this. We already have support to override texture formats in this manner, so adding the ability to override their width & height might actually be fairly easy. In fact, I think it's easy enough that I'd even encourage you to download the source code to 3DMigoto from github and try to add it yourself. Since it can be a bit overwhelming looking at a new code base I'll give you some hints - the code you need to modify is: In DirectX11/globals.h: - Add and initialise entries for width and height in struct TextureOverride In DirectX11/IniHandler.cpp: - In ParseTextureOverrideSections() look for the code that parses the format and do the same thing for width & height - Add width and height to the TextureOverrideIniKeys list In DirectX11/HackerDevice.cpp: - In HackerDevice::CreateTexture2D find where textureOverride->format is used and add similar code to do the same thing for width and height If you are able to make it work, you can submit the code changes via a pull request, or bo3b could grant you commit access. On the other hand, if you've seen through my attempt to recruit a new coder and have no interest in doing this, let me know and I'll do it ;-)
masterotaku said:Question: is it possible to increase the shadows resolution with pixel/vertex shader modifications?
Currently no, but it might be doable if we add the ability for 3DMigoto to override a render target width/height and you use it to force the shadow maps to a higher resolution. I can't be certain if it would work, but I can't see any reason it wouldn't.

Code of the pixel shader:
The way to do it would be to use a [TextureOverride] section to force g_ShadowMap_texture to a higher resolution. You can use ShaderUsage.txt or frame analysis to identify the hash of the texture in slot 12 in this shader for this.

We already have support to override texture formats in this manner, so adding the ability to override their width & height might actually be fairly easy.

In fact, I think it's easy enough that I'd even encourage you to download the source code to 3DMigoto from github and try to add it yourself. Since it can be a bit overwhelming looking at a new code base I'll give you some hints - the code you need to modify is:

In DirectX11/globals.h:
- Add and initialise entries for width and height in struct TextureOverride

In DirectX11/IniHandler.cpp:
- In ParseTextureOverrideSections() look for the code that parses the format and do the same thing for width & height
- Add width and height to the TextureOverrideIniKeys list

In DirectX11/HackerDevice.cpp:
- In HackerDevice::CreateTexture2D find where textureOverride->format is used and add similar code to do the same thing for width and height

If you are able to make it work, you can submit the code changes via a pull request, or bo3b could grant you commit access.



On the other hand, if you've seen through my attempt to recruit a new coder and have no interest in doing this, let me know and I'll do it ;-)

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 12/09/2015 07:34 AM   
[quote="DJ-RK"]however, rather than ask you to hand fix this one, I'd like to ask how do I go about working on the ASM code. Specifically, how do I correctly declare the StereoParams and IniParams and then utilize them.[/quote] You declare them like this (t125 is StereoParams, t120 is IniParams): [code] dcl_resource_texture2d (float,float,float,float) t125 dcl_resource_texture1d (float,float,float,float) t120 [/code] And you load them like this: [code] ld_indexable(texture2d)(float,float,float,float) r17.xyzw, l(0, 0, 0, 0), t125.xyzw ld_indexable(texture1d)(float,float,float,float) r18.xyzw, l(0, 0, 0, 0), t120.xyzw [/code] Obviously you should change the temporary registers to whichever ones you are using, and remember that you need to bump dcl_temps up to EXACTLY ONE HIGHER than the highest temporary register number you have used. If you need to refer to any examples, the Witcher 3 fix includes a number of assembly shaders.
DJ-RK said:however, rather than ask you to hand fix this one, I'd like to ask how do I go about working on the ASM code. Specifically, how do I correctly declare the StereoParams and IniParams and then utilize them.


You declare them like this (t125 is StereoParams, t120 is IniParams):
dcl_resource_texture2d (float,float,float,float) t125
dcl_resource_texture1d (float,float,float,float) t120


And you load them like this:
ld_indexable(texture2d)(float,float,float,float) r17.xyzw, l(0, 0, 0, 0), t125.xyzw
ld_indexable(texture1d)(float,float,float,float) r18.xyzw, l(0, 0, 0, 0), t120.xyzw


Obviously you should change the temporary registers to whichever ones you are using, and remember that you need to bump dcl_temps up to EXACTLY ONE HIGHER than the highest temporary register number you have used.


If you need to refer to any examples, the Witcher 3 fix includes a number of assembly shaders.

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 12/09/2015 07:48 AM   
[quote="DarkStarSword"][quote="DJ-RK"]however, rather than ask you to hand fix this one, I'd like to ask how do I go about working on the ASM code. Specifically, how do I correctly declare the StereoParams and IniParams and then utilize them.[/quote] You declare them like this (t125 is StereoParams, t120 is IniParams): [code] dcl_resource_texture2d (float,float,float,float) t125 dcl_resource_texture1d (float,float,float,float) t120 [/code] And you load them like this: [code] ld_indexable(texture2d)(float,float,float,float) r17.xyzw, l(0, 0, 0, 0), t125.xyzw ld_indexable(texture1d)(float,float,float,float) r18.xyzw, l(0, 0, 0, 0), t120.xyzw [/code] Obviously you should change the temporary registers to whichever ones you are using, and remember that you need to bump dcl_temps up to EXACTLY ONE HIGHER than the highest temporary register number you have used. If you need to refer to any examples, the Witcher 3 fix includes a number of assembly shaders.[/quote] TYVM, got it working, and a few additional shaders fixed. Now the hunt is on for the rest. I think I'm finally on my way to being able to bring a game into 3D Ready status! Cheers. Just wondering if any/what features exist within the tool to help me with tracking down shaders during short scenes, where no pause exists (in this case, during all the fatalities in the game). Would that be using the frame analysis feature? Is there any documentation on how that works?
DarkStarSword said:
DJ-RK said:however, rather than ask you to hand fix this one, I'd like to ask how do I go about working on the ASM code. Specifically, how do I correctly declare the StereoParams and IniParams and then utilize them.


You declare them like this (t125 is StereoParams, t120 is IniParams):
dcl_resource_texture2d (float,float,float,float) t125
dcl_resource_texture1d (float,float,float,float) t120


And you load them like this:
ld_indexable(texture2d)(float,float,float,float) r17.xyzw, l(0, 0, 0, 0), t125.xyzw
ld_indexable(texture1d)(float,float,float,float) r18.xyzw, l(0, 0, 0, 0), t120.xyzw


Obviously you should change the temporary registers to whichever ones you are using, and remember that you need to bump dcl_temps up to EXACTLY ONE HIGHER than the highest temporary register number you have used.


If you need to refer to any examples, the Witcher 3 fix includes a number of assembly shaders.


TYVM, got it working, and a few additional shaders fixed. Now the hunt is on for the rest. I think I'm finally on my way to being able to bring a game into 3D Ready status! Cheers.

Just wondering if any/what features exist within the tool to help me with tracking down shaders during short scenes, where no pause exists (in this case, during all the fatalities in the game). Would that be using the frame analysis feature? Is there any documentation on how that works?

3D Gaming Rig: CPU: i7 7700K @ 4.9Ghz | Mobo: Asus Maximus Hero VIII | RAM: Corsair Dominator 16GB | GPU: 2 x GTX 1080 Ti SLI | 3xSSDs for OS and Apps, 2 x HDD's for 11GB storage | PSU: Seasonic X-1250 M2| Case: Corsair C70 | Cooling: Corsair H115i Hydro cooler | Displays: Asus PG278QR, BenQ XL2420TX & BenQ HT1075 | OS: Windows 10 Pro + Windows 7 dual boot

Like my fixes? Dontations can be made to: www.paypal.me/DShanz or rshannonca@gmail.com
Like electronic music? Check out: www.soundcloud.com/dj-ryan-king

Posted 12/10/2015 04:20 AM   
Yes, frame analysis is the way to track these down. Frame analysis has a lot of features to serve a lot of different needs, but for this type of analysis you generally want: analyse_frame = VK_F8 analysis_options = dump_rt clear_rt Then, when you see a broken effect press F8 (hunting must be fully enabled as well). The game will freeze and your disk light will go solid as 3DMigoto dumps out jps or dds files after every single draw call. You can then go through these files to locate the first time you see the broken effect - the filename will contain the hashes of all shaders used in the corresponding draw call. A few tips: - dump_rt will dump jps files for formats that DirectTK supports converting to jpeg, and will dump dds files for everything else. DDS files are huge and difficult to work with (I still don't have any good general recommendations for tools to open these, though I do have a python script (ddsinfo.py) that can convert some of them to png), so if you can get away with using dump_rt_jps instead I strongly suggest you do - in some games this will be fine, but in others (like Unity games) it will barely dump anything and you have to use the dds files. - If you do have to dump out dds files, lower your resolution - these files are not compressed, so dumping out a stereo pair of uncompressed 1920x1080 images for every draw call will eat your disk space very quickly and take forever. You can also use the mono keyword to just dump out the info for the right eye, which may be harder to see the broken effect, but if you know what you are looking for can still be enough. - If you look at all the images after the first dump you will notice that there are several distinct stages to drawing the frame. The first stage draws opaque geometry - this is the longest stage *by far* and is generally not interesting for our purposes - I have yet to see any broken effects drawn in this stage. If you can locate a shader that is first used immediately after this stage is complete (e.g. I've used the depth pass of HBAO+ for this in a few games) you can set up frame analysis so it only starts when that shader is used, saving a *lot* of time and disk space dumping further frames. To do this, set the global analyse_options to just "clear_rt", and add an analyse_options in the ShaderOverride for the shader you identified to "dump_rt clear_rt persist". - The first number in the filename is the draw number. If you find that the numbers aren't consecutive it means frame analysis was unable to dump out render targets for draw calls with the missing numbers (e.g. due to using dump_rt_jps when those particular draw calls could only be dumped as dds files, or for draw calls rendering to a 1D or 3D render target, or certain formats of anti-aliased render targets, or occasionally just because there was no render target assigned for the draw call). Keep this in mind if the first time you see a broken effect in the dump is immediately after one of these gaps, as it is possible it was one of the missing draw calls that broke it. - On occasion, a shader might render a broken effect to an alpha channel. These can be easy to miss since dumping the files as jps will strip the alpha channel, and even working with dds files can be hard to spot these since most image viewers use the alpha channel for transparency and don't have options to just show the contents of the channel. I don't have any good advice for when to check for this, but keep it in mind and be suspicious of effects drawn with multiple passes.
Yes, frame analysis is the way to track these down. Frame analysis has a lot of features to serve a lot of different needs, but for this type of analysis you generally want:

analyse_frame = VK_F8
analysis_options = dump_rt clear_rt

Then, when you see a broken effect press F8 (hunting must be fully enabled as well). The game will freeze and your disk light will go solid as 3DMigoto dumps out jps or dds files after every single draw call. You can then go through these files to locate the first time you see the broken effect - the filename will contain the hashes of all shaders used in the corresponding draw call.

A few tips:

- dump_rt will dump jps files for formats that DirectTK supports converting to jpeg, and will dump dds files for everything else. DDS files are huge and difficult to work with (I still don't have any good general recommendations for tools to open these, though I do have a python script (ddsinfo.py) that can convert some of them to png), so if you can get away with using dump_rt_jps instead I strongly suggest you do - in some games this will be fine, but in others (like Unity games) it will barely dump anything and you have to use the dds files.

- If you do have to dump out dds files, lower your resolution - these files are not compressed, so dumping out a stereo pair of uncompressed 1920x1080 images for every draw call will eat your disk space very quickly and take forever. You can also use the mono keyword to just dump out the info for the right eye, which may be harder to see the broken effect, but if you know what you are looking for can still be enough.

- If you look at all the images after the first dump you will notice that there are several distinct stages to drawing the frame. The first stage draws opaque geometry - this is the longest stage *by far* and is generally not interesting for our purposes - I have yet to see any broken effects drawn in this stage. If you can locate a shader that is first used immediately after this stage is complete (e.g. I've used the depth pass of HBAO+ for this in a few games) you can set up frame analysis so it only starts when that shader is used, saving a *lot* of time and disk space dumping further frames. To do this, set the global analyse_options to just "clear_rt", and add an analyse_options in the ShaderOverride for the shader you identified to "dump_rt clear_rt persist".

- The first number in the filename is the draw number. If you find that the numbers aren't consecutive it means frame analysis was unable to dump out render targets for draw calls with the missing numbers (e.g. due to using dump_rt_jps when those particular draw calls could only be dumped as dds files, or for draw calls rendering to a 1D or 3D render target, or certain formats of anti-aliased render targets, or occasionally just because there was no render target assigned for the draw call). Keep this in mind if the first time you see a broken effect in the dump is immediately after one of these gaps, as it is possible it was one of the missing draw calls that broke it.

- On occasion, a shader might render a broken effect to an alpha channel. These can be easy to miss since dumping the files as jps will strip the alpha channel, and even working with dds files can be hard to spot these since most image viewers use the alpha channel for transparency and don't have options to just show the contents of the channel. I don't have any good advice for when to check for this, but keep it in mind and be suspicious of effects drawn with multiple passes.

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 12/10/2015 05:01 AM   
Cool, I'll definitely mess around with it a bit. Quick question, on the most recent VS shader I've been working on (using the ASM code), when I'd try to run the stereo fix code on some of the outputs in the VS, it would completely lock up my PC alltogether. That's definitely a showstopper, in the most literal sense. Are there any 'gotcha's' that come to mind that might result in that? If there's anything I can do that will help avoid/prevent that, I'd be most welcome for that info.
Cool, I'll definitely mess around with it a bit.

Quick question, on the most recent VS shader I've been working on (using the ASM code), when I'd try to run the stereo fix code on some of the outputs in the VS, it would completely lock up my PC alltogether. That's definitely a showstopper, in the most literal sense. Are there any 'gotcha's' that come to mind that might result in that? If there's anything I can do that will help avoid/prevent that, I'd be most welcome for that info.

3D Gaming Rig: CPU: i7 7700K @ 4.9Ghz | Mobo: Asus Maximus Hero VIII | RAM: Corsair Dominator 16GB | GPU: 2 x GTX 1080 Ti SLI | 3xSSDs for OS and Apps, 2 x HDD's for 11GB storage | PSU: Seasonic X-1250 M2| Case: Corsair C70 | Cooling: Corsair H115i Hydro cooler | Displays: Asus PG278QR, BenQ XL2420TX & BenQ HT1075 | OS: Windows 10 Pro + Windows 7 dual boot

Like my fixes? Dontations can be made to: www.paypal.me/DShanz or rshannonca@gmail.com
Like electronic music? Check out: www.soundcloud.com/dj-ryan-king

Posted 12/10/2015 06:03 AM   
Hmm, that sounds nasty. Can you post the code you're working on? Also check if the code in the reasm.txt file matches what you are working on. I might also need to see the original and reassembled bytecode to see if the disassembly / reassembly process has damaged it at all - you can dump the original bytecode out using export_binary=1 (an undocumented option), which will create .bin files in ShaderCache, and the reassembled bytecode will be in ShaderFixes with the _reasm.bin suffix.
Hmm, that sounds nasty. Can you post the code you're working on? Also check if the code in the reasm.txt file matches what you are working on.

I might also need to see the original and reassembled bytecode to see if the disassembly / reassembly process has damaged it at all - you can dump the original bytecode out using export_binary=1 (an undocumented option), which will create .bin files in ShaderCache, and the reassembled bytecode will be in ShaderFixes with the _reasm.bin suffix.

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 12/10/2015 07:54 AM   
I meant to add - we don't fully understand all the possible gotchas of working with assembly in DX11 yet, especially since it is still a relatively new capability and one that most people still aren't using. We're using Flugan's assembler, which currently does not have any error checking at all, so it won't catch even trivial mistakes - either it will just silently drop instructions or it might outright crash. The second problem is that since there's no validation of the declarations we can easily mess up resource management in DirectX and the GPU if we make a mistake there causing problems that only manifest some of the time (which is why I keep reminding people about dcl_temps and declaring the StereoParams). The final problem is similar to what we face in DX9 where perfectly valid assembly might try to make the hardware do something it is not capable of. In DX9 they expected people to write assembly like this and for the most part avoided it being fatal, but in DX11 they expect that any shaders will have been produced by a compiler and don't expect it to be able to do certain things - and if we do those things it could cause all sorts of havoc, from CTDs to GPU hangs and quite possibly full system lock ups. It might be worth adding a page to the 3DMigoto wiki and documenting these types of gotchas as we find them. I would like to try to improve the assembler to catch more and more errors, but Flugan mentioned that it would probably need a full rewrite to make it much more robust against human error (it was originally written to cope with only what the disassembler would produce and cannot cope with much deviation from that), so this won't be coming any time soon.
I meant to add - we don't fully understand all the possible gotchas of working with assembly in DX11 yet, especially since it is still a relatively new capability and one that most people still aren't using.

We're using Flugan's assembler, which currently does not have any error checking at all, so it won't catch even trivial mistakes - either it will just silently drop instructions or it might outright crash.

The second problem is that since there's no validation of the declarations we can easily mess up resource management in DirectX and the GPU if we make a mistake there causing problems that only manifest some of the time (which is why I keep reminding people about dcl_temps and declaring the StereoParams).

The final problem is similar to what we face in DX9 where perfectly valid assembly might try to make the hardware do something it is not capable of. In DX9 they expected people to write assembly like this and for the most part avoided it being fatal, but in DX11 they expect that any shaders will have been produced by a compiler and don't expect it to be able to do certain things - and if we do those things it could cause all sorts of havoc, from CTDs to GPU hangs and quite possibly full system lock ups.

It might be worth adding a page to the 3DMigoto wiki and documenting these types of gotchas as we find them. I would like to try to improve the assembler to catch more and more errors, but Flugan mentioned that it would probably need a full rewrite to make it much more robust against human error (it was originally written to cope with only what the disassembler would produce and cannot cope with much deviation from that), so this won't be coming any time soon.

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 12/10/2015 08:13 AM   
Wouldn't it be to much trouble to add a currently disabled shader's hash to the OSD?
Wouldn't it be to much trouble to add a currently disabled shader's hash to the OSD?

EVGA GeForce GTX 980 SC
Core i5 2500K
MSI Z77A-G45
8GB DDR3
Windows 10 x64

Posted 12/11/2015 12:40 AM   
Dear All, I would like to ask for informaton on how to use texture filtering properly in 3dMigoto. I'm trying to fix a game that is sharing pixel shaders a lot. Because of that, if I, for example fix the sky in the vertex shader, that modifies other objects' separation on the ground as well (those were rendered at the correct depth before the modification). The pixel shader is shared between the sky and theese other objects. I would like my modified vertex shader to be applied to the sky's textures only and leave everything else alone. I'm trying to use texture filtering to achieve this, but it doesn't work for me. The following is part of the ShaderUsage.txt for the relevant pixelshader: [code]<PixelShader hash="9c08d32567717c4e"> <ParentVertexShaders>40fbed84774fc706 4695208815cc03c8 6b437e672136d1a3 74e514d9873e5cf1 80438732be000cd7 8bca8392ff26cfc4 a668ac83cf99d30f b3487d202db807e0 cbf172eeeb352d82 ccc12064d35e2611 </ParentVertexShaders> <Register id=0 handle=0000000011717408 hash_contaminated=true>0c7fe159</Register> <Register id=0 handle=0000000011717648 hash_contaminated=true>9e593f94</Register> <Register id=0 handle=0000000011717A88 hash_contaminated=true>9e593f94</Register> <Register id=0 handle=0000000011717F08 hash_contaminated=true>9e593f94</Register> <Register id=0 handle=0000000011718588 hash_contaminated=true>9e593f94</Register> <Register id=0 handle=0000000011718C08 hash_contaminated=true>6f91832c</Register> <Register id=0 handle=0000000011719708 hash_contaminated=true>2ca71720</Register> <Register id=0 handle=0000000011719B48 hash_contaminated=true>ce7be65d</Register> <Register id=0 handle=000000001171B588 hash_contaminated=true>9e593f94</Register>[/code] And this is how I'm trying to use texture filtering in the d3dx.ini: [code][ShaderOverrideClouds] Hash=9c08d32567717c4e //pixel shader's hash x2=ps-t0 //not sure what x2 and t0 stand for? [TextureOverrideClouds] Hash=0c7fe159 //I would like to apply the modification set in vertex shader 40fbed84774fc706 only to theese listed textures Hash=6f91832c Hash=ce7be65d [/code] In the above example I'm trying to tell 3DMigoto to only apply the modified vertex shader (40fbed84774fc706) for the textures: "0c7fe159", "6f91832c", "ce7be65d" My problem is, I'm not sure how 3DMigoto knows that I would like to apply or not apply the vertex shader 40fbed84774fc706, as in the ShaderOverride section I included the pixel shader's hash only. I'm trying to follow the tutorial provided here: [url]https://github.com/bo3b/3Dmigoto/wiki/Texture-Filtering[/url], but I think I miss something important. Also, I would like to ask what this line means: x2=ps-t0? I checked other fixes and saw somebody used for example this: y2=ps-t1. What is the difference between x2 and y2? Thank you in advance for your help!
Dear All,

I would like to ask for informaton on how to use texture filtering properly in 3dMigoto. I'm trying to fix a game that is sharing pixel shaders a lot. Because of that, if I, for example fix the sky in the vertex shader, that modifies other objects' separation on the ground as well (those were rendered at the correct depth before the modification). The pixel shader is shared between the sky and theese other objects. I would like my modified vertex shader to be applied to the sky's textures only and leave everything else alone.

I'm trying to use texture filtering to achieve this, but it doesn't work for me.
The following is part of the ShaderUsage.txt for the relevant pixelshader:

<PixelShader hash="9c08d32567717c4e">
<ParentVertexShaders>40fbed84774fc706 4695208815cc03c8 6b437e672136d1a3 74e514d9873e5cf1 80438732be000cd7 8bca8392ff26cfc4 a668ac83cf99d30f b3487d202db807e0 cbf172eeeb352d82 ccc12064d35e2611 </ParentVertexShaders>
<Register id=0 handle=0000000011717408 hash_contaminated=true>0c7fe159</Register>
<Register id=0 handle=0000000011717648 hash_contaminated=true>9e593f94</Register>
<Register id=0 handle=0000000011717A88 hash_contaminated=true>9e593f94</Register>
<Register id=0 handle=0000000011717F08 hash_contaminated=true>9e593f94</Register>
<Register id=0 handle=0000000011718588 hash_contaminated=true>9e593f94</Register>
<Register id=0 handle=0000000011718C08 hash_contaminated=true>6f91832c</Register>
<Register id=0 handle=0000000011719708 hash_contaminated=true>2ca71720</Register>
<Register id=0 handle=0000000011719B48 hash_contaminated=true>ce7be65d</Register>
<Register id=0 handle=000000001171B588 hash_contaminated=true>9e593f94</Register>


And this is how I'm trying to use texture filtering in the d3dx.ini:

[ShaderOverrideClouds]
Hash=9c08d32567717c4e //pixel shader's hash
x2=ps-t0 //not sure what x2 and t0 stand for?

[TextureOverrideClouds]
Hash=0c7fe159 //I would like to apply the modification set in vertex shader 40fbed84774fc706 only to theese listed textures
Hash=6f91832c
Hash=ce7be65d


In the above example I'm trying to tell 3DMigoto to only apply the modified vertex shader (40fbed84774fc706) for the textures: "0c7fe159", "6f91832c", "ce7be65d"

My problem is, I'm not sure how 3DMigoto knows that I would like to apply or not apply the vertex shader 40fbed84774fc706, as in the ShaderOverride section I included the pixel shader's hash only. I'm trying to follow the tutorial provided here: https://github.com/bo3b/3Dmigoto/wiki/Texture-Filtering, but I think I miss something important.

Also, I would like to ask what this line means: x2=ps-t0?
I checked other fixes and saw somebody used for example this: y2=ps-t1.
What is the difference between x2 and y2?

Thank you in advance for your help!

Posted 12/11/2015 06:58 AM   
[quote]And this is how I'm trying to use texture filtering in the d3dx.ini: [code][ShaderOverrideClouds] Hash=9c08d32567717c4e //pixel shader's hash x2=ps-t0 //not sure what x2 and t0 stand for? [TextureOverrideClouds] Hash=0c7fe159 //I would like to apply the modification set in vertex shader 40fbed84774fc706 only to theese listed textures Hash=6f91832c Hash=ce7be65d [/code] [/quote]There's a few problems there - you will probably be hearing some error beeps from 3DMigoto when it parses that since it's not legal to have multiple Hash keys in the same section like that, and there will be a warning in the log file about it - those should be in three separate sections. That's also not the right way to do comments in an ini file - in some cases it might work, but it's basically relying on bugs in 3DMigoto's ini parsing that just happen to ignore the extra characters at the end of the line, in *some* cases. The correct way to comment an ini file is a semicolon at the start of a line. It should look more like this: [code][ShaderOverrideClouds] ; pixel shader's hash Hash=9c08d32567717c4e ; Texture filtering in IniParams.Load(int2(2,0)).x x2=ps-t0 ; Apply the modification set in vertex shader 40fbed84774fc706 only to theese listed textures: [TextureOverrideClouds1] Hash=0c7fe159 [TextureOverrideClouds2] Hash=6f91832c [TextureOverrideClouds3] Hash=ce7be65d [/code] [quote]In the above example I'm trying to tell 3DMigoto to only apply the modified vertex shader (40fbed84774fc706) for the textures: "0c7fe159", "6f91832c", "ce7be65d"[/quote]Some of the older filtering mechanisms work that way, but it was pretty limited what you could do with it so all the recent ones we've added work differently - instead of running the original or modified shader it will always use the modified shader, but it will pass some information in IniParams that you can check in the shader to decide what to do. [quote]Also, I would like to ask what this line means: x2=ps-t0? I checked other fixes and saw somebody used for example this: y2=ps-t1. What is the difference between x2 and y2?[/quote]The x2 and y2 indicate which part of the IniParams resource will be used to store the result of the texture filter. The IniParams resource contains 8 lots of 4 component registers (32 in total) that you can use - these are the same ones in the [Constants] section. In the shader you use IniParams.Load(int2(index, 0)).component to access them, e.g.: [code] float x = IniParams.Load(0).x; float x1 = IniParams.Load(int2(1, 0)).x; float x2 = IniParams.Load(int2(2, 0)).x; float y7 = IniParams.Load(int2(7, 0)).y; [/code] [quote="toomyxp"]The following is part of the ShaderUsage.txt for the relevant pixelshader: [code]<PixelShader hash="9c08d32567717c4e"> <ParentVertexShaders>40fbed84774fc706 4695208815cc03c8 6b437e672136d1a3 74e514d9873e5cf1 80438732be000cd7 8bca8392ff26cfc4 a668ac83cf99d30f b3487d202db807e0 cbf172eeeb352d82 ccc12064d35e2611 </ParentVertexShaders> <Register id=0 handle=0000000011717408 hash_contaminated=true>0c7fe159</Register> <Register id=0 handle=0000000011717648 hash_contaminated=true>9e593f94</Register> <Register id=0 handle=0000000011717A88 hash_contaminated=true>9e593f94</Register> <Register id=0 handle=0000000011717F08 hash_contaminated=true>9e593f94</Register> <Register id=0 handle=0000000011718588 hash_contaminated=true>9e593f94</Register> <Register id=0 handle=0000000011718C08 hash_contaminated=true>6f91832c</Register> <Register id=0 handle=0000000011719708 hash_contaminated=true>2ca71720</Register> <Register id=0 handle=0000000011719B48 hash_contaminated=true>ce7be65d</Register> <Register id=0 handle=000000001171B588 hash_contaminated=true>9e593f94</Register>[/code] [/quote]Something to be aware of here - the hash_contaminated=true is a warning that recent versions of 3DMigoto adds if it notices that the game has overwritten the texture at some point, which may mean that the hash no longer matches the contents of the texture. If you see this you might need to enable track_texture_updates=1 and 3DMigoto will update the hash whenever the game updates the texture from the CPU, or performs a (full) copy on the GPU. This feature was added to make texture filtering reliable in Far Cry 4 - I haven't enabled it in the default d3dx.ini because I'm not sure yet if there are any possible downsides of using it.
And this is how I'm trying to use texture filtering in the d3dx.ini:

[ShaderOverrideClouds]
Hash=9c08d32567717c4e //pixel shader's hash
x2=ps-t0 //not sure what x2 and t0 stand for?

[TextureOverrideClouds]
Hash=0c7fe159 //I would like to apply the modification set in vertex shader 40fbed84774fc706 only to theese listed textures
Hash=6f91832c
Hash=ce7be65d

There's a few problems there - you will probably be hearing some error beeps from 3DMigoto when it parses that since it's not legal to have multiple Hash keys in the same section like that, and there will be a warning in the log file about it - those should be in three separate sections.

That's also not the right way to do comments in an ini file - in some cases it might work, but it's basically relying on bugs in 3DMigoto's ini parsing that just happen to ignore the extra characters at the end of the line, in *some* cases. The correct way to comment an ini file is a semicolon at the start of a line.

It should look more like this:
[ShaderOverrideClouds]
; pixel shader's hash
Hash=9c08d32567717c4e
; Texture filtering in IniParams.Load(int2(2,0)).x
x2=ps-t0

; Apply the modification set in vertex shader 40fbed84774fc706 only to theese listed textures:
[TextureOverrideClouds1]
Hash=0c7fe159
[TextureOverrideClouds2]
Hash=6f91832c
[TextureOverrideClouds3]
Hash=ce7be65d

In the above example I'm trying to tell 3DMigoto to only apply the modified vertex shader (40fbed84774fc706) for the textures: "0c7fe159", "6f91832c", "ce7be65d"
Some of the older filtering mechanisms work that way, but it was pretty limited what you could do with it so all the recent ones we've added work differently - instead of running the original or modified shader it will always use the modified shader, but it will pass some information in IniParams that you can check in the shader to decide what to do.

Also, I would like to ask what this line means: x2=ps-t0?
I checked other fixes and saw somebody used for example this: y2=ps-t1.
What is the difference between x2 and y2?
The x2 and y2 indicate which part of the IniParams resource will be used to store the result of the texture filter. The IniParams resource contains 8 lots of 4 component registers (32 in total) that you can use - these are the same ones in the [Constants] section. In the shader you use IniParams.Load(int2(index, 0)).component to access them, e.g.:

float x = IniParams.Load(0).x;
float x1 = IniParams.Load(int2(1, 0)).x;
float x2 = IniParams.Load(int2(2, 0)).x;
float y7 = IniParams.Load(int2(7, 0)).y;


toomyxp said:The following is part of the ShaderUsage.txt for the relevant pixelshader:

<PixelShader hash="9c08d32567717c4e">
<ParentVertexShaders>40fbed84774fc706 4695208815cc03c8 6b437e672136d1a3 74e514d9873e5cf1 80438732be000cd7 8bca8392ff26cfc4 a668ac83cf99d30f b3487d202db807e0 cbf172eeeb352d82 ccc12064d35e2611 </ParentVertexShaders>
<Register id=0 handle=0000000011717408 hash_contaminated=true>0c7fe159</Register>
<Register id=0 handle=0000000011717648 hash_contaminated=true>9e593f94</Register>
<Register id=0 handle=0000000011717A88 hash_contaminated=true>9e593f94</Register>
<Register id=0 handle=0000000011717F08 hash_contaminated=true>9e593f94</Register>
<Register id=0 handle=0000000011718588 hash_contaminated=true>9e593f94</Register>
<Register id=0 handle=0000000011718C08 hash_contaminated=true>6f91832c</Register>
<Register id=0 handle=0000000011719708 hash_contaminated=true>2ca71720</Register>
<Register id=0 handle=0000000011719B48 hash_contaminated=true>ce7be65d</Register>
<Register id=0 handle=000000001171B588 hash_contaminated=true>9e593f94</Register>

Something to be aware of here - the hash_contaminated=true is a warning that recent versions of 3DMigoto adds if it notices that the game has overwritten the texture at some point, which may mean that the hash no longer matches the contents of the texture. If you see this you might need to enable track_texture_updates=1 and 3DMigoto will update the hash whenever the game updates the texture from the CPU, or performs a (full) copy on the GPU. This feature was added to make texture filtering reliable in Far Cry 4 - I haven't enabled it in the default d3dx.ini because I'm not sure yet if there are any possible downsides of using it.

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

Posted 12/11/2015 12:45 PM   
Thank you very much DarkStarSword! Now I understand and managed to do a test filtering, which worked out great! Now, I need to browse through and pick the relevant textures from the shaderusage file. Thank you again for your work on this great tool! Whith it, I'm an now closer than ever to fully fix a game. :)
Thank you very much DarkStarSword! Now I understand and managed to do a test filtering, which worked out great! Now, I need to browse through and pick the relevant textures from the shaderusage file.
Thank you again for your work on this great tool! Whith it, I'm an now closer than ever to fully fix a game. :)

Posted 12/11/2015 06:33 PM   
[quote="DarkStarSword"]In fact, I think it's easy enough that I'd even encourage you to download the source code to 3DMigoto from github and try to add it yourself. Since it can be a bit overwhelming looking at a new code base I'll give you some hints - the code you need to modify is: In DirectX11/globals.h: - Add and initialise entries for width and height in struct TextureOverride In DirectX11/IniHandler.cpp: - In ParseTextureOverrideSections() look for the code that parses the format and do the same thing for width & height - Add width and height to the TextureOverrideIniKeys list In DirectX11/HackerDevice.cpp: - In HackerDevice::CreateTexture2D find where textureOverride->format is used and add similar code to do the same thing for width and height If you are able to make it work, you can submit the code changes via a pull request, or bo3b could grant you commit access.[/quote] Sorry for the late response. I have been busy these days. But this would be me trying something like that: [img]http://i.kinja-img.com/gawker-media/image/upload/japbcvpavbzau9dbuaxf.jpg[/img] [quote="DarkStarSword"]On the other hand, if you've seen through my attempt to recruit a new coder and have no interest in doing this, let me know and I'll do it ;-) [/quote] Hahaha :p. I am a web developer (html, css, javascript, php, java, etc), and it's totally different from these other things. I want to learn, but I'm not prepared to be in the team yet. I wish I had more free time. And yes, you're free to do that yourself :p. You'll do it faster and better than me, even in the case I could manage to make it work. By the way, another idea that may not exist in 3Dmigoto: OSD messages. I'd like to show messages like "Bloom off", "Blurry as DoF FXAA" and similar things. If it exists, great. If not, I don't mind. As a last wish, DX9 for 3Dmigoto so I don't have to deal with ASM and no beeps, lol (I know it only has a logging feature for now).
DarkStarSword said:In fact, I think it's easy enough that I'd even encourage you to download the source code to 3DMigoto from github and try to add it yourself. Since it can be a bit overwhelming looking at a new code base I'll give you some hints - the code you need to modify is:

In DirectX11/globals.h:
- Add and initialise entries for width and height in struct TextureOverride

In DirectX11/IniHandler.cpp:
- In ParseTextureOverrideSections() look for the code that parses the format and do the same thing for width & height
- Add width and height to the TextureOverrideIniKeys list

In DirectX11/HackerDevice.cpp:
- In HackerDevice::CreateTexture2D find where textureOverride->format is used and add similar code to do the same thing for width and height

If you are able to make it work, you can submit the code changes via a pull request, or bo3b could grant you commit access.


Sorry for the late response. I have been busy these days. But this would be me trying something like that:

Image


DarkStarSword said:On the other hand, if you've seen through my attempt to recruit a new coder and have no interest in doing this, let me know and I'll do it ;-)


Hahaha :p. I am a web developer (html, css, javascript, php, java, etc), and it's totally different from these other things. I want to learn, but I'm not prepared to be in the team yet. I wish I had more free time. And yes, you're free to do that yourself :p. You'll do it faster and better than me, even in the case I could manage to make it work.

By the way, another idea that may not exist in 3Dmigoto: OSD messages. I'd like to show messages like "Bloom off", "Blurry as DoF FXAA" and similar things. If it exists, great. If not, I don't mind.

As a last wish, DX9 for 3Dmigoto so I don't have to deal with ASM and no beeps, lol (I know it only has a logging feature for now).

CPU: Intel Core i7 7700K @ 4.9GHz
Motherboard: Gigabyte Aorus GA-Z270X-Gaming 5
RAM: GSKILL Ripjaws Z 16GB 3866MHz CL18
GPU: MSI GeForce RTX 2080Ti Gaming X Trio
Monitor: Asus PG278QR
Speakers: Logitech Z506
Donations account: masterotakusuko@gmail.com

Posted 12/11/2015 09:53 PM   
I don't know if this is a bug or not, but I feel like reporting it anyway. Here's how it works: 1. dump a shader 2. modify it's output 3. delete the shader's file 4. press reload shaders button on a kb 5. dump shader again The result: file dumped will contain a previously modified file. I have shader cache disabled in NV control panel. Also cache_shaders=0
I don't know if this is a bug or not, but I feel like reporting it anyway.

Here's how it works:
1. dump a shader
2. modify it's output
3. delete the shader's file
4. press reload shaders button on a kb
5. dump shader again

The result: file dumped will contain a previously modified file.

I have shader cache disabled in NV control panel. Also cache_shaders=0

EVGA GeForce GTX 980 SC
Core i5 2500K
MSI Z77A-G45
8GB DDR3
Windows 10 x64

Posted 12/11/2015 10:33 PM   
  41 / 143    
Scroll To Top