Flugan's Assembler
I want to shed some light on a major module that is often mentioned but rarely understood. In simple terms we need the actuall hash function used when compiling shaders. We also need to convert modified ASM shaders back into binary in such a way that it works as expected. Neither of these tasks are simple but when they are done there is little left to do. Major reverse engineering: ComputeHashRetail@D3DCompiler_47.dll Using trial version of IDA Pro [img]http://flugan.net/hash.png[/img] The code is not consistant and you really need to keep track to make the code version consistent. [code] vector<DWORD> ComputeHash(byte const* input, DWORD size) { DWORD esi; DWORD ebx; DWORD i = 0; DWORD edi; DWORD edx; DWORD processedSize = 0; DWORD sizeHash = size & 0x3F; bool sizeHash56 = sizeHash >= 56; DWORD restSize = sizeHash56 ? 120 - 56 : 56 - sizeHash; DWORD loopSize = (size + 8 + restSize) >> 6; DWORD Dst[16]; DWORD Data[] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; DWORD loopSize2 = loopSize - (sizeHash56 ? 2 : 1); DWORD start_0 = 0; DWORD* pSrc = (DWORD*)input; DWORD h[] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476 }; if (loopSize > 0) { while (i < loopSize) { if (i == loopSize2) { if (!sizeHash56) { Dst[0] = size << 3; DWORD remSize = size - processedSize; std::memcpy(&Dst[1], pSrc, remSize); std::memcpy(&Dst[1 + remSize / 4], Data, restSize); Dst[15] = (size * 2) | 1; pSrc = Dst; } else { DWORD remSize = size - processedSize; std::memcpy(&Dst[0], pSrc, remSize); std::memcpy(&Dst[remSize / 4], Data, 64 - remSize); pSrc = Dst; } } else if (i > loopSize2) { Dst[0] = size << 3; std::memcpy(&Dst[1], &Data[1], 56); Dst[15] = (size * 2) | 1; pSrc = Dst; } // initial values from memory edx = h[0]; ebx = h[1]; edi = h[2]; esi = h[3]; edx = _rotl((~ebx & esi | ebx & edi) + pSrc[0] + 0xD76AA478 + edx, 7) + ebx; esi = _rotl((~edx & edi | edx & ebx) + pSrc[1] + 0xE8C7B756 + esi, 12) + edx; edi = _rotr((~esi & ebx | esi & edx) + pSrc[2] + 0x242070DB + edi, 15) + esi; ebx = _rotr((~edi & edx | edi & esi) + pSrc[3] + 0xC1BDCEEE + ebx, 10) + edi; edx = _rotl((~ebx & esi | ebx & edi) + pSrc[4] + 0xF57C0FAF + edx, 7) + ebx; esi = _rotl((~edx & edi | ebx & edx) + pSrc[5] + 0x4787C62A + esi, 12) + edx; edi = _rotr((~esi & ebx | esi & edx) + pSrc[6] + 0xA8304613 + edi, 15) + esi; ebx = _rotr((~edi & edx | edi & esi) + pSrc[7] + 0xFD469501 + ebx, 10) + edi; edx = _rotl((~ebx & esi | ebx & edi) + pSrc[8] + 0x698098D8 + edx, 7) + ebx; esi = _rotl((~edx & edi | ebx & edx) + pSrc[9] + 0x8B44F7AF + esi, 12) + edx; edi = _rotr((~esi & ebx | esi & edx) + pSrc[10] + 0xFFFF5BB1 + edi, 15) + esi; ebx = _rotr((~edi & edx | edi & esi) + pSrc[11] + 0x895CD7BE + ebx, 10) + edi; edx = _rotl((~ebx & esi | ebx & edi) + pSrc[12] + 0x6B901122 + edx, 7) + ebx; esi = _rotl((~edx & edi | ebx & edx) + pSrc[13] + 0xFD987193 + esi, 12) + edx; edi = _rotr((~esi & ebx | esi & edx) + pSrc[14] + 0xA679438E + edi, 15) + esi; ebx = _rotr((~edi & edx | edi & esi) + pSrc[15] + 0x49B40821 + ebx, 10) + edi; edx = _rotl((~esi & edi | esi & ebx) + pSrc[1] + 0xF61E2562 + edx, 5) + ebx; esi = _rotl((~edi & ebx | edi & edx) + pSrc[6] + 0xC040B340 + esi, 9) + edx; edi = _rotl((~ebx & edx | ebx & esi) + pSrc[11] + 0x265E5A51 + edi, 14) + esi; ebx = _rotr((~edx & esi | edx & edi) + pSrc[0] + 0xE9B6C7AA + ebx, 12) + edi; edx = _rotl((~esi & edi | esi & ebx) + pSrc[5] + 0xD62F105D + edx, 5) + ebx; esi = _rotl((~edi & ebx | edi & edx) + pSrc[10] + 0x02441453 + esi, 9) + edx; edi = _rotl((~ebx & edx | ebx & esi) + pSrc[15] + 0xD8A1E681 + edi, 14) + esi; ebx = _rotr((~edx & esi | edx & edi) + pSrc[4] + 0xE7D3FBC8 + ebx, 12) + edi; edx = _rotl((~esi & edi | esi & ebx) + pSrc[9] + 0x21E1CDE6 + edx, 5) + ebx; esi = _rotl((~edi & ebx | edi & edx) + pSrc[14] + 0xC33707D6 + esi, 9) + edx; edi = _rotl((~ebx & edx | ebx & esi) + pSrc[3] + 0xF4D50D87 + edi, 14) + esi; ebx = _rotr((~edx & esi | edx & edi) + pSrc[8] + 0x455A14ED + ebx, 12) + edi; edx = _rotl((~esi & edi | esi & ebx) + pSrc[13] + 0xA9E3E905 + edx, 5) + ebx; esi = _rotl((~edi & ebx | edi & edx) + pSrc[2] + 0xFCEFA3F8 + esi, 9) + edx; edi = _rotl((~ebx & edx | ebx & esi) + pSrc[7] + 0x676F02D9 + edi, 14) + esi; ebx = _rotr((~edx & esi | edx & edi) + pSrc[12] + 0x8D2A4C8A + ebx, 12) + edi; edx = _rotl((esi ^ edi ^ ebx) + pSrc[5] + 0xFFFA3942 + edx, 4) + ebx; esi = _rotl((edi ^ ebx ^ edx) + pSrc[8] + 0x8771F681 + esi, 11) + edx; edi = _rotl((ebx ^ edx ^ esi) + pSrc[11] + 0x6D9D6122 + edi, 16) + esi; ebx = _rotr((edx ^ esi ^ edi) + pSrc[14] + 0xFDE5380C + ebx, 9) + edi; edx = _rotl((esi ^ edi ^ ebx) + pSrc[1] + 0xA4BEEA44 + edx, 4) + ebx; esi = _rotl((edi ^ ebx ^ edx) + pSrc[4] + 0x4BDECFA9 + esi, 11) + edx; edi = _rotl((ebx ^ edx ^ esi) + pSrc[7] + 0xF6BB4B60 + edi, 16) + esi; ebx = _rotr((edx ^ esi ^ edi) + pSrc[10] + 0xBEBFBC70 + ebx, 9) + edi; edx = _rotl((esi ^ edi ^ ebx) + pSrc[13] + 0x289B7EC6 + edx, 4) + ebx; esi = _rotl((edi ^ ebx ^ edx) + pSrc[0] + 0xEAA127FA + esi, 11) + edx; edi = _rotl((ebx ^ edx ^ esi) + pSrc[3] + 0xD4EF3085 + edi, 16) + esi; ebx = _rotr((edx ^ esi ^ edi) + pSrc[6] + 0x04881D05 + ebx, 9) + edi; edx = _rotl((esi ^ edi ^ ebx) + pSrc[9] + 0xD9D4D039 + edx, 4) + ebx; esi = _rotl((edi ^ ebx ^ edx) + pSrc[12] + 0xE6DB99E5 + esi, 11) + edx; edi = _rotl((ebx ^ edx ^ esi) + pSrc[15] + 0x1FA27CF8 + edi, 16) + esi; ebx = _rotr((edx ^ esi ^ edi) + pSrc[2] + 0xC4AC5665 + ebx, 9) + edi; edx = _rotl(((~esi | ebx) ^ edi) + pSrc[0] + 0xF4292244 + edx, 6) + ebx; esi = _rotl(((~edi | edx) ^ ebx) + pSrc[7] + 0x432AFF97 + esi, 10) + edx; edi = _rotl(((~ebx | esi) ^ edx) + pSrc[14] + 0xAB9423A7 + edi, 15) + esi; ebx = _rotr(((~edx | edi) ^ esi) + pSrc[5] + 0xFC93A039 + ebx, 11) + edi; edx = _rotl(((~esi | ebx) ^ edi) + pSrc[12] + 0x655B59C3 + edx, 6) + ebx; esi = _rotl(((~edi | edx) ^ ebx) + pSrc[3] + 0x8F0CCC92 + esi, 10) + edx; edi = _rotl(((~ebx | esi) ^ edx) + pSrc[10] + 0xFFEFF47D + edi, 15) + esi; ebx = _rotr(((~edx | edi) ^ esi) + pSrc[1] + 0x85845DD1 + ebx, 11) + edi; edx = _rotl(((~esi | ebx) ^ edi) + pSrc[8] + 0x6FA87E4F + edx, 6) + ebx; esi = _rotl(((~edi | edx) ^ ebx) + pSrc[15] + 0xFE2CE6E0 + esi, 10) + edx; edi = _rotl(((~ebx | esi) ^ edx) + pSrc[6] + 0xA3014314 + edi, 15) + esi; ebx = _rotr(((~edx | edi) ^ esi) + pSrc[13] + 0x4E0811A1 + ebx, 11) + edi; edx = _rotl(((~esi | ebx) ^ edi) + pSrc[4] + 0xF7537E82 + edx, 6) + ebx; h[0] += edx; esi = _rotl(((~edi | edx) ^ ebx) + pSrc[11] + 0xBD3AF235 + esi, 10) + edx; h[3] += esi; edi = _rotl(((~ebx | esi) ^ edx) + pSrc[2] + 0x2AD7D2BB + edi, 15) + esi; h[2] += edi; ebx = _rotr(((~edx | edi) ^ esi) + pSrc[9] + 0xEB86D391 + ebx, 11) + edi; h[1] += ebx; processedSize += 0x40; pSrc += 16; i++; } } vector<DWORD> hash(4); std::memcpy(hash.data(), h, 16); return hash; } [/code] DarkStarSword asked me about computeshaders. At the time we had nothing that could handle anything other than ps and vs. I quickly realized it was a massive workload. I've never seen any of these shaders in my life and not suprisingly they introduced loads of new instructions. At this point I created the dump only versions of my wrappers which dumped all 6 types of shaders. [code] DX11: f:\SteamApps\common\Alien Isolation f:\SteamApps\common\Aliens vs Predator f:\Ubisoft\Related Designs\ANNO 2070 F:\Glyph\Games\ArcheAge f:\SteamApps\common\Arma 3 f:\SteamApps\common\Assassin's Creed 3 f:\Ubisoft\Ubisoft Game Launcher\games\Assassin's Creed Liberation HD f:\Ubisoft\Ubisoft Game Launcher\games\Assassin's Creed IV Black Flag Asia f:\Ubisoft\Ubisoft Game Launcher\games\Assassin's Creed Unity f:\SteamApps\common\assettocorsa f:\SteamApps\common\Batman Arkham City GOTY\Binaries\Win32 f:\SteamApps\common\Batman Arkham Origins\SinglePlayer\Binaries\Win32 f:\SteamApps\common\Batman Arkham Knight\Binaries\Win64 f:\Origin Games\Battlefield 3 f:\Origin Games\Battlefield 4 f:\Origin Games\Battlefield Bad Company 2 f:\SteamApps\common\BioShock Infinite\Binaries\Win32 f:\SteamApps\common\Call of Duty Black Ops II f:\SteamApps\common\Call of Duty Ghosts f:\SteamApps\common\Sid Meier's Civilization V f:\SteamApps\common\Sid Meier's Civilization Beyond Earth f:\SteamApps\common\Company of Heroes 2 f:\Origin Games\Crysis 2 f:\Origin Games\Crysis 3\Bin32 f:\SteamApps\common\Dark Souls II Scholar of the First Sin\Game f:\SteamApps\common\Deus Ex - Human Revolution f:\SteamApps\common\Deus Ex Human Revolution Director's Cut f:\SteamApps\common\DiRT 3 f:\SteamApps\common\DiRT 3 Complete Edition f:\SteamApps\common\DiRT Rally f:\SteamApps\common\DiRT Showdown f:\Origin Games\dragon Age Inquisition f:\SteamApps\common\Dungeons and Dragons Online f:\SteamApps\common\Dying Light f:\SteamApps\common\F.E.A.R. 3 f:\Ubisoft\Ubisoft Game Launcher\games\Far Cry 3 f:\Ubisoft\Far Cry 3 Blood Dragon\bin f:\Ubisoft\Ubisoft Game Launcher\games\Far Cry 4\bin f:\Origin Games\FIFA 15 DEMO f:\SteamApps\common\Grand Theft Auto V f:\SteamApps\common\grid 2 f:\SteamApps\common\GRID Autosport f:\SteamApps\common\Hitman Absolution f:\SteamApps\common\HOMEFRONT\Binaries f:\SteamApps\common\L.A.Noire f:\SteamApps\common\Lords Of The Fallen f:\SteamApps\common\Lost Planet 2 f:\SteamApps\common\Max Payne 3\Max Payne 3 f:\Origin Games\Medal of Honor\MP f:\Origin Games\Medal of Honor Warfighter f:\SteamApps\common\Metro 2033 f:\SteamApps\common\Metro 2033 Redux f:\SteamApps\common\Metro Last Light f:\SteamApps\common\Metro Last Light Redux f:\SteamApps\common\ShadowOfMordor\x64 f:\SteamApps\common\MK10\Binaries\Retail f:\SteamApps\common\Murdered Soul Suspect\Binaries\Win64 f:\SteamApps\common\Cryptic Studios\Neverwinter\Live f:\SteamApps\common\Oil Rush\bin f:\SteamApps\common\pCars f:\SteamApps\common\red faction armageddon f:\SteamApps\common\Red Faction Guerrilla f:\SteamApps\common\Rise of Nations f:\SteamApps\common\Ryse Son of Rome\Bin64 f:\SteamApps\common\Saints Row the Third f:\SteamApps\common\Saints Row IV f:\SteamApps\common\SleepingDogs f:\SteamApps\common\SleepingDogsDefinitiveEdition f:\SteamApps\common\Project Silverado f:\SteamApps\common\strikesuitzero\pc\main\Binary f:\SteamApps\common\SSZ Directors Cut\pc\main\Binary f:\SteamApps\common\Star Trek Online\Star Trek Online\Live f:\SteamApps\common\Lord of the Rings Online f:\Games\The Secret World f:\GOG Games\The Witcher 3 Wild Hunt\bin\x64 f:\SteamApps\common\Thief\Binaries2\Win64 f:\Origin Games\Titanfall f:\Ubisoft\Tom Clancy's H.A.W.X. 2 f:\SteamApps\common\Tomb Raider f:\SteamApps\common\Total War Rome II f:\SteamApps\common\Total War SHOGUN 2 f:\SteamApps\common\ToyboxTurbos f:\SteamApps\common\TrialsPC\datapack f:\SteamApps\common\Trials Fusion\datapack f:\Ubisoft\Ubisoft Game Launcher\games\Watch_Dogs\bin f:\Games\World of Warcraft f:\SteamApps\common\Tom Clancy's Splinter Cell Blacklist f:\SteamApps\common\The Bureau\Binaries\Win32 f:\SteamApps\common\War of the Roses f:\SteamApps\common\Shadow Warrior\dx11 DX10: f:\SteamApps\common\Age of Conan f:\Ubisoft\Related Designs\ANNO 1404 - Gold Edition f:\SteamApps\common\Assassins Creed f:\SteamApps\common\Bioshock\Builds\Release f:\SteamApps\common\BioShock 2\SP\Builds\Binaries f:\SteamApps\common\BioShock 2\MP\Builds\Binaries f:\SteamApps\common\Devil May Cry 4 f:\steamapps\common\Special Edition f:\SteamApps\common\Far Cry 2\bin f:\Ubisoft\James Cameron's AVATAR - THE GAME f:\SteamApps\common\Just Cause 2 f:\SteamApps\common\lost planet extreme condition f:\SteamApps\common\Stormrise [/code] After this massive dump process the next part was going through the shaders with the assembler. Initially there was a massive amount of errors and missing instructions. The assembler has Always been a slow iterative process. A simple change can heal a spectra of instructions while at the same time a single error can reoccur multiple times Before getting fixed. One drawback is that my tools don't tell me where the problem is not even which game. Batman Knight took a long time to fix mostly because the massive amount of shaders and the number of iteration. Sometimes you need to go through all shaders in all games to make sure a fix didn't cause something else to break. In the end I have yet to hear of any problems with the assembler. Due to the nature of the code I would consider it fragile but stable. My work on the wrappers are significant and has helped especially when dumping and playing certain games. The timing could probably not be worse for my wrapper as I started it only a few month Before 3DMigoto went open source. If it had been the opposite way around I wouldn't have been motivated to start such a massive project. This probably means no assembler in 3DMigoto which is worse for everyone. I think Chiri, bo3b, DarkStarSword and myself has contributed with what we are best at. I'm not saying the assembler wouldn't exist without me but it would definitelly look different and I don't know who would break the hash function. The fight between Flugan "wrappers" and 3DMigoto was never fair. 3DMigoto has Always been a wrapper while I have never written a wrapper in my Life. It was pretty late until I was stable with hooks and some other strange behaviour.
I want to shed some light on a major module that is often mentioned but rarely understood.
In simple terms we need the actuall hash function used when compiling shaders.
We also need to convert modified ASM shaders back into binary in such a way that it works as expected.
Neither of these tasks are simple but when they are done there is little left to do.

Major reverse engineering:
ComputeHashRetail@D3DCompiler_47.dll
Using trial version of IDA Pro
Image
The code is not consistant and you really need to keep track to make the code version consistent.
vector<DWORD> ComputeHash(byte const* input, DWORD size) {
DWORD esi;
DWORD ebx;
DWORD i = 0;
DWORD edi;
DWORD edx;
DWORD processedSize = 0;

DWORD sizeHash = size & 0x3F;
bool sizeHash56 = sizeHash >= 56;
DWORD restSize = sizeHash56 ? 120 - 56 : 56 - sizeHash;
DWORD loopSize = (size + 8 + restSize) >> 6;
DWORD Dst[16];
DWORD Data[] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
DWORD loopSize2 = loopSize - (sizeHash56 ? 2 : 1);
DWORD start_0 = 0;
DWORD* pSrc = (DWORD*)input;
DWORD h[] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476 };
if (loopSize > 0) {
while (i < loopSize) {
if (i == loopSize2) {
if (!sizeHash56) {
Dst[0] = size << 3;
DWORD remSize = size - processedSize;
std::memcpy(&Dst[1], pSrc, remSize);
std::memcpy(&Dst[1 + remSize / 4], Data, restSize);
Dst[15] = (size * 2) | 1;
pSrc = Dst;
} else {
DWORD remSize = size - processedSize;
std::memcpy(&Dst[0], pSrc, remSize);
std::memcpy(&Dst[remSize / 4], Data, 64 - remSize);
pSrc = Dst;
}
} else if (i > loopSize2) {
Dst[0] = size << 3;
std::memcpy(&Dst[1], &Data[1], 56);
Dst[15] = (size * 2) | 1;
pSrc = Dst;
}

// initial values from memory
edx = h[0];
ebx = h[1];
edi = h[2];
esi = h[3];

edx = _rotl((~ebx & esi | ebx & edi) + pSrc[0] + 0xD76AA478 + edx, 7) + ebx;
esi = _rotl((~edx & edi | edx & ebx) + pSrc[1] + 0xE8C7B756 + esi, 12) + edx;
edi = _rotr((~esi & ebx | esi & edx) + pSrc[2] + 0x242070DB + edi, 15) + esi;
ebx = _rotr((~edi & edx | edi & esi) + pSrc[3] + 0xC1BDCEEE + ebx, 10) + edi;
edx = _rotl((~ebx & esi | ebx & edi) + pSrc[4] + 0xF57C0FAF + edx, 7) + ebx;
esi = _rotl((~edx & edi | ebx & edx) + pSrc[5] + 0x4787C62A + esi, 12) + edx;
edi = _rotr((~esi & ebx | esi & edx) + pSrc[6] + 0xA8304613 + edi, 15) + esi;
ebx = _rotr((~edi & edx | edi & esi) + pSrc[7] + 0xFD469501 + ebx, 10) + edi;
edx = _rotl((~ebx & esi | ebx & edi) + pSrc[8] + 0x698098D8 + edx, 7) + ebx;
esi = _rotl((~edx & edi | ebx & edx) + pSrc[9] + 0x8B44F7AF + esi, 12) + edx;
edi = _rotr((~esi & ebx | esi & edx) + pSrc[10] + 0xFFFF5BB1 + edi, 15) + esi;
ebx = _rotr((~edi & edx | edi & esi) + pSrc[11] + 0x895CD7BE + ebx, 10) + edi;
edx = _rotl((~ebx & esi | ebx & edi) + pSrc[12] + 0x6B901122 + edx, 7) + ebx;
esi = _rotl((~edx & edi | ebx & edx) + pSrc[13] + 0xFD987193 + esi, 12) + edx;
edi = _rotr((~esi & ebx | esi & edx) + pSrc[14] + 0xA679438E + edi, 15) + esi;
ebx = _rotr((~edi & edx | edi & esi) + pSrc[15] + 0x49B40821 + ebx, 10) + edi;

edx = _rotl((~esi & edi | esi & ebx) + pSrc[1] + 0xF61E2562 + edx, 5) + ebx;
esi = _rotl((~edi & ebx | edi & edx) + pSrc[6] + 0xC040B340 + esi, 9) + edx;
edi = _rotl((~ebx & edx | ebx & esi) + pSrc[11] + 0x265E5A51 + edi, 14) + esi;
ebx = _rotr((~edx & esi | edx & edi) + pSrc[0] + 0xE9B6C7AA + ebx, 12) + edi;
edx = _rotl((~esi & edi | esi & ebx) + pSrc[5] + 0xD62F105D + edx, 5) + ebx;
esi = _rotl((~edi & ebx | edi & edx) + pSrc[10] + 0x02441453 + esi, 9) + edx;
edi = _rotl((~ebx & edx | ebx & esi) + pSrc[15] + 0xD8A1E681 + edi, 14) + esi;
ebx = _rotr((~edx & esi | edx & edi) + pSrc[4] + 0xE7D3FBC8 + ebx, 12) + edi;
edx = _rotl((~esi & edi | esi & ebx) + pSrc[9] + 0x21E1CDE6 + edx, 5) + ebx;
esi = _rotl((~edi & ebx | edi & edx) + pSrc[14] + 0xC33707D6 + esi, 9) + edx;
edi = _rotl((~ebx & edx | ebx & esi) + pSrc[3] + 0xF4D50D87 + edi, 14) + esi;
ebx = _rotr((~edx & esi | edx & edi) + pSrc[8] + 0x455A14ED + ebx, 12) + edi;
edx = _rotl((~esi & edi | esi & ebx) + pSrc[13] + 0xA9E3E905 + edx, 5) + ebx;
esi = _rotl((~edi & ebx | edi & edx) + pSrc[2] + 0xFCEFA3F8 + esi, 9) + edx;
edi = _rotl((~ebx & edx | ebx & esi) + pSrc[7] + 0x676F02D9 + edi, 14) + esi;
ebx = _rotr((~edx & esi | edx & edi) + pSrc[12] + 0x8D2A4C8A + ebx, 12) + edi;

edx = _rotl((esi ^ edi ^ ebx) + pSrc[5] + 0xFFFA3942 + edx, 4) + ebx;
esi = _rotl((edi ^ ebx ^ edx) + pSrc[8] + 0x8771F681 + esi, 11) + edx;
edi = _rotl((ebx ^ edx ^ esi) + pSrc[11] + 0x6D9D6122 + edi, 16) + esi;
ebx = _rotr((edx ^ esi ^ edi) + pSrc[14] + 0xFDE5380C + ebx, 9) + edi;
edx = _rotl((esi ^ edi ^ ebx) + pSrc[1] + 0xA4BEEA44 + edx, 4) + ebx;
esi = _rotl((edi ^ ebx ^ edx) + pSrc[4] + 0x4BDECFA9 + esi, 11) + edx;
edi = _rotl((ebx ^ edx ^ esi) + pSrc[7] + 0xF6BB4B60 + edi, 16) + esi;
ebx = _rotr((edx ^ esi ^ edi) + pSrc[10] + 0xBEBFBC70 + ebx, 9) + edi;
edx = _rotl((esi ^ edi ^ ebx) + pSrc[13] + 0x289B7EC6 + edx, 4) + ebx;
esi = _rotl((edi ^ ebx ^ edx) + pSrc[0] + 0xEAA127FA + esi, 11) + edx;
edi = _rotl((ebx ^ edx ^ esi) + pSrc[3] + 0xD4EF3085 + edi, 16) + esi;
ebx = _rotr((edx ^ esi ^ edi) + pSrc[6] + 0x04881D05 + ebx, 9) + edi;
edx = _rotl((esi ^ edi ^ ebx) + pSrc[9] + 0xD9D4D039 + edx, 4) + ebx;
esi = _rotl((edi ^ ebx ^ edx) + pSrc[12] + 0xE6DB99E5 + esi, 11) + edx;
edi = _rotl((ebx ^ edx ^ esi) + pSrc[15] + 0x1FA27CF8 + edi, 16) + esi;
ebx = _rotr((edx ^ esi ^ edi) + pSrc[2] + 0xC4AC5665 + ebx, 9) + edi;

edx = _rotl(((~esi | ebx) ^ edi) + pSrc[0] + 0xF4292244 + edx, 6) + ebx;
esi = _rotl(((~edi | edx) ^ ebx) + pSrc[7] + 0x432AFF97 + esi, 10) + edx;
edi = _rotl(((~ebx | esi) ^ edx) + pSrc[14] + 0xAB9423A7 + edi, 15) + esi;
ebx = _rotr(((~edx | edi) ^ esi) + pSrc[5] + 0xFC93A039 + ebx, 11) + edi;
edx = _rotl(((~esi | ebx) ^ edi) + pSrc[12] + 0x655B59C3 + edx, 6) + ebx;
esi = _rotl(((~edi | edx) ^ ebx) + pSrc[3] + 0x8F0CCC92 + esi, 10) + edx;
edi = _rotl(((~ebx | esi) ^ edx) + pSrc[10] + 0xFFEFF47D + edi, 15) + esi;
ebx = _rotr(((~edx | edi) ^ esi) + pSrc[1] + 0x85845DD1 + ebx, 11) + edi;
edx = _rotl(((~esi | ebx) ^ edi) + pSrc[8] + 0x6FA87E4F + edx, 6) + ebx;
esi = _rotl(((~edi | edx) ^ ebx) + pSrc[15] + 0xFE2CE6E0 + esi, 10) + edx;
edi = _rotl(((~ebx | esi) ^ edx) + pSrc[6] + 0xA3014314 + edi, 15) + esi;
ebx = _rotr(((~edx | edi) ^ esi) + pSrc[13] + 0x4E0811A1 + ebx, 11) + edi;
edx = _rotl(((~esi | ebx) ^ edi) + pSrc[4] + 0xF7537E82 + edx, 6) + ebx;
h[0] += edx;
esi = _rotl(((~edi | edx) ^ ebx) + pSrc[11] + 0xBD3AF235 + esi, 10) + edx;
h[3] += esi;
edi = _rotl(((~ebx | esi) ^ edx) + pSrc[2] + 0x2AD7D2BB + edi, 15) + esi;
h[2] += edi;
ebx = _rotr(((~edx | edi) ^ esi) + pSrc[9] + 0xEB86D391 + ebx, 11) + edi;
h[1] += ebx;

processedSize += 0x40;
pSrc += 16;
i++;
}
}
vector<DWORD> hash(4);
std::memcpy(hash.data(), h, 16);
return hash;
}


DarkStarSword asked me about computeshaders.
At the time we had nothing that could handle anything other than ps and vs.

I quickly realized it was a massive workload.
I've never seen any of these shaders in my life and not suprisingly they introduced loads of new instructions.

At this point I created the dump only versions of my wrappers which dumped all 6 types of shaders.
DX11:
f:\SteamApps\common\Alien Isolation
f:\SteamApps\common\Aliens vs Predator
f:\Ubisoft\Related Designs\ANNO 2070
F:\Glyph\Games\ArcheAge
f:\SteamApps\common\Arma 3
f:\SteamApps\common\Assassin's Creed 3
f:\Ubisoft\Ubisoft Game Launcher\games\Assassin's Creed Liberation HD
f:\Ubisoft\Ubisoft Game Launcher\games\Assassin's Creed IV Black Flag Asia
f:\Ubisoft\Ubisoft Game Launcher\games\Assassin's Creed Unity
f:\SteamApps\common\assettocorsa
f:\SteamApps\common\Batman Arkham City GOTY\Binaries\Win32
f:\SteamApps\common\Batman Arkham Origins\SinglePlayer\Binaries\Win32
f:\SteamApps\common\Batman Arkham Knight\Binaries\Win64
f:\Origin Games\Battlefield 3
f:\Origin Games\Battlefield 4
f:\Origin Games\Battlefield Bad Company 2
f:\SteamApps\common\BioShock Infinite\Binaries\Win32
f:\SteamApps\common\Call of Duty Black Ops II
f:\SteamApps\common\Call of Duty Ghosts
f:\SteamApps\common\Sid Meier's Civilization V
f:\SteamApps\common\Sid Meier's Civilization Beyond Earth
f:\SteamApps\common\Company of Heroes 2
f:\Origin Games\Crysis 2
f:\Origin Games\Crysis 3\Bin32
f:\SteamApps\common\Dark Souls II Scholar of the First Sin\Game
f:\SteamApps\common\Deus Ex - Human Revolution
f:\SteamApps\common\Deus Ex Human Revolution Director's Cut
f:\SteamApps\common\DiRT 3
f:\SteamApps\common\DiRT 3 Complete Edition
f:\SteamApps\common\DiRT Rally
f:\SteamApps\common\DiRT Showdown
f:\Origin Games\dragon Age Inquisition
f:\SteamApps\common\Dungeons and Dragons Online
f:\SteamApps\common\Dying Light
f:\SteamApps\common\F.E.A.R. 3
f:\Ubisoft\Ubisoft Game Launcher\games\Far Cry 3
f:\Ubisoft\Far Cry 3 Blood Dragon\bin
f:\Ubisoft\Ubisoft Game Launcher\games\Far Cry 4\bin
f:\Origin Games\FIFA 15 DEMO
f:\SteamApps\common\Grand Theft Auto V
f:\SteamApps\common\grid 2
f:\SteamApps\common\GRID Autosport
f:\SteamApps\common\Hitman Absolution
f:\SteamApps\common\HOMEFRONT\Binaries
f:\SteamApps\common\L.A.Noire
f:\SteamApps\common\Lords Of The Fallen
f:\SteamApps\common\Lost Planet 2
f:\SteamApps\common\Max Payne 3\Max Payne 3
f:\Origin Games\Medal of Honor\MP
f:\Origin Games\Medal of Honor Warfighter
f:\SteamApps\common\Metro 2033
f:\SteamApps\common\Metro 2033 Redux
f:\SteamApps\common\Metro Last Light
f:\SteamApps\common\Metro Last Light Redux
f:\SteamApps\common\ShadowOfMordor\x64
f:\SteamApps\common\MK10\Binaries\Retail
f:\SteamApps\common\Murdered Soul Suspect\Binaries\Win64
f:\SteamApps\common\Cryptic Studios\Neverwinter\Live
f:\SteamApps\common\Oil Rush\bin
f:\SteamApps\common\pCars
f:\SteamApps\common\red faction armageddon
f:\SteamApps\common\Red Faction Guerrilla
f:\SteamApps\common\Rise of Nations
f:\SteamApps\common\Ryse Son of Rome\Bin64
f:\SteamApps\common\Saints Row the Third
f:\SteamApps\common\Saints Row IV
f:\SteamApps\common\SleepingDogs
f:\SteamApps\common\SleepingDogsDefinitiveEdition
f:\SteamApps\common\Project Silverado
f:\SteamApps\common\strikesuitzero\pc\main\Binary
f:\SteamApps\common\SSZ Directors Cut\pc\main\Binary
f:\SteamApps\common\Star Trek Online\Star Trek Online\Live
f:\SteamApps\common\Lord of the Rings Online
f:\Games\The Secret World
f:\GOG Games\The Witcher 3 Wild Hunt\bin\x64
f:\SteamApps\common\Thief\Binaries2\Win64
f:\Origin Games\Titanfall
f:\Ubisoft\Tom Clancy's H.A.W.X. 2
f:\SteamApps\common\Tomb Raider
f:\SteamApps\common\Total War Rome II
f:\SteamApps\common\Total War SHOGUN 2
f:\SteamApps\common\ToyboxTurbos
f:\SteamApps\common\TrialsPC\datapack
f:\SteamApps\common\Trials Fusion\datapack
f:\Ubisoft\Ubisoft Game Launcher\games\Watch_Dogs\bin
f:\Games\World of Warcraft
f:\SteamApps\common\Tom Clancy's Splinter Cell Blacklist
f:\SteamApps\common\The Bureau\Binaries\Win32
f:\SteamApps\common\War of the Roses
f:\SteamApps\common\Shadow Warrior\dx11

DX10:
f:\SteamApps\common\Age of Conan
f:\Ubisoft\Related Designs\ANNO 1404 - Gold Edition
f:\SteamApps\common\Assassins Creed
f:\SteamApps\common\Bioshock\Builds\Release
f:\SteamApps\common\BioShock 2\SP\Builds\Binaries
f:\SteamApps\common\BioShock 2\MP\Builds\Binaries
f:\SteamApps\common\Devil May Cry 4
f:\steamapps\common\Special Edition
f:\SteamApps\common\Far Cry 2\bin
f:\Ubisoft\James Cameron's AVATAR - THE GAME
f:\SteamApps\common\Just Cause 2
f:\SteamApps\common\lost planet extreme condition
f:\SteamApps\common\Stormrise

After this massive dump process the next part was going through the shaders with the assembler.
Initially there was a massive amount of errors and missing instructions.
The assembler has Always been a slow iterative process.
A simple change can heal a spectra of instructions while at the same time a single error can reoccur multiple times Before getting fixed. One drawback is that my tools don't tell me where the problem is not even which game. Batman Knight took a long time to fix mostly because the massive amount of shaders and the number of iteration. Sometimes you need to go through all shaders in all games to make sure a fix didn't cause something else to break.

In the end I have yet to hear of any problems with the assembler. Due to the nature of the code I would consider it fragile but stable.

My work on the wrappers are significant and has helped especially when dumping and playing certain games. The timing could probably not be worse for my wrapper as I started it only a few month Before 3DMigoto went open source. If it had been the opposite way around I wouldn't have been motivated to start such a massive project. This probably means no assembler in 3DMigoto which is worse for everyone.

I think Chiri, bo3b, DarkStarSword and myself has contributed with what we are best at.
I'm not saying the assembler wouldn't exist without me but it would definitelly look different and I don't know who would break the hash function.

The fight between Flugan "wrappers" and 3DMigoto was never fair. 3DMigoto has Always been a wrapper while I have never written a wrapper in my Life. It was pretty late until I was stable with hooks and some other strange behaviour.

Thanks to everybody using my assembler it warms my heart.
To have a critical piece of code that everyone can enjoy!
What more can you ask for?

donations: ulfjalmbrant@hotmail.com

#1
Posted 07/05/2016 02:48 AM   
I looked at the hash function back in April and cracked it, but I took a different approach to you - it was quite obvious to me that it was a obfuscated MD5 hash (the constant values give it away), so I started by implementing MD5 in Python (following the algorithm as documented on wikipedia) confirming that the result matched a reference implementation, then analysed their version (also using IDA) to work out what the difference was. Since I had already studied and implemented the official algorithm I could easily identify which parts of the disassembly followed the official algorithm and which part looked suspect (the padding), leading me to identify that the difference was in fact very small: [code] # append original length in bits mod (2 pow 64) to message # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX # XXX # XXX MS Implementation differs from RSA MD5 only in the way the size is # XXX used to pad the final block. # XXX # XXX The Real MD5 Implementation would use: # XXX message.extend(struct.pack('<Q', orig_len_bits)) # 64bit size # XXX # XXX But here they *insert* that at the *start* of the final 512bit block # XXX as a *32bit* little-endian value, and add a second *31bit* size in # XXX *bytes* at the end of the block shifted left with a final 1 added. # XXX # XXX I was wondering if they had simply made an error when implementing # XXX it, however, while standards can be hard to read and the reference # XXX implementation is needlessly complex - the part on padding with the # XXX size is pretty damn clear, and this is a little too bizzare to be by # XXX accident. Therefore, it appears they intentionally obfuscated it, for # XXX whatever pointless and misguided reason they thought they had. # XXX # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX if real_md5: message.extend(struct.pack('<Q', orig_len_bits)) else: message = message[:-56] + struct.pack('<I', orig_len_bits) + message[-56:] message.extend(struct.pack('<I', (orig_len_bytes << 1) | 1)) [/code] Otherwise it's a bog standard MD5. The complete implementation is: [url]https://github.com/DarkStarSword/3d-fixes/blob/master/dx11shaderanalyse.py#L197[/url] [code] def shader_hash(message, real_md5=False): ''' Follows the MD5 psuedocode from: https://en.wikipedia.org/wiki/Md5 If real_md5=False, will use a slight modification to the padding method to generate the same obfuscated MD5 hashes as d3dcompiler. ''' np.seterr(over='ignore') message = bytearray(message) # leftrotate function definition def leftrotate (x, c): return np.uint32(x << c) | np.uint32(x >> (32-c)) # Gotcha: length is in bits, not bytes: orig_len_bytes = len(message) orig_len_bits = np.uint64(orig_len_bytes * 8) # Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating # s specifies the per-round shift amounts s = [7, 12, 17, 22]*4 + [5, 9, 14, 20]*4 + [4, 11, 16, 23]*4 + [6, 10, 15, 21]*4 # Use binary integer part of the sines of integers (Radians) as constants: K = [ np.uint32(math.floor(2**32 * abs(math.sin(i)))) for i in range(1, 65) ] # Initialize variables: a0 = np.uint32(0x67452301) # A b0 = np.uint32(0xefcdab89) # B c0 = np.uint32(0x98badcfe) # C d0 = np.uint32(0x10325476) # D # Pre-processing: adding a single 1 bit # append "1" bit to message /* Notice: the input bytes are considered as bits # strings, where the first bit is the most significant bit of the byte. message.append(0x80) # Pre-processing: padding with zeros # append "0" bit until message length in bits ≡ 448 (mod 512) pad = 64 - (len(message) % 64) if pad < 8: message.extend([0] * (64 + pad - 8)) else: message.extend([0] * (pad - 8)) # append original length in bits mod (2 pow 64) to message # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX # XXX # XXX MS Implementation differs from RSA MD5 only in the way the size is # XXX used to pad the final block. # XXX # XXX The Real MD5 Implementation would use: # XXX message.extend(struct.pack('<Q', orig_len_bits)) # 64bit size # XXX # XXX But here they *insert* that at the *start* of the final 512bit block # XXX as a *32bit* little-endian value, and add a second *31bit* size in # XXX *bytes* at the end of the block shifted left with a final 1 added. # XXX # XXX I was wondering if they had simply made an error when implementing # XXX it, however, while standards can be hard to read and the reference # XXX implementation is needlessly complex - the part on padding with the # XXX size is pretty damn clear, and this is a little too bizzare to be by # XXX accident. Therefore, it appears they intentionally obfuscated it, for # XXX whatever pointless and misguided reason they thought they had. # XXX # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX if real_md5: message.extend(struct.pack('<Q', orig_len_bits)) else: message = message[:-56] + struct.pack('<I', orig_len_bits) + message[-56:] message.extend(struct.pack('<I', (orig_len_bytes << 1) | 1)) assert(len(message) % 64 == 0) # Process the message in successive 512-bit chunks: # for each 512-bit chunk of message while message: # break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15 M = struct.unpack('<16I', message[:64]) message = message[64:] # Initialize hash value for this chunk: A = a0 B = b0 C = c0 D = d0 # Main loop: for i in range(64): if i < 16: F = (B & C) | (~B & D) g = i elif i < 32: F = (D & B) | (~D & C) g = np.uint32((5*i + 1) % 16) elif i < 48: F = B ^ C ^ D g = np.uint32((3*i + 5) % 16) else: F = C ^ (B | ~D) g = np.uint32((7*i) % 16) dTemp = D D = C C = B B = np.uint32(B + leftrotate(np.uint32(A + F + K[i] + M[g]), s[i])) A = dTemp # Add this chunk's hash to result so far: a0 = np.uint32(a0 + A) b0 = np.uint32(b0 + B) c0 = np.uint32(c0 + C) d0 = np.uint32(d0 + D) # var char digest[16] := a0 append b0 append c0 append d0 //(Output is in little-endian) return '%08x%08x%08x%08x' % struct.unpack('>4I', struct.pack('<4I', a0, b0, c0, d0)) [/code]
I looked at the hash function back in April and cracked it, but I took a different approach to you - it was quite obvious to me that it was a obfuscated MD5 hash (the constant values give it away), so I started by implementing MD5 in Python (following the algorithm as documented on wikipedia) confirming that the result matched a reference implementation, then analysed their version (also using IDA) to work out what the difference was. Since I had already studied and implemented the official algorithm I could easily identify which parts of the disassembly followed the official algorithm and which part looked suspect (the padding), leading me to identify that the difference was in fact very small:

# append original length in bits mod (2 pow 64) to message

# XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# XXX
# XXX MS Implementation differs from RSA MD5 only in the way the size is
# XXX used to pad the final block.
# XXX
# XXX The Real MD5 Implementation would use:
# XXX message.extend(struct.pack('<Q', orig_len_bits)) # 64bit size
# XXX
# XXX But here they *insert* that at the *start* of the final 512bit block
# XXX as a *32bit* little-endian value, and add a second *31bit* size in
# XXX *bytes* at the end of the block shifted left with a final 1 added.
# XXX
# XXX I was wondering if they had simply made an error when implementing
# XXX it, however, while standards can be hard to read and the reference
# XXX implementation is needlessly complex - the part on padding with the
# XXX size is pretty damn clear, and this is a little too bizzare to be by
# XXX accident. Therefore, it appears they intentionally obfuscated it, for
# XXX whatever pointless and misguided reason they thought they had.
# XXX
# XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

if real_md5:
message.extend(struct.pack('<Q', orig_len_bits))
else:
message = message[:-56] + struct.pack('<I', orig_len_bits) + message[-56:]
message.extend(struct.pack('<I', (orig_len_bytes << 1) | 1))


Otherwise it's a bog standard MD5. The complete implementation is:

https://github.com/DarkStarSword/3d-fixes/blob/master/dx11shaderanalyse.py#L197

def shader_hash(message, real_md5=False):
'''
Follows the MD5 psuedocode from:
https://en.wikipedia.org/wiki/Md5

If real_md5=False, will use a slight modification to the padding method to
generate the same obfuscated MD5 hashes as d3dcompiler.
'''

np.seterr(over='ignore')

message = bytearray(message)

# leftrotate function definition
def leftrotate (x, c):
return np.uint32(x << c) | np.uint32(x >> (32-c))

# Gotcha: length is in bits, not bytes:
orig_len_bytes = len(message)
orig_len_bits = np.uint64(orig_len_bytes * 8)

# Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating

# s specifies the per-round shift amounts
s = [7, 12, 17, 22]*4 + [5, 9, 14, 20]*4 + [4, 11, 16, 23]*4 + [6, 10, 15, 21]*4

# Use binary integer part of the sines of integers (Radians) as constants:
K = [ np.uint32(math.floor(2**32 * abs(math.sin(i)))) for i in range(1, 65) ]

# Initialize variables:
a0 = np.uint32(0x67452301) # A
b0 = np.uint32(0xefcdab89) # B
c0 = np.uint32(0x98badcfe) # C
d0 = np.uint32(0x10325476) # D

# Pre-processing: adding a single 1 bit
# append "1" bit to message /* Notice: the input bytes are considered as bits
# strings, where the first bit is the most significant bit of the byte.
message.append(0x80)

# Pre-processing: padding with zeros
# append "0" bit until message length in bits ≡ 448 (mod 512)
pad = 64 - (len(message) % 64)
if pad < 8:
message.extend([0] * (64 + pad - 8))
else:
message.extend([0] * (pad - 8))

# append original length in bits mod (2 pow 64) to message

# XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# XXX
# XXX MS Implementation differs from RSA MD5 only in the way the size is
# XXX used to pad the final block.
# XXX
# XXX The Real MD5 Implementation would use:
# XXX message.extend(struct.pack('<Q', orig_len_bits)) # 64bit size
# XXX
# XXX But here they *insert* that at the *start* of the final 512bit block
# XXX as a *32bit* little-endian value, and add a second *31bit* size in
# XXX *bytes* at the end of the block shifted left with a final 1 added.
# XXX
# XXX I was wondering if they had simply made an error when implementing
# XXX it, however, while standards can be hard to read and the reference
# XXX implementation is needlessly complex - the part on padding with the
# XXX size is pretty damn clear, and this is a little too bizzare to be by
# XXX accident. Therefore, it appears they intentionally obfuscated it, for
# XXX whatever pointless and misguided reason they thought they had.
# XXX
# XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

if real_md5:
message.extend(struct.pack('<Q', orig_len_bits))
else:
message = message[:-56] + struct.pack('<I', orig_len_bits) + message[-56:]
message.extend(struct.pack('<I', (orig_len_bytes << 1) | 1))

assert(len(message) % 64 == 0)

# Process the message in successive 512-bit chunks:
# for each 512-bit chunk of message
while message:
# break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15
M = struct.unpack('<16I', message[:64])
message = message[64:]

# Initialize hash value for this chunk:
A = a0
B = b0
C = c0
D = d0

# Main loop:
for i in range(64):
if i < 16:
F = (B & C) | (~B & D)
g = i
elif i < 32:
F = (D & B) | (~D & C)
g = np.uint32((5*i + 1) % 16)
elif i < 48:
F = B ^ C ^ D
g = np.uint32((3*i + 5) % 16)
else:
F = C ^ (B | ~D)
g = np.uint32((7*i) % 16)
dTemp = D
D = C
C = B
B = np.uint32(B + leftrotate(np.uint32(A + F + K[i] + M[g]), s[i]))
A = dTemp

# Add this chunk's hash to result so far:
a0 = np.uint32(a0 + A)
b0 = np.uint32(b0 + B)
c0 = np.uint32(c0 + C)
d0 = np.uint32(d0 + D)

# var char digest[16] := a0 append b0 append c0 append d0 //(Output is in little-endian)
return '%08x%08x%08x%08x' % struct.unpack('>4I', struct.pack('<4I', a0, b0, c0, d0))

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

#2
Posted 07/05/2016 05:10 AM   
Great, now we have a python implementation of the hash function. To work within the wrapper it still has to be coded in c++. I really went to the source when perfectly porting the x86 assembler to c++. I don't even know how much I would have benefited by knowing it was a strange version of MD5. Most of the code would have to be written to do a reference implementation of MD5 in c++. And then figure out how to modify the code to produce the official result according to shader hash. As I did a perfect x86 ASM -> c++ translation I had a working c++ version as soon as I was done. The algorithm was cracked over two years ago in early january 2014 by yours truly. I created this thread to showcase my active contribution. This includes my "wrappers" as they were a big part of creating the assembler. By dumping all the DX10/11 games in my library I got a lot of shaders to work on. Constantly encountering shader ASM that breaks the assembler. I hope you have any use for the python version or maybe you were just curious. Call me ignorant but I had no knowledge of how MD5 is implemented when I recreated the algoritm. My method would have worked regardless of how the hash function works as the x86 assembler code does not lie and tells me all the details. I have only looked afterwards on MD5 and came to the conclusion that it looked very familiar but nailing down the differences straight from IDA Pro is still tricky but clearly possible as DarkStarSword demonstrated. I'm not sure why I feel that it is like a competition that I keep loosing. If you recognize the MD5 hash constants you can quickly do the massive inner loop. With no MD5 knowledge it was really hard but not that timeconsuming as a pattern emerged. I don't remember presicely but using a demo version of IDA Pro I was probably done within a long day. At the end of the day the hash function is really important unless you use the MS compiler to produce the hash like 3DMigoto does most of the time. My solution was building an assembler. A vital part of the puzzle is cracking the hash function as otherwise it's game over. As time goes by more and more people crack the hash function. The assembler is the big ugly beast. With the help of the little documentation available and some initial datatypes: [code] struct shader_ins { unsigned opcode : 11; unsigned _11_23 : 13; unsigned length : 7; unsigned extended : 1; }; struct token_operand { unsigned comps_enum : 2; /* sm4_operands_comps */ unsigned mode : 2; /* sm4_operand_mode */ unsigned sel : 8; unsigned file : 8; /* SM_FILE */ unsigned num_indices : 2; unsigned index0_repr : 3; /* sm4_operand_index_repr */ unsigned index1_repr : 3; /* sm4_operand_index_repr */ unsigned index2_repr : 3; /* sm4_operand_index_repr */ unsigned extended : 1; }; [/code] Beyond that it's usually a lot of hard work, guesswork, backtracking, validating. I can't really explain the process but wheen it works it's like magic. A large chunk of instructions gone from the error file. Keep on iterating until there are no errors. Hello Batman, 35k shaders, a few thousand entries in the error file. Time to work and after chunk by chunk has been fixed there are no more errors. As far as I know the assembler is unchanged since I did that massive dump after Arkham Knight's release. The complete list of games can be found in the first post. Previously I mentioned that the assembler is fragile. By going to the extreme I have managed to get it pretty stable. This requires dumping lots of games and at one point I bought almost all dx11 games regardless of 3D likelihood. This obviously cost a lot of money and I'm no longer pursuing that path. I'm the only one who has spent any significant amount of time with the assembler. During crunch time I probably clocked 12h a day for a duration of almost three weeks. This was both dumping games and fixing all the assembler errors. The assembler might still fail on certain shaders but unless we are actively fixing that shader we don't notice. I'm not sure why but people sometimes ignore my contributions to 3DMigoto. Guess I will make a new list of games using the assembler: Hand of Fate Street Fighter V Totalwar Attila Just Cause 3 Totalwar Shogun 2 Totalwar Rome 2 The Witcher 3: Wild Hunt Kholat Hatred The Park S.T.A.L.K.E.R. Call of Pripyat Dark Souls 3 The Flame in the Flood Assassin’s Creed Chronicles: Russia Assassin’s Creed Chronicles: India Far Cry Primal Rise of the Tomb Raider Mortal Kombat X Fallout 4 State of Decay Year One Crysis 2 Crysis 3 I'm not claiming it's a complete list but the assembler was up and running for Crysis 3. Due to history there have been a bit confusion on certain facts. The assembler in it's current form was made by me for 3DMigoto. It is not my personal assembler. It's 3DMigoto's assembler nothing more nothing less. A vital piece for many game fixes. Because if my important contribution I want to be counted along side Chiri, bo3b, DarkStarSword as a 3DMigoto developer. I got involved with 3D Migoto almost 2,5 years ago. Together we have made 3DMigoto great. The assembler has been tested in many fixes and so far it has been flawless if you write pretty code. I have trouble telling if an assembler bug will occur in the future or not. There is no real reason to compare who has done the most or the most important parts. If I remove any single developer from the list 3DMigoto wouldn't be what it is today. We tend to forget those who are not Active: HeliX Chiri Flugan and instead focus on the active part: bo3b DarkStarSword This thread is about increasing awareness of what I have done in the name of 3DMigoto. I accept donations as appreciation of my work that made many fixes possible at all. If you donate expect nothing in return as I'm no shader hacker.
Great, now we have a python implementation of the hash function.

To work within the wrapper it still has to be coded in c++.

I really went to the source when perfectly porting the x86 assembler to c++.

I don't even know how much I would have benefited by knowing it was a strange version of MD5.

Most of the code would have to be written to do a reference implementation of MD5 in c++.
And then figure out how to modify the code to produce the official result according to shader hash.

As I did a perfect x86 ASM -> c++ translation I had a working c++ version as soon as I was done.
The algorithm was cracked over two years ago in early january 2014 by yours truly.

I created this thread to showcase my active contribution. This includes my "wrappers" as they were a big part of creating the assembler. By dumping all the DX10/11 games in my library I got a lot of shaders to work on. Constantly encountering shader ASM that breaks the assembler.

I hope you have any use for the python version or maybe you were just curious.
Call me ignorant but I had no knowledge of how MD5 is implemented when I recreated the algoritm.
My method would have worked regardless of how the hash function works as the x86 assembler code does not lie and tells me all the details. I have only looked afterwards on MD5 and came to the conclusion that it looked very familiar but nailing down the differences straight from IDA Pro is still tricky but clearly possible as DarkStarSword demonstrated.

I'm not sure why I feel that it is like a competition that I keep loosing.
If you recognize the MD5 hash constants you can quickly do the massive inner loop.
With no MD5 knowledge it was really hard but not that timeconsuming as a pattern emerged.
I don't remember presicely but using a demo version of IDA Pro I was probably done within a long day.

At the end of the day the hash function is really important unless you use the MS compiler to produce the hash like 3DMigoto does most of the time.
My solution was building an assembler.
A vital part of the puzzle is cracking the hash function as otherwise it's game over.

As time goes by more and more people crack the hash function.

The assembler is the big ugly beast.
With the help of the little documentation available and some initial datatypes:
struct shader_ins
{
unsigned opcode : 11;
unsigned _11_23 : 13;
unsigned length : 7;
unsigned extended : 1;
};
struct token_operand
{
unsigned comps_enum : 2; /* sm4_operands_comps */
unsigned mode : 2; /* sm4_operand_mode */
unsigned sel : 8;
unsigned file : 8; /* SM_FILE */
unsigned num_indices : 2;
unsigned index0_repr : 3; /* sm4_operand_index_repr */
unsigned index1_repr : 3; /* sm4_operand_index_repr */
unsigned index2_repr : 3; /* sm4_operand_index_repr */
unsigned extended : 1;
};

Beyond that it's usually a lot of hard work, guesswork, backtracking, validating.
I can't really explain the process but wheen it works it's like magic.
A large chunk of instructions gone from the error file.
Keep on iterating until there are no errors.

Hello Batman, 35k shaders, a few thousand entries in the error file.
Time to work and after chunk by chunk has been fixed there are no more errors.

As far as I know the assembler is unchanged since I did that massive dump after Arkham Knight's release. The complete list of games can be found in the first post.

Previously I mentioned that the assembler is fragile. By going to the extreme I have managed to get it pretty stable. This requires dumping lots of games and at one point I bought almost all dx11 games regardless of 3D likelihood.
This obviously cost a lot of money and I'm no longer pursuing that path.

I'm the only one who has spent any significant amount of time with the assembler. During crunch time I probably clocked 12h a day for a duration of almost three weeks.
This was both dumping games and fixing all the assembler errors.
The assembler might still fail on certain shaders but unless we are actively fixing that shader we don't notice.

I'm not sure why but people sometimes ignore my contributions to 3DMigoto.
Guess I will make a new list of games using the assembler:
Hand of Fate
Street Fighter V
Totalwar Attila
Just Cause 3
Totalwar Shogun 2
Totalwar Rome 2
The Witcher 3: Wild Hunt
Kholat
Hatred
The Park
S.T.A.L.K.E.R. Call of Pripyat
Dark Souls 3
The Flame in the Flood
Assassin’s Creed Chronicles: Russia
Assassin’s Creed Chronicles: India
Far Cry Primal
Rise of the Tomb Raider
Mortal Kombat X
Fallout 4
State of Decay Year One
Crysis 2
Crysis 3

I'm not claiming it's a complete list but the assembler was up and running for Crysis 3.
Due to history there have been a bit confusion on certain facts.
The assembler in it's current form was made by me for 3DMigoto. It is not my personal assembler.
It's 3DMigoto's assembler nothing more nothing less. A vital piece for many game fixes.

Because if my important contribution I want to be counted along side Chiri, bo3b, DarkStarSword as a 3DMigoto developer. I got involved with 3D Migoto almost 2,5 years ago. Together we have made 3DMigoto great.
The assembler has been tested in many fixes and so far it has been flawless if you write pretty code.
I have trouble telling if an assembler bug will occur in the future or not.
There is no real reason to compare who has done the most or the most important parts.
If I remove any single developer from the list 3DMigoto wouldn't be what it is today.

We tend to forget those who are not Active:
HeliX
Chiri
Flugan
and instead focus on the active part:
bo3b
DarkStarSword

This thread is about increasing awareness of what I have done in the name of 3DMigoto.
I accept donations as appreciation of my work that made many fixes possible at all.
If you donate expect nothing in return as I'm no shader hacker.

Thanks to everybody using my assembler it warms my heart.
To have a critical piece of code that everyone can enjoy!
What more can you ask for?

donations: ulfjalmbrant@hotmail.com

#3
Posted 07/05/2016 01:17 PM   
you have reason flugan, for my part i fix the credits, i was time out, and after the confusion, and as i, think a lot people dont know all the history. thanks
you have reason flugan, for my part i fix the credits, i was time out, and after the confusion,
and as i, think a lot people dont know all the history. thanks

Windows 7 64bit, i7, GTX680, 16GB, Benq 120hz

#4
Posted 07/05/2016 04:10 PM   
I did that mostly out of curiosity - I like to understand how something works, not to just implement it blindly, and it didn't sit right with me that I knew it was almost MD5 from googling some of the constants, but using a standard MD5 hash did not produce the same result. I used Python because it allows me to rapidly iterate the code for experimentation - I've used it in many reverse engineering projects. The language doesn't really matter though - once the algorithm is known it's trivial to translate into any language. BTW why do you keep mentioning Crysis 3 as the first game the assembler was used in (or am I misunderstanding you)? The Witcher 3 was the first game we used it in - before that the assembler wasn't even hooked up in 3DMigoto and no one could use it. I chose to use your assembler as I believed it would save us time (probably about a month as my 2nd choice would have been to write my own assembler) and would provide a superior solution to using the decompiler (which was another backup plan and Bo3b did do some work towards that end) - your help in that fix to add compute shader support to the assembler was absolutely invaluable! Also, Lichdom Battlemage was the first CryEngine 3 fix that used your assembler - the Crysis 3 fix was based on that one.
I did that mostly out of curiosity - I like to understand how something works, not to just implement it blindly, and it didn't sit right with me that I knew it was almost MD5 from googling some of the constants, but using a standard MD5 hash did not produce the same result. I used Python because it allows me to rapidly iterate the code for experimentation - I've used it in many reverse engineering projects. The language doesn't really matter though - once the algorithm is known it's trivial to translate into any language.

BTW why do you keep mentioning Crysis 3 as the first game the assembler was used in (or am I misunderstanding you)? The Witcher 3 was the first game we used it in - before that the assembler wasn't even hooked up in 3DMigoto and no one could use it. I chose to use your assembler as I believed it would save us time (probably about a month as my 2nd choice would have been to write my own assembler) and would provide a superior solution to using the decompiler (which was another backup plan and Bo3b did do some work towards that end) - your help in that fix to add compute shader support to the assembler was absolutely invaluable! Also, Lichdom Battlemage was the first CryEngine 3 fix that used your assembler - the Crysis 3 fix was based on that one.

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

#5
Posted 07/05/2016 05:09 PM   
[b]S.T.A.L.K.E.R. Call of Pripyat[/b] The only way I was able to play in 3D back then was to use static lightning. I've played all 3 in 2D at max setting and full effects then when I bought my 3DVkit redid a few hours in 3D with static lightning since that's the only way it was looking fine. Now if I can play in 3D and full dyn lighning I will sure take another look one day. The Zone is alive and is always different. :-)
S.T.A.L.K.E.R. Call of Pripyat

The only way I was able to play in 3D back then was to use static lightning. I've played all 3 in 2D at max setting and full effects then when I bought my 3DVkit redid a few hours in 3D with static lightning since that's the only way it was looking fine.

Now if I can play in 3D and full dyn lighning I will sure take another look one day. The Zone is alive and is always different. :-)

3D Vision must live! NVIDIA, don't let us down!

#6
Posted 07/05/2016 05:32 PM   
I don't remember exactly how things began. All the dumping and dumped files are long gone (HDD raid 0 crash) The process of updating the assembler to support compute shaders etc is found in my mercurial repository. The actual addition of the assembler to 3DMigoto is on github. For the actual list that was just me Walking the blogspot backwards and I just stopped at crysis 3. You have a much better recollection of the early days as you were the first to use it. Finally would you have played with the hash algoritm if it was not a modified MD5 hash? Just curious :) I find it remarkable how I managed to write the assembler with minimal documentation. I never checked the WDK for ASM documentation just the MSDN which leaves massive gaps. They might tell you about an instruction but nothing about the binary layout. I know the assembler can be replaced which would make my 18 month calendar time development useless. As it stands now I can feel proud about major fixes relying on the 3DMigoto assembler. I know I've only contributed a tiny fraction of the code in 3DMigoto. I spent much time making my "wrappers" play the latest fixes. Because I don't wrap anything in the end I could not keep up and development stopped. At this time the assembler was solid and hooked up to 3DMigoto and the rest is history. I want you to embrace our assembler not replace it. I know that it's slow as hell especially when assembling >100 000 shaders whicch I had to do multiple times during development. The disassembler is way faster than the assembler. I'm glad DarkStarSword is recieving money as he keeps doing the almost impossible very efficiently. I just recovered from a hypomanic episode and it's true you no longer value money and go on buying Sprees. At the end of the three month period I had spent €900 on Counter-Strike: Global Offensive skins. It's pretty insane literally and don't wish my illness on anyone else. If you give Money to DarkStarSword you will contribute to 3DMigoto and fixes. If you give them to me who knows what I would buy. I'm fully capable of saving money but when I'm manic I deplete the savings so it's pretty bad. I've been stuck in 2D land known as CS:GO since christmas and I still havn't finished that game. I think this is as good Place as any to bring up my autism as I already feel like I'm jumping between subjects and confusing already confusing matters. You can't read this thread and in the end know what I want. I know that money money don't grow on trees as I'm already recieving the minimum pension available for my early retirement. I have to do my best with what I have. What I want is free. I want people to add me on steam: http://steamcommunity.com/id/Flugan I really need some good friends who knows what 3D Vision is. Nobody I know have even heard of it. If you ever get an assembler bug you can't solve send me a private message and I will fix it. Otherwise I assume 3DMigoto to be in the safe hands of bo3b and DarkStarSword. I hope to find people who's favorite game is unfixed. Like the situation with me and the Trials series of games. The difficulty level in those games are insane. I think the world would have been a lot different if 3DMigoto stayed closed source. I positioned myself as the only solution in town doing active development. It was a massive undertaking based oon The fact that you can change the vtable of COM objects. DirectX is all about COM objects. If MS didn't protect the vtables from tampering by rewriting the vtables regularly this naive solution would work. I ended up using Nektra in-proc hooks GPL v3 which which brought stable hooks. I struggled with a crash bug and solved it by not hooking certain deferredd context. Now I could plaly Assassin's Creed 4 fix. Afterwards I look back at how much time was wasted on a solution that has been retired for a long time. It had it's use while 3DMigoto was stuck in Windows 7. No point in using my "wrapper" if 3DMigoto is already working. Having serious problems with my dx9 and dx10 "wrappers" as they are almost working. I thought getting dx12 up and running on my framework would be simpler than 3DMigoto but I have yet to discover any dx12 games. Big Flashback I spent all my savings on CS:GO skins leaving Little over for buying games. I did buy Killer Instinct but 3D Vision never activates on my computer. Wonder if the VG278H is different in any way.
I don't remember exactly how things began. All the dumping and dumped files are long gone (HDD raid 0 crash)
The process of updating the assembler to support compute shaders etc is found in my mercurial repository.
The actual addition of the assembler to 3DMigoto is on github.
For the actual list that was just me Walking the blogspot backwards and I just stopped at crysis 3.
You have a much better recollection of the early days as you were the first to use it.

Finally would you have played with the hash algoritm if it was not a modified MD5 hash?
Just curious :)

I find it remarkable how I managed to write the assembler with minimal documentation.
I never checked the WDK for ASM documentation just the MSDN which leaves massive gaps.
They might tell you about an instruction but nothing about the binary layout.

I know the assembler can be replaced which would make my 18 month calendar time development useless.
As it stands now I can feel proud about major fixes relying on the 3DMigoto assembler.

I know I've only contributed a tiny fraction of the code in 3DMigoto. I spent much time making my "wrappers" play the latest fixes. Because I don't wrap anything in the end I could not keep up and development stopped. At this time the assembler was solid and hooked up to 3DMigoto and the rest is history.

I want you to embrace our assembler not replace it. I know that it's slow as hell especially when assembling >100 000 shaders whicch I had to do multiple times during development. The disassembler is way faster than the assembler. I'm glad DarkStarSword is recieving money as he keeps doing the almost impossible very efficiently.

I just recovered from a hypomanic episode and it's true you no longer value money and go on buying Sprees. At the end of the three month period I had spent €900 on Counter-Strike: Global Offensive skins.
It's pretty insane literally and don't wish my illness on anyone else.
If you give Money to DarkStarSword you will contribute to 3DMigoto and fixes.

If you give them to me who knows what I would buy. I'm fully capable of saving money but when I'm manic I deplete the savings so it's pretty bad.

I've been stuck in 2D land known as CS:GO since christmas and I still havn't finished that game.

I think this is as good Place as any to bring up my autism as I already feel like I'm jumping between subjects and confusing already confusing matters. You can't read this thread and in the end know what I want.

I know that money money don't grow on trees as I'm already recieving the minimum pension available for my early retirement. I have to do my best with what I have.

What I want is free. I want people to add me on steam:

http://steamcommunity.com/id/Flugan

I really need some good friends who knows what 3D Vision is.
Nobody I know have even heard of it.

If you ever get an assembler bug you can't solve send me a private message and I will fix it.
Otherwise I assume 3DMigoto to be in the safe hands of bo3b and DarkStarSword.

I hope to find people who's favorite game is unfixed. Like the situation with me and the Trials series of games.
The difficulty level in those games are insane.

I think the world would have been a lot different if 3DMigoto stayed closed source. I positioned myself as the only solution in town doing active development. It was a massive undertaking based oon The fact that you can change the vtable of COM objects. DirectX is all about COM objects. If MS didn't protect the vtables from tampering by rewriting the vtables regularly this naive solution would work. I ended up using Nektra in-proc hooks GPL v3 which which brought stable hooks. I struggled with a crash bug and solved it by not hooking certain deferredd context.
Now I could plaly Assassin's Creed 4 fix. Afterwards I look back at how much time was wasted on a solution that has been retired for a long time. It had it's use while 3DMigoto was stuck in Windows 7.

No point in using my "wrapper" if 3DMigoto is already working.

Having serious problems with my dx9 and dx10 "wrappers" as they are almost working.
I thought getting dx12 up and running on my framework would be simpler than 3DMigoto but I have yet to discover any dx12 games. Big Flashback I spent all my savings on CS:GO skins leaving Little over for buying games.

I did buy Killer Instinct but 3D Vision never activates on my computer. Wonder if the VG278H is different in any way.

Thanks to everybody using my assembler it warms my heart.
To have a critical piece of code that everyone can enjoy!
What more can you ask for?

donations: ulfjalmbrant@hotmail.com

#7
Posted 07/05/2016 08:26 PM   
I always like to emphasize the teamwork that is involved in any big software project. I think it's really interesting to watch the interplay between every member's strengths and weaknesses. Chiri kicked this off with an absolute ton of work to get 3Dmigoto even into it's limping state. He did not know how to calculate the needed checksum for the shader files, and his solution was to write the Decompiler. That was a bold approach that I was initially very skeptical of using, but it's proven to be worthwhile. Flugan's assembler is a valuable tool. It gives us a solid baseline for any shader that we can't otherwise touch with the Decompiler. It's not necessarily the first tool of choice, because ASM is always harder to work with than HLSL. But when it's necessary, like for Compute Shaders, it makes fixes possible that otherwise wouldn't happen. It was languishing in 3Dmigoto for more than a year, because the Decompiler was handling all the scenarios we needed, even being rough around the edges and having known bugs. However, when we ran into the newest gaming fad of tiled lighting where they use Compute Shaders, the Decompiler wasn't up to the job. I've put quite a bit of effort into the Decompiler, but Compute Shaders use nested struct definitions extensively, which I don't have a good answer for yet. Rather than wait to get the Decompiler fixed, and also I wanted to get out of the business of updating the Decompiler for every game that came out, it seemed like a good time to give ASM a try. So, I took it upon myself to hook up the Assembler and make it a viable part of 3Dmigoto. That was an OK start, but the Assembler was not ready for prime time at that point, because it had never been used for actual fixes. The reassembling of thousands of game files is a great validation step, but what really matters is being able to modify the ASM code and get fixes. This is where DarkStarSword stepped in to fix some key bugs in the Assembler, and make it possible to actually use the Assembler to make shader fixes. Lastly, with a working Assembler, Flugan came back to fill in the gaps for all the new Compute/Geometry/Hull shader instructions that were not implemented in the original Assembler. Going through numerous games at this point was very helpful to get confidence in the Assembler coverage. One other fix Flugan provided was the fix for floating point accuracy, where the Microsoft disassembler actually damages the output, introducing numeric inaccuracy and inevitable bugs. DarkStarSword found the problem, and Flugan came up with a good fix. So... All in all, I just think the teamwork itself is fascinating and how our different strengths and weaknesses made something awesome. It's always fun to be the superstar, but the best results still come from teamwork. Even the genius of Helix would have had much less impact if only he was fixing games, and there had not been people like eqzitara and the earlier modders picking up the tool to make hundreds of fixes.
I always like to emphasize the teamwork that is involved in any big software project. I think it's really interesting to watch the interplay between every member's strengths and weaknesses.


Chiri kicked this off with an absolute ton of work to get 3Dmigoto even into it's limping state. He did not know how to calculate the needed checksum for the shader files, and his solution was to write the Decompiler. That was a bold approach that I was initially very skeptical of using, but it's proven to be worthwhile.


Flugan's assembler is a valuable tool. It gives us a solid baseline for any shader that we can't otherwise touch with the Decompiler. It's not necessarily the first tool of choice, because ASM is always harder to work with than HLSL. But when it's necessary, like for Compute Shaders, it makes fixes possible that otherwise wouldn't happen.


It was languishing in 3Dmigoto for more than a year, because the Decompiler was handling all the scenarios we needed, even being rough around the edges and having known bugs. However, when we ran into the newest gaming fad of tiled lighting where they use Compute Shaders, the Decompiler wasn't up to the job. I've put quite a bit of effort into the Decompiler, but Compute Shaders use nested struct definitions extensively, which I don't have a good answer for yet.

Rather than wait to get the Decompiler fixed, and also I wanted to get out of the business of updating the Decompiler for every game that came out, it seemed like a good time to give ASM a try. So, I took it upon myself to hook up the Assembler and make it a viable part of 3Dmigoto.


That was an OK start, but the Assembler was not ready for prime time at that point, because it had never been used for actual fixes. The reassembling of thousands of game files is a great validation step, but what really matters is being able to modify the ASM code and get fixes.

This is where DarkStarSword stepped in to fix some key bugs in the Assembler, and make it possible to actually use the Assembler to make shader fixes.


Lastly, with a working Assembler, Flugan came back to fill in the gaps for all the new Compute/Geometry/Hull shader instructions that were not implemented in the original Assembler. Going through numerous games at this point was very helpful to get confidence in the Assembler coverage.

One other fix Flugan provided was the fix for floating point accuracy, where the Microsoft disassembler actually damages the output, introducing numeric inaccuracy and inevitable bugs. DarkStarSword found the problem, and Flugan came up with a good fix.


So... All in all, I just think the teamwork itself is fascinating and how our different strengths and weaknesses made something awesome.

It's always fun to be the superstar, but the best results still come from teamwork. Even the genius of Helix would have had much less impact if only he was fixing games, and there had not been people like eqzitara and the earlier modders picking up the tool to make hundreds of fixes.

Acer H5360 (1280x720@120Hz) - ASUS VG248QE with GSync mod - 3D Vision 1&2 - Driver 372.54
GTX 970 - i5-4670K@4.2GHz - 12GB RAM - Win7x64+evilKB2670838 - 4 Disk X25 RAID
SAGER NP9870-S - GTX 980 - i7-6700K - Win10 Pro 1607
Latest 3Dmigoto Release
Bo3b's School for ShaderHackers

#8
Posted 07/06/2016 03:34 AM   
One example of teamwork was how DarkStarSword managed to modify input and output signatures without touching the assembler. It was high on his priority list and I was not up for the task at that point. He modified the signatures before handing it off to the assembler who has no idea that anything has been modified and just assembles the shader as normal and signs it with a hash including DarkStarSwords signature changes. If I remember correctly the signatures are specified on commented lines in the shader so modifying commented lines needs to be handled. They are also not part of the SHDR/SHEX chunks which is handled by the assembler. I know my "wrapper" has been useful but I don't know to what extent. It is pretty tiny being <10 000 lines of code with the assembler being a significant chunk. The workflow is clearly different as I dump binary and then later convert to ASM and HLSL. I use the latest HLSL decompiler from 3DMigoto. My "wrapper" borrowed from 3DMigoto in significant areas like getting good hooks. The vtable protection could have been the end of my "wrapper". The actual end of my "wrapper" was when 3DMigoto started hashing textures and used a HW accelerated crc32 library at which point I just gave up the pursuit. The code was already more advanced than I ever expected. I started to see how you can't compete with a stable wrapper as doing changes with loads of hooks became a bottleneck. I want to thank all the shaderhackers who made working on my "wrapper" a pleasant experience. When I finally got a stable fix running without crashes it was candy for my eyes. What motivated me was seeing these Amazing game fixes in action. I own a lot of games because they were fixed. I spent a lot of money on games as they are the tools of the trade. I no longer have that kind of money. Having spent significant time in 2D land makes me realize makes me realize how complex 3D Vision can be. Matching driver version, custom profiles, multiple convergence toggles. Almost a handful bound keys. Just like Chiri I had the intension to make some money by creating a wrapper in the only way I knew how. Hooks + ASM. I now know what Chiri realized which is that it's impossible to make money by giving something away for free. Creating any kind of DRM on the wrapper or fixes felt like a waste of time and resources. I did have some plan to checksum the exe to block pirated copies as I don't like piracy. What I didn't know when I started was that 3DMigoto was going to be released as open source. I got a few weeks notice in advance and my options were continue or give up. As fixes have been made in 3DMigoto the work there by default. The only exceptions I know of is when OS differences made 3DMigoto crash. I don't even know which wrapper is fastest and bo3b has been great at profiling and optimizing 3DMigoto. In all of this I really wanted to become a shader fixer but considering the main game I play which is counter-strike: global offensive I would never place a modified dll in that game due to a permanent ban due to cheating.
One example of teamwork was how DarkStarSword managed to modify input and output signatures without touching the assembler. It was high on his priority list and I was not up for the task at that point.

He modified the signatures before handing it off to the assembler who has no idea that anything has been modified and just assembles the shader as normal and signs it with a hash including DarkStarSwords signature changes.

If I remember correctly the signatures are specified on commented lines in the shader so modifying commented lines needs to be handled. They are also not part of the SHDR/SHEX chunks which is handled by the assembler.

I know my "wrapper" has been useful but I don't know to what extent. It is pretty tiny being <10 000 lines of code with the assembler being a significant chunk. The workflow is clearly different as I dump binary and then later convert to ASM and HLSL. I use the latest HLSL decompiler from 3DMigoto. My "wrapper" borrowed from 3DMigoto in significant areas like getting good hooks. The vtable protection could have been the end of my "wrapper". The actual end of my "wrapper" was when 3DMigoto started hashing textures and used a HW accelerated crc32 library at which point I just gave up the pursuit. The code was already more advanced than I ever expected. I started to see how you can't compete with a stable wrapper as doing changes with loads of hooks became a bottleneck.

I want to thank all the shaderhackers who made working on my "wrapper" a pleasant experience.
When I finally got a stable fix running without crashes it was candy for my eyes.
What motivated me was seeing these Amazing game fixes in action. I own a lot of games because they were fixed.
I spent a lot of money on games as they are the tools of the trade. I no longer have that kind of money.
Having spent significant time in 2D land makes me realize makes me realize how complex 3D Vision can be.
Matching driver version, custom profiles, multiple convergence toggles. Almost a handful bound keys.

Just like Chiri I had the intension to make some money by creating a wrapper in the only way I knew how. Hooks + ASM. I now know what Chiri realized which is that it's impossible to make money by giving something away for free. Creating any kind of DRM on the wrapper or fixes felt like a waste of time and resources. I did have some plan to checksum the exe to block pirated copies as I don't like piracy.

What I didn't know when I started was that 3DMigoto was going to be released as open source.
I got a few weeks notice in advance and my options were continue or give up.
As fixes have been made in 3DMigoto the work there by default.
The only exceptions I know of is when OS differences made 3DMigoto crash.
I don't even know which wrapper is fastest and bo3b has been great at profiling and optimizing 3DMigoto.

In all of this I really wanted to become a shader fixer but considering the main game I play which is counter-strike: global offensive I would never place a modified dll in that game due to a permanent ban due to cheating.

Thanks to everybody using my assembler it warms my heart.
To have a critical piece of code that everyone can enjoy!
What more can you ask for?

donations: ulfjalmbrant@hotmail.com

#9
Posted 07/06/2016 07:40 AM   
[quote="Flugan"]Finally would you have played with the hash algoritm if it was not a modified MD5 hash? Just curious :)[/quote]It depends - if it had been a variation of some other identifiable hash function then yes, but if it had looked like an entirely custom or unidentifiable hash probably not.
Flugan said:Finally would you have played with the hash algoritm if it was not a modified MD5 hash?
Just curious :)
It depends - if it had been a variation of some other identifiable hash function then yes, but if it had looked like an entirely custom or unidentifiable hash probably not.

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

#10
Posted 07/06/2016 08:16 AM   
Yeah, I agree it was a stupid question as if it is custom there is nothing to compare it to. But if it is based on a standard hash function you can implement the reference hash and then identify the modifications. I had serious problems with HeliX hash function as I just couldn't reproduce the hash value for a long time. The crc is done on the binary code and you have to find the end token to specify the right binary blob. As HeliX mod comes without symbols reverse engineering the assembler would be much harder. At least we are not talking modified crc and I did eventually figure out how to apply the hash function. Solving the dx9 hash function was never a high priority but still I was stuck for a very long time. Just another example of problematic hashing as the hash is either correct or completely wrong.
Yeah, I agree it was a stupid question as if it is custom there is nothing to compare it to.
But if it is based on a standard hash function you can implement the reference hash and then identify the modifications.

I had serious problems with HeliX hash function as I just couldn't reproduce the hash value for a long time.
The crc is done on the binary code and you have to find the end token to specify the right binary blob.

As HeliX mod comes without symbols reverse engineering the assembler would be much harder.

At least we are not talking modified crc and I did eventually figure out how to apply the hash function.
Solving the dx9 hash function was never a high priority but still I was stuck for a very long time.
Just another example of problematic hashing as the hash is either correct or completely wrong.

Thanks to everybody using my assembler it warms my heart.
To have a critical piece of code that everyone can enjoy!
What more can you ask for?

donations: ulfjalmbrant@hotmail.com

#11
Posted 07/06/2016 09:50 AM   
Thanks for all your efforts Flugan and thank you for this thread. I know you have always been a significant contributor but sometimes its hard to see what's actually that's involved. So this has given me a greater appreciation for what you have done, and I intend to express this in a more "tangible" way. I have been trying to give something back of late, and you are the next person on my list. [quote="Flugan"]Batman (Arkham?) Knight took a long time to fix mostly because the massive amount of shaders and the number of iteration. Sometimes you need to go through all shaders in all games to make sure a fix didn't cause something else to break. [/quote] Unless I am missing something here, this game has not been fixed, and seems like it may never be at this point. Which is a shame cause it seems a lot of work has already gone into it.
Thanks for all your efforts Flugan and thank you for this thread. I know you have always been a significant contributor but sometimes its hard to see what's actually that's involved. So this has given me a greater appreciation for what you have done, and I intend to express this in a more "tangible" way. I have been trying to give something back of late, and you are the next person on my list.

Flugan said:Batman (Arkham?) Knight took a long time to fix mostly because the massive amount of shaders and the number of iteration. Sometimes you need to go through all shaders in all games to make sure a fix didn't cause something else to break.


Unless I am missing something here, this game has not been fixed, and seems like it may never be at this point. Which is a shame cause it seems a lot of work has already gone into it.

i7-4790K CPU 4.8Ghz stable overclock.
16 GB RAM Corsair
EVGA 1080TI SLI
Samsung SSD 840Pro
ASUS Z97-WS
3D Surround ASUS Rog Swift PG278Q(R), 2x PG278Q (yes it works)
Obutto R3volution.
Windows 10 pro 64x (Windows 7 Dual boot)

#12
Posted 07/08/2016 03:03 AM   
I Think Batman has been studied so the shaderhackers know how to fix individual shaders. The main thing causing problems is that the game has 35 000 shaders which means it can't be fixed the normal way as that would be too timeconsuming. There has been discussions regarding what scripting language should be used. HeliX would have used lua as that is integrated with the wrapper and meant for such situations. I integrated lua in my shader Tools for a while until I realized only HeliX's Bioshock Infinite fix used lua. DarkStarSword would use python but integrating python with the wrapper would add another dependency on python being installed in all computers using 3DMigoto. I guess an offline python solution would be best as you can pretty much dump all the Batman shaders when loading the game. Given the situation I must assume that Tomb Raider was priority. I'm guessing that fixing the shaders even using python is pretty damn hard as you don't want to affect the wrong shaders breaking the fix. This is my understanding of the situation. If you ignore how hard it is to understand HeliX lua files the solution is pretty cool. So far we have managed without any online script solution. I think DarkStarSword has some nice offline Tools for Unity engine.
I Think Batman has been studied so the shaderhackers know how to fix individual shaders.
The main thing causing problems is that the game has 35 000 shaders which means it can't be fixed the normal way as that would be too timeconsuming.

There has been discussions regarding what scripting language should be used.
HeliX would have used lua as that is integrated with the wrapper and meant for such situations.

I integrated lua in my shader Tools for a while until I realized only HeliX's Bioshock Infinite fix used lua.

DarkStarSword would use python but integrating python with the wrapper would add another dependency on python being installed in all computers using 3DMigoto.

I guess an offline python solution would be best as you can pretty much dump all the Batman shaders when loading the game.

Given the situation I must assume that Tomb Raider was priority.
I'm guessing that fixing the shaders even using python is pretty damn hard as you don't want to affect the wrong shaders breaking the fix.

This is my understanding of the situation.
If you ignore how hard it is to understand HeliX lua files the solution is pretty cool.
So far we have managed without any online script solution. I think DarkStarSword has some nice offline Tools for Unity engine.

Thanks to everybody using my assembler it warms my heart.
To have a critical piece of code that everyone can enjoy!
What more can you ask for?

donations: ulfjalmbrant@hotmail.com

#13
Posted 07/08/2016 03:44 AM   
Yeah, I keep looking at Batman still on my TODO list and intending to get back to it - Mike and I know how to fix it, we just have been busy with other things. I'm hopefully out of crunch time in my day job at last, so maybe soon...
Yeah, I keep looking at Batman still on my TODO list and intending to get back to it - Mike and I know how to fix it, we just have been busy with other things. I'm hopefully out of crunch time in my day job at last, so maybe soon...

2x Geforce GTX 980 in SLI provided by NVIDIA, i7 6700K 4GHz CPU, Asus 27" VG278HE 144Hz 3D Monitor, BenQ W1070 3D Projector, 120" Elite Screens YardMaster 2, 32GB Corsair DDR4 3200MHz RAM, Samsung 850 EVO 500G SSD, 4x750GB HDD in RAID5, Gigabyte Z170X-Gaming 7 Motherboard, Corsair Obsidian 750D Airflow Edition Case, Corsair RM850i PSU, HTC Vive, Win 10 64bit

Alienware M17x R4 w/ built in 3D, Intel i7 3740QM, GTX 680m 2GB, 16GB DDR3 1600MHz RAM, Win7 64bit, 1TB SSD, 1TB HDD, 750GB HDD

Pre-release 3D fixes, shadertool.py and other goodies: http://github.com/DarkStarSword/3d-fixes
Support me on Patreon: https://www.patreon.com/DarkStarSword or PayPal: https://www.paypal.me/DarkStarSword

#14
Posted 07/08/2016 04:37 AM   
Well that's very reassuring to hear. Obviously I don't expect anything but that might be the number one game on my wishlist still this long after release. So I'm happy it's still possibly on the table for sometime in the future.
Well that's very reassuring to hear. Obviously I don't expect anything but that might be the number one game on my wishlist still this long after release. So I'm happy it's still possibly on the table for sometime in the future.

i7-4790K CPU 4.8Ghz stable overclock.
16 GB RAM Corsair
EVGA 1080TI SLI
Samsung SSD 840Pro
ASUS Z97-WS
3D Surround ASUS Rog Swift PG278Q(R), 2x PG278Q (yes it works)
Obutto R3volution.
Windows 10 pro 64x (Windows 7 Dual boot)

#15
Posted 07/08/2016 06:12 AM   
Scroll To Top