Difference between revisions of "User:InsaneInGame"

From Bo3b's School for Shaderhackers
Jump to: navigation, search
(Lesson 4. Fix an entire game by disabling problematic effects)
Line 521: Line 521:
 
   mov oC0.xyzw, c200.wwww
 
   mov oC0.xyzw, c200.wwww
 
endif</nowiki>
 
endif</nowiki>
 +
 +
== Lesson 5. Experimentation ==
 +
Note to self : This was a very interesting and insightful lesson. Focusing on
 +
experimenting to find the right spot for the right fix. The actual fixing on the
 +
water was somehow problematic at first, but the later lesson 6 and its
 +
"Prime Directive" (Canonical Code; how the nvidia definition/formula translates to Helix/ASM),
 +
in addition to valuable input from Bo3b, Mike_ar69 and 4everAwake, made me find
 +
the correct spot for the code (this demonstrated that I also understand the mechanics
 +
of the essential components and parameters used). This is great progress!
 +
 +
<br>
 +
 +
Quiz : Go to the end of the demo, and figure out a way to fix the broken water.
 +
 +
Answer : I will copy/paste the actual code that I took great effort in understanding
 +
and applying (including comments on the essential sections of the code/input) :
 +
 +
<nowiki>// 4everAwake located this shader which is supposed to be the shader for the water glitch
 +
// He used a mass edit script to edit multiple shaders at a time.
 +
// The whole screen is flickering in pink through magenta.
 +
// I would never have thought that this shader would be responsible.
 +
//
 +
// Generated by Microsoft (R) HLSL Shader Compiler 9.27.952.3022
 +
//
 +
// Parameters:
 +
//
 +
//  float4 CameraPosition;
 +
//  float3 ExponentialFogColor;
 +
//  float3 ExponentialFogLightVector;
 +
//  float3 ExponentialFogParameters;
 +
//  float4 FogDistanceScale;
 +
//  float4 FogExtinctionDistance;
 +
//  float4 FogInScattering[4];
 +
//  float4 FogMaxHeight;
 +
//  float4 FogMinHeight;
 +
//  float4 FogStartDistance;
 +
//  float3 LightInscatteringColor;
 +
//  float4x4 LocalToWorld;
 +
//  float4x4 ViewProjectionMatrix;
 +
//  float3x3 WorldToLocal;
 +
//  float bUseExponentialHeightFog;
 +
//
 +
//
 +
// Registers:
 +
//
 +
//  Name                      Reg  Size
 +
//  ------------------------- ----- ----
 +
//  ViewProjectionMatrix      c0      4
 +
//  CameraPosition            c4      1
 +
//  LocalToWorld              c6      4
 +
//  FogInScattering          c10      4
 +
//  WorldToLocal              c14      3
 +
//  bUseExponentialHeightFog  c17      1
 +
//  ExponentialFogParameters  c18      1
 +
//  ExponentialFogColor      c19      1
 +
//  LightInscatteringColor    c20      1
 +
//  ExponentialFogLightVector c21      1
 +
//  FogDistanceScale          c22      1
 +
//  FogExtinctionDistance    c23      1
 +
//  FogStartDistance          c24      1
 +
//  FogMinHeight              c25      1
 +
//  FogMaxHeight              c26      1
 +
//
 +
 +
    vs_3_0
 +
// DefVSConst1 = 250, Const1, Const2, Const3, Const4
 +
def c201, 1.0, 600, 0.0625, 0 // 0.0625 prime directive reference value
 +
 +
    def c5, 0.00784313772, -1, 0.00100000005, 1
 +
    def c27, 0.5, 0, 1, 0
 +
dcl_2d s0 // declare sample register s0 (loading image, applying pixel to triangle/vertices)
 +
    dcl_position v0
 +
    dcl_tangent v1
 +
    dcl_normal v2
 +
    dcl_color1 v3
 +
    dcl_texcoord v4
 +
    dcl_color o0
 +
    dcl_texcoord o1
 +
    dcl_texcoord4 o2
 +
    dcl_texcoord5 o3 // texcoord5 (matching the texcoord5 in pixelshader causing problems)
 +
    dcl_texcoord6 o4
 +
    dcl_texcoord7 o5.xyz
 +
    dcl_position o6
 +
    mul r0, c7, v0.y
 +
    mad r0, c6, v0.x, r0
 +
    mad r0, c8, v0.z, r0
 +
    mad r0, c9, v0.w, r0
 +
    mad r1, v2, c5.x, c5.y
 +
    mad r2.xyz, v1.yzxw, c5.x, c5.y
 +
    mul r3.xyz, r1.zxyw, r2
 +
    mad r2.xyz, r1.yzxw, r2.yzxw, -r3
 +
    mul r2.xyz, r1.w, r2
 +
    mul r3.xyz, r1.yzxw, r2.zxyw
 +
    mad r3.xyz, r2.yzxw, r1.zxyw, -r3
 +
    mul r3.xyz, r1.w, r3
 +
    mul r4, r0.y, c1
 +
    mad r4, c0, r0.x, r4
 +
    mad r4, c2, r0.z, r4
 +
    mad r4, c3, r0.w, r4
 +
    mad r5.xyz, r0, -c4.w, c4
 +
    mul r6.xyz, r5.y, c15
 +
    mad r5.xyw, c14.xyzz, r5.x, r6.xyzz
 +
    mad r5.xyz, c16, r5.z, r5.xyww
 +
    dp3 o4.x, r3, r5
 +
    dp3 o4.y, r2, r5
 +
    dp3 o4.z, r1, r5
 +
    dp3 o5.x, r3, c16
 +
    dp3 o5.y, r2, c16
 +
    dp3 o5.z, r1, c16
 +
    abs r0.w, c17.x
 +
    if_lt -r0.w, r0.w
 +
      add r1.xyz, r0, -c4
 +
      dp3 r0.w, r1, r1
 +
      rsq r0.w, r0.w
 +
      rcp r1.w, r0.w
 +
      mul r1.w, r1.w, c18.x
 +
      slt r2.x, c5.z, r1_abs.z
 +
      mul r2.y, r1.z, -c18.y
 +
      exp r2.y, r2.y
 +
      add r2.y, -r2.y, c5.w
 +
      mul r2.z, r1.z, c18.y
 +
      rcp r2.z, r2.z
 +
      mul r2.y, r2.z, r2.y
 +
      mad r2.y, r1.w, r2.y, -r1.w
 +
      mad r1.w, r2.x, r2.y, r1.w
 +
      mul r1.xyz, r0.w, r1
 +
      dp3 r0.w, c21, r1
 +
      slt r1.x, r0.w, c18.z
 +
      mov r2.xyz, c19
 +
      add r3.xyz, r2, c20
 +
      mul r5.xyz, r3, c27.x
 +
      add r1.y, r0.w, c5.w
 +
      mov r2.w, c5.w
 +
      add r1.z, r2.w, c18.z
 +
      rcp r1.z, r1.z
 +
      mul_sat r1.y, r1.z, r1.y
 +
      mad r2.xyz, r3, c27.x, -r2
 +
      mad r2.xyz, r1.y, r2, c19
 +
      add r0.w, r0.w, -c18.z
 +
      add r1.y, r2.w, -c18.z
 +
      rcp r1.y, r1.y
 +
      mul_sat r0.w, r0.w, r1.y
 +
      mul r0.w, r0.w, r0.w
 +
      mov r6.x, c27.x
 +
      mad r3.xyz, r3, -r6.x, c20
 +
      mad r3.xyz, r0.w, r3, r5
 +
      lrp r5.xyz, r1.x, r2, r3
 +
      exp r0.w, -r1.w
 +
      min r0.w, r0.w, c5.w
 +
      add r1.x, -r0.w, c5.w
 +
      mul o2.xyz, r1.x, r5
 +
      mov o2.w, r0.w
 +
    else
 +
      add r0.xyz, r0, -c4
 +
      dp3 r0.x, r0, r0
 +
      rsq r0.x, r0.x
 +
      rcp r0.x, r0.x
 +
      mov r1.z, c4.z
 +
      add r2, -r1.z, c25
 +
      sge r0.y, c5.z, r0_abs.z
 +
      lrp r1.x, r0.y, c5.z, r0.z
 +
      rcp r0.y, r1.x
 +
      mul_sat r2, r0.y, r2
 +
      add r1, -r1.z, c26
 +
      mul_sat r1, r0.y, r1
 +
      add r0, r0.x, -c24
 +
      max r0, r0, c27.y
 +
      add r1, -r2, r1
 +
      mul r0, r0, r1_abs
 +
      slt r1, r0, c23
 +
      mul r0, r0, c22
 +
      exp r2.x, r0.x
 +
      exp r2.y, r0.y
 +
      exp r2.z, r0.z
 +
      exp r2.w, r0.w
 +
      mul r0, r1, r2
 +
      mad r1, r1, r2, c5.y
 +
      mul r2.xyz, r1.x, c10
 +
      mov r2.w, r0.x
 +
      mul r2, r0.y, r2
 +
      mad r2.xyz, r1.y, c11, r2
 +
      mul r2, r0.z, r2
 +
      mad r2.xyz, r1.z, c12, r2
 +
      mul r0, r0.w, r2
 +
      mad o2.xyz, r1.w, c13, r0
 +
      mov o2.w, r0.w
 +
    endif
 +
    mov o0, v3
 +
    mul o1, c27.zzyy, v4.xyxx
 +
 +
// mov o3, r4 // texcoord5 o3 (referring to texcoord5 input in pixelshader,
 +
// commenting out for applying prime directive code
 +
 +
    mov o4.w, c5.w
 +
    mov o6, r4 // output position (from dcl_position o6)
 +
 +
mov r0, r4 // move r4 to r0
 +
// At this point r0 is the output position, correctly
 +
// placed, but without stereo.
 +
 +
// To create stereo effects, we need to calculate:
 +
//  Xnew = Xold + Separation * (W - Convergence)
 +
 +
// Fetch the Separation (r30.x) and Convergence (r30.y) 
 +
// using the Helix NVapi trick
 +
texldl r30, c201.z, s0
 +
 +
// (W - Convergence)
 +
add r30.w, r0.w, -r30.y
 +
 +
// multiply that times Separation for:
 +
//  Separation * (W - Convergence)
 +
mul r30.z, r30.x, r30.w
 +
 +
// Add that to Xold for the complete:
 +
//  Xold + Separation * (W - Convergence)
 +
add r0.x, r0.x, r30.z
 +
 +
mov o3, r0 // move fixed r4 (r0) into o3
 +
   
 +
// approximately 111 instruction slots used
 +
</nowiki>
 +
 +
<br>
 +
 +
Screenshots of the fixed water (at the end of the demo/worm boss) :
 +
 +
[[File:TheBall06_50.jpg|700px]]
 +
 +
<br>
 +
 +
[[File:TheBall07_50.jpg|700px]]

Revision as of 11:13, 25 September 2014

Lesson 0. So far, so good. ;)

TheBall01 50 - Copy.jpg


Lesson 1. Getting a feel for the shader hunting. :)

This is the first shader file. Pixel shader of entire ball.

TheBall02 50 - Copy.jpg

//
// Generated by Microsoft (R) HLSL Shader Compiler 9.27.952.3022
//
// Parameters:
//
//   float4 AmbientColorAndSkyFactor;
//   float4 MinZ_MaxZRatio;
//   sampler2D Texture2D_0;
//   sampler2D Texture2D_1;
//   sampler2D Texture2D_2;
//   sampler2D Texture2D_3;
//   sampler2D Texture2D_4;
//   sampler2D Texture2D_5;
//   sampler2D Texture2D_6;
//   sampler2D Texture2D_7;
//   sampler2D Texture2D_8;
//   float UniformPixelScalar_23;
//   float UniformPixelScalar_24;
//   float UniformPixelScalar_5;
//   float UniformPixelScalar_6;
//   float4 UniformPixelVector_0;
//   float4 UniformPixelVector_1;
//   float4 UniformPixelVector_10;
//   float4 UniformPixelVector_11;
//   float4 UniformPixelVector_12;
//   float4 UniformPixelVector_13;
//   float4 UniformPixelVector_14;
//   float4 UniformPixelVector_15;
//   float4 UniformPixelVector_16;
//   float4 UniformPixelVector_17;
//   float4 UniformPixelVector_18;
//   float4 UniformPixelVector_2;
//   float4 UniformPixelVector_3;
//   float4 UniformPixelVector_4;
//   float4 UniformPixelVector_5;
//   float4 UniformPixelVector_6;
//   float4 UniformPixelVector_7;
//   float4 UniformPixelVector_8;
//   float4 UniformPixelVector_9;
//
//
// Registers:
//
//   Name                     Reg   Size
//   ------------------------ ----- ----
//   UniformPixelVector_0     c0       1
//   MinZ_MaxZRatio           c2       1
//   UniformPixelVector_1     c4       1
//   UniformPixelVector_2     c5       1
//   UniformPixelVector_3     c6       1
//   UniformPixelVector_4     c7       1
//   UniformPixelVector_5     c8       1
//   UniformPixelVector_6     c9       1
//   UniformPixelVector_7     c10      1
//   UniformPixelVector_8     c11      1
//   UniformPixelVector_9     c12      1
//   UniformPixelVector_10    c13      1
//   UniformPixelVector_11    c14      1
//   UniformPixelVector_12    c15      1
//   UniformPixelVector_13    c16      1
//   UniformPixelVector_14    c17      1
//   UniformPixelVector_15    c18      1
//   UniformPixelVector_16    c19      1
//   UniformPixelVector_17    c20      1
//   UniformPixelVector_18    c21      1
//   UniformPixelScalar_5     c22      1
//   UniformPixelScalar_6     c23      1
//   UniformPixelScalar_23    c24      1
//   UniformPixelScalar_24    c25      1
//   AmbientColorAndSkyFactor c26      1
//   Texture2D_0              s0       1
//   Texture2D_1              s1       1
//   Texture2D_2              s2       1
//   Texture2D_3              s3       1
//   Texture2D_4              s4       1
//   Texture2D_5              s5       1
//   Texture2D_6              s6       1
//   Texture2D_7              s7       1
//   Texture2D_8              s8       1
//

    ps_3_0
    def c1, 0.75, -9.99999997e-007, 6, -0.333299994
    def c3, 0.666700006, 1.5, 0.0250000004, 4
    def c27, 5, 0.5, 6.28318548, -3.14159274
    def c28, 0.200000003, 0.100000001, 10, 0.800000012
    def c29, 0.300000012, 0.589999974, 0.109999999, 0.600000024
    def c30, 1.5, 0.649999976, 0, 0
    def c31, 0.5, -0.5, 8, 17
    def c32, 2, -1, -0.5, 0
    def c33, 1.25, 1, 0, 0
    def c34, 9, 0.5, 0.200000003, 0.100000001
    def c35, 0.450000018, 100, 90, 0.100000001
    def c36, 0.816496611, 0.577350259, 0, 0
    def c37, -0.707106769, -0.408248305, 0.577350259, 0.707106769
    dcl_texcoord2_pp v0.xyz
    dcl_texcoord3_pp v1.xyz
    dcl_texcoord5 v2.w
    dcl_texcoord6 v3.xyz
    dcl_color v4.xyz
    dcl_color1 v5
    dcl_texcoord v6
    dcl_2d s0
    dcl_2d s1
    dcl_2d s2
    dcl_2d s3
    dcl_2d s4
    dcl_2d s5
    dcl_2d s6
    dcl_2d s7
    dcl_2d s8
    mul r0.xy, c1.x, v6
    texld r0, r0, s8
    add r0.y, r0_abs.x, c1.y
    pow r1.x, r0_abs.x, c25.x
    cmp r0.y, r0.y, r1.x, c32.w
    lrp r1.x, c24.x, r0.y, r0.x
    mul r0.x, r1.x, c24.x
    mul_sat r0.x, r0.x, c1.z
    add r0.x, r0.x, c1.w
    mov r1, c32
    add r0.y, r1.y, c24.x
    cmp r0, r0.y, c3.x, r0.x
    texkill r0
    mov r0.y, c3.y
    mad r0.xy, v6, r0.y, c11
    texld r0, r0, s2
    mad r0.xz, v6.xyyw, r1.x, c12.xyyw
    texld r2, r0.xzzw, s2
    mul r0.x, r2.y, c3.z
    mad r0.x, r0.y, r2.y, r0.x
    add r0.yz, c9.xxyw, v6.xxyw
    texld r2, r0.yzzw, s2
    mad r0.yz, v6.xxyw, -r1.z, c10.xxyw
    texld r3, r0.yzzw, s2
    mul r0.y, r2.x, r3.x
    mad r0.x, r0.y, c3.w, r0.x
    mul r0.xyz, r0.x, c8
    mov r1.x, c23.x
    mul r0.w, r1.x, c13.x
    mad r0.w, r0.w, c27.x, c27.y
    frc r0.w, r0.w
    mad r0.w, r0.w, c27.z, c27.w
    sincos r2.y, r0.w
    mul r2.xyz, r0, r2.y
    mad r0.xyz, r2, c32.z, r0
    nrm_pp r2.xyz, v4
    nrm_pp r3.xyz, v5
    mul_pp r4.xyz, r2.yzxw, r3.zxyw
    mad_pp r4.xyz, r3.yzxw, r2.zxyw, -r4
    mul r4.xyz, r4, v5.w
    mov r5.y, r4.z
    dp3 r4.y, r4, c19
    mov_pp r5.x, r2.z
    dp3 r4.x, r2, c19
    mov_pp r5.z, r3.z
    dp3 r0.w, r5, r5
    rsq r0.w, r0.w
    mul r0.w, r0.w, r5.z
    dp3 r4.z, r3, c19
    mad r0.w, r0.w, c29.w, c29.w
    mul r1.x, r0_abs.w, r0_abs.w
    add r0.w, r0_abs.w, c1.y
    mul r1.x, r1.x, r1.x
    mul r1.x, r1.x, r1.x
    mul r1.x, r1.x, c3.w
    cmp r0.w, r0.w, r1.x, c32.w
    min r1.x, r0.w, c28.w
    dp3 r0.w, r4, r4
    rsq r0.w, r0.w
    mul r0.w, r0.w, r4.z
    mad r0.w, r0.w, c31.y, c31.x
    add r2.x, r0_abs.w, c1.y
    mul r0.w, r0_abs.w, r0_abs.w
    mul r0.w, r0.w, r0.w
    mul r0.w, r0.w, r0.w
    mul r0.w, r0.w, c3.w
    cmp r0.w, r2.x, r0.w, c32.w
    min r2.x, r0.w, c28.w
    add r0.w, -r2.x, -c32.y
    mad r0.w, c20.x, r0.w, r2.x
    mad r2.xy, v6.wzzw, c31.x, c31.y
    dp2add r3.x, c17, r2, r1.w
    dp2add r3.y, c18, r2, r1.w
    add r2.xy, r3, -c32.z
    texld r2, r2, s6
    add r2.xyz, r2, c30
    mov r3.yzw, c32_abs.xyxw
    texld r4, v6, s3
    dp3 r2.w, r4, c29
    mul r3.x, r2.w, c28.z
    mul r2.xyz, r2, r3.xyxw
    mul r2.xyz, r2, r3.zxww
    mul r2.xyz, r0.w, r2
    add r3.xy, v6, v6
    nrm r5.xyz, v3
    mad r3.xy, r5, c35.x, r3
    texld r3, r3, s4
    mul r3.xyz, r3.y, c35.yzzw
    max r6.xyz, r4_abs, -c1.y
    log r7.x, r6.x
    log r7.y, r6.y
    log r7.z, r6.z
    mul r6.xyz, r7, c3.y
    exp r7.x, r6.x
    exp r7.y, r6.y
    exp r7.z, r6.z
    mad r3.xyz, r3, r7, r4
    texld r6, v6, s5
    mul r3.xyz, r3, r6.x
    mul r7.xyz, r4, c28.xyyw
    mad r3.xyz, r3, c35.w, r7
    mad r3.xyz, r6.z, c15, r3
    mad r2.xyz, c16.x, r2, r3
    mul r3.xyz, r4, c32_abs
    mad r2.xyz, r1.x, r3, r2
    mul r0.w, r4.w, c21.x
    mad r2.xyz, r0.w, c32_abs.xzww, r2
    mad r0.xyz, c14.x, r0, r2
    add_pp r0.xyz, r0, c0
    mad r0.w, r5.z, -c33.x, c33.y
    cmp r0.w, r5.z, r0.w, -c32.y
    mul r1.x, r0_abs.w, r0_abs.w
    mul r1.x, r1.x, r1.x
    mul r1.x, r0_abs.w, r1.x
    add r0.w, r0_abs.w, c1.y
    mad r2.xyz, r4, c34.x, c34.yzww
    mul r2.xyz, r1.x, r2
    cmp r2.xyz, r0.w, r2, c32.w
    add r2.xyz, r2, r4
    add r3.xyz, -r1.y, -c0
    mul_pp r2.xyz, r2, r3
    mul_pp r3.xyz, r2, v0
    mad r1.xy, c22.x, v6, r1.z
    dp2add r4.x, c5, r1, r1.w
    dp2add r4.y, c6, r1, r1.w
    add r1.xy, r4, -c32.z
    texld r1, r1, s1
    mad r1.xyz, r1, c32.x, c32.y
    mul r1.xyz, r1, c7
    texld r4, v6, s0
    mad r4.xyz, r4, c32.x, c32.y
    mad_pp r1.xyz, r4, c4, r1
    nrm_pp r4.xyz, r1
    dp3 r0.w, r4, r5
    mul r1.xyz, r0.w, r4
    mad r1.xyz, r1, c32.x, -r5
    dp2add_sat_pp r5.x, r1.yzzw, c36, c36.z
    dp3_sat_pp r5.y, r1, c37
    dp3_sat_pp r5.z, r1.yzxw, c37.yzww
    max r1.xyz, r5, -c1.y
    log r5.x, r1.x
    log r5.y, r1.y
    log r5.z, r1.z
    mad_pp r0.w, r6.x, c31.z, c31.w
    mul r1.xyz, r6.y, c32_abs.xzww
    mul r5.xyz, r5, r0.w
    exp_pp r6.x, r5.x
    exp_pp r6.y, r5.y
    exp_pp r6.z, r5.z
    dp3_pp r0.w, v1, r6
    texld r5, v6, s7
    mad_pp r1.xyz, r5, c3.w, r1
    mul_pp r1.xyz, r1, v0
    mul_pp r1.xyz, r0.w, r1
    dp2add_sat_pp r5.x, r4.yzzw, c36, c36.z
    dp3_sat_pp r5.y, r4, c37
    dp3_sat_pp r5.z, r4.yzxw, c37.yzww
    mul_pp r4.xyz, r5, r5
    max_pp r5.xyz, r4, -c1.y
    dp3_pp r0.w, v1, r5
    mad_pp r1.xyz, r3, r0.w, r1
    add_pp r0.xyz, r0, r1
    mad_pp oC0.xyz, r2, c26, r0
    rcp r0.x, v2.w
    mad_pp oC0.w, c2.x, r0.x, c2.y

// approximately 188 instruction slots used (12 texture, 176 arithmetic)
 

Lesson 2. Disable bloom effect on the ball (make transparent)

Note to self : This was remarkable! I also tried to disable the subtle glow on other light sources (pixel shader 85), just to see if I understood the code, instructions, and theory correctly. I also try to memorize codes and parameters. So far, so good.


Screenshot before disabling the bloom effect:

Before TheBall03 50.jpg

Screenshot after disabling the bloom effect:

After TheBall04 50.jpg

Shader file editing :

// Pixel Shader responsible for bloom effect on ball
//
// Generated by Microsoft (R) HLSL Shader Compiler 9.27.952.3022
//
// Parameters:
//
//   sampler2D Texture2D_0;
//   float UniformPixelScalar_7;
//   float4 UniformPixelVector_0;
//   float4 UniformPixelVector_3;
//   float4 UniformPixelVector_4;
//   float4 UniformPixelVector_5;
//   float4 UniformPixelVector_6;
//
//
// Registers:
//
//   Name                 Reg   Size
//   -------------------- ----- ----
//   UniformPixelVector_0 c0       1
//   UniformPixelVector_3 c4       1
//   UniformPixelVector_4 c5       1
//   UniformPixelVector_5 c6       1
//   UniformPixelVector_6 c7       1
//   UniformPixelScalar_7 c8       1
//   Texture2D_0          s0       1
//

    ps_3_0
    def c1, 0.5, 1, -0.5, 0
    def c2, 3, 0, 0, 0
    dcl_texcoord v0.xy
    dcl_texcoord1 v1
    dcl_texcoord2 v2.y
    dcl_texcoord4 v3.w
    dcl_2d s0
    add r0.xy, c1.z, v0
    mov r0.w, c1.w
    dp2add r1.x, c5, r0, r0.w
    dp2add r1.y, c6, r0, r0.w
    add r0.xy, r1, c1.x
    texld r0, r0, s0
    mad_sat r0.y, v2.y, -c1.x, c1.y
    mul r0.yzw, r0.y, v1.xxyz
    mul r0.yzw, r0, v1.w
    mul_sat r0.xyz, r0.yzww, r0.x
    mul r1.xyz, r0, c4
    mad r0.xyz, c4, -r0, r0
    mad r0.xyz, c8.x, r0, r1
    mul r0.xyz, r0, c4.w
    mul r0.xyz, r0, c7
    mov r1.x, c2.x
    mad_pp r0.xyz, r0, r1.x, c0
    mul_pp oC0.xyz, r0, v3.w
    mov oC0.w, c1.w

// Remove annoying bloom effect on ball (make all pixels invisible)
mov oC0.xyzw, c1.wwww
	
// approximately 21 instruction slots used (1 texture, 20 arithmetic)
 

Lesson 3. Create and use constants in dx9settings.ini

.: Part 1 : Change the key used to toggle the effect from Numpad 0 to any key of your choice. :.

// ASCII Table Chart 
// With the aid of the ASCII table chart, I changed the default "toggle on/off mechanism" 
// from numpad 0 (decimal value 96) to number 8 (decimal value 56)
[KEY1]
Key = 56
Presets = 1;2;
Type = 1

.: Part 2 : Change the mechanism to not toggle the effect, but only be active when held down. :.

// Change key type parameter so that the effect will only be active when key
// is held down. I've changed the toggle mechanism (on/off) from Type = 1, 
// to momentary "held down" Type = 2.
[KEY1]
Key = 56
Presets = 1;2;
Type = 2

.: Part 3a : When the effect is being displayed, change its color. :.

// When the effect is displayed, change its color
// I modified the constant register c200.wwww (rgba is transparent),
// to c200.wwyy (to only enable the blue channel and alpha channel,
// so the color blue is shown when the effect is active
mov r30.x, c220.x
if_eq r30.x, c200.x
   mov oC0.xyzw, c200.wwyy
endif

.: Part 3b : When the effect is being displayed change its color (screen capture) :.

Blue Activated TheBall05 50.jpg

Lesson 4. Fix an entire game by disabling problematic effects

Note to self : This was an interesting lesson. I localized six additional subtle and also quite prominent problematic pixel shaders, and disabled the corresponding effects. Also two vertex shaders responsible for static shadows on the monkeys and dynamic shadows on the ball itself were disabled. I did struggle a bit on the code/instruction section for defining a second key to toggle the bloom effect on and off separated from the other toggle key. I thought you could assign an additional "DefVSConst2" and "DefPSConst2" statement in the dx9settings.ini file, but that caused nothing but trouble.

After reading a little about the usage of these instructions in the "HelixMod Feature List", I understood that you can only use "DefVSConst1" and "DefPSConst1" as valid instructions assiged to a chosen c-register value. I did know that you refer back to the Const1,2,3,4 by the X,Y,Z,W parameters from the shader file itself, but I thought I had to assign an additional DefVS&PSConst2 instruction. So thanks to the Feature List, I figured it out pretty quickly. Two hours of improvising code to try to get it to work, then later read that those instructions were invalid, was a little frustrating, but also very satisfying. I did solve it the first time, but due to the confusion about the "DefVS&PSConst1" being the only valid instruction, I didn't know where to go and just did a lot of random operations to get it to work even though I knew it had to be something regarding the Def instruction. Since I read that all the constants (const1-4) are being passed through the c-register value to the shader file, and referred back to the dx9settings.ini file with the X,Y,Z,W parameters, and using the instructions Const1, Const2, Const3, Const4 with the hex values (floating point 0 and 1 for comparison on the shader file level, in the respective presets for the different keys it made more sense. Again, thanks to the "Feature List". :)


Quiz: Make a second key preset to toggle bloom independently from other effects.

Answer: In the dx9settings.ini I made the necessary adjustments to assign a second key with two presets for toggling the bloom effect on and off, by altering the PresetsKeysList. I added Key = 55 (decimal "7") under [KEY2], and two presets (for the toggle mechanics). Under [PRES3] and [PRES4] I assigned Const2 = "the toggle values in hex format" (referred to with the Y paramter from the shader file; in my case c220.y (const2).

In the shader file itself, I just altered the comment section and swapped Const1 with Const2, just to make it more structured and organized. The instruction "mov r30.y, c220.y" moving the const2 values for the c220 register from the dx9settings.ini file into the temporary register r30.y in the shaderfile and comparing the def c200 x=0 value with the temporary register, with the instruction "if_eq r30.y, c200.x (where x=0 matching the const2 hex value from [PRES4] in dx9settings.ini, disabling the effect with the instruction "mov oC0.xyzw, c200.wwww (for transparency). This accomplished the goal of separating the bloom effect from the other effects with an individual key preset.


CODE (dx9settings.ini) :

[General]

UseRenderedShaders = true
DumpAll = false
DefVSConst1 = 220
DefPSConst1 = 220

PresetsKeysList = 1;2;

[KEY1]
Key = 56
Presets = 1;2;
Type = 1

[PRES1]
Const1 = 0x3f800000

[PRES2]
Const1 = 0x00000000

[KEY2]
Key = 55
Presets = 3;4;
Type = 1

[PRES3]
Const2 = 0x3f800000

[PRES4]
Const2 = 0x00000000

CODE (shader file) :

    ps_3_0
//def c220, Const1, Const2, Const3, Const4
def c200, 0, 1, 0.0625, 0	// x=0 for comparison to Const2

    def c1, 0.5, 1, -0.5, 0
    def c2, 3, 0, 0, 0
    dcl_texcoord v0.xy
    dcl_texcoord1 v1
    dcl_texcoord2 v2.y
    dcl_texcoord4 v3.w
    dcl_2d s0
    add r0.xy, c1.z, v0
    mov r0.w, c1.w
    dp2add r1.x, c5, r0, r0.w
    dp2add r1.y, c6, r0, r0.w
    add r0.xy, r1, c1.x
    texld r0, r0, s0
    mad_sat r0.y, v2.y, -c1.x, c1.y
    mul r0.yzw, r0.y, v1.xxyz
    mul r0.yzw, r0, v1.w
    mul_sat r0.xyz, r0.yzww, r0.x
    mul r1.xyz, r0, c4
    mad r0.xyz, c4, -r0, r0
    mad r0.xyz, c8.x, r0, r1
    mul r0.xyz, r0, c4.w
    mul r0.xyz, r0, c7
    mov r1.x, c2.x
    mad_pp r0.xyz, r0, r1.x, c0
    mul_pp oC0.xyz, r0, v3.w
    mov oC0.w, c1.w
 
// if Const2 = 0 disable effect, else leave it on
mov r30.y, c220.y
if_eq r30.y, c200.x
   mov oC0.xyzw, c200.wwww
endif

Lesson 5. Experimentation

Note to self : This was a very interesting and insightful lesson. Focusing on experimenting to find the right spot for the right fix. The actual fixing on the water was somehow problematic at first, but the later lesson 6 and its "Prime Directive" (Canonical Code; how the nvidia definition/formula translates to Helix/ASM), in addition to valuable input from Bo3b, Mike_ar69 and 4everAwake, made me find the correct spot for the code (this demonstrated that I also understand the mechanics of the essential components and parameters used). This is great progress!


Quiz : Go to the end of the demo, and figure out a way to fix the broken water.

Answer : I will copy/paste the actual code that I took great effort in understanding and applying (including comments on the essential sections of the code/input) :

// 4everAwake located this shader which is supposed to be the shader for the water glitch
// He used a mass edit script to edit multiple shaders at a time. 
// The whole screen is flickering in pink through magenta. 
// I would never have thought that this shader would be responsible.
//
// Generated by Microsoft (R) HLSL Shader Compiler 9.27.952.3022
//
// Parameters:
//
//   float4 CameraPosition;
//   float3 ExponentialFogColor;
//   float3 ExponentialFogLightVector;
//   float3 ExponentialFogParameters;
//   float4 FogDistanceScale;
//   float4 FogExtinctionDistance;
//   float4 FogInScattering[4];
//   float4 FogMaxHeight;
//   float4 FogMinHeight;
//   float4 FogStartDistance;
//   float3 LightInscatteringColor;
//   float4x4 LocalToWorld;
//   float4x4 ViewProjectionMatrix;
//   float3x3 WorldToLocal;
//   float bUseExponentialHeightFog;
//
//
// Registers:
//
//   Name                      Reg   Size
//   ------------------------- ----- ----
//   ViewProjectionMatrix      c0       4
//   CameraPosition            c4       1
//   LocalToWorld              c6       4
//   FogInScattering           c10      4
//   WorldToLocal              c14      3
//   bUseExponentialHeightFog  c17      1
//   ExponentialFogParameters  c18      1
//   ExponentialFogColor       c19      1
//   LightInscatteringColor    c20      1
//   ExponentialFogLightVector c21      1
//   FogDistanceScale          c22      1
//   FogExtinctionDistance     c23      1
//   FogStartDistance          c24      1
//   FogMinHeight              c25      1
//   FogMaxHeight              c26      1
//

    vs_3_0
// DefVSConst1 = 250, Const1, Const2, Const3, Const4
def c201, 1.0, 600, 0.0625, 0		// 0.0625 prime directive reference value

    def c5, 0.00784313772, -1, 0.00100000005, 1
    def c27, 0.5, 0, 1, 0
dcl_2d s0							// declare sample register s0 (loading image, applying pixel to triangle/vertices)
    dcl_position v0
    dcl_tangent v1
    dcl_normal v2
    dcl_color1 v3
    dcl_texcoord v4
    dcl_color o0
    dcl_texcoord o1
    dcl_texcoord4 o2
    dcl_texcoord5 o3				// texcoord5 (matching the texcoord5 in pixelshader causing problems)
    dcl_texcoord6 o4
    dcl_texcoord7 o5.xyz
    dcl_position o6
    mul r0, c7, v0.y
    mad r0, c6, v0.x, r0
    mad r0, c8, v0.z, r0
    mad r0, c9, v0.w, r0
    mad r1, v2, c5.x, c5.y
    mad r2.xyz, v1.yzxw, c5.x, c5.y
    mul r3.xyz, r1.zxyw, r2
    mad r2.xyz, r1.yzxw, r2.yzxw, -r3
    mul r2.xyz, r1.w, r2
    mul r3.xyz, r1.yzxw, r2.zxyw
    mad r3.xyz, r2.yzxw, r1.zxyw, -r3
    mul r3.xyz, r1.w, r3
    mul r4, r0.y, c1
    mad r4, c0, r0.x, r4
    mad r4, c2, r0.z, r4
    mad r4, c3, r0.w, r4	
    mad r5.xyz, r0, -c4.w, c4
    mul r6.xyz, r5.y, c15
    mad r5.xyw, c14.xyzz, r5.x, r6.xyzz
    mad r5.xyz, c16, r5.z, r5.xyww
    dp3 o4.x, r3, r5
    dp3 o4.y, r2, r5
    dp3 o4.z, r1, r5
    dp3 o5.x, r3, c16
    dp3 o5.y, r2, c16
    dp3 o5.z, r1, c16
    abs r0.w, c17.x
    if_lt -r0.w, r0.w
      add r1.xyz, r0, -c4
      dp3 r0.w, r1, r1
      rsq r0.w, r0.w
      rcp r1.w, r0.w
      mul r1.w, r1.w, c18.x
      slt r2.x, c5.z, r1_abs.z
      mul r2.y, r1.z, -c18.y
      exp r2.y, r2.y
      add r2.y, -r2.y, c5.w
      mul r2.z, r1.z, c18.y
      rcp r2.z, r2.z
      mul r2.y, r2.z, r2.y
      mad r2.y, r1.w, r2.y, -r1.w
      mad r1.w, r2.x, r2.y, r1.w
      mul r1.xyz, r0.w, r1
      dp3 r0.w, c21, r1
      slt r1.x, r0.w, c18.z
      mov r2.xyz, c19
      add r3.xyz, r2, c20
      mul r5.xyz, r3, c27.x
      add r1.y, r0.w, c5.w
      mov r2.w, c5.w
      add r1.z, r2.w, c18.z
      rcp r1.z, r1.z
      mul_sat r1.y, r1.z, r1.y
      mad r2.xyz, r3, c27.x, -r2
      mad r2.xyz, r1.y, r2, c19
      add r0.w, r0.w, -c18.z
      add r1.y, r2.w, -c18.z
      rcp r1.y, r1.y
      mul_sat r0.w, r0.w, r1.y
      mul r0.w, r0.w, r0.w
      mov r6.x, c27.x
      mad r3.xyz, r3, -r6.x, c20
      mad r3.xyz, r0.w, r3, r5
      lrp r5.xyz, r1.x, r2, r3
      exp r0.w, -r1.w
      min r0.w, r0.w, c5.w
      add r1.x, -r0.w, c5.w
      mul o2.xyz, r1.x, r5
      mov o2.w, r0.w
    else
      add r0.xyz, r0, -c4
      dp3 r0.x, r0, r0
      rsq r0.x, r0.x
      rcp r0.x, r0.x
      mov r1.z, c4.z
      add r2, -r1.z, c25
      sge r0.y, c5.z, r0_abs.z
      lrp r1.x, r0.y, c5.z, r0.z
      rcp r0.y, r1.x
      mul_sat r2, r0.y, r2
      add r1, -r1.z, c26
      mul_sat r1, r0.y, r1
      add r0, r0.x, -c24
      max r0, r0, c27.y
      add r1, -r2, r1
      mul r0, r0, r1_abs
      slt r1, r0, c23
      mul r0, r0, c22
      exp r2.x, r0.x
      exp r2.y, r0.y
      exp r2.z, r0.z
      exp r2.w, r0.w
      mul r0, r1, r2
      mad r1, r1, r2, c5.y
      mul r2.xyz, r1.x, c10
      mov r2.w, r0.x
      mul r2, r0.y, r2
      mad r2.xyz, r1.y, c11, r2
      mul r2, r0.z, r2
      mad r2.xyz, r1.z, c12, r2
      mul r0, r0.w, r2
      mad o2.xyz, r1.w, c13, r0
      mov o2.w, r0.w
    endif
    mov o0, v3
    mul o1, c27.zzyy, v4.xyxx
	
// mov o3, r4			// texcoord5 o3 (referring to texcoord5 input in pixelshader,
						// commenting out for applying prime directive code

    mov o4.w, c5.w
    mov o6, r4			// output position (from dcl_position o6)
	
mov r0, r4				// move r4 to r0
// At this point r0 is the output position, correctly
// placed, but without stereo.
 
// To create stereo effects, we need to calculate:
//  Xnew = Xold + Separation * (W - Convergence)
 
// Fetch the Separation (r30.x) and Convergence (r30.y)  
// using the Helix NVapi trick
texldl r30, c201.z, s0
 
// (W - Convergence)
add r30.w, r0.w, -r30.y
 
// multiply that times Separation for:
//   Separation * (W - Convergence)
mul r30.z, r30.x, r30.w
 
// Add that to Xold for the complete:
//  Xold + Separation * (W - Convergence)
add r0.x, r0.x, r30.z
		
mov o3, r0				// move fixed r4 (r0) into o3
    
// approximately 111 instruction slots used
 


Screenshots of the fixed water (at the end of the demo/worm boss) :

TheBall06 50.jpg


TheBall07 50.jpg