//////////////////// TEST.CPP ////////////////
void test(int mine[3][3])
{
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
mine<i>[j] = 0xFF;
}
int main ()
{
int test_array[3][3];
test(test_array);
return 0;
}
//////////////////// TEST.ASM /////////////////
?test@@YAXQAY02H@Z PROC NEAR ; test, COMDAT
; File d:\microsoft visual studio\myprojects\dsaff\sadsaf.cpp
; Line 2
push ebp
mov ebp, esp
sub esp, 72 ; 00000048H
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-72]
mov ecx, 18 ; 00000012H
mov eax, -858993460 ; ccccccccH; Line 3
mov DWORD PTR _i$[ebp], 0
jmp SHORT $L218
rep stosd
$L219:
mov eax, DWORD PTR _i$[ebp]
add eax, 1
mov DWORD PTR _i$[ebp], eax
$L218:
cmp DWORD PTR _i$[ebp], 3
jge SHORT $L220
; Line 4
mov DWORD PTR _j$221[ebp], 0
jmp SHORT $L222
$L223:
mov ecx, DWORD PTR _j$221[ebp]
add ecx, 1
mov DWORD PTR _j$221[ebp], ecx
$L222:
cmp DWORD PTR _j$221[ebp], 3
jge SHORT $L224
; Line 5
mov edx, DWORD PTR _i$[ebp]
imul edx, 12 ; 0000000cH
mov eax, DWORD PTR _mine$[ebp]
add eax, edx
mov ecx, DWORD PTR _j$221[ebp]
mov DWORD PTR [eax+ecx*4], 255 ; 000000ffH
jmp SHORT $L223
$L224:
jmp SHORT $L219
$L220:
; Line 6
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
?test@@YAXQAY02H@Z ENDP ; test
Let''s see what the code does....
;Line 2
...
sub esp, 72
...
72 bytes? what the ....? The above code allocates 72 bytes... not only does allocated uneeded memory (when it can simply pass a pointer to the array [which A LOT better], but it allocates double the amount of memory needed). Let''s see...
A 3x3 matrix of int would take 36 bytes... 3x3=9x4=36... 3x3 being the dimensions of the array, which is 9 elements, and an int is 4 bytes, that makes it 36 bytes.. why does it allocate 72 bytes? same as you, i have no idea... and that''s the first problem really.. memory.. here comes the speed part (optimization) part... i have the Win32 Release option on, and have compiled it with Optimize for Speed...
; Line 3
...
add eax, 1
...
This should''ve been
inc eax
...
also, the whoop could have used a do/while type of structure when the value is supposed to go down to zero (meaning the start value in the for loop is zero).
so..
mov ecx, 3
repeat:
... do whatever
dec ecx
jns short repeat
when i tried to use a register for the loop index values, it didnt do anything.. it still used memory and im sure it didnt even try to use the regsiters.. C only looks good from the outside but nobody knows what it does when it gets in the low level stuff... when using the above code it''s not going to use all those comparisons and branches like in:
...
cmp DWORD PTR _j$221[ebp], 3
jge SHORT $L224
...
and plus it will make the source code
smaller in size (well a couple bytes )... AND take a few cycles less.. (in 3D engines a single cycle matters)..
...
; Line 5
mov edx, DWORD PTR _i$[ebp]
imul edx, 12
...
first of all, they should keep that "i" index value in a register all the time... that''s very important!!
second... instead of using IMUL they can shift the value to left just like you did in your VGA PutPixel routine...
so:
"i*12" will be the same as "(i<<3)+(i<<2)"... also a lookup table might be calculated prior to the program that has all the indirect addressing values which would make it faster since you dont have to calculated it for each item every time...
See? these are just a few things that a programmer can do that the compiler can''t.. the code will be much faster and much more cleaner... just my thoughts.. any comments are welcome... take care!
-------------------------------
That's just my 200 bucks' worth!
..-=gLaDiAtOr=-..