Pregunta

Modulus of short integers not correct. This is really weird, and already cost me two days. I've narrowed the problematic code as follows (simplified as far as possible):

#include <stdio.h>
#include <stdlib.h>

int foo(short Width, short Height, short MSize) 
{
    short i = 0, k = 0, pos = 0;
    short j = 0;

    for(j = 1; j < Width - 1; j = j + 1)
    {/* a blank loop */}

    for(i = 1; i < Height - 1; i = i + 1) {
        for(j = 1; j < Width - 1; j = j + 1) {
            if((j % MSize) == 0) {
                k = k + 1;
            }
            printf("i=%d, k=%d, j=%d, MSize=%d, j mod MSize=%d\n", (int)i, (int)k, (int)j, (int)MSize, (int)(j % MSize));
            if (pos >= 1024) {
                fprintf(stderr, "pos = %d, over 1024\n", (int)pos);
            }
            pos = pos + 1;
        }
    }
    return 0;
}

int main(int argc, char* argv[])
{
    foo(32, 32, 8);
    return 0;
}

When compiled in Debug mode, the above codes work fine, the result of j%MSize is correct, however, when compiled in Release mode, the result of j%MSize will always be 7, which is nonsense (tested under Visual Studio 2005/2012/2013). There is no memory operation, so it should not be caused by stack corruption. Anybody has a clue?

The output I'm seeing is (slightly edited):

j=10, MSize=8, j mod MSize=7
j=11, MSize=8, j mod MSize=7
j=12, MSize=8, j mod MSize=7
j=13, MSize=8, j mod MSize=7
j=14, MSize=8, j mod MSize=7
j=15, MSize=8, j mod MSize=7
j=16, MSize=8, j mod MSize=7
j=17, MSize=8, j mod MSize=7
j=18, MSize=8, j mod MSize=7
j=19, MSize=8, j mod MSize=7
j=20, MSize=8, j mod MSize=7
j=21, MSize=8, j mod MSize=7
j=22, MSize=8, j mod MSize=7
j=23, MSize=8, j mod MSize=7
j=24, MSize=8, j mod MSize=7
j=25, MSize=8, j mod MSize=7
j=26, MSize=8, j mod MSize=7
j=27, MSize=8, j mod MSize=7

The following is the build log:

 1>Project "E:\Code\workspace\C\GeneralC\SNDFeatureExtract\SNDFeatureExtract.vcxproj" on node 2 (Build target(s)).

 1>ClCompile:

     D:\Program Files\Microsoft Visual Studio 11.0\VC\bin\CL.exe /c /Zi /nologo /W3 /WX- /sdl /O2 /Oi /Oy- /GL /D WIN32 /D NDEBUG /D _CONSOLE /D _MBCS /Gm- /EHsc /MT /GS /Gy /fp:precise /Zc:wchar_t /Zc:forScope /Fo"Release\\" /Fd"Release\vc110.pdb" /Gd /TP /analyze- /errorReport:prompt WeirdBug.cpp

     WeirdBug.cpp

   Link:

     D:\Program Files\Microsoft Visual Studio 11.0\VC\bin\link.exe /ERRORREPORT:PROMPT /OUT:"E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.exe" /INCREMENTAL:NO /NOLOGO kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /MANIFEST /MANIFESTUAC:"level='asInvoker' uiAccess='false'" /manifest:embed /DEBUG /PDB:"E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.pdb" /SUBSYSTEM:CONSOLE /OPT:REF /OPT:ICF /LTCG /TLBID:1 /DYNAMICBASE /NXCOMPAT /IMPLIB:"E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.lib" /MACHINE:X86 /SAFESEH Release\WeirdBug.obj

     Generating code

     Finished generating code

     SNDFeatureExtract.vcxproj -> E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.exe

 1>Done Building Project "E:\Code\workspace\C\GeneralC\SNDFeatureExtract\SNDFeatureExtract.vcxproj" (Build target(s)).

The following is the Disassembly result from VS:

    short i = 0, k = 0, pos = 0;
    short j = 0;

    for(j = 1; j < Width - 1; j = j + 1)
00801014  mov         edi,1FF983C8h  
00801019  jl          foo+12h (0801012h)  
    {/* a blank loop */}

    for(i = 1; i < Height - 1; i = i + 1) {
0080101B  mov         edx,1  
00801020  mov         dword ptr [ebp-4],1  
00801027  mov         dword ptr [ebp-8],edx  
0080102A  and         ecx,80000007h  
00801030  jns         foo+37h (0801037h)  
00801032  dec         ecx  
00801033  or          ecx,0FFFFFFF8h  
00801036  inc         ecx  
00801037  mov         dword ptr [ebp-0Ch],ecx  
0080103A  lea         ebx,[ebx]  
00801040  mov         eax,1  
        for(j = 1; j < Width - 1; j = j + 1) {
00801045  mov         ebx,eax  
            if((j % MSize) == 0) {
00801047  test        ecx,ecx  
00801049  jne         foo+4Ch (080104Ch)  
                k = k + 1;
0080104B  inc         edi  
            }
            printf_s("i=%d, k=%d, j=%d, MSize=%d, j mod MSize=%d\n", (int)i, (int)k, (int)j, (int)MSize, (int)(j % MSize));
0080104C  push        ecx  
0080104D  push        8  
0080104F  push        eax  
00801050  movsx       eax,di  
00801053  push        eax  
00801054  push        edx  
00801055  push        80CD30h  
0080105A  call        printf_s (0801266h)  
            if (pos >= 1024) {
0080105F  mov         eax,400h  
00801064  add         esp,18h  
00801067  cmp         si,ax  
0080106A  jl          foo+86h (0801086h)  
                fprintf_s(stderr, "pos = %d, over 1024\n", (int)pos);
0080106C  movsx       eax,si  
                fprintf_s(stderr, "pos = %d, over 1024\n", (int)pos);
0080106F  push        eax  
00801070  push        80CD5Ch  
00801075  call        __iob_func (0801175h)  
0080107A  add         eax,40h  
0080107D  push        eax  
0080107E  call        fprintf_s (080127Ch)  
00801083  add         esp,0Ch  
        for(j = 1; j < Width - 1; j = j + 1) {
00801086  mov         ecx,dword ptr [ebp-0Ch]  
00801089  mov         edx,dword ptr [ebp-8]  
            }
            pos = pos + 1;
0080108C  inc         ebx  
0080108D  movsx       eax,bx  
00801090  inc         esi  
00801091  cmp         eax,1Fh  
00801094  jl          foo+47h (0801047h)  
    {/* a blank loop */}

    for(i = 1; i < Height - 1; i = i + 1) {
00801096  mov         eax,dword ptr [ebp-4]  
00801099  inc         eax  
0080109A  movsx       edx,ax  
0080109D  mov         dword ptr [ebp-4],eax  
008010A0  mov         dword ptr [ebp-8],edx  
008010A3  cmp         edx,1Fh  
008010A6  jl          foo+40h (0801040h)  
        }
    }
    return 0;
008010A8  pop         edi  
008010A9  pop         esi  
008010AA  xor         eax,eax  
008010AC  pop         ebx  
}
008010AD  mov         esp,ebp  
008010AF  pop         ebp  
008010B0  ret  
¿Fue útil?

Solución

it is because the compiler's optimization, and this is something todo with your blank loop. But I'm not quite sure where the problem is.

To simply solve the question, declare j as:

  volatile short j;

and it will works fine. Cause program will fetch j from memory instead of registers every time.

I debugged the assembly code, and find out the program calculate the j % MSize and stored it into memory just after the blank loop, and every time before doing printf, it just fetch the value from memory instead of re-calculating it.

mov         ecx,dword ptr [ebp-10h] // j % MSize    @ memory
push        ecx  // j % MSize
mov         ecx,dword ptr [ebp-0Ch]  
push        8  // MSize
push        eax  // j
movsx       eax,word ptr [IdxY]  
movsx       esi,di  
push        esi  // k
push        eax  // IdxY
push        ecx  // i
// push static string and calling printf

But adding a volatile, it will be act like:

mov         dx,word ptr [j]  
movsx       eax,dx  // j
and         eax,80000007h  // j % 8
push        eax 
// push other vars and calling printf

That's re-calculating the MOD, and push it into stack for printf. So it's likely a compiler's bug, because it should fetch j from memory even if there is no volatile add.

Since I can't add comments again now :(.. I found out it's the /Oxxx and /GL flag's fault. It will choose one from below:

/O1 /O2 /Ox

It has to choose one of the above options along with /GL to see the issue.

My IDE is Visual Studio 2010 10.0.40219.1 SP1Rel

Otros consejos

I don't see any issue

$ gcc modulus.c 
$ ./a.out 
Width = 32, Height = 32, MSize = 8, Dim =16, sizeof(short)=2
i=1, IdxY=0, k=0, j=1, MSize=8, j mod MSize=1
i=1, IdxY=0, k=0, j=2, MSize=8, j mod MSize=2
i=1, IdxY=0, k=0, j=3, MSize=8, j mod MSize=3
i=1, IdxY=0, k=0, j=4, MSize=8, j mod MSize=4
i=1, IdxY=0, k=0, j=5, MSize=8, j mod MSize=5
i=1, IdxY=0, k=0, j=6, MSize=8, j mod MSize=6
i=1, IdxY=0, k=0, j=7, MSize=8, j mod MSize=7
i=1, IdxY=0, k=1, j=8, MSize=8, j mod MSize=0

Am i missing something?

In addition to the other answers already provided I want to point out that you can prevent such erros with better scoping (although it's not your fault in this case, it's probably a compiler bug).

Prefer to declare the iteration variables inside the loop-scope. Or even more general: Declare variables only in the scope where they are used.

If you change the second for-loop to this:

for(short j = 1; j < Width - 1; j = j + 1) {

so that j is declared in the scope of the for-loop, the compiler must treat j as a new variable which has nothing to do with the previous empty loop. Therefore it is less likely to over-optimize by reusing previous memory locations. This small change fixes the bug in VS2013 and I consider it much cleaner than using volatile.

Licenciado bajo: CC-BY-SA con atribución
No afiliado a StackOverflow
scroll top