سؤال

My code initializes 6 structs, 2 unions, and uses the 'dump' function to display the address of the allocated bytes for them, likewise the values inside those bytes.

The short code:

#include <stdio.h>
// #DEFINE PD = padding

void dump (void *p, int n);

/* (...) */

union U1
{
    int i;
    char c[5];
};

/*  has 8 bytes of information, organized as follows:
    | i/c[0] | i/c[1] | i/c[2] | i/c[3] | PD/c[4] | PD | PD | PD |                                      */

union U2
{
    short s;
    char c[5];
};

/*  has 6 bytes of information, organized as follows:
    | s/c[0] | s/c[1] | PD/c[2] | PD/c[3] | PD/c[4] | PD |                                          */



int main (void)
{
    int i;

    union U1 u1;
    union U2 u2;

    /* (...) */

         u1.i = 0x01020304;      // initializes int
    printf("\nu1 (int)\n");
        dump(&u1, sizeof(u1));
        for (i=0;i<5;i++)        // initializes char
            u1.c[i] = 0xcc;
    printf("u1 (char)\n");
        dump(&u1, sizeof(u1));

        u2.s = 0x0102;           // initializes short
    printf("\nu2 (short)\n");
        dump(&u2, sizeof(u2));   
        for (i=0;i<5;i++)        // initializes char
            u2.c[i] = 0xcc;
    printf("u2 (char)\n");
        dump(&u2, sizeof(u2));

    return 0;
} 

From all the structs and the first union I'm getting the expected number of bytes and values - 00 for all the padding bytes - but from the last union I'm getting this:

u2 (short)
0x7fff825a05a0 - 02
0x7fff825a05a1 - 01
0x7fff825a05a2 - 5A
0x7fff825a05a3 - 82
0x7fff825a05a4 - FF
0x7fff825a05a5 - 7F
u2 (char)
0x7fff825a05a0 - CC
0x7fff825a05a1 - CC
0x7fff825a05a2 - CC
0x7fff825a05a3 - CC
0x7fff825a05a4 - CC
0x7fff825a05a5 - 7F

The values of the 2 middle bytes of the short are aleatory - probably random memory, changes at every run - and the last 2 of the short and the last one of the char array are fixed.

Why am I getting this values? Shouldn't all the padding bytes be 0x00? Even the first union goes well, the fact of the change from int to short changes something? Why part memory garbage and part fixed values on the union initialized with the short variable?

I suppose the answer for this is a theorical one, if so, can you please cite the reference? That would be great.

Excuse me in advance for the possible giant post, my first here. :)

The full code:

#include <stdio.h>
// #DEFINE PD = padding

void dump (void *p, int n)
{
     unsigned char *p1 = p;

     while (n--)
     {
          printf("%p - %02X\n",p1, *p1);
          p1++;
     }
}

struct X1
{
    char c1;
    int i;
    char c2;
} x1 = {0xc1, 0x01020304, 0xc2};

/*  possui 12 bytes de informação, organizados em:
    | c1 | PD | PD | PD | i | i | i | i | c2 | PD | PD | PD |                                       */

struct X2
{
    int i;
    char c;
} x2 = {0x01020304, 0xc2};

/*  possui 8 bytes de informação, organizados em:
    | i | i | i | i | c | PD | PD | PD |                                                            */

struct X3
{
    int i;
    char c1;
    char c2;
} x3 = {0x01020304, 0xc1, 0xc2};

/*  possui 8 bytes de informação, organizados em:
    | i | i | i | i | c1 | c2 | PD | PD |                                                           */

struct X4
{
    struct X2 x;
    char c;
} x4 = {{0x01020304, 0xc1}, 0xc2};

/*  possui 8 bytes de informação, organizados em:
    | X2.i | X2.i | X2.i | X2.i | X2.c | PD | PD | PD | c | PD | PD | PD |                          */

struct X5
{
    char c1;
    char c2;
    char c3;
} x5 = {0xc1, 0xc2, 0xc3};

/*  possui 3 bytes de informação, organizados em:
    | c1 | c2 | c3 |                                                                            */

struct X6
{
    short s1;
    int i;
    char c[3];
    short s2;
} x6 = {0x0102, 0x01020304, {0xc1, 0xc2, 0xc3}, 0x0102};

/*  possui 16 bytes de informação, organizados em:
    | s1 | s1 | PD | PD | i | i | i | i | c[0] | c[1] | c[2] | PD | s2 | s2| PD | PD |              */


union U1
{
    int i;
    char c[5];
};

/*  possui 8 bytes de informação, organizados em:
    | i/c[0] | i/c[1] | i/c[2] | i/c[3] | PD/c[4] | PD | PD | PD |                                      */

union U2
{
    short s;
    char c[5];
};

/*  possui 8 bytes de informação, organizados em:
    | s/c[0] | s/c[1] | PD/c[2] | PD/c[3] | PD/c[4] | PD | PD | PD |                                            */



int main (void)
{
    int i;

    union U1 u1;
    union U2 u2;

    printf("\nx1: \n");
        dump(&x1, sizeof(x1));
    printf("\nx2: \n");
        dump(&x2, sizeof(x2));
    printf("\nx3: \n");
        dump(&x3, sizeof(x3));
    printf("\nx4: \n");
        dump(&x4, sizeof(x4));
    printf("\nx5: \n");
        dump(&x5, sizeof(x5));
    printf("\nx6: \n");
        dump(&x6, sizeof(x6));

         u1.i = 0x01020304;
    printf("\nu1 (int)\n");
        dump(&u1, sizeof(u1));
        for (i=0;i<5;i++)
            u1.c[i] = 0xcc;
    printf("u1 (char)\n");
        dump(&u1, sizeof(u1));

        u2.s = 0x0102;
    printf("\nu2 (short)\n");
        dump(&u2, sizeof(u2));
        for (i=0;i<5;i++)
            u2.c[i] = 0xcc;
    printf("u2 (char)\n");
        dump(&u2, sizeof(u2));

    return 0;
    }

The full output:

x1: 
0x601030 - C1
0x601031 - 00
0x601032 - 00
0x601033 - 00
0x601034 - 04
0x601035 - 03
0x601036 - 02
0x601037 - 01
0x601038 - C2
0x601039 - 00
0x60103a - 00
0x60103b - 00

x2: 
0x60103c - 04
0x60103d - 03
0x60103e - 02
0x60103f - 01
0x601040 - C2
0x601041 - 00
0x601042 - 00
0x601043 - 00

x3: 
0x601044 - 04
0x601045 - 03
0x601046 - 02
0x601047 - 01
0x601048 - C1
0x601049 - C2
0x60104a - 00
0x60104b - 00

x4: 
0x60104c - 04
0x60104d - 03
0x60104e - 02
0x60104f - 01
0x601050 - C1
0x601051 - 00
0x601052 - 00
0x601053 - 00
0x601054 - C2
0x601055 - 00
0x601056 - 00
0x601057 - 00

x5: 
0x601058 - C1
0x601059 - C2
0x60105a - C3

x6: 
0x601060 - 02
0x601061 - 01
0x601062 - 00
0x601063 - 00
0x601064 - 04
0x601065 - 03
0x601066 - 02
0x601067 - 01
0x601068 - C1
0x601069 - C2
0x60106a - C3
0x60106b - 00
0x60106c - 02
0x60106d - 01
0x60106e - 00
0x60106f - 00

u1 (int)
0x7fff825a0590 - 04
0x7fff825a0591 - 03
0x7fff825a0592 - 02
0x7fff825a0593 - 01
0x7fff825a0594 - 00
0x7fff825a0595 - 00
0x7fff825a0596 - 00
0x7fff825a0597 - 00
u1 (char)
0x7fff825a0590 - CC
0x7fff825a0591 - CC
0x7fff825a0592 - CC
0x7fff825a0593 - CC
0x7fff825a0594 - CC
0x7fff825a0595 - 00
0x7fff825a0596 - 00
0x7fff825a0597 - 00

u2 (short)
0x7fff825a05a0 - 02
0x7fff825a05a1 - 01
0x7fff825a05a2 - 5A
0x7fff825a05a3 - 82
0x7fff825a05a4 - FF
0x7fff825a05a5 - 7F
u2 (char)
0x7fff825a05a0 - CC
0x7fff825a05a1 - CC
0x7fff825a05a2 - CC
0x7fff825a05a3 - CC
0x7fff825a05a4 - CC
0x7fff825a05a5 - 7F
هل كانت مفيدة؟

المحلول 3

Your unions u1 and u2 are automatic and stored on stack. Automatic variables get initialized to garbage by default in C. What you see in dumps is the old values on the stack, which were there before calling main(). They are semi-aleatory, depending on what the C runtime library does before calling your main().

The structures x1-x6 in your code are different, they are static and static variables get initialized to zeroes by the C-standard.

Variables defined inside functions are automatic (unless declared static), but variables outside of functions can only be static. To make the answer fuller, there is another storage class, the heap or dynamic memory, accessed with malloc()/free(). Only the static variables have fixed location in memory during the entire program run time, the other locations loose meaning when the respective function returns, or when the heap block is freed. These are both popular sources of errors in C programs.

نصائح أخرى

From the C99 standard, 6.2.6.1p7:

When a value is stored in an object of structure or union type, including in a member object, the bytes of the object representation that correspond to any padding bytes take unspecified values.

Your unions are on the stack and not global there is no expectation whatsoever as to what they are initialized to before you start using them. Officially you should never read a union or any variable for that matter that you have not written first.

There really are no valid expectations for how much or the state of the padding if any in a structure or naturally a union that uses structures or a union without structures.

If you had made them global then there would be a somewhat valid expectation for them to start off zeroed, but I personally dont subscribe to that expectation and try to always explicitly set a variable (write) before I read it the first time. Once you start messing with items in the union though then all bets are off as to what affects what.

EDIT

Your unions are non-static local variables so they are on the stack and as such have no expectation for initialization by the C compiler before you start accessing them.

The only valid use of items in a union are to read back the most recently accessed paths you wrote to.

so

union U1 u1;
union U2 u2;

/* (...) */

     u1.i = 0x01020304;      // initializes int

so the only valid use case here is to read back u1.i. you should not have any expectations at this point for any of the u1.c[] values. and you should not have any expectations for any of the other padding in the whole of the memory the union uses. The only thing the C compiler has to insure is that if you set u1.i then when you read it back assuming you have not modified the union using u1.c in this case, you will get back what you wrote.

    for (i=0;i<5;i++)        // initializes char
        u1.c[i] = 0xcc;

same answer, the only valid read of the union at this point is the u1.c[] items, reading u1.i has no valid expecatations. likewise dumping the whole union can have padding such that the string makes sense and the read back of like items (u1.c[n]) is what you wrote.

u2.s = 0x0102;           // initializes short

then accessing u2.c[] makes no sense and there are no valid expectations. dumping the union the only expectation is that two consecutive bytes somewhere in the union are 0x01 and 0x02 in the endian order for that architecture.

for (i=0;i<5;i++)        // initializes char
u2.c[i] = 0xcc;

then accessing u2.s makes no sense and there are no valid expecations. dumping the union the only expectation is that somewhere in the union are the bytes 0xcc, 0xcd, 0xce, 0xcf, 0xd0 in the proper order, where in the union and the values of the padding you should have no expectations about.

Now saying that I have often been able to use unions improperly to poke at floating point numbers for example and it tends to (so far always) work, but I am aware that that code may just stop working some day when it is compiled by a different version of the same compiler, a different compiler or the same compiler with different compile options. I expect it may fail some day.

مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top