Efficently accessing the individial bytes in a long in C (On an 8-bit platform)

StackOverflow https://stackoverflow.com/questions/17227285

  •  01-06-2022
  •  | 
  •  

Question

I have a application where I am receiving a binary serial data stream, and I need to split this data-stream up into separate variables of various length (uint16_t and uint32_t).

Right now, I'm doing the ultra-simple:

#define OFFSET_iTOW 0

volatile uint8_t    temp[128];
volatile uint32_t   recBytes;

void main()
{
    while (1)
    {
        recBytes    = temp[OFFSET_iTOW+3];
        recBytes    <<= 8;
        recBytes    |= temp[OFFSET_iTOW+2];
        recBytes    <<= 8;
        recBytes    |= temp[OFFSET_iTOW+1];
        recBytes    <<= 8;
        recBytes    |= temp[OFFSET_iTOW+0];

    }
}

(Data is sent little-endian. The OFFSET_iTOW is one of about three dozen offsets (defined in a header file normally) for the various sections of a data packet)

However, this results in a rather enormous assembly output (slightly truncated to section of interest):

void main()
{

    recBytes = 0;
 12e:   10 92 04 02     sts 0x0204, r1
 132:   10 92 05 02     sts 0x0205, r1
 136:   10 92 06 02     sts 0x0206, r1
 13a:   10 92 07 02     sts 0x0207, r1
    while (1)
    {



        recBytes    = temp[OFFSET_iTOW+3];
 13e:   80 91 03 02     lds r24, 0x0203
 142:   90 e0           ldi r25, 0x00   ; 0
 144:   a0 e0           ldi r26, 0x00   ; 0
 146:   b0 e0           ldi r27, 0x00   ; 0
 148:   80 93 04 02     sts 0x0204, r24
 14c:   90 93 05 02     sts 0x0205, r25
 150:   a0 93 06 02     sts 0x0206, r26
 154:   b0 93 07 02     sts 0x0207, r27
        recBytes    <<= 8;
 158:   80 91 04 02     lds r24, 0x0204
 15c:   90 91 05 02     lds r25, 0x0205
 160:   a0 91 06 02     lds r26, 0x0206
 164:   b0 91 07 02     lds r27, 0x0207
 168:   ba 2f           mov r27, r26
 16a:   a9 2f           mov r26, r25
 16c:   98 2f           mov r25, r24
 16e:   88 27           eor r24, r24
 170:   80 93 04 02     sts 0x0204, r24
 174:   90 93 05 02     sts 0x0205, r25
 178:   a0 93 06 02     sts 0x0206, r26
 17c:   b0 93 07 02     sts 0x0207, r27
        recBytes    |= temp[OFFSET_iTOW+2];
 180:   20 91 04 02     lds r18, 0x0204
 184:   30 91 05 02     lds r19, 0x0205
 188:   40 91 06 02     lds r20, 0x0206
 18c:   50 91 07 02     lds r21, 0x0207
 190:   80 91 02 02     lds r24, 0x0202
 194:   90 e0           ldi r25, 0x00   ; 0
 196:   a0 e0           ldi r26, 0x00   ; 0
 198:   b0 e0           ldi r27, 0x00   ; 0
 19a:   82 2b           or  r24, r18
 19c:   93 2b           or  r25, r19
 19e:   a4 2b           or  r26, r20
 1a0:   b5 2b           or  r27, r21
 1a2:   80 93 04 02     sts 0x0204, r24
 1a6:   90 93 05 02     sts 0x0205, r25
 1aa:   a0 93 06 02     sts 0x0206, r26
 1ae:   b0 93 07 02     sts 0x0207, r27
        recBytes    <<= 8;
 1b2:   80 91 04 02     lds r24, 0x0204
 1b6:   90 91 05 02     lds r25, 0x0205
 1ba:   a0 91 06 02     lds r26, 0x0206
 1be:   b0 91 07 02     lds r27, 0x0207
 1c2:   ba 2f           mov r27, r26
 1c4:   a9 2f           mov r26, r25
 1c6:   98 2f           mov r25, r24
 1c8:   88 27           eor r24, r24
 1ca:   80 93 04 02     sts 0x0204, r24
 1ce:   90 93 05 02     sts 0x0205, r25
 1d2:   a0 93 06 02     sts 0x0206, r26
 1d6:   b0 93 07 02     sts 0x0207, r27
        recBytes    |= temp[OFFSET_iTOW+1];
 1da:   20 91 04 02     lds r18, 0x0204
 1de:   30 91 05 02     lds r19, 0x0205
 1e2:   40 91 06 02     lds r20, 0x0206
 1e6:   50 91 07 02     lds r21, 0x0207
 1ea:   80 91 01 02     lds r24, 0x0201
 1ee:   90 e0           ldi r25, 0x00   ; 0
 1f0:   a0 e0           ldi r26, 0x00   ; 0
 1f2:   b0 e0           ldi r27, 0x00   ; 0
 1f4:   82 2b           or  r24, r18
 1f6:   93 2b           or  r25, r19
 1f8:   a4 2b           or  r26, r20
 1fa:   b5 2b           or  r27, r21
 1fc:   80 93 04 02     sts 0x0204, r24
 200:   90 93 05 02     sts 0x0205, r25
 204:   a0 93 06 02     sts 0x0206, r26
 208:   b0 93 07 02     sts 0x0207, r27
        recBytes    <<= 8;
 20c:   80 91 04 02     lds r24, 0x0204
 210:   90 91 05 02     lds r25, 0x0205
 214:   a0 91 06 02     lds r26, 0x0206
 218:   b0 91 07 02     lds r27, 0x0207
 21c:   ba 2f           mov r27, r26
 21e:   a9 2f           mov r26, r25
 220:   98 2f           mov r25, r24
 222:   88 27           eor r24, r24
 224:   80 93 04 02     sts 0x0204, r24
 228:   90 93 05 02     sts 0x0205, r25
 22c:   a0 93 06 02     sts 0x0206, r26
 230:   b0 93 07 02     sts 0x0207, r27
        recBytes    |= temp[OFFSET_iTOW+0];
 234:   20 91 04 02     lds r18, 0x0204
 238:   30 91 05 02     lds r19, 0x0205
 23c:   40 91 06 02     lds r20, 0x0206
 240:   50 91 07 02     lds r21, 0x0207
 244:   80 91 00 02     lds r24, 0x0200
 248:   90 e0           ldi r25, 0x00   ; 0
 24a:   a0 e0           ldi r26, 0x00   ; 0
 24c:   b0 e0           ldi r27, 0x00   ; 0
 24e:   82 2b           or  r24, r18
 250:   93 2b           or  r25, r19
 252:   a4 2b           or  r26, r20
 254:   b5 2b           or  r27, r21
 256:   80 93 04 02     sts 0x0204, r24
 25a:   90 93 05 02     sts 0x0205, r25
 25e:   a0 93 06 02     sts 0x0206, r26
 262:   b0 93 07 02     sts 0x0207, r27
 266:   6b cf           rjmp    .-298       ; 0x13e <loop+0x10>

This is part of a GPS data parser in the interrupt-service routine living on an 8 bit microprocessor running at 16 Mhz, and I need to do a lot of these conversions, so the result above is a bit excessive.

Since this is in an ISR, I can be confident that the various data will not change during the interrupt. Basically, I'd like to be able to address the individual bytes in the long. Since this is an 8-bit architecture, it seems like the compiler should be able to optimize down to just a few operations (maybe 3-4 per line of C, as the bytes in the long are directly addressable from an assembly perspective).

The variables are declared volatile so they're not optimized away to a loop that does nothing. In the actual application, they're externed structs that are written to from the ISR, but read from the idle loop (with the appropriate ISR guarding to prevent reads being interrupted). I'm not sure how to produce a compact snippet that demonstrates that exactt behavior, though.

Était-ce utile?

La solution

if you use a union you can get to the byte parts of the long.

union Data
{
   uint8_t  bytes[4];
   uint32_t value;
} recBytes;

then

recBytes.bytes[0] = temp[OFFSET_iTOW+3];
recBytes.bytes[1] = temp[OFFSET_iTOW+2];
recBytes.bytes[2] = temp[OFFSET_iTOW+1];
recBytes.bytes[3] = temp[OFFSET_iTOW];

then recBytes.value will be what you want ( though I'm not 100% about the byte ordering, you may have to reverse it)

Autres conseils

Your compiler is generating a 32-bit store for every single input byte and shift instruction - it has to, since recBytes is volatile. Use a temporary to build the 32-bit value and then store it into the volatile, or just do it all in one assignment:

#define OFFSET_iTOW 0

volatile uint8_t    temp[128];
volatile uint32_t   recBytes;

void main()
{
    while (1)
    {
        recBytes    = (uint32_t)temp[OFFSET_iTOW+3] << 24 |
                      (uint32_t)temp[OFFSET_iTOW+2] << 16 |
                      (uint32_t)temp[OFFSET_iTOW+1] << 8  |
                      (uint32_t)temp[OFFSET_iTOW+0];
    }
}

If your temp buffer can be filled in the same endian order as your processor, you can form a union of the 128 byte temp and 128/4 value. No movement needed.

#define N (128) 
union Data {
  uint8_t  temp[N];
  uint32_t value_u32[N/sizeof(uint32_t)];
  } recBytes;

recBytes.value[OFFSET_iTOW/4];

[edit expanded to meet additional OP's concerns]

typedef struct {
      uint32_t field1;
      int32_t field2; 
      int16_t field3;  // This and next 2 pack nicely into 4 bytes
      uint8_t field4;
      uint8_t field5;
      int32_t field6;
      int32_t field7;
      int32_t field8;
      uint32_t field9;
      int32_t field10;
      int32_t field11;
      int32_t field12;
      uint32_t field13;
      uint16_t field14;  // This and next 2 pack nicely into 4 bytes
      uint8_t field15;
      uint8_t field16;
      uint32_t field17;
} packet_t;

union Data {
    uint8_t  temp[128];
   packet_t Packet;
} recBytes;

The union could consist of all the fields of the packet structure. After checksum verification, simply copy the structure, rather than field by field.

Working_var = recBytes.Packet;  // or memcpy(&Working_var, &recBytes.Packet, sizeof(Working_var);

Note: your supplied packet defines 52 bytes.

Licencié sous: CC-BY-SA avec attribution
Non affilié à StackOverflow
scroll top