Question

I have a number of sample processing primitives like:

function Add8(A, B: Byte): Byte; {$IFDEF CODEINLINING}inline;{$ENDIF}
begin
  Result := A + B;
end;

function Sub16(A, B: Word): Word; {$IFDEF CODEINLINING}inline;{$ENDIF}
begin
  Result := A - B;
end;

{ et cetera }

These functions are workhorses of the data processing and to be invoked for every input sample (millions of them). By design Result type has to be the same size as arguments (operands).

The problem arises then the result of operation exceed the defined range of Low(Result)..High(Result), truncating most significant bits and effectively making result incorrect. For example: Adding low value to the peak value Add8(240, 22) gives eliminates the peak, I'd better have 255. For subtracting two values of near baseline level Sub16(32000, 33000) I'd better have 0.

My questions are: how to have such operations to clamp a result values to the range in the performance-wise manner? Is there a generalized solution for all arithmetics and all base types (8 bit, 16 bit, unsigned, signed)?

Was it helpful?

Solution

Because you deals with large-size data processing, I'd recommend to try some assembler - MMX, SSE2 commands are specially intended for such tasks. For example, paddusb instruction can add 16 pairs of bytes at once with saturation (clamps results in byte range). (Don't forget about proper aligning of data chunks)

Example (not thoroughly tested) for 32-bit compiler. It works 9x faster than pascal-version for treatment of 256M-arrays (604 vs 5100 ms with 10 repeats). Note that for reasonable data size pascal version is pretty fast as well.

program Project1;
{$APPTYPE CONSOLE}
uses SysUtils;

procedure AddBytesSat(const A, B, Res: PByteArray; Len: Integer);
//adds byte arrays Res[i] := A[i] + B[i] with saturation
//arrays should be aligned to 16-byte border, length divisible by 16
//three parameters in eax, edx, ecx registers, fourth on the stack
asm
  push esi
  mov esi, ecx // save Res pointer
  mov ecx, Len
  shr ecx, 4   // Len div 16
@@start:
  movdqa xmm0, [eax]  //copies 16 bytes (aligned) to sse register

  paddusb xmm0, [edx] // adds 16 unsigned values with saturation

  movdqa [esi], xmm0  // move result bytes back to memory
  add eax, 16  // move array pointers
  add edx, 16
  add esi, 16
  loop @@start  //go to next iteration 
  pop esi
end;

var
  A, B, C: PByteArray;
  i: integer;
begin
  //ensure that memory manager returns properly aligned blocks
  SetMinimumBlockAlignment(System.mba16Byte);

  GetMem(A, 32);
  GetMem(B, 32);
  GetMem(C, 32);

  for i := 0 to 31 do begin
    A[i] := 8 * i;
    B[i] := 200;
  end;

  AddBytesSat(A, B, C, 32);

  //clamping demonstration
  for i := 0 to 15 do
    Writeln(C[i]);

  Readln;
end.

OTHER TIPS

If the values to be clamped are just above 255 or below zero, you can use a lookup table. Such lookup table approaches are very fast as long as the tables fit inside L1 cache for commonly occuring values. And the byte_clamp_lut+256 is translated by compiler into single memory reference into the middle of the table. I don't know if Pascal/Delphi allows macros, but in C/C++ that is done like that:

#define byte_clamp(v) ((uint8_t*)(byte_clamp_lut+256))[v]

static uint8_t byte_clamp768[] = {
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
  256,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
};

If you'd care for a Generics-based comparer (probably not optimal since it resorts to using TComparer)

unit Zoomicon.Generics.Functors;

type
  TF = class
    class function Clamp<T>(const Value: T; const MinValue, MaxValue: T): T; inline; //EnsureRange
    //...
  end;

implementation
  uses System.Generics.Defaults; //for TComparer

class function TF.Clamp<T>(const Value: T; const MinValue, MaxValue: T): T;
begin
  result := Value;

  var Comparer := TComparer<T>.Default; //TODO: document if throws exception if T is something not comparable and/or for "nil" value or bounds
  if Comparer.Compare(result, MinValue) < 0 then //if (Value < MinValue)
    result := MinValue
  else if Comparer.Compare(result, MaxValue) > 0 then //if (Value > MaxValue)
    result := MaxValue;
end;


//...

end.

How to use:

Value := TF.Clamp(Value, Min, Max);
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top