Neighbor index computation for diagonally flattened matrix

Question 1

I actually already had the elements to solve it somewhere else in my code. As BLUEPIXY's solution hinted, I am using scatter/gather operations, which I had already implemented for layout transformation.

This solution basically rebuilds the original (x,y) index of the given element in the matrix, applies the index offset and translates the result back to the transformed layout. It splits the square in 2 triangles and adjust the computation depending on which triangle it belongs to.

It is an almost entirely algebraic transformation: it uses no loop and no table lookup, has a small memory footprint and little branching. The code can probably be optimized further.

Here is the draft of the code:

#include <stdio.h>
#include <math.h>

// size of the matrix
#define SIZE 4

// triangle number of X
#define TRIG(X) (((X) * ((X) + 1)) >> 1)
// triangle root of X
#define TRIROOT(X) ((int)(sqrt(8*(X)+1)-1)>>1);

// return the index of a neighbor given an offset
int getNGonalNeighbor(const size_t index,
                      const int x_offset,
                      const int y_offset){
    // compute largest upper triangle index
    const size_t upper_triangle = TRIG(SIZE);

    // position of the actual element of index
    unsigned int x = 0,y = 0;

    // adjust the index depending of upper/lower triangle.
    const size_t adjusted_index = index < upper_triangle ?
                index :
                SIZE * SIZE - index - 1;

    // compute triangular root
    const size_t triroot = TRIROOT(adjusted_index);
    const size_t trig = TRIG(triroot);
    const size_t offset = adjusted_index - trig;

    // upper triangle
    if(index < upper_triangle){
        x = offset;
        y = triroot-offset;
    }
    // lower triangle
    else {
        x = SIZE - offset - 1;
        y = SIZE - (trig + triroot + 1 - adjusted_index);
    }

    // adjust the offset
    x += x_offset;
    y += y_offset;

    // manhattan distance
    const size_t man_dist = x+y;

    // calculate index using triangular number
    return TRIG(man_dist) +
            (man_dist >= SIZE ? x - (man_dist - SIZE + 1) : x) -
            (man_dist > SIZE ? 2* TRIG(man_dist - SIZE) : 0);
}

int main(){
    printf("%d\n", getNGonalNeighbor(15,-1,-1)); // should return 11
    printf("%d\n", getNGonalNeighbor(15, 0,-1)); // should return 14
    printf("%d\n", getNGonalNeighbor(15,-1, 0)); // should return 13
    printf("%d\n", getNGonalNeighbor(11,-2,-1)); // should return 1
}

And the output is indeed:

If you think this solution looks over complicated and inefficient, I remind you that the target here is GPU, where computation costs virtually nothing compared to memory accesses, and all index computations are computed at the same time using massively parallel architectures.

Question 2

Table lookup

#include <stdio.h>

#define SIZE 16
#define SIDE  4  //sqrt(SIZE)

int table[SIZE];
int rtable[100];// {x,y| x<=99, y<=99 }

void setup(){
    int i, x, y, xy, index;//xy = x + y

    x=y=xy=0;
    for(i=0;i<SIZE;++i){
        table[i]= index= x*10 + y;
        rtable[x*10+y]=i;
        x = x + 1; y = y - 1;//right up
        if(y < 0 || x >= SIDE){
            ++xy;
            x = 0;
            y = xy;;
            while(y>=SIDE){
                ++x;
                --y;
            }
        }
    }
}
int getNGonalNeighbor(int index, int offsetX, int offsetY){
    int x,y;
    x=table[index] / 10 + offsetX;
    y=table[index] % 10 + offsetY;
    if(x < 0 || x >= SIDE || y < 0 || y >= SIDE) return -1; //ERROR
    return rtable[ x*10+y ];
}

int main() {
    int i;
    setup();
    printf("%d\n", getNGonalNeighbor(15,-1,-1));
    printf("%d\n", getNGonalNeighbor(15, 0,-1));
    printf("%d\n", getNGonalNeighbor(15,-1, 0));
    printf("%d\n", getNGonalNeighbor(11,-2,-1));
    printf("%d\n", getNGonalNeighbor(0, -1,-1));

    return 0;
}

don't use table version.

#include <stdio.h>

#define SIZE 16
#define SIDE  4

void num2xy(int index, int *offsetX, int *offsetY){
    int i, x, y, xy;//xy = x + y

    x=y=xy=0;
    for(i=0;i<SIZE;++i){
        if(i == index){
            *offsetX = x;
            *offsetY = y;
            return;
        }
        x = x + 1; y = y - 1;//right up
        if(y < 0 || x >= SIDE){
            ++xy;
            x = 0;
            y = xy;;
            while(y>=SIDE){
                ++x;
                --y;
            }
        }
    }
}
int xy2num(int offsetX, int offsetY){
    int i, x, y, xy, index;//xy = x + y

    x=y=xy=0;
    for(i=0;i<SIZE;++i){
        if(offsetX == x && offsetY == y) return i;
        x = x + 1; y = y - 1;//right up
        if(y < 0 || x >= SIDE){
            ++xy;
            x = 0;
            y = xy;;
            while(y>=SIDE){
                ++x;
                --y;
            }
        }
    }
    return -1;
}
int getNGonalNeighbor(int index, int offsetX, int offsetY){
    int x,y;

    num2xy(index, &x, &y);

    return xy2num(x + offsetX, y + offsetY);
}

int main() {
    printf("%d\n", getNGonalNeighbor(15,-1,-1));
    printf("%d\n", getNGonalNeighbor(15, 0,-1));
    printf("%d\n", getNGonalNeighbor(15,-1, 0));
    printf("%d\n", getNGonalNeighbor(11,-2,-1));
    printf("%d\n", getNGonalNeighbor(0, -1,-1));

    return 0;
}