The different addressing modes of CUDA textures

Question 1

As mentioned by sgarizvi, CUDA supports only four, non-customizable address modes, namely, clamp, border, wrap and mirror, which are described in Section 3.2.11.1. of the CUDA programming guide.

The former two work in both unnormalized and normalized coordinates, while the latter two in normalized coordinates only.

To describe the first two, let us consider the unnormalized coordinates case and consider 1D signals, for the sake of simplicity. In this case, the input sequence is c[k], with k=0,...,M-1.

cudaAddressModeClamp

The signal c[k] is continued outside k=0,...,M-1 so that c[k] = c[0] for k < 0, and c[k] = c[M-1] for k >= M.

cudaAddressModeBorder

The signal c[k] is continued outside k=0,...,M-1 so that c[k] = 0 for k < 0and for k >= M.

Now, to describe the last two address modes, we are forced to consider normalized coordinates, so that the 1D input signal samples are assumed to be c[k / M], with k=0,...,M-1.

cudaAddressModeWrap

The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to M. In other words, c[(k + p * M) / M] = c[k / M] for any (positive, negative or vanishing) integer p.

cudaAddressModeMirror

The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to 2 * M - 2. In other words, c[l / M] = c[k / M] for any l and k such that (l + k)mod(2 * M - 2) = 0.

The following code illustrates all the four available address modes

#include <stdio.h>

texture<float, 1, cudaReadModeElementType> texture_clamp;
texture<float, 1, cudaReadModeElementType> texture_border;
texture<float, 1, cudaReadModeElementType> texture_wrap;
texture<float, 1, cudaReadModeElementType> texture_mirror;

/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
    if (code != cudaSuccess) 
    {
        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}

/******************************/
/* CUDA ADDRESS MODE CLAMPING */
/******************************/
__global__ void Test_texture_clamping(const int M) {

    printf("Texture clamping - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_clamp, -(float)threadIdx.x));
    printf("Texture clamping - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_clamp, (float)(M + threadIdx.x)));

}

/****************************/
/* CUDA ADDRESS MODE BORDER */
/****************************/
__global__ void Test_texture_border(const int M) {

    printf("Texture border - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_border, -(float)threadIdx.x));
    printf("Texture border - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_border, (float)(M + threadIdx.x)));

}

/**************************/
/* CUDA ADDRESS MODE WRAP */
/**************************/
__global__ void Test_texture_wrap(const int M) {

    printf("Texture wrap - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_wrap, -(float)threadIdx.x/(float)M));
    printf("Texture wrap - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_wrap, (float)(M + threadIdx.x)/(float)M));

}

/****************************/
/* CUDA ADDRESS MODE MIRROR */
/****************************/
__global__ void Test_texture_mirror(const int M) {

    printf("Texture mirror - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_mirror, -(float)threadIdx.x/(float)M));
    printf("Texture mirror - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_mirror, (float)(M + threadIdx.x)/(float)M));

}

/********/
/* MAIN */
/********/
void main(){

    const int M = 4;

    // --- Host side memory allocation and initialization
    float *h_data = (float*)malloc(M * sizeof(float));

    for (int i=0; i<M; i++) h_data[i] = (float)i;

    // --- Texture clamping
    cudaArray* d_data_clamping = NULL; gpuErrchk(cudaMallocArray(&d_data_clamping, &texture_clamp.channelDesc, M, 1)); 
    gpuErrchk(cudaMemcpyToArray(d_data_clamping, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
    cudaBindTextureToArray(texture_clamp, d_data_clamping); 
    texture_clamp.normalized = false; 
    texture_clamp.addressMode[0] = cudaAddressModeClamp;

    dim3 dimBlock(2 * M, 1); dim3 dimGrid(1, 1);
    Test_texture_clamping<<<dimGrid,dimBlock>>>(M);

    printf("\n\n\n");

    // --- Texture border
    cudaArray* d_data_border = NULL; gpuErrchk(cudaMallocArray(&d_data_border, &texture_border.channelDesc, M, 1)); 
    gpuErrchk(cudaMemcpyToArray(d_data_border, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
    cudaBindTextureToArray(texture_border, d_data_border); 
    texture_border.normalized = false; 
    texture_border.addressMode[0] = cudaAddressModeBorder;

    Test_texture_border<<<dimGrid,dimBlock>>>(M);

    printf("\n\n\n");

    // --- Texture wrap
    cudaArray* d_data_wrap = NULL; gpuErrchk(cudaMallocArray(&d_data_wrap, &texture_wrap.channelDesc, M, 1)); 
    gpuErrchk(cudaMemcpyToArray(d_data_wrap, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
    cudaBindTextureToArray(texture_wrap, d_data_wrap); 
    texture_wrap.normalized = true; 
    texture_wrap.addressMode[0] = cudaAddressModeWrap;

    Test_texture_wrap<<<dimGrid,dimBlock>>>(M);

    printf("\n\n\n");

    // --- Texture mirror
    cudaArray* d_data_mirror = NULL; gpuErrchk(cudaMallocArray(&d_data_mirror, &texture_mirror.channelDesc, M, 1)); 
    gpuErrchk(cudaMemcpyToArray(d_data_mirror, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
    cudaBindTextureToArray(texture_mirror, d_data_mirror); 
    texture_mirror.normalized = true ; 
    texture_mirror.addressMode[0] = cudaAddressModeMirror;

    Test_texture_mirror<<<dimGrid,dimBlock>>>(M);

    printf("\n\n\n");
}

Those are the outputs

index                  -7  -6  -5  -4  -3  -2  -1  0  1  2  3  4  5  6  7  8  9  10  11
clamp                   0   0   0   0   0   0   0  0  1  2  3  3  3  3  3  3  3   3   3
border                  0   0   0   0   0   0   0  0  1  2  3  0  0  0  0  0  0   0   0
wrap                    1   2   3   0   1   2   3  0  1  2  3  0  1  2  3  0  1   2   3
mirror                  1   2   3   3   2   1   0  0  1  2  3  3  2  1  0  0  1   2   3

Question 2

As of now (CUDA 5.5), the CUDA texture fetch behavior is not customizable. Only 1 of the 4 automatic built-in modes (i.e. Border, Clamp, Wrap and Mirror) can be utilized for out of range texture fetch.