CUDA - Texture


// include system
#include <stdio.h>

// include CUDA
#include <cuda_runtime.h>
#include <helper_cuda.h>

#define BLOCK_DIM 512

// use texture memory
//-----------------------------------------------------//
texture<unsigned char, 1 , cudaReadModeElementType>rT1;//
texture<unsigned char, 1 , cudaReadModeElementType>rT2;//
//-----------------------------------------------------//

__global__ void Blending_Texture(unsigned char *aRS, int size){
    int index = blockIdx.x * blockDim.x + threadIdx.x;
   
    if(index < size){
        //-----------------------------------------------------------------------//
        aRS[index] = 0.5 * tex1Dfetch(rT1, index) + 0.5 * tex1Dfetch(rT2, index);//
        //-----------------------------------------------------------------------//
    }
}

void Blen_Device(unsigned char* aImg1, unsigned char* aImg2, unsigned char* aRS, int width, int height, int chanel){
    int size = width * height * chanel;
    int data_size = size * sizeof(unsigned char);
   
    // part1, allocate data on device
    unsigned char *dev_A, *dev_B, *dev_C;
    cudaMalloc((void**)&dev_A, data_size);
    cudaMalloc((void**)&dev_B, data_size);
    cudaMalloc((void**)&dev_C, data_size);
   
    //part2, copy data from cpu to device
    cudaMemcpy(dev_A, aImg1, data_size, cudaMemcpyHostToDevice);
    cudaMemcpy(dev_B, aImg2, data_size, cudaMemcpyHostToDevice);
   
    //part2a , bind texture
    //-----------------------------//
    cudaBindTexture(0, rT1, dev_A);//
    cudaBindTexture(0, rT2, dev_B);//
    //-----------------------------//
   
    // part3, run kernel
    Blending_Texture<<< ceil((float)size/BLOCK_DIM), BLOCK_DIM>>>(dev_C, size);
   
    //part4, copy data from device
    cudaMemcpy(aRS, dev_C, data_size, cudaMemcpyDeviceToHost);
   
    //part5, release data
    cudaUnbindTexture(rT1);
    cudaUnbindTexture(rT2);
    cudaFree(dev_A);
    cudaFree(dev_B);
    cudaFree(dev_C);
   
}

// Host
void Blen_Host( unsigned char* aImg1, unsigned char* aImg2, unsigned char* aRS, int width, int height, int chanel){
    for(int i = 0; i< width * height * chanel; i++){
        aRS[i] = (unsigned char)(0.3 * aImg1[i] + 0.5 * aImg2[i]);
    }
}

// correctResult function
bool correctResult(unsigned char *img1, unsigned char *img2, int size){
    for(int i = 0; i< size; i++){
        if(img1[i] != img2[i]){
            printf("Error [%d] : %d--%d\n",i, img1[i], img2[i]);
            return false;
        }
    }
    return true
}

void initial(unsigned char *img, int size, int number){
    for(int i = 0; i< size ; i++){
        img[i] = number;
    }
}
//====================  MAIN  PROGRAM =========================================
int main(int argc, char *argv[]){
    int width = 640;
    int height = 480;
    int chanel = 3;
   
    int size = width*height*chanel;
    int nbytes = size*sizeof(unsigned char);
   
    //Setup test data
    unsigned char* img1, *img2, *outDevice, *outHost;
    img1 = (unsigned char*)malloc(nbytes);
    img2 = (unsigned char*)malloc(nbytes);
    outDevice = (unsigned char*)malloc(nbytes);
    outHost = (unsigned char*)malloc(nbytes);
   
    initial(img1, size, 25);
    initial(img2, size, 255);
   
    // call Device function
    Blen_Device(img1, img2, outDevice, width, height, chanel);
   
    // call Host function
    Blen_Host(img1, img2, outHost, width, height, chanel);
   
    // check error
    bool result = correctResult(outDevice, outHost, size);
    if(result == 1){
        printf("Result is true !\n");
    }else{
        printf("Result is false !\n);
    }
   
    // free memory
    free(img1);
    free(img2);
    free(outDevice);
    free(outHost);

    return 0;
}


Comments

Popular posts from this blog

Bài 1 - OpenMP Lập Trình Xử Lý Song Song - Giới Thiệu