CUDA - Texture
// include system
#include <stdio.h>
// include CUDA
#include <cuda_runtime.h>
#include <helper_cuda.h>
#define BLOCK_DIM 512
// use texture memory
//-----------------------------------------------------//
texture<unsigned char, 1 , cudaReadModeElementType>rT1;//
texture<unsigned char, 1 , cudaReadModeElementType>rT2;//
//-----------------------------------------------------//
__global__ void Blending_Texture(unsigned char *aRS, int size){
int index = blockIdx.x * blockDim.x + threadIdx.x;
if(index < size){
//-----------------------------------------------------------------------//
aRS[index] = 0.5 * tex1Dfetch(rT1, index) + 0.5 * tex1Dfetch(rT2, index);//
//-----------------------------------------------------------------------//
}
}
void Blen_Device(unsigned char* aImg1, unsigned char* aImg2, unsigned char* aRS, int width, int height, int chanel){
int size = width * height * chanel;
int data_size = size * sizeof(unsigned char);
// part1, allocate data on device
unsigned char *dev_A, *dev_B, *dev_C;
cudaMalloc((void**)&dev_A, data_size);
cudaMalloc((void**)&dev_B, data_size);
cudaMalloc((void**)&dev_C, data_size);
//part2, copy data from cpu to device
cudaMemcpy(dev_A, aImg1, data_size, cudaMemcpyHostToDevice);
cudaMemcpy(dev_B, aImg2, data_size, cudaMemcpyHostToDevice);
//part2a , bind texture
//-----------------------------//
cudaBindTexture(0, rT1, dev_A);//
cudaBindTexture(0, rT2, dev_B);//
//-----------------------------//
// part3, run kernel
Blending_Texture<<< ceil((float)size/BLOCK_DIM), BLOCK_DIM>>>(dev_C, size);
//part4, copy data from device
cudaMemcpy(aRS, dev_C, data_size, cudaMemcpyDeviceToHost);
//part5, release data
cudaUnbindTexture(rT1);
cudaUnbindTexture(rT2);
cudaFree(dev_A);
cudaFree(dev_B);
cudaFree(dev_C);
}
// Host
void Blen_Host( unsigned char* aImg1, unsigned char* aImg2, unsigned char* aRS, int width, int height, int chanel){
for(int i = 0; i< width * height * chanel; i++){
aRS[i] = (unsigned char)(0.3 * aImg1[i] + 0.5 * aImg2[i]);
}
}
// correctResult function
bool correctResult(unsigned char *img1, unsigned char *img2, int size){
for(int i = 0; i< size; i++){
if(img1[i] != img2[i]){
printf("Error [%d] : %d--%d\n",i, img1[i], img2[i]);
return false;
}
}
return true
}
void initial(unsigned char *img, int size, int number){
for(int i = 0; i< size ; i++){
img[i] = number;
}
}
//==================== MAIN PROGRAM =========================================
int main(int argc, char *argv[]){
int width = 640;
int height = 480;
int chanel = 3;
int size = width*height*chanel;
int nbytes = size*sizeof(unsigned char);
//Setup test data
unsigned char* img1, *img2, *outDevice, *outHost;
img1 = (unsigned char*)malloc(nbytes);
img2 = (unsigned char*)malloc(nbytes);
outDevice = (unsigned char*)malloc(nbytes);
outHost = (unsigned char*)malloc(nbytes);
initial(img1, size, 25);
initial(img2, size, 255);
// call Device function
Blen_Device(img1, img2, outDevice, width, height, chanel);
// call Host function
Blen_Host(img1, img2, outHost, width, height, chanel);
// check error
bool result = correctResult(outDevice, outHost, size);
if(result == 1){
printf("Result is true !\n");
}else{
printf("Result is false !\n);
}
// free memory
free(img1);
free(img2);
free(outDevice);
free(outHost);
return 0;
}
Comments
Post a Comment