太原网站排名系统,西安专业网站建设服务,wordpress utc时间慢8小时,公司设计图片跟1D一样#xff0c;2D的代码也没有运行过。旧的方法看看就好。
声明二维Texture
texturefloat, 2 texConstSrc;
texturefloat, 2 texIn;
texturefloat, 2 texOut;
访问二维Texture
使用2D的Texture的便利性体现在blend_kernel函数里。不再需要通…跟1D一样2D的代码也没有运行过。旧的方法看看就好。
声明二维Texture
texturefloat, 2 texConstSrc;
texturefloat, 2 texIn;
texturefloat, 2 texOut;
访问二维Texture
使用2D的Texture的便利性体现在blend_kernel函数里。不再需要通过xy去计算一维索引。二维texture使用tex2D()去读取数据。
__global__ void blend_kernel( float *dst,bool dstOut ) {// map from threadIdx/BlockIdx to pixel positionint x threadIdx.x blockIdx.x * blockDim.x;int y threadIdx.y blockIdx.y * blockDim.y;int offset x y * blockDim.x * gridDim.x;float t, l, c, r, b;if (dstOut) {t tex2D(texIn,x,y-1);l tex2D(texIn,x-1,y);c tex2D(texIn,x,y);r tex2D(texIn,x1,y);b tex2D(texIn,x,y1);} else {t tex2D(texOut,x,y-1);l tex2D(texOut,x-1,y);c tex2D(texOut,x,y);r tex2D(texOut,x1,y);b tex2D(texOut,x,y1);}dst[offset] c SPEED * (t b r l - 4 * c);
}
然后拷贝热源数据
__global__ void copy_const_kernel( float *iptr ) {// map from threadIdx/BlockIdx to pixel positionint x threadIdx.x blockIdx.x * blockDim.x;int y threadIdx.y blockIdx.y * blockDim.y;int offset x y * blockDim.x * gridDim.x;float c tex2D(texConstSrc,x,y);if (c ! 0)iptr[offset] c;
}
二维Texture绑定
使用维Texture去绑定一维数组稍微复杂一些
cudaChannelFormatDesc desc cudaCreateChannelDescfloat();
HANDLE_ERROR( cudaBindTexture2D( NULL, texConstSrc, data.dev_constSrc,desc, DIM, DIM, sizeof(float)*DIM));
HANDLE_ERROR( cudaBindTexture2D( NULL, texIn, data.dev_inSrc,desc, DIM, DIM, sizeof(float)*DIM));
HANDLE_ERROR( cudaBindTexture2D( NULL, texOut, data.dev_outSrc,desc, DIM, DIM, sizeof(float)*DIM));解除绑定
解除绑定的方式跟D相同
cudaUnbindTexture(texIn);
cudaUnbindTexture(texOut);
cudaUnbindTexture(texConstSrc);
完整代码
#include ../common/book.h
#include ../common/cpu_anim.h#define DIM 1024
#define PI 3.1415926535897932f
#define MAX_TEMP 1.0f
#define MIN_TEMP 0.0001f
#define SPEED 0.25f// these exist on the GPU side
texturefloat,2 texConstSrc;
texturefloat,2 texIn;
texturefloat,2 texOut;__global__ void blend_kernel( float *dst,bool dstOut ) {// map from threadIdx/BlockIdx to pixel positionint x threadIdx.x blockIdx.x * blockDim.x;int y threadIdx.y blockIdx.y * blockDim.y;int offset x y * blockDim.x * gridDim.x;float t, l, c, r, b;if (dstOut) {t tex2D(texIn,x,y-1);l tex2D(texIn,x-1,y);c tex2D(texIn,x,y);r tex2D(texIn,x1,y);b tex2D(texIn,x,y1);} else {t tex2D(texOut,x,y-1);l tex2D(texOut,x-1,y);c tex2D(texOut,x,y);r tex2D(texOut,x1,y);b tex2D(texOut,x,y1);}dst[offset] c SPEED * (t b r l - 4 * c);
}__global__ void copy_const_kernel( float *iptr ) {// map from threadIdx/BlockIdx to pixel positionint x threadIdx.x blockIdx.x * blockDim.x;int y threadIdx.y blockIdx.y * blockDim.y;int offset x y * blockDim.x * gridDim.x;float c tex2D(texConstSrc,x,y);if (c ! 0)iptr[offset] c;
}// globals needed by the update routine
struct DataBlock {unsigned char *output_bitmap;float *dev_inSrc;float *dev_outSrc;float *dev_constSrc;CPUAnimBitmap *bitmap;cudaEvent_t start, stop;float totalTime;float frames;
};void anim_gpu( DataBlock *d, int ticks ) {HANDLE_ERROR( cudaEventRecord( d-start, 0 ) );dim3 blocks(DIM/16,DIM/16);dim3 threads(16,16);CPUAnimBitmap *bitmap d-bitmap;// since tex is global and bound, we have to use a flag to// select which is in/out per iterationvolatile bool dstOut true;for (int i0; i90; i) {float *in, *out;if (dstOut) {in d-dev_inSrc;out d-dev_outSrc;} else {out d-dev_inSrc;in d-dev_outSrc;}copy_const_kernelblocks,threads( in );blend_kernelblocks,threads( out, dstOut );dstOut !dstOut;}float_to_colorblocks,threads( d-output_bitmap,d-dev_inSrc );HANDLE_ERROR( cudaMemcpy( bitmap-get_ptr(),d-output_bitmap,bitmap-image_size(),cudaMemcpyDeviceToHost ) );HANDLE_ERROR( cudaEventRecord( d-stop, 0 ) );HANDLE_ERROR( cudaEventSynchronize( d-stop ) );float elapsedTime;HANDLE_ERROR( cudaEventElapsedTime( elapsedTime,d-start, d-stop ) );d-totalTime elapsedTime;d-frames;printf( Average Time per frame: %3.1f ms\n,d-totalTime/d-frames );
}// clean up memory allocated on the GPU
void anim_exit( DataBlock *d ) {cudaUnbindTexture( texIn );cudaUnbindTexture( texOut );cudaUnbindTexture( texConstSrc );HANDLE_ERROR( cudaFree( d-dev_inSrc ) );HANDLE_ERROR( cudaFree( d-dev_outSrc ) );HANDLE_ERROR( cudaFree( d-dev_constSrc ) );HANDLE_ERROR( cudaEventDestroy( d-start ) );HANDLE_ERROR( cudaEventDestroy( d-stop ) );
}int main( void ) {DataBlock data;CPUAnimBitmap bitmap( DIM, DIM, data );data.bitmap bitmap;data.totalTime 0;data.frames 0;HANDLE_ERROR( cudaEventCreate( data.start ) );HANDLE_ERROR( cudaEventCreate( data.stop ) );int imageSize bitmap.image_size();HANDLE_ERROR( cudaMalloc( (void**)data.output_bitmap,imageSize ) );// assume float 4 chars in size (ie rgba)HANDLE_ERROR( cudaMalloc( (void**)data.dev_inSrc,imageSize ) );HANDLE_ERROR( cudaMalloc( (void**)data.dev_outSrc,imageSize ) );HANDLE_ERROR( cudaMalloc( (void**)data.dev_constSrc,imageSize ) );cudaChannelFormatDesc desc cudaCreateChannelDescfloat();HANDLE_ERROR( cudaBindTexture2D( NULL, texConstSrc,data.dev_constSrc,desc, DIM, DIM,sizeof(float) * DIM ) );HANDLE_ERROR( cudaBindTexture2D( NULL, texIn,data.dev_inSrc,desc, DIM, DIM,sizeof(float) * DIM ) );HANDLE_ERROR( cudaBindTexture2D( NULL, texOut,data.dev_outSrc,desc, DIM, DIM,sizeof(float) * DIM ) );// initialize the constant datafloat *temp (float*)malloc( imageSize );for (int i0; iDIM*DIM; i) {temp[i] 0;int x i % DIM;int y i / DIM;if ((x300) (x600) (y310) (y601))temp[i] MAX_TEMP;}temp[DIM*100100] (MAX_TEMP MIN_TEMP)/2;temp[DIM*700100] MIN_TEMP;temp[DIM*300300] MIN_TEMP;temp[DIM*200700] MIN_TEMP;for (int y800; y900; y) {for (int x400; x500; x) {temp[xy*DIM] MIN_TEMP;}}HANDLE_ERROR( cudaMemcpy( data.dev_constSrc, temp,imageSize,cudaMemcpyHostToDevice ) ); // initialize the input datafor (int y800; yDIM; y) {for (int x0; x200; x) {temp[xy*DIM] MAX_TEMP;}}HANDLE_ERROR( cudaMemcpy( data.dev_inSrc, temp,imageSize,cudaMemcpyHostToDevice ) );free( temp );bitmap.anim_and_exit( (void (*)(void*,int))anim_gpu,(void (*)(void*))anim_exit );
}