"C" void convolutionRowCPU(float *h_Dst, float *h_Src, float *h_Kernel, int imageW, int imageH, int kernelR) { for (int y = 0; y < imageH; y++) for (int x = 0; x < imageW; x++) { float sum = 0; for (int k = -kernelR; k <= kernelR; k++) { int d = x + k; if (d >= 0 && d < imageW) sum += h_Src[y * imageW + d] * h_Kernel[kernelR - k]; } h_Dst[y * imageW + x] = sum; } } PTXプログラムはCUDAよりも低レベルなコードになる CUDA PTX .version 6.5 .target sm_75 .address_size 64 .entry my_kernel (.param .u64 data) { .reg .b32 r1, r2; ld.param.u64 r1, [data]; add.u32 r2, r1, 10; st.global.u32 [r1], r2; } https://acoustype.com/?p=2434