cuda做卷積運算最終版本

2021-10-01 16:58:07 字數 3297 閱讀 5113

#include

"device_functions.h"

#include

"cuda_runtime.h"

#include

"device_launch_parameters.h"

#include

"stdlib.h"

#include

//#include

using

namespace std;

//返回thread和block

intgetthreadnum()

__global__ void

conv

(float

*imggpu,

float

*kernelgpu,

float

*resultgpu,

int width,

int height,

int kernelsize)

int row = id / width;

//獲取img 的行和列

int clo = id % width;

//每乙個執行緒處理一次卷積計算

//resultgpu[id] = 0;

for(

int i =

0; i < kernelsize;

++i)

else

resultgpu[id]

+= kernelgpu[i*kernelsize + j]

* im**alue;}}

}//形參:列舉型別

void

getcudacalerror

(cudaerror err)

return;}

intmain()

}//宣告卷積核大小,大小為3*3

const

int kernelsize =3;

//float*kernel = (float*)calloc(kernelsize*kernelsize, sizeof(float));

float

*kernel =

newfloat

[kernelsize*kernelsize]

;//卷積核賦值

//第一種方法

for(

int i =

0; i < kernelsize;

++i)

}//第二種

/*for (int i = 0; i < kernelsize*kernelsize; ++i)

*///輸出img的左上角

for(

int row =

0; row <10;

++row)

std::cout <<

'\n';}

cout <<

"kernel\n"

;for

(int i =

0; i < kernelsize;

++i)

cout << endl;

}float

*imggpu =0;

//將host值複製到device上面

float

*kernelgpu =0;

//將kernel也複製到device上

float

*resultgpu =0;

//卷積結果

//為device分配記憶體

getcudacalerror

(cudamalloc

(&imggpu, height*width *

sizeof

(float))

);getcudacalerror

(cudamalloc

(&kernelgpu, kernelsize*kernelsize *

sizeof

(float))

);getcudacalerror

(cudamalloc

(&resultgpu, height*width *

sizeof

(float))

);//這個地方捕捉錯誤,明天改

cudamemcpy

(imggpu, img, width*height *

sizeof

(float

), cudamemcpyhosttodevice)

;cudamemcpy

(kernelgpu, kernel, kernelsize*kernelsize *

sizeof

(float

), cudamemcpyhosttodevice)

;//獲取gpu資訊

const

int threadnum =

getthreadnum()

;const

int blocknum =

(width*height + threadnum -1)

/ threadnum;

//這裡block使用一維

conv <<

>

>

(imggpu, kernelgpu, resultgpu, width, height, kernelsize)

;//接受device上resultgpu裡面的資料

float

*showimg =

newfloat

[height*width]

;cudamemcpy

(showimg, resultgpu, width*height *

sizeof

(float

), cudamemcpydevicetohost)

;for

(int row =

0; row <10;

++row)

std::cout <<

'\n';}

//沒有釋放記憶體

cudafree

(imggpu)

;cudafree

(kernelgpu)

;cudafree

(resultgpu)

;/*free(img);

free(kernel);*/

delete

img;

delete

kernel;

delete

showimg;

system

("pause");

return0;

}

這裡面還有好多錯誤沒有捕捉等能力變強後再重新改改吧!

這個地方有點小疑惑:為device分配完記憶體後裡面的數預設為0

解析器最終版本

include include define size 512 define cr n char file 300 input.txt 用於接收輸入輸出檔名 定義分界符 char singleword 50 file fin 指向輸入檔案的指標 void main int returnvalue 0...

cuda做卷積計算初稿

include cuda runtime.h include device launch parameters.h include using namespace std 返回thread和block intgetthreadnum global void conv float imggpu,flo...

setTimeout 最終版本 物件導向可擴充套件

object.extend function destination,source return destination 返回擴充套件後的destination extend方法的所用是 destination目標物件的property屬性繼承source物件的property屬性 function...