1、CUDA+OPENCV
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include
#include
#include
#include
using namespace std;
using namespace cv;
//========================Sobel算子边缘检测核函数========================
__global__ void laplaceInCuda(unsigned char* dataIn, unsigned char* dataOut, int imgHeight, int imgWidth)
{
//==========1,1,1=============
//==========1,-8,1============
//========= 1,1,1=============
//===============找寻线程的唯一ID=================
int yIndex = threadIdx.y + blockIdx.y * blockDim.y; //=====row====
int xIndex = threadIdx.x + blockIdx.x * blockDim.x; //=====col====
int index = yIndex * imgWidth + xIndex;
float G = 0.f;
if (xIndex > 0 && xIndex < imgWidth - 1 && yIndex > 0 && yIndex < imgHeight - 1) {
G = dataIn[(yIndex - 1) * imgWidth + xIndex - 1] * 1 + dataIn[(yIndex - 1) * imgWidth + xIndex] * 1 + dataIn[(yIndex - 1) * imgWidth + xIndex + 1] * 1
+ dataIn[(yIndex)*imgWidth + xIndex - 1] * 1 - 8 * dataIn[yIndex * imgWidth + xIndex] + dataIn[(yIndex)*imgWidth + xIndex + 1] * 1
+ dataIn[(yIndex + 1) * imgWidth + xIndex - 1] * 1 + dataIn[(yIndex + 1) * imgWidth + xIndex] * 1 + dataIn[(yIndex + 1) * imgWidth + xIndex + 1] * 1;
dataOut[index] = G;
}
}
int main()
{
//==================加载图像==================
Mat grayImg = imread("H:\opencv_project\opencv_cuda学习\image\hist_02.jpg", 0);
int imgHeight = grayImg.rows;
int imgWidth = grayImg.cols;
Mat laplaceImg;
//==============opencv实现laplace图像卷积==========
cv::Mat kernel = (Mat_(3, 3) <<
1, 1, 1,
1, -8, 1,
1, 1, 1);
filter2D(grayImg, laplaceImg, CV_32F, kernel);
laplaceImg.convertTo(laplaceImg,CV_8U);
cv::namedWindow("laplace-opencv",cv::WINDOW_NORMAL);
cv::imshow("laplace-opencv",laplaceImg);
//===============CUDA实现后的传回的图像=============
Mat dstImg(imgHeight, imgWidth, CV_8UC1, Scalar(0));
//==================创建GPU内存===================
unsigned char* d_in;
unsigned char* d_out;
cudaMalloc((void**)&d_in, imgHeight * imgWidth * sizeof(unsigned char));
cudaMalloc((void**)&d_out, imgHeight * imgWidth * sizeof(unsigned char));
//================将高斯滤波后的图像从CPU传入GPU=================
cudaMemcpy(d_in, grayImg.data, imgHeight * imgWidth * sizeof(unsigned char), cudaMemcpyHostToDevice);
dim3 threadsPerBlock(32, 32);
dim3 blocksPerGrid((imgWidth + threadsPerBlock.x - 1) / threadsPerBlock.x, (imgHeight + threadsPerBlock.y - 1) / threadsPerBlock.y);
//========================调用核函数=======================
laplaceInCuda << > > (d_in, d_out, imgHeight, imgWidth);
//=======================将图像传回GPU=====================
cudaMemcpy(dstImg.data, d_out, imgHeight * imgWidth * sizeof(unsigned char), cudaMemcpyDeviceToHost);
//=========================释放GPU内存====================
cudaFree(d_in);
cudaFree(d_out);
cv::namedWindow("cuda-sobel", cv::WINDOW_NORMAL);
cv::imshow("cuda-sobel", dstImg);
cv::waitKey(0);
return 0;
}
2、结果