主要目的是学习算法和cuda。源码地址
__global__ void calsum_kernel(cuVector2Dpre, cuVector2D cuimg) { int j = threadIdx.x; // + threadIdx.y * blockDim.x; int i = blockIdx.x; // + threadIdx.y * blockDim.x; int n = pre.cols; int m = pre.rows; int left = max(j - 1, 0); int right = min(j + 1, n - 2); int top = max(i - 1, 0); int bottom = min(i + 1, m - 2); int cnt = (bottom - top + 1) * (right - left + 1); int sum = pre[bottom + 1][right + 1] - pre[top][right + 1] - pre[bottom + 1][left] + pre[top][left]; cuimg[i][j] = sum / cnt; } using namespace std; class prefixsum { public: vector > imageSmoother(vector > &img) { int m = img.size(); int n = img[0].size(); vector > pre(m + 1, vector (n + 1, 0)); for (int i = 1; i <= m; i++) { for (int j = 1; j <= n; j++) { pre[i][j] = pre[i - 1][j] + pre[i][j - 1] - pre[i - 1][j - 1] + img[i - 1][j - 1]; } } vector > ans(m, vector (n, 0)); for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { int left = max(j - 1, 0); int right = min(j + 1, n - 1); int top = max(i - 1, 0); int bottom = min(i + 1, m - 1); int cnt = (bottom - top + 1) * (right - left + 1); int sum = pre[bottom + 1][right + 1] - pre[top][right + 1] - pre[bottom + 1][left] + pre[top][left]; ans[i][j] = sum / cnt; } } // for (int i = 0; i < m; i++) // { // for (int j = 0; j < n; j++) // { // cout << ans[i][j] << " "; // } // cout << endl; // } return ans; } vector > imageSmootherGPU(vector > &img) { int m = img.size(); int n = img[0].size(); cuVector2D cupre(m + 1, n + 1, 0); for (int i = 1; i <= m; i++) { for (int j = 1; j <= n; j++) { cupre[i][j] = cupre[i - 1][j] + cupre[i][j - 1] - cupre[i - 1][j - 1] + img[i - 1][j - 1]; } } cuVector2D cuimg(m, n, 0); vector > ans(m, vector (n, 0)); calsum_kernel<< >>(cupre, cuimg); CK(cudaDeviceSynchronize()); for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { ans[i][j] = cuimg[i][j]; // cout << ans[i][j] << " "; } // cout << endl; } return ans; } };
图片平滑器



