可以对多个不同长度数组进行reduce求和,所有数组都放在一个大数组里,用offsets描述每个数组的偏移和大小信息
源码
#include#include #include #include #include #include #include #include #include using offset_t = int; using float_t = float; const unsigned int nArray = 4; float_t my_rand(void) { static thrust::default_random_engine rng; //static thrust::uniform_int_distribution dist(0, 9999); static thrust::uniform_real_distribution dist(-100.0f, 100.0f); return dist(rng); } struct MyFunctor { thrust::device_ptr idata_; thrust::device_ptr offsets_; thrust::device_ptr odata_; __host__ __device__ void operator()(int idx) { offset_t offset = offsets_[idx]; int n = offsets_[idx + 1] - offsets_[idx]; odata_[idx] = thrust::reduce(thrust::cuda::par, idata_ + offset, idata_ + offset + n, float_t(0), thrust::plus ()); } }; template void print_array(T* data, int n, std::string name = "") { std::cout << name << "n"; thrust::copy(data, data + n, std::ostream_iterator (std::cout, "n")); return; } template void print_array(const thrust::host_vector & v, std::string name = "") { std::cout << name << "n"; thrust::copy(v.begin(), v.end(), std::ostream_iterator (std::cout, "n")); return; } void test() { thrust::host_vector h_offsets(nArray+1); h_offsets[0] = 4; //#0 h_offsets[1] = 5; //#1 h_offsets[2] = 6; //#2 h_offsets[3] = 7; //#3 thrust::exclusive_scan(h_offsets.begin(), h_offsets.end(),h_offsets.begin(),0); print_array(h_offsets,"h_offsets"); thrust::device_vector d_offsets = h_offsets; const unsigned int nData = *(h_offsets.end() - 1); std::cout << "nData=" << nData << "n"; thrust::host_vector h_idata(nData); thrust::generate(h_idata.begin(), h_idata.end(), my_rand); print_array(h_idata,"h_idata"); thrust::device_vector d_idata = h_idata; thrust::host_vector h_index(nArray); thrust::sequence(h_index.begin(), h_index.end()); print_array(h_index, "h_index"); thrust::device_vector d_index = h_index; thrust::host_vector h_odata(nArray); thrust::device_vector d_odata(nArray); MyFunctor f{ d_idata.data(),d_offsets.data(),d_odata.data() }; thrust::for_each(d_index.begin(), d_index.end(), f); h_odata = d_odata; std::cout << "GPU: reduce sumn"; print_array(h_odata, "h_odata"); } int main(int argc, char **argv) { test(); return 0; }
资料
- how-to-use-thrust-to-sort-the-rows-of-a-matrix
- device_vector获取底层指针
- thrust in kernel
- using-thrust-functions-within-device-code-cudalaunchcooperativekernel
- Add thrust::cuda::par_nosync execution policy
- tutorial_gpu_thrust_intero



