本代码基于Barrett算法,加速32比特以内数据的模乘运算,以及64比特以内数据的取模运算。
- 小工具:
#pragma once #includetypedef char int8; typedef int int32; typedef long long int64; typedef unsigned char uint8; typedef unsigned int uint32; typedef unsigned long long uint64; // 异常 #define ErrorInfo(format, ...) { printf("File:%s, Line:%d, Function:%s, ", __FILE__, __LINE__ , __FUNCTION__); printf(format, ##__VA_ARGS__);} // 计时器 typedef std::chrono::steady_clock clock_type;//标准时钟:steady_clock。高分辨率时钟:high_resolution_clock extern std::chrono::time_point TM_start, TM_end; #define Clock() clock_type::now() #define Time(t_start,t_end) std::chrono::duration >(t_end - t_start).count() //时间差,类型 double, s #define Timer(code) TM_start = Clock(); code; TM_end = Clock(); std::cout << Time(TM_start,TM_end) << " sn"; //对code部分计时 // 循环测试 #define Loop(loop, code) Timer(for(int64 i=0;i
- Barrett算法实现:
#define WordLen 32 //Word = 2^WordLen,用于Barrett模乘 #define WordMod (((uint64)1L<> WordLen) * p) #define Mod(x, one_pre, p) ((x) - ((x >> WordLen) * (one_pre >> WordLen) + (((x >> WordLen)*(one_pre & WordMod) + (one_pre >> WordLen) * (x & WordMod)) >> WordLen)) * p)
- 测试:
int main() { uint64 pp = 8404993; uint64 ww = 12345; uint64 ww_pre = PreCompute(ww, pp); uint64 one_pre = PreComputeMod(pp); uint64 x = (uint64)1 << 20; uint64 y = (uint64)1 << 50; uint64 a=0, b=0; cout << x * ww%pp << " -> " << MulMod(x, ww, ww_pre, pp) << endl; cout << y%pp << " -> " << Mod(y, one_pre, pp) << endl; //Barrett算法,约比"%"算符快4倍! Loop(10000000, a=MulMod(x, ww, ww_pre, pp)); Loop(10000000, b=x*ww%pp); Loop(10000000, a=Mod(y, one_pre, pp)); Loop(10000000, b=y%pp); return 0; }
- 测试结果
981500 -> 981500 540177 -> 540177 0.0511806 s 0.215115 s 0.05608 s 0.189529 s



