- 前言
- 实践
多进程能够在不同的CPU核心上并行运行任务,可以绕过用于线程的GIL。
多进程:更适合密集计算相关任务(cpu使用率高)。
多线程:更好地处理与IO相关的任务。
我们来实现一个数据求和的功能,例如从0加到100000000。首先我们使用单个进程进行计算:
from multiprocessing import Process, cpu_count
import time
result = {}
def counter(num_1, num_2, name):
count = 0
for i in range(num_1, num_2):
count += i
result[name] = count
print(result)
def main():
a = Process(target=counter, args=(0, 100000000, '进程1'))
a.start()
a.join()
print('运行耗时: {} 秒'.format(time.perf_counter()))
if __name__ == '__main__':
main()
>>> {'进程1': 4999999950000000}
>>> 运行耗时: 13.5747503 秒
接下来我们使用两个进程进行计算:
from multiprocessing import Process, cpu_count
import time
result = {}
def counter(num_1, num_2, name):
count = 0
for i in range(num_1, num_2):
count += i
result[name] = count
print(result)
def main():
a = Process(target=counter, args=(0, 50000000, '进程1'))
a.start()
b = Process(target=counter, args=(50000000, 100000000, '进程2'))
b.start()
a.join()
b.join()
print('运行耗时: {} 秒'.format(time.perf_counter()))
if __name__ == '__main__':
main()
>>> {'进程1': 1249999975000000}
>>> {'进程2': 3749999975000000}
>>> 运行耗时: 9.9812741 秒
接下来我们尝试跑满所有的CPU核心:
from multiprocessing import Process, cpu_count
import time
result = {}
def counter(num_1, num_2, name):
count = 0
for i in range(num_1, num_2):
count += i
result[name] = count
print(result)
def main():
print(cpu_count()) # 计算本机的CPU核心数目
a = Process(target=counter, args=(0, 25000000, '进程1'))
a.start()
b = Process(target=counter, args=(25000000, 50000000, '进程2'))
b.start()
c = Process(target=counter, args=(50000000, 75000000, '进程3'))
c.start()
d = Process(target=counter, args=(75000000, 100000000, '进程4'))
d.start()
a.join()
b.join()
c.join()
d.join()
print('运行耗时: {} 秒'.format(time.perf_counter()))
if __name__ == '__main__':
main()
>>> 4
>>> {'进程3': 1562499987500000}
>>> {'进程1': 312499987500000}
>>> {'进程2': 937499987500000}
>>> {'进程4': 2187499987500000}
>>> 运行耗时: 5.9548173 秒
发现当跑满所有的CPU核心时速度是最快的(并不是越多越好哦,再多的话速度反而慢了),所以我们一般设定多进程的进程数目为CPU的核心数目



