主要参考:
【Python】Python进程池multiprocessing.Pool八个函数对比:apply、apply_async、map、map_async、imap、starmap..._随波一落叶-CSDN博客_starmap_async
Pool对象包括 apply、apply_async、map、map_async、imap、imap_unordered、starmap、starmap_async等方法,它们的区别可以从 输入对象、执行状态、返回结果等方面考虑
| 方法 | 输入对象 | 执行状态 | 返回结果 | 本文测试用时/s |
| apply | 单个或多个对象参数 | 同步 | 单个,顺序结果 | 9.10 |
| apply_async | 单个或多个对象参数 | 异步 | 单个,顺序结果 | 9.22 |
| map | 单个可迭代对象参数 | 同步 | 顺序list | 5.17 |
| map_async | 单个可迭代对象参数 | 异步 | 顺序list | 4.71 |
| imap | 单个可迭代对象参数 | 同步 | 顺序Iterable | 3.74 |
| imap_unordered | 单个可迭代对象参数 | 同步 | 乱序Iterable | 3.81 |
| starmap | 多个可迭代对象参数 | 同步 | 顺序 | 5.34 |
| starmap_async | 多个可迭代对象参数 | 异步 | 顺序 | 4.72 |
测试代码示例
import os
import time
import cv2
import pandas as pd
from multiprocessing import Pool
from tqdm import *
def _func(file_path):
'''
需要多线程进行的任务功能
:param file_name:
:return:
'''
img = cv2.imread(file_path)
file_name = os.path.basename(file_path)
return img.shape,file_name
def _func1(file_name, file_dir):
'''
需要多线程进行的任务功能
:param file_name:
:return:
'''
img = cv2.imread(os.path.join(file_dir, file_name))
return img.shape,file_name
def mlt_compare2():
img_dir = r'E:1_datasetOtherDataimagecaption_aichallengerai_challenger_caption_validation_20170910ai_challenger_caption_validation_20170910caption_validation_images_20170910'
# imap返回顺序结果, 阻塞等待
t11 = time.time()
df11 = pd.Dataframe(None, columns=['file_name', 'shape'])
with Pool(4) as p:
files_list = [os.path.join(img_dir,file_name) for file_name in os.listdir(img_dir)[:1000]]
with tqdm(files_list) as pbar:
mpbar = p.imap(_func, pbar)
for img_shape, file_name in mpbar:
df11.loc[len(df11)] = [file_name, img_shape]
print(df11)
t12 = time.time()
# imap_unordered返回乱序结果,阻塞等待
t13 = time.time()
df12 = pd.Dataframe(None, columns=['file_name', 'shape'])
with Pool(4) as p:
files_list = [os.path.join(img_dir,file_name) for file_name in os.listdir(img_dir)[:1000]]
with tqdm(files_list) as pbar:
mpbar = p.imap_unordered(_func, pbar)
for img_shape, file_name in mpbar:
df12.loc[len(df12)] = [file_name, img_shape]
print(df12)
t14 = time.time()
# map返回顺序结果
t15 = time.time()
df13 = pd.Dataframe(None, columns=['file_name', 'shape'])
with Pool(4) as p:
files_list = [os.path.join(img_dir,file_name) for file_name in os.listdir(img_dir)[:1000]]
with tqdm(files_list) as pbar:
mpbar = p.map(_func, pbar)
for img_shape, file_name in mpbar:
df13.loc[len(df13)] = [file_name, img_shape]
print(df13)
t16 = time.time()
# map_async,阻塞等待,等待所有Task执行结束后返回list
t17 = time.time()
df14 = pd.Dataframe(None, columns=['file_name', 'shape'])
with Pool(4) as p:
files_list = [os.path.join(img_dir,file_name) for file_name in os.listdir(img_dir)[:1000]]
with tqdm(files_list) as pbar:
mpbar = p.map_async(_func, pbar)
# mpbar.wait()
# for img_shape, file_name in mpbar._value:
for img_shape, file_name in mpbar.get():
df14.loc[len(df14)] = [file_name, img_shape]
print(df14)
t18 = time.time()
# starmap返回顺序结果
t19 = time.time()
df15 = pd.Dataframe(None, columns=['file_name', 'shape'])
with Pool(4) as p:
files_list = os.listdir(img_dir)[:1000]
with tqdm(list(zip(files_list,[img_dir]*len(files_list)))) as pbar:
mpbar = p.starmap(_func1, pbar)
for img_shape, file_name in mpbar:
df15.loc[len(df15)] = [file_name, img_shape]
print(df15)
t20 = time.time()
# starmap_async
t21 = time.time()
df16 = pd.Dataframe(None, columns=['file_name', 'shape'])
with Pool(4) as p:
files_list = os.listdir(img_dir)[:1000]
with tqdm(list(zip(files_list,[img_dir]*len(files_list)))) as pbar:
mpbar = p.starmap_async(_func1, pbar)
for img_shape, file_name in mpbar.get():
df16.loc[len(df16)] = [file_name, img_shape]
print(df16)
t22 = time.time()
# apply, 可以映射一个参数或者多个参数
t23 = time.time()
df17 = pd.Dataframe(None, columns=['file_name', 'shape'])
with Pool(4) as p:
files_list = [os.path.join(img_dir, file_name) for file_name in os.listdir(img_dir)[:1000]]
with tqdm(files_list) as pbar:
for file_path in pbar:
# file_name, img_shape = p.apply(_func, (file_path,)) # 一个参数
file_name, img_shape = p.apply(_func1, (os.path.basename(file_path),img_dir)) # 多个参数
df17.loc[len(df17)] = [file_name, img_shape]
print(df17)
t24 = time.time()
# starmap_async
t25 = time.time()
df18 = pd.Dataframe(None, columns=['file_name', 'shape'])
with Pool(4) as p:
files_list = [os.path.join(img_dir, file_name) for file_name in os.listdir(img_dir)[:1000]]
with tqdm(files_list) as pbar:
for file_path in pbar:
result = p.apply_async(_func, (file_path,))
img_shape, file_name = result.get()
df18.loc[len(df18)] = [file_name, img_shape]
print(df18)
t26 = time.time()
print('imap time',t12-t11)
print('imap_unordered time',t14-t13)
print('map time', t16-t15)
print('map_async time', t18-t17)
print('starmap time', t20-t19)
print('starmap_async time', t22-t21)
print('apply time', t24-t23)
print('apply_async time', t26-t25)
if __name__ == '__main__':
mlt_compare2()



