当使用具有1000多个对象的存储桶时,实现一个解决方案(使用
NextContinuationToken最多1000个键的顺序集)是必要的。该解决方案首先编译对象列表,然后迭代创建指定目录并下载现有对象。
import boto3import oss3_client = boto3.client('s3')def download_dir(prefix, local, bucket, client=s3_client): """ params: - prefix: pattern to match in s3 - local: local path to folder in which to place files - bucket: s3 bucket with target contents - client: initialized s3 client object """ keys = [] dirs = [] next_token = '' base_kwargs = { 'Bucket':bucket, 'Prefix':prefix, } while next_token is not None: kwargs = base_kwargs.copy() if next_token != '': kwargs.update({'ContinuationToken': next_token}) results = client.list_objects_v2(**kwargs) contents = results.get('Contents') for i in contents: k = i.get('Key') if k[-1] != '/': keys.append(k) else: dirs.append(k) next_token = results.get('NextContinuationToken') for d in dirs: dest_pathname = os.path.join(local, d) if not os.path.exists(os.path.dirname(dest_pathname)): os.makedirs(os.path.dirname(dest_pathname)) for k in keys: dest_pathname = os.path.join(local, k) if not os.path.exists(os.path.dirname(dest_pathname)): os.makedirs(os.path.dirname(dest_pathname)) client.download_file(bucket, k, dest_pathname)


