@S3处理
AWS S3 文件读取在这里插入代码片
def listfile(s3, bucket,path, prefix):
continuation_token = None
files_info_list ,unsorted = [],[]
if path != "":
path = path.strip("/") + "/"
prefix = path + prefix
while True:
if continuation_token is None:
files_info = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
else:
files_info = s3.list_objects_v2(Bucket=bucket, Prefix=prefix, ContinuationToken=continuation_token)
for info in files_info["Contents"]:
unsorted.append(info)
if not files_info["IsTruncated"]:
break
continuation_token = files_info["NextContinuationToken"]
files_info_list = [obj['Key'] for obj in sorted(unsorted, key=lambda list: list['LastModified'], reverse=True)]
return files_info_list
在这里插入代码片
mport os
import pip
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
install_path = os.environ['GLUE_INSTALLATION']
index_url = 'https://pypi.tuna.tsinghua.edu.cn/simple'
#wheel_file = 'https://files.pythonhosted.org/packages/65/43/3c1b5fced4d3caa31a17f4cae5ec6bc656becbcf492041c5caa809825ec1/awswrangler-2.9.0-py3-none-any.whl'
wheel_file = 'https://ln-solutions.s3.cn-northwest-1.amazonaws.com.cn/awswrangler-2.9.0-py3-none-any.whl'
logger.info(pip.main(['install', '--index-url', index_url, '--target', install_path, wheel_file]))
#logger.info(pip.main(['uninstall','--yes','awscli']))
#logger.info(pip.main(['install','awscli']))
import awswrangler as wr
print("Installed awswrangler version: " + wr.__version__)
# # print("Glue Databases:")
# # print(wr.catalog.databases())



