业务场景:本地数据库(Oracle)有200张表需要同步到odps,需要先在odps建表,再进行表数据同步。如果一张张的去建表,很浪费时间。如果后续还有这种业务,则考虑用Python写脚本,读取本地数据库这些表的表结构,在odps上进行批量建表,字段类型统一用string。
import cx_Oracle
from odps import ODPS, options
def create_table(table_name, table_comments, owner):
#执行查询表结构语句,获取表结构信息
col_sql = '''select COLUMN_NAME,COMMENTS from all_col_comments where table_name = '{}' and owner = '{}' '''.format(
table_name, owner)
#print(col_sql) #打印SQL
col_data = cursor.execute(col_sql).fetchall()
# print(type(col_data)) #list
readcolumnlist = [] #存放字段
commentslist = [] #存放字段注释
for col in col_data:
col_name = col[0] # 字段
comments = col[1] # 字段注释
readcolumnlist.append(col_name)
commentslist.append(comments)
# print(comments)
# print(readcolumnlist)
# print(commentslist)
create_table_sql = "CREATE TABLE IF NOT EXISTS 项目空间名称." + table_name + "(n"
column_comment_sql = ""
#生成建表语句
for idx in range(len(readcolumnlist)):
if idx != len(readcolumnlist) - 1:
create_table_sql += readcolumnlist[idx] + " STRING COMMENT '" + (commentslist[idx] if commentslist[idx] else "") + "'," + 'n'
else:
create_table_sql += readcolumnlist[idx] + " STRING COMMENT '" + (commentslist[idx] if commentslist[idx] else "") + "'" + 'n'
create_table_sql += ")" + "nCOMMENT '{}';n".format(table_comments)
print(create_table_sql)
odps.execute_sql(create_table_sql) #在odps建表
if __name__ == '__main__':
accessId = "填写用户id"
accessKey = "填写用户key"
project = "填写项目空间名称"
endpoint = "填写odps访问地址"
odps = ODPS(accessId, accessKey, project, endpoint=endpoint)
'''
本地哪些库的表要同步到odps,就把库名OWNER ,表名TABLE_NAME,
表备注COMMENTS 等信息存到一张表里TABLE_DATA。
'''
select_sql = "SELECT TABLE_NAME,COMMENTS ,OWNER FROM DMP.TABLE_DATA"
db_conn = cx_Oracle.connect("user", "password", "10.0.0.0:15215/orcl")
cursor = db_conn.cursor()
cursor.execute(select_sql)
results = cursor.fetchall()
for result in results:
table_name = result[0] # 表名
table_comments = result[1] #表备注
owner = result[2] #用户,即表在哪个数据库
# print(table_name) #str类型
# print(table_comments) #str类型
create_table(table_name, table_comments, owner)#调用建表方法



