题记:自己定义excel分表里的数量,大表分解成若干小表。例如:
| 大表数据量 | 小表数据量 | 小表个数 |
|---|---|---|
| 100 条 | 30条 | 4张 |
| 500条 | 50条 | 10张 |
| 1650条 | 1000条 | 2张 |
import pandas as pd
from time import *
begin_time = time() #程序运行时间
dirPath = r'C:UsersuserDesktop2020-04-30 epc生成 0820909030065\'
dirPath2 = r'C:UsersuserDesktop2020-04-30 epc生成 0820909030065表格拆分\'
path = dirPath + '00820909030065-1-392500.xlsx' #excel的路径
orgName = pd.read_excel(path)
SerialNumber_list = list(orgName['SerialNumber'].drop_duplicates())
BarCode_list = list(orgName['BarCode'].drop_duplicates())
BarCode = BarCode_list[0]
print(SerialNumber_list)
print(SerialNumber_list[0])
excel_alldata_num = SerialNumber_list.__len__() #excel的总数据量
excel_data_num = 2000 #拆分每个excel里的数据量
excel_num = excel_alldata_num // excel_data_num
if excel_alldata_num % excel_data_num != 0:
excel_num += 1
print('表格总数量:'+str(excel_num))
#pd.read_excel(path, skiprows = 0, nrows=80)表示不跳过任何行,取前80行的数据;
#pd.read_excel(path, skiprows = 80, nrows=20)表示跳过前80行,取剩下20行的数据。
#pd.read_excel(path, skiprows = 0, nrows=100)表示不跳过,读取全部数据。
def SequenceNums(sta,end):
numbers = []
for i in range(sta,end+1):
numbers.append(i)
return numbers
for i in range(excel_num):
#for j in range(excel_data_num*i,excel_data_num*(i+1)-1):
print(i)
if i == 0:
tempdata = pd.read_excel(path, skiprows=0, nrows=excel_data_num)
writer = pd.ExcelWriter(dirPath2 + str(BarCode) + '-' + str(SerialNumber_list[0]) + '-' + str(SerialNumber_list[excel_data_num]-1) + '.xlsx')
elif i != excel_num-1:
tempdata = pd.read_excel(path,skiprows=SequenceNums(1,excel_data_num*i),nrows=excel_data_num)
writer = pd.ExcelWriter(dirPath2 + str(BarCode) + '-' + str(SerialNumber_list[excel_data_num*i]) + '-' + str(SerialNumber_list[excel_data_num*i+excel_data_num-1])+ '.xlsx')
else:
tempdata = pd.read_excel(path,skiprows=SequenceNums(1,excel_data_num*i), nrows=excel_alldata_num-1)
writer = pd.ExcelWriter(dirPath2 + str(BarCode) + '-'+ str(SerialNumber_list[excel_data_num*i]) + '-' + str(SerialNumber_list[excel_alldata_num-1]) + '.xlsx')
tempdata.to_excel(writer, index=False)
writer.save()
writer.close()
end_time = time() #程序运行结束时间
run_time = end_time - begin_time
print('程序运行总时间:',run_time)
# 步骤二:完成表格的拆分
'''
for i in org_list:
if orgName['upc'][i] >5:
writer = pd.ExcelWriter(dirPath + str(i) + '指标体系.xlsx')
tempdata = orgName[orgName['upc'] <5]
tempdata.to_excel(writer, index=False)
writer.save()
writer.close()
'''



