栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Python

【办公自动化】Python抓取PPT的值到excel-20220506

Python 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

【办公自动化】Python抓取PPT的值到excel-20220506

效果:有60个结构类似的PPT,每个PPT里面有5页,需抓取特定标题页面下特定位置的PPT内容。 思路:①遍历PPT文件;②定义ppt读取函数;③遍历PPT文件,读取目标值;④# 将值存放进去excel

https://pythonbrief.blog.csdn.net/article/details/109089030

read_ppt.py
import imp
from pptx import Presentation
import os
import shutil
from openpyxl import load_workbook
from datetime import datetime

### 遍历文件
file_address = r'Total'
talent_excel= r"人員名單 .xlsx"
wrong_excel= r"读取记录.xlsx"

file_address_list = []
for filename in os.listdir(file_address):
    if filename.endswith('.ppt'):
        file_address_list.append(filename)
    elif filename.endswith('.pptx'):
        file_address_list.append(filename)
#print('file_address_list',file_address_list)




# 定义ppt读取函数
def read_ppt_goal(filename):
    prs = Presentation(filename)
    person_goal= ''
    team_goal= ''

    employee_id = filename.split('-')[1]
    #print('employee_id',employee_id)

    # 获取Slide
    for slide in prs.slides:
        #print(slide)
        #3)获取Shape形状
        for shape in slide.shapes:
            #print(shape)
            #print('type of shape',type(shape))
            #判断每个Shape中是否存在文字
            if shape.has_text_frame:
                text_frame = shape.text_frame
                #print(text_frame.text)

            # 找出表格里面的值
            if 'GraphicFrame' in str(type(shape)) and ('一些字眼' in text_frame.text and '延伸' not in text_frame.text):
                    #print('ok')
                    table = shape.table
                    #print(table)
                    person_goal= table.cell(1, 1).text.replace('n', '').replace('x0b', '')
                    #print('person_goal',person_goal)

                # 找出表格里面的值
                #if 'GraphicFrame' in str(type(shape)) and '任務2 : 團體目標9宮格延伸' in text_frame.text:
            #if 'GraphicFrame' in str(type(shape)) and ('任務2 : 團體目標9宮格' in text_frame.text and '延伸' not in text_frame.text):
            if 'GraphicFrame' in str(type(shape)) and ('一些字眼' in text_frame.text):
                    #print('ok')
                    table = shape.table
                    #print(table)
                    team_goal= table.cell(4, 4).text.replace('n', '').replace('x0b', '')
                    #print('team_goal',team_goal)

    # print('person_goal',person_goal)
    # print('team_goal',team_goal)

    return employee_id,person_goal,team_goal



wb_ppt = load_workbook(wrong_excel)
sheet_ppt = wb_ppt['sheet1']
sheet_wrong = wb_ppt['sheet2']

# 遍历文件,读取目标值
talent_data = []
print(len(file_address_list))
i = 1
j = 1
for filename in file_address_list:
    try:
        i = i + 1
        employee_id,person_goal,team_goal = read_ppt_goal(filename)
        talent_data.append([employee_id,person_goal,team_goal])
        print(employee_id,person_goal,team_goal)
        #保存在sheet_ppt里面
        sheet_ppt.cell(row=i, column=1).value = employee_id
        sheet_ppt.cell(row=i, column=2).value = person_goal
        sheet_ppt.cell(row=i, column=3).value = team_goal
        wb_ppt.save(wrong_excel)

    except Exception:
        j = j + 1
        print(filename,"读取失败")
        sheet_wrong.cell(row=j, column=1).value = filename + "读取失败"
        wb_ppt.save(wrong_excel)
        pass

print(talent_data)
print('len of talent data',len(talent_data))

# 将值存放进去excel
talent_wb = load_workbook(talent_excel)
talent_sheet = talent_wb.active
max_row = talent_sheet.max_row
for i in range(2,max_row):
    for listdata in talent_data:
        print(i,listdata)
        if str(talent_sheet.cell(row=i, column=5).value) == str(listdata[0]):
            talent_sheet.cell(row=i, column=20).value = str(listdata[1])
            if talent_sheet.cell(row=i, column=20).value:
                talent_sheet.cell(row=i, column=19).value = 'Y'
            else:
                talent_sheet.cell(row=i, column=19).value = 'N'

            talent_sheet.cell(row=i, column=23).value = str(listdata[2])
            if talent_sheet.cell(row=i, column=23).value:
                talent_sheet.cell(row=i, column=22).value = 'Y'
            else:
                talent_sheet.cell(row=i, column=22).value = 'N'

talent_wb.save(talent_excel)



转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/861416.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号