Python操作excel和pdf截图功能

__author__ = 'Jeff.xie'
# _*_ encoding:utf-8 _*_

import os,time,sys
import fitz
import xlsxwriter
import openpyxl
import shutil


class Ref():
    def __init__(self, source_file, Customer_ID, Ref_list):
        self.Customer_ID = Customer_ID
        self.source_file = source_file
        self.Ref_list = Ref_list


class Shot():

    ref_summary = []
    sheet_list = []
    Image_path = ""

    def get_file_shot(self, sourcefile, customer_id, ref_list, image_dir):
        doc = fitz.open(sourcefile)
        for ref in ref_list:
            if "eive money" in ref:
                continue
                self.ref_summary.append(ref)
            elif ref in self.ref_summary:
                continue
            else:
                self.ref_summary.append(ref)
                # print("start shot: ", ref)
                self.get_ref_shot(doc, customer_id, ref, image_dir)
        doc.close()

    def get_ref_shot(self, doc, customer_id, ref_text, image_dir):

        for pi in range(doc.pageCount):
            page = doc[pi]
            text_instances = page.searchFor(ref_text)
            # 数字越小，截取的高度越小
            five_percent_height = (page.rect.br.y - page.rect.tl.y) * 0.02
            two_percent_height = (page.rect.br.y - page.rect.tl.y) * 0.015
            for inst in text_instances:
                highlight = page.addHighlightAnnot(inst)
                # define a suitable cropping box which spans the whole page
                # and adds padding around the highlighted text
                # print(page.rect.tl.x)
                # print(page.rect.br.x)
                tl_pt = fitz.Point(page.rect.tl.x+60, max(page.rect.tl.y, inst.tl.y - five_percent_height))
                br_pt = fitz.Point(page.rect.br.x-50, min(page.rect.br.y, inst.br.y + two_percent_height))
                hl_clip = fitz.Rect(tl_pt, br_pt)
                zoom_mat = fitz.Matrix(3.2, 3.2)  # 数字越大，截取图片的清晰度越高
                pix = page.getPixmap(matrix=zoom_mat, clip=hl_clip)
                file_name = customer_id + "_" + ref_text.strip() + ".png"
                pix.writePNG(image_dir + file_name)


    def write_picture(self,excel_dir,image_dir,customer_list):
        book = xlsxwriter.Workbook(excel_dir + "/picture_export.xlsx")  # 保存的文件名
        picture_files = os.listdir(image_dir)
        book.add_worksheet("Test Result")
        for c in customer_list:
            sheet_pic = book.add_worksheet(c)
            # print("Start to get picture for {}".format(c))
            index = 0
            for f in picture_files:
                if f.startswith(c):
                    self.write_picture_to_excel(sheet_pic, os.path.join(image_dir, f), index)
                    index += 1
        book.close()

    def generate_picture(self,sheet,refs_list,wk,image_dir):
        for i in range(1, sheet.max_row):
            row = [item.value for item in list(sheet.rows)[i]]
            # print('第{}行值'.format(str(i)),row)
            refs = row[3].strip()
            refs = refs[1:len(refs) - 1]
            refss = refs.split(",")
            p = row[2].replace(r"/", "\")
            abs_path = os.path.join(p, row[1]).strip()
            refs_list.append(Ref(abs_path, row[0].strip(), refss))
            self.sheet_list.append(row[0].strip())
        wk.close()
        for refs in refs_list:
            shot.get_file_shot(refs.source_file, refs.Customer_ID, refs.Ref_list, image_dir)

    def write_picture_to_excel(self, sheet, picture_file, index):
        # picture_file =r"D:Projecte-Statementestatement_pdf2021_06_08_estatementImage8000013533_FT21141H18Q8.jpg"
        # sheet.insert_image('A12', picture_file, {'x_offset': 15, 'y_offset': 10})    # 存入表格的位置和图片的路径
        sheet.insert_image('A{}'.format(index * 10+1), picture_file)  # 存入表格的位置和图片的路径，位置只能从A1开始，没有A0

    def main_shot(self, path):
        refs_list = []
        wk = openpyxl.load_workbook(path)
        # sheet = wk.get_sheet_by_name('Result')  #这种方式有warning
        sheet = wk['Result']
        # row3=[item.value for item in list(sheet.rows)[2]]
        # print('第3行值',row3)

        col1 = [item.value for item in list(sheet.columns)[0]]
        # print('第1列值',col1)
        customer_list = col1[1:]
        excel_dir = os.path.split(path)[0]
        image_dir = os.path.split(path)[0] + "/Image/"
        # print(os.path.split(path)[0])#获取字符串中的文件夹绝对路径
        # print(os.path.split(path)[1])#获取字符串中的文件名
        if not os.path.exists(image_dir):
            os.mkdir(image_dir)
        else:
            shutil.rmtree(image_dir)
            time.sleep(0.2)
            os.mkdir(image_dir)
            print("image_dir is exist")
            time.sleep(0.2)

        self.generate_picture(sheet,refs_list,wk,image_dir)
        self.write_picture(excel_dir,image_dir,customer_list)


if __name__ == '__main__':
    start_time = time.time()
    shot = Shot()
    try:
        ref_file_path = sys.argv[1]
        # ref_file_path = r"D:/Project/e-Statement/estatement_pdf/2021_07_27_estatement/Result_fail_Refs.xlsx"
        shot.main_shot(ref_file_path)
    except:
        print("did not execute")

    end_time = time.time()
    print("cost time:  {}".format(end_time-start_time))
Python操作excel和pdf截图功能

Python相关栏目本月热门文章