- 前言
- 对于docx文档
- 1.获取word文本
- 2、修改文档
- 3、删除空行
- 对于doc文档
- 1.修改为docx文档进行处理
- 总结
前言
对于.docx后缀的文档,需要依赖python-docx,使用pip命令安装pip install python-docx
对于.doc文档,需要依赖pip install pypiwin32,使用pip命令安装pip install python-docx
对于docx文档 1.获取word文本
代码如下(示例):
# -*- coding: utf-8 -*-
import docx
# 提取word文档的内容
def fetch_doc():
doc_name = "lr.docx"
doc = docx.Document(doc_name)
title = doc.paragraphs[0] # 获取标题
for run in title.runs:
print(run)
print(run.text)
for paragraph in doc.paragraphs: # 获取所有的paragraphs
print(paragraph.text)
if __name__ == '__main__':
fetch_doc()
2、修改文档
# -*- coding: utf-8 -*-
import docx
# 更新word文档的标题内容
def update_doc():
# 1.获取原文档
doc_name = "lr.docx"
doc = docx.Document(doc_name)
# 2.修改doc的内容
pre_text = " "
for paragraph in doc.paragraphs: # 获取所有的paragraphs
print(paragraph.text)
if pre_text == "正确答案:":
paragraph.text += 'yes'
pre_text = " "
continue
else:
pre_text = paragraph.text
# 3.保存为word文档
new_doc_name = "lr_new.docx"
doc.save(new_doc_name)
if __name__ == '__main__':
update_doc()
3、删除空行
for paragraph in doc.paragraphs: # 获取所有的paragraphs
temp = paragraph.text
# print(temp)
if len(temp) == 0:
p = paragraph._element
p.getparent().remove(p)
p._p = p._element = None
对于doc文档
1.修改为docx文档进行处理
from win32com import client as wc
file_name = r"路径"
file_new_name = r"路径"
word = wc.Dispatch("Word.Application")
doc = word.Documents.Open(file_name)
print("before:", file_name)
print("after:", file_new_name)
doc.SaveAs(file_new_name, 12) # 12为docx 将file_name文件转换为docx文件存储到file_new_name
doc.Close()
word.Quit()
总结



