先看下效果
文档内容如下:
查看解析内容
上传doc文档
查看解析内容
导出我的测试文档!
正文部分
吧啦吧啦啦啦啦阿拉啦啦
利用刚刚解析出来的HTML导出为Word
访问导出接口即可下载文档
导出效果
注意target/classes/word/image目录和content.docx模版文档要存在
content.docx中的内容就是content:
思路cn.afterturn easypoi-spring-boot-starter 4.2.0 org.jsoup jsoup 1.13.1 org.docx4j docx4j 3.3.6 org.slf4j slf4j-log4j12 com.deepoove poi-tl 1.6.0-beta1 org.apache.poi poi 3.17 org.apache.poi poi-scratchpad 3.17 org.apache.poi poi-ooxml 3.17 fr.opensagres.xdocreport xdocreport 2.0.1 org.apache.poi poi-ooxml-schemas 3.17 org.apache.poi ooxml-schemas 1.4 org.apache.commons commons-lang3 3.7
上传word文件,解析出HTML内容,图片存储到定义好的静态资源目录,主题HTML文档中img的src存储的是相对路径
当导出word文件时,先对HTML文档img的src进行解析,增加服务器的静态资源访问位置路径,这样就能访问到图片输出为word
可以根据需求可以替换成图片资源服务器~
用于定义图片存储和模版文件等
application.yml
spring:
application:
name: hotevent-service
undertow:
buffer-size: 1024
direct-buffers: true
profiles:
active: dev
resources:
static-locations: ${res.src}
servlet:
multipart:
enabled: true #是否启用http上传处理
max-request-size: 100MB #最大请求文件的大小
max-file-size: 20MB #设置单个文件最大长度
file-size-threshold: 20MB #当文件达到多少时进行磁盘写入
#当前应用相关的配置请在app下添加
app:
resource-img-path: ${app.resource-img-path.val}
upload-img-path: ${app.upload-img-path.val}
application-dev.properties
# 应用名称 spring.application.name=html-to-word # 应用服务 WEB 访问端口 server.port=9967 app.upload-img-path.val=/Users/cat/Documents/image/ app.resource-img-path.val=http://127.0.0.1:9967/ #富文本内图片url前缀 app.html.url.head=http://127.0.0.1:9967/image/ # 富文本导出 word 模版路径 word.src=word/content.docx #静态资源映射路径, 用于存储解析的图片 res.src=file:/Users/cat/Desktop/html-to-word/target/classes/word res.word=/Users/cat/Desktop/html-to-word/target/classes/word/代码实现 HtmlToWordConStants 常量类
package com.hexuan.htmltoword.constants;
public class HtmlToWordConStants {
public static final String COMMONATTR = "data-class";
public static final String HTML_ELEMENT = "";
public static final String DEMO_HTML = "demo.html";
public static final String DOC = ".doc";
public static final String DOCX = ".docx";
public static final String IMG_WIDTH = "400";
}
TestController 接口测试案例
package com.hexuan.htmltoword.controller;
import com.hexuan.htmltoword.constants.HtmlToWordConStants;
import com.hexuan.htmltoword.util.word.ExcelUtils;
import com.hexuan.htmltoword.util.word.XWPFDocumentUtil;
import fr.opensagres.poi.xwpf.converter.core.BasicURIResolver;
import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import org.w3c.dom.Document;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@RestController
public class TestController {
@Value("${word.src}")
private String exportWordSrc;
@Value("${res.word}")
private String resWord;
@Value("${app.html.url.head}")
private String htmlUrlHead;
@PostMapping(value = "/upload")
public String upload(@RequestParam("file") MultipartFile file) throws Exception {
String message = "success";
if (!file.isEmpty()) {
StringBuilder context = new StringBuilder();
try {
if (file.getOriginalFilename().endsWith(HtmlToWordConStants.DOC)) {
docToHtml(file);
} else if (file.getOriginalFilename().endsWith(HtmlToWordConStants.DOCX)) {
docxToHtml(file);
} else {
message = "格式有误,只允许doc,docx的word文件";
return message;
}
} catch (FileNotFoundException e) {
e.printStackTrace();
message = "操作失败";
return message;
}
String content = readFileByLines(resWord + HtmlToWordConStants.DEMO_HTML);
System.out.println(content);
}
return "ok";
}
public static String readFileByLines(String fileName) {
FileInputStream file = null;
BufferedReader reader = null;
InputStreamReader inputFileReader = null;
String content = "";
String tempString = null;
try {
file = new FileInputStream(fileName);
inputFileReader = new InputStreamReader(file, "utf-8");
reader = new BufferedReader(inputFileReader);
// 一次读入一行,直到读入null为文件结束
while ((tempString = reader.readLine()) != null) {
content += tempString;
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
return null;
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
}
}
}
return content;
}
void docToHtml(MultipartFile file) throws TransformerException, IOException, ParserConfigurationException {
HWPFDocument hwpfDocument = new HWPFDocument(file.getInputStream());
WordToHtmlConverter converter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
//设置存储图片的管理者--使用匿名内部类实现 该类实现了PicturesManager接口,实现了其中的savePicture方法
converter.setPicturesManager(new PicturesManager() {
FileOutputStream out = null;
//在下面的processDocument方法内部会调用该方法 用于存储word中的图片文件
@Override
public String savePicture(byte[] bytes, PictureType pictureType, String name, float width, float height) {
String imgName = String.valueOf(System.currentTimeMillis());
try {
//单个图片的保存
out = new FileOutputStream(resWord + "image/" + imgName + ".jpg");
out.write(bytes);
} catch (IOException exception) {
exception.printStackTrace();
} finally {
if (out != null) {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
//这里要返回给操作者(HtmlDocumentFacade)一个存储的路径 用于生成Html时定位到图片资源
return imgName + ".jpg";
}
});
//使用外观模式,将hwpfDocument文档对象设置给HtmlDocumentFacade中的Document属性
converter.processDocument(hwpfDocument);
//获取转换器中的document文档
Document htmlDocument = converter.getDocument();
//充当文档对象模型 (DOM) 树形式的转换源树的持有者 -- 源树
DOMSource domSource = new DOMSource(htmlDocument);
//转换器 该对象用于将源树转换为结果树
Transformer transformer = TransformerFactory.newInstance().newTransformer();
//设置输出时的以什么方式输出,也可说是结果树的文件类型 可以是html/xml/text或者是一些扩展前三者的扩展类型
transformer.setOutputProperty(OutputKeys.METHOD, "html");
//设置一些必要的属性 设置输出时候的编码为utf-8
transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
//转换 将输入的源树转换为结果树并且输出到streamResult中
transformer.transform(domSource, new StreamResult(new File(resWord + HtmlToWordConStants.DEMO_HTML)));
}
void docxToHtml(MultipartFile file) throws IOException {
OutputStreamWriter outputStreamWriter = null;
XWPFDocument document = new XWPFDocument(file.getInputStream());
XHTMLOptions options = XHTMLOptions.create();
// 存放图片的文件夹
options.setExtractor(new FileImageExtractor(new File(resWord + "image/" + System.currentTimeMillis())));
// html中图片的路径
options.URIResolver(new BasicURIResolver(System.currentTimeMillis() + "/"));
outputStreamWriter = new OutputStreamWriter(new FileOutputStream(resWord + HtmlToWordConStants.DEMO_HTML), "utf-8");
XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
xhtmlConverter.convert(document, outputStreamWriter, options);
outputStreamWriter.close();
}
@GetMapping(value = "/exportWord")
public String exportWord(HttpServletRequest request, HttpServletResponse response) throws
Exception {
String content = "我的测试文档!
正文部分
吧啦吧啦啦啦啦阿拉啦啦
n";
//由于刚刚导入解析存储的是相对路径,所以导出时要加上图片资源的前缀,我这里直接存储在了自定义的静态资源目录中
content = replaceImgSrc(content);
InputStream in = null;
XWPFDocument doc = null;
in = Thread.currentThread().getContextClassLoader().getResourceAsStream(exportWordSrc);
OPCPackage srcPackage = OPCPackage.open(in);
doc = new XWPFDocument(srcPackage);
List
ElementEnum HTML元素映射到word的枚举
package com.hexuan.htmltoword.enums;
public enum ElementEnum {
H1("h1", "h1", "一级标题"),
H2("h2", "h2", "二级标题"),
H3("h3", "h3", "三级标题"),
H7("h7", "h7", "小标题"),
P("p", "paragraph", "段落"),
STRONG("strong", "", "加粗"),
I("i", "", "斜体"),
U("u", "", "字体下划线"),
IMG("img", "imgurl", "base64图片"),
TABLE("table", "table", "表格"),
BR("br", "br", "换行");
private String code;
private String value;
private String desc;
public String getCode() {
return code;
}
public String getValue() {
return value;
}
public String getDesc() {
return desc;
}
ElementEnum(String code, String value, String desc) {
this.code = code;
this.value = value;
this.desc = desc;
}
public static String getValueByCode(String code) {
for (ElementEnum e : ElementEnum.values()) {
if (e.getCode().equalsIgnoreCase(code)) {
return e.getValue();
}
}
return null;
}
}
TitleFontEnum 标题字体样式
package com.hexuan.htmltoword.enums;
public enum TitleFontEnum {
H1("h1", 24),
H2("h2", 22),
H3("h3", 12),
H7("h7", 12);
private String title;
private Integer font;
public String getTitle() {
return title;
}
public Integer getFont() {
return font;
}
TitleFontEnum(String title, Integer font) {
this.title = title;
this.font = font;
}
public static Integer getFontByTitle(String title) {
for (TitleFontEnum e : TitleFontEnum.values()) {
if (title.equals(e.getTitle())) {
return e.getFont();
}
}
return null;
}
}
WordUtils 导出 word 工具类
package com.hexuan.htmltoword.util.word;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.util.Assert;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.OutputStream;
import java.net.URLEncoder;
public class WordUtils {
public static void exportWordList(XWPFDocument doc, String fileName, HttpServletRequest request, HttpServletResponse response) {
Assert.notNull(fileName, "导出文件名不能为空");
Assert.isTrue(fileName.endsWith(".docx"), "word导出请使用docx格式");
try {
String userAgent = request.getHeader("user-agent").toLowerCase();
if (userAgent.contains("msie") || userAgent.contains("like gecko")) {
fileName = URLEncoder.encode(fileName, "UTF-8");
} else {
fileName = new String(fileName.getBytes("utf-8"), "ISO-8859-1");
}
// 设置强制下载不打开
response.setContentType("application/force-download");
// 设置文件名
response.addHeader("Content-Disposition", "attachment;fileName=" + fileName);
OutputStream out = response.getOutputStream();
doc.write(out);
out.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
XWPFDocumentUtil
package com.hexuan.htmltoword.util.word;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import java.util.List;
import java.util.Map;
public class XWPFDocumentUtil {
public static void wordInsertRitchText(XWPFDocument doc, List
项目源码:https://gitee.com/pikachu2333/html-to-word




