java生成docx文件、pdf文件、docx转pdf、docx转图片 pdf转图片工具

docx4j生成docx文件、pdf文件、docx转pdf、docx转图片 pdf转图片工具

最近写项目时遇到一些操作数据填充word、pdf以及word转pdf、word转图片的需求。网络搜索资料经整理如下
操作office文档、pdf一般来说有好几种实现方式

1、docx4j+apache.pdfbox+xdocreport 1.1 引入maven

   
        
            org.apache.pdfbox
            pdfbox
            2.0.25
        
        
        
        
            fr.opensagres.xdocreport
            org.apache.poi.xwpf.converter.pdf
            1.0.4
        
        

        
        
            org.docx4j
            docx4j-JAXB-Internal
            8.2.4
        
        
            org.docx4j
            docx4j-export-fo
            8.2.4

1.2 工具类

package com.sl.utils.office.word;

import com.sl.utils.id.IDUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.pdfbox.pdmodel.PDdocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.docx4j.Docx4J;
import org.docx4j.TraversalUtil;
import org.docx4j.XmlUtils;
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.finders.RangeFinder;
import org.docx4j.fonts.IdentityPlusMapper;
import org.docx4j.fonts.Mapper;
import org.docx4j.fonts.PhysicalFonts;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage;
import org.docx4j.openpackaging.parts.WordprocessingML.MaindocumentPart;
import org.docx4j.wml.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 *
 * docx4j生成docx文件、pdf文件、docx转pdf、docx转图片 pdf转图片工具
 *
 * 通过docx文件的书签、占位符替换变量
 *
 * 通过占位符替换注意
 *  * 通过占位符替换注意 -----------坑坑坑坑 直接再docx文件中进行修改占位符不一定会连续！！！-----------
 *  ${var}必须是连续的，否则取不到变量。有时候取不到变量的时候可以抓换为xml然后查看你的变量是否是连续的
 *  可以通过如下方式解决 现在docx文件中写入占位符然后
 *  把当前docx文件用rar或zip打开，找到其中的 word/document.xml文件，修改占位符连续
 *
 *  比如把
 *  
 *      ${na
 *  
 *  
 *      me}
 *  
 * 修改为
 *  
 *      ${name}
 *  
 *
 * @author gaoxueyong
 * @create at: 2021/12/28 下午15:02
 */
public class DocxAndPdfAndImgUtils {
    private static final Logger log = LoggerFactory.getLogger(DocxAndPdfAndImgUtils.class);
    private static WordprocessingMLPackage wordMLPackage;
    private static ObjectFactory factory;

    /**
     * 通过docx模板获取docx模板转换的图片
     * @param templatePath 模板文件
     * @param mappings 要匹配的占位符数据
     * @param fileMapping 书签名称对于的文件
     * @return
     */
    public static List getPngByDocxTemplate(String templatePath, Map mappings, Map fileMapping) {
        return pdfToImg(getPdfFile(templatePath, mappings, fileMapping));
    }

    /**
     * 通过模板获取转换后docx的二进制数组
     * @param templatePath 模板文件
     * @param mappings 要匹配的占位符数据
     * @param fileMapping 书签名称对于的文件
     * @return
     */
    public static byte[] getDocxByTemplate(String templatePath, Map mappings, Map fileMapping) {
        File docxFile = getDocxFile(templatePath, mappings, fileMapping);
        try {
            if (null == docxFile) {
                return null;
            }
            byte[] bytes = Files.readAllBytes(docxFile.toPath());
            if (docxFile.exists()) {
                docxFile.delete();
            }
            return bytes;
        } catch (IOException e) {
            log.error("获取文件失败");
            if (docxFile.exists()) {
                docxFile.delete();
            }
            return null;
        }
    }

    /**
     * 通过模板获取转换后pdf文件
     * @param templatePath 模板文件
     * @param mappings 要匹配的占位符数据
     * @param fileMapping 书签名称对于的文件
     * @return
     */
    public static byte[] getPdfFile(String templatePath, Map mappings, Map fileMapping){
        try {
            File docxFile = getDocxFile(templatePath, mappings, fileMapping);
            if(null == docxFile){
                return null;
            }
            WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(docxFile);
            Path pdf = Files.createTempFile(IDUtils.getPrimaryId(), "pdf");
            File pdfFile = pdf.toFile();
            if(null == pdfFile){
                log.error("创建文件失败");
                return null;
            }
            Docx4J.toPDF(wordMLPackage, new FileOutputStream(pdfFile));
            if(docxFile.exists()){
                docxFile.delete();
            }
            byte[] bytes = Files.readAllBytes(pdf);
            if(pdfFile.exists()){
                pdfFile.delete();
            }
            return bytes;
        } catch (Docx4JException e) {
            log.error("bookReplaceVarText error:Docx4JException ", e);
            return null;
        } catch (Exception e) {
            log.error("bookReplaceVarText error:Docx4JException ", e);
            return null;
        }
    }
    /**
     * 通过文件输入流获取pdf文档的二进制数组
     * @param docxInputstream
     * @return
     */
    public static byte[] getPdfByte(InputStream docxInputstream){
        try {
            if(null == docxInputstream){
                return null;
            }
            WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(docxInputstream);
            Path pdf = Files.createTempFile(IDUtils.getPrimaryId(), "pdf");
            File pdfFile = pdf.toFile();
            if(null == pdfFile){
                log.error("创建文件失败");
                return null;
            }
            Docx4J.toPDF(wordMLPackage, new FileOutputStream(pdfFile));
            byte[] bytes = Files.readAllBytes(pdf);
            if(pdfFile.exists()){
                pdfFile.delete();
            }
            return bytes;
        } catch (Docx4JException e) {
            log.error("bookReplaceVarText error:Docx4JException ", e);
            return null;
        } catch (Exception e) {
            log.error("bookReplaceVarText error:Docx4JException ", e);
            return null;
        }
    }
    /**
     * 通过模板获取转换后docx文件
     * @param templatePath 模板文件
     * @param mappings 要匹配的占位符数据
     * @param fileMapping 书签名称对于的文件
     * @return
     */
    public static File getDocxFile(String templatePath, Map mappings, Map fileMapping){
        try {
            wordMLPackage = WordprocessingMLPackage.load(new File(templatePath));
            MaindocumentPart maindocumentPart = wordMLPackage.getMaindocumentPart();
            if(MapUtils.isNotEmpty(mappings)){
                maindocumentPart.variableReplace(mappings);
            }

            factory = Context.getWmlObjectFactory();
            document wmlDoc = (document) maindocumentPart.getJaxbElement();
            Body body = wmlDoc.getBody();
            // 提取正文中所有段落
            List

java生成docx文件、pdf文件、docx转pdf、docx转图片 pdf转图片工具

Java相关栏目本月热门文章