前导:
发过程中经常会使用java将office系列文档转换为PDF, 一般都使用微软提供的openoffice+jodconverter 实现转换文档。
openoffice既有windows版本也有linux版。不用担心生产环境是linux系统。
1、openoffice依赖jar,以maven为例:
com.artofsolving jodconverter2.2.1 org.openoffice jurt3.0.1 org.openoffice ridl3.0.1 org.openoffice juh3.0.1 org.openoffice unoil3.0.1 org.slf4j slf4j-jdk141.4.3
2、直接上转换代码,需要监听openoffice应用程序8100端口即可。
public void convert(File sourceFile, File targetFile) {
try {
// 1: 打开连接
OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100);
connection.connect();
documentConverter converter = new OpenOfficedocumentConverter(connection);
// 2:获取Format
documentFormatRegistry factory = new BasicdocumentFormatRegistry();
documentFormat inputdocumentFormat = factory
.getFormatByFileExtension(getExtensionName(sourceFile.getAbsolutePath()));
documentFormat outputdocumentFormat = factory
.getFormatByFileExtension(getExtensionName(targetFile.getAbsolutePath()));
// 3:执行转换
converter.convert(sourceFile, inputdocumentFormat, targetFile, outputdocumentFormat);
} catch (ConnectException e) {
log.info("文档转换PDF失败");
}
}
3、需注意:jodconverter 在转换2007版本以后的xxx.docx文档会报错,原因大家都明03后缀名xxx.doc 07以后版本xxx.docx
查看jodconverter源码发现documentFormat不支持xxx.docx格式BasicdocumentFormatRegistry中public documentFormat getFormatByFileExtension(String extension)默认支持是使用doc格式
BasicdocumentFormatRegistry类源码
// // JODConverter - Java Opendocument Converter // Copyright (C) 2004-2007 - Mirko Nasato// // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // http://www.gnu.org/copyleft/lesser.html // package com.artofsolving.jodconverter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; public class BasicdocumentFormatRegistry implements documentFormatRegistry { private List documentFormats = new ArrayList(); public void adddocumentFormat(documentFormat documentFormat) { documentFormats.add(documentFormat); } protected List getdocumentFormats() { return documentFormats; } public documentFormat getFormatByFileExtension(String extension) { if (extension == null) { return null; } String lowerExtension = extension.toLowerCase(); for (Iterator it = documentFormats.iterator(); it.hasNext();) { documentFormat format = (documentFormat) it.next(); if (format.getFileExtension().equals(lowerExtension)) { return format; } } return null; } public documentFormat getFormatByMimeType(String mimeType) { for (Iterator it = documentFormats.iterator(); it.hasNext();) { documentFormat format = (documentFormat) it.next(); if (format.getMimeType().equals(mimeType)) { return format; } } return null; } }
BasicdocumentFormatRegistry的默认实现类DefaultdocumentFormatRegistry 中支持的文件格式如下
// // JODConverter - Java Opendocument Converter // Copyright (C) 2004-2007 - Mirko Nasato// // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // http://www.gnu.org/copyleft/lesser.html // package com.artofsolving.jodconverter; public class DefaultdocumentFormatRegistry extends BasicdocumentFormatRegistry { public DefaultdocumentFormatRegistry() { final documentFormat pdf = new documentFormat("Portable document Format", "application/pdf", "pdf"); pdf.setExportFilter(documentFamily.DRAWING, "draw_pdf_Export"); pdf.setExportFilter(documentFamily.PRESENTATION, "impress_pdf_Export"); pdf.setExportFilter(documentFamily.SPREADSHEET, "calc_pdf_Export"); pdf.setExportFilter(documentFamily.TEXT, "writer_pdf_Export"); adddocumentFormat(pdf); final documentFormat swf = new documentFormat("Macromedia Flash", "application/x-shockwave-flash", "swf"); swf.setExportFilter(documentFamily.DRAWING, "draw_flash_Export"); swf.setExportFilter(documentFamily.PRESENTATION, "impress_flash_Export"); adddocumentFormat(swf); final documentFormat xhtml = new documentFormat("XHTML", "application/xhtml+xml", "xhtml"); xhtml.setExportFilter(documentFamily.PRESENTATION, "XHTML Impress File"); xhtml.setExportFilter(documentFamily.SPREADSHEET, "XHTML Calc File"); xhtml.setExportFilter(documentFamily.TEXT, "XHTML Writer File"); adddocumentFormat(xhtml); // HTML is treated as Text when supplied as input, but as an output it is also // available for exporting Spreadsheet and Presentation formats final documentFormat html = new documentFormat("HTML", documentFamily.TEXT, "text/html", "html"); html.setExportFilter(documentFamily.PRESENTATION, "impress_html_Export"); html.setExportFilter(documentFamily.SPREADSHEET, "HTML (StarCalc)"); html.setExportFilter(documentFamily.TEXT, "HTML (StarWriter)"); adddocumentFormat(html); final documentFormat odt = new documentFormat("Opendocument Text", documentFamily.TEXT, "application/vnd.oasis.opendocument.text", "odt"); odt.setExportFilter(documentFamily.TEXT, "writer8"); adddocumentFormat(odt); final documentFormat sxw = new documentFormat("OpenOffice.org 1.0 Text document", documentFamily.TEXT, "application/vnd.sun.xml.writer", "sxw"); sxw.setExportFilter(documentFamily.TEXT, "StarOffice XML (Writer)"); adddocumentFormat(sxw); final documentFormat doc = new documentFormat("Microsoft Word", documentFamily.TEXT, "application/msword", "doc"); doc.setExportFilter(documentFamily.TEXT, "MS Word 97"); adddocumentFormat(doc); final documentFormat rtf = new documentFormat("Rich Text Format", documentFamily.TEXT, "text/rtf", "rtf"); rtf.setExportFilter(documentFamily.TEXT, "Rich Text Format"); adddocumentFormat(rtf); final documentFormat wpd = new documentFormat("WordPerfect", documentFamily.TEXT, "application/wordperfect", "wpd"); adddocumentFormat(wpd); final documentFormat txt = new documentFormat("Plain Text", documentFamily.TEXT, "text/plain", "txt"); // set FilterName to "Text" to prevent OOo from tryign to display the "ASCII Filter Options" dialog // alternatively FilterName could be "Text (encoded)" and FilterOptions used to set encoding if needed txt.setimportOption("FilterName", "Text"); txt.setExportFilter(documentFamily.TEXT, "Text"); adddocumentFormat(txt); final documentFormat wikitext = new documentFormat("MediaWiki wikitext", "text/x-wiki", "wiki"); wikitext.setExportFilter(documentFamily.TEXT, "MediaWiki"); adddocumentFormat(wikitext); final documentFormat ods = new documentFormat("Opendocument Spreadsheet", documentFamily.SPREADSHEET, "application/vnd.oasis.opendocument.spreadsheet", "ods"); ods.setExportFilter(documentFamily.SPREADSHEET, "calc8"); adddocumentFormat(ods); final documentFormat sxc = new documentFormat("OpenOffice.org 1.0 Spreadsheet", documentFamily.SPREADSHEET, "application/vnd.sun.xml.calc", "sxc"); sxc.setExportFilter(documentFamily.SPREADSHEET, "StarOffice XML (Calc)"); adddocumentFormat(sxc); final documentFormat xls = new documentFormat("Microsoft Excel", documentFamily.SPREADSHEET, "application/vnd.ms-excel", "xls"); xls.setExportFilter(documentFamily.SPREADSHEET, "MS Excel 97"); adddocumentFormat(xls); final documentFormat csv = new documentFormat("CSV", documentFamily.SPREADSHEET, "text/csv", "csv"); csv.setimportOption("FilterName", "Text - txt - csv (StarCalc)"); csv.setimportOption("FilterOptions", "44,34,0"); // Field Separator: ','; Text Delimiter: '"' csv.setExportFilter(documentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)"); csv.setExportOption(documentFamily.SPREADSHEET, "FilterOptions", "44,34,0"); adddocumentFormat(csv); final documentFormat tsv = new documentFormat("Tab-separated Values", documentFamily.SPREADSHEET, "text/tab-separated-values", "tsv"); tsv.setimportOption("FilterName", "Text - txt - csv (StarCalc)"); tsv.setimportOption("FilterOptions", "9,34,0"); // Field Separator: 't'; Text Delimiter: '"' tsv.setExportFilter(documentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)"); tsv.setExportOption(documentFamily.SPREADSHEET, "FilterOptions", "9,34,0"); adddocumentFormat(tsv); final documentFormat odp = new documentFormat("Opendocument Presentation", documentFamily.PRESENTATION, "application/vnd.oasis.opendocument.presentation", "odp"); odp.setExportFilter(documentFamily.PRESENTATION, "impress8"); adddocumentFormat(odp); final documentFormat sxi = new documentFormat("OpenOffice.org 1.0 Presentation", documentFamily.PRESENTATION, "application/vnd.sun.xml.impress", "sxi"); sxi.setExportFilter(documentFamily.PRESENTATION, "StarOffice XML (Impress)"); adddocumentFormat(sxi); final documentFormat ppt = new documentFormat("Microsoft PowerPoint", documentFamily.PRESENTATION, "application/vnd.ms-powerpoint", "ppt"); ppt.setExportFilter(documentFamily.PRESENTATION, "MS PowerPoint 97"); adddocumentFormat(ppt); final documentFormat odg = new documentFormat("Opendocument Drawing", documentFamily.DRAWING, "application/vnd.oasis.opendocument.graphics", "odg"); odg.setExportFilter(documentFamily.DRAWING, "draw8"); adddocumentFormat(odg); final documentFormat svg = new documentFormat("Scalable Vector Graphics", "image/svg+xml", "svg"); svg.setExportFilter(documentFamily.DRAWING, "draw_svg_Export"); adddocumentFormat(svg); } }
解决方法:重写BasicdocumentFormatRegistry类中public documentFormat getFormatByFileExtension(String extension)方法,只要是后缀名包含doc则使用doc的documentFormat文档格式
// // JODConverter - Java Opendocument Converter // Copyright (C) 2004-2007 - Mirko Nasato// // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // http://www.gnu.org/copyleft/lesser.html // package com.artofsolving.jodconverter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; public class BasicdocumentFormatRegistry implements documentFormatRegistry { private List documentFormats = new ArrayList(); public void adddocumentFormat(documentFormat documentFormat) { documentFormats.add(documentFormat); } protected List getdocumentFormats() { return documentFormats; } public documentFormat getFormatByFileExtension(String extension) { if (extension == null) { return null; } //将文件名后缀统一转化 if (extension.indexOf("doc") >= 0) { extension = "doc"; } if (extension.indexOf("ppt") >= 0) { extension = "ppt"; } if (extension.indexOf("xls") >= 0) { extension = "xls"; } String lowerExtension = extension.toLowerCase(); for (Iterator it = documentFormats.iterator(); it.hasNext();) { documentFormat format = (documentFormat) it.next(); if (format.getFileExtension().equals(lowerExtension)) { return format; } } return null; } public documentFormat getFormatByMimeType(String mimeType) { for (Iterator it = documentFormats.iterator(); it.hasNext();) { documentFormat format = (documentFormat) it.next(); if (format.getMimeType().equals(mimeType)) { return format; } } return null; } }
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持考高分网。



