使用java将word文档docx,doc(包含图形,文本框)完美转换成所有格式图片(pdf,png,gif,jpeg等等)

使用java将word文档docx,doc(包含图形,文本框,图片等)完美转换成所有格式图片(pdf,png,gif,jpeg等等)下文中附带代码,效果图等

思路

使用jacob将docx转换成doc,用openoffice将doc转成pdf,最后是用pdfbox将pdf转成任意格式图片

使用到的包

https://download.csdn.net/download/weixin_44396516/11393966

实现代码

把上面的包下载并导入,代码中有些内容自行修改,有问题请留言,看到会回复。



import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.ConnectException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Properties;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.model.PAPX;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.w3c.dom.Document;

import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;
import com.artofsolving.jodconverter.openoffice.converter.StreamOpenOfficeDocumentConverter;
import com.jacob.activeX.ActiveXComponent;
import com.jacob.com.ComThread;
import com.jacob.com.Dispatch;
import com.jacob.com.Variant;

public class POIWordToPng {
	/**
	 * 使用jacob进行Word文档格式互转(例:doc2docx、docx2doc)
	 * 
	 * @author Harley Hong
	 * @created 2017 /08/09 16:09:32
	 */
		/**
		 * doc格式
		 */
		private static final int DOC_FMT = 0;
		/**
		 * docx格式
		 */
		private static final int DOCX_FMT = 12;

	static String picp=null;
	static String docn="/"+new Date().getTime()+".doc";
	public static String getSystemFileCharset(){
	    Properties pro = System.getProperties();
	    return pro.getProperty("file.encoding");
	 }
	
	public static void main(String[] args) throws IOException {
		String sourcePath="D:\\软件\\Tomcat\\Tomcat7.075\\webapps\\jhwx\\file\\2019-07-22\\8_20190722111135.docx";
		String picturesPath="D:\\软件\\Tomcat\\Tomcat7.075\\webapps\\jhwx\\file\\2019-07-22\\image";
		try {
			convertDocFmt(sourcePath, picturesPath+docn, DOC_FMT);
		} catch (Exception e) {
			e.printStackTrace();
		}
		convert(new File(picturesPath+docn), picturesPath);
		String str=picturesPath+"\\"+picp.substring(6);
		pdf2Gif(str);
		
	}
	  /**
     * 将doc文档转换成pdf文档
     * 
     * @param docFile
     *                需要转换的word文档
     * @param filepath
     *                转换之后html的存放路径
     * @return 转换之后的html文件
     */   
     public static File convert(File docFile, String filepath) throws IOException {
 		// 创建保存html的文件
    	 String allpath=filepath + "/" + new Date().getTime() + ".pdf";
 		File htmlFile = new File(allpath);
 		 picp=allpath.substring(54);
 		// 启动Openoffice
 		String OpenOffice_HOME = "C:\\Program Files (x86)\\OpenOffice 4";
 		if (OpenOffice_HOME.charAt(OpenOffice_HOME.length() - 1) != '\\') {
 			OpenOffice_HOME += "\\";
 		}
 		String command = OpenOffice_HOME
 				+ "program\\soffice -headless -accept=\"socket,host=127.0.0.1,port=8100;urp;\" -nofirststartwizard";
 			Process pro = Runtime.getRuntime().exec(command);
 		// 创建Openoffice连接
 		OpenOfficeConnection connection = new SocketOpenOfficeConnection("127.0.0.1", 8100);
 		connection.connect();

 		// 创建转换器
// 		DocumentConverter converter = new OpenOfficeDocumentConverter(connection);
 		DocumentConverter converter = new StreamOpenOfficeDocumentConverter(connection);
 		// 转换文档
 		converter.convert(docFile, htmlFile);
 		// 关闭openoffice连接
 		connection.disconnect();
 		// 关闭OpenOffice服务的进程
 		pro.destroy();
 		return htmlFile;
 	}
    /**
	 * 根据docx类型转换doc文件
	 * 
	 * @param srcPaththe doc path 源文件
	 * @param descPath   the docx path 目标文件
	 * @param fmtthe     fmt 所转格式
	 * @return the file
	 * @throws Exception the exception
	 */
    public static File convertDocFmt(String srcPath, String descPath, int fmt) throws Exception {
		// 实例化ComThread线程与ActiveXComponent
		ComThread.InitSTA();
		ActiveXComponent app = new ActiveXComponent("Word.Application");
		try {
// 文档隐藏时进行应用操作 
			app.setProperty("Visible", new Variant(false));
// 实例化模板Document对象 
			Dispatch document = app.getProperty("Documents").toDispatch();
// 打开Document进行另存为操作 
			Dispatch doc = Dispatch.invoke(document, "Open", Dispatch.Method,
					new Object[] { srcPath, new Variant(true), new Variant(true) }, new int[1]).toDispatch();
			Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] { descPath, new Variant(fmt) }, new int[1]);
			Dispatch.call(doc, "Close", new Variant(false));
			return new File(descPath);
		} catch (Exception e) {
			throw e;
		} finally {
// 释放线程与ActiveXComponent 
			app.invoke("Quit", new Variant[] {});
			ComThread.Release();
		}
	}
    /**
     * pdf转png
     * @param pdfFilename
     * @throws InvalidPasswordException
     * @throws IOException
     */
    public static void pdf2Gif(String pdfFilename ) throws InvalidPasswordException, IOException {
    	//合成一张图片
   	 PDDocument document = PDDocument.load(new File(pdfFilename));
   		PDFRenderer pdfRenderer = new PDFRenderer(document);
   		List<BufferedImage> images = new ArrayList();
   		int pageCounter = 0;
   		for (PDPage page : document.getPages()) {
   			// note that the page number parameter is zero based
   			BufferedImage bim = pdfRenderer.renderImageWithDPI(pageCounter++, 100, ImageType.RGB);
   			images.add(bim);
   		}
   		
   		BufferedImage result = mergeImage(false,images.toArray(new BufferedImage[0]));
   		ImageIOUtil.writeImage(result, pdfFilename + "-" + (pageCounter++) + ".gif", 100);
   		document.close();
   		//不合成一张
//   	PDDocument document = PDDocument.load(new File(pdfFilename));
//		PDFRenderer pdfRenderer = new PDFRenderer(document);
//		int pageCounter = 0;
//		int pages = document.getNumberOfPages();
//		pngp=new String[pages];
//		for (PDPage page : document.getPages()) {
//			// note that the page number parameter is zero based
//			BufferedImage bim = pdfRenderer.renderImageWithDPI(pageCounter++, 200, ImageType.RGB);
//			ImageIOUtil.writeImage(bim, pdfFilename + "-" + pageCounter + ".gif", 72);
//			pngp[pageCounter-1]=pdfFilename.substring(54)+ "-" + pageCounter + ".gif";
//			
//		}
//		document.close();
    }
    /**
	 * 合并任数量的图片成一张图片
	 * 
	 * @param isHorizontal
	 *            true代表水平合并,fasle代表垂直合并
	 * @param imgs
	 *            欲合并的图片数组
	 * @return
	 * @throws IOException
	 */
	public static BufferedImage mergeImage(boolean isHorizontal, BufferedImage... imgs) throws IOException {
		// 生成新图片
		BufferedImage destImage = null;
 
		// 计算新图片的长和高
		int allw = 0, allh = 0, allwMax = 0, allhMax = 0;
		for (BufferedImage img : imgs) {
			allw += img.getWidth();
			allh += img.getHeight();
			if (img.getWidth() > allwMax) {
				allwMax = img.getWidth();
			}
			if (img.getHeight() > allhMax) {
				allhMax = img.getHeight();
			}
		}
		// 创建新图片
		if (isHorizontal) {
			destImage = new BufferedImage(allw, allhMax, BufferedImage.TYPE_INT_RGB);
		} else {
			destImage = new BufferedImage(allwMax, allh, BufferedImage.TYPE_INT_RGB);
		}
 
		// 合并所有子图片到新图片
		int wx = 0, wy = 0;
		for (int i = 0; i < imgs.length; i++) {
			BufferedImage img = imgs[i];
			int w1 = img.getWidth();
			int h1 = img.getHeight();
			// 从图片中读取RGB
			int[] ImageArrayOne = new int[w1 * h1];
			ImageArrayOne = img.getRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 逐行扫描图像中各个像素的RGB到数组中
			if (isHorizontal) { // 水平方向合并
				destImage.setRGB(wx, 0, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB
			} else { // 垂直方向合并
				destImage.setRGB(0, wy, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB
			}
			wx += w1;
			wy += h1;
		}
		return destImage;
	}
}

效果图:

转换前的docx文档(包含图形,文本框)

《使用java将word文档docx,doc(包含图形,文本框)完美转换成所有格式图片(pdf,png,gif,jpeg等等)》
效果图:转换后的png文件
《使用java将word文档docx,doc(包含图形,文本框)完美转换成所有格式图片(pdf,png,gif,jpeg等等)》

    原文作者:右拐~
    原文地址: https://blog.csdn.net/weixin_44396516/article/details/96836400
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞