pdf分页分片预览，pdf截取，pdf转图片

2024年2月23日 68次阅读来源: toforu

分页预览主要解决两个问题

(1)前端下载整个pdf比较慢。

(2) 前端可以下载整个文件流，不安全。

若要分页预览，也有两种方案：

（1）把pdf 转成一张一张的图片。

（2）把pdf 再次截取from-to页，然后生成一个新的pdf文件。

pdf 分页截取截取生成新的pdf，转图片代码

  <dependency>
    <groupId>org.apache.pdfbox</groupId>
    <artifactId>pdfbox</artifactId>
    <version>2.0.16</version>
</dependency>
<dependency>
    <groupId>org.apache.pdfbox</groupId>
    <artifactId>fontbox</artifactId>
    <version>2.0.16</version>
</dependency>
<dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itextpdf</artifactId>
            <version>5.5.13</version>
        </dependency>
  </dependencies>

import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

import javax.imageio.ImageIO;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;

import com.itextpdf.text.Document;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfReader;

public class PdfToImageUtil {
	/**
	 * dpi越大转换后越清晰，相对转换速度越慢
	 */
	private static final Integer DPI = 100;

	/**
	 * 转换后的图片类型
	 */
	private static final String IMG_TYPE = "png";

	public static void main1(String[] args) throws FileNotFoundException, IOException {
		pdfToImage(new FileInputStream(
				new File("C:\\Users\\xxx\\Desktop\\temp\\zhuanhuan\\360Teams移动端使用简介.pdf")));
	}
	
	public static void main(String[] args) throws FileNotFoundException, IOException {
		partitionPdfFile("C:\\Users\\xxx\\Desktop\\temp\\zhuanhuan\\360Teams移动端使用简介.pdf", "C:\\Users\\xxx\\Desktop\\temp\\zhuanhuan\\360Teams移动端使用简介2.pdf", 1, 1);
}

	/**
	 * PDF转图片 InputStream 和 OutputStream最好都不要落地，直接存储到s3
	 *
	 * @param fileContent PDF文件的二进制流
	 * @return 图片文件的二进制流
	 */
	public static List<String> pdfToImage(InputStream inputStream) throws IOException {
		List<String> imgKeys = new ArrayList<>();
		try (PDDocument document = PDDocument.load(inputStream)) {
			PDFRenderer renderer = new PDFRenderer(document);
			for (int i = 0; i < document.getNumberOfPages(); ++i) {
				BufferedImage bufferedImage = renderer.renderImageWithDPI(i, DPI);
				String imageKey = i + "_" + UUID.randomUUID().toString().replace("-", "");// 自己确定文件key的格式
				// 需要几页，就存几页即可。
				try (OutputStream outputStream = new FileOutputStream(
						"C:\\Users\\xxx\\Desktop\\temp\\zhuanhuan\\" + imageKey + "." + IMG_TYPE)) {
					imgKeys.add(imageKey);
					ImageIO.write(bufferedImage, IMG_TYPE, outputStream);
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
		}
		return imgKeys;
	}

	/**
	 * PDF转图片
	 *
	 * @param fileContent PDF文件的二进制流
	 * @return 图片文件的二进制流
	 */
	public static List<byte[]> pdfToImage(byte[] fileContent) throws IOException {
		List<byte[]> result = new ArrayList<>();
		try (PDDocument document = PDDocument.load(fileContent)) {
			PDFRenderer renderer = new PDFRenderer(document);
			for (int i = 0; i < document.getNumberOfPages(); ++i) {
				BufferedImage bufferedImage = renderer.renderImageWithDPI(i, DPI);
				ByteArrayOutputStream out = new ByteArrayOutputStream();
				ImageIO.write(bufferedImage, IMG_TYPE, out);
				result.add(out.toByteArray());
			}
		}
		return result;
	}

	/**
	 * 截取pdf某几页，生成一个新的pdf
	 * @param pdfFile
	 * @param newFile
	 * @param from
	 * @param end
	 */
	public static void partitionPdfFile(String pdfFile, String newFile, int from, int end) {
		Document document = null;
		PdfCopy copy = null;
		try {
			PdfReader reader = new PdfReader(pdfFile);
			int n = reader.getNumberOfPages();
			if (end == 0) {
				end = n;
			}

			document = new Document(reader.getPageSize(1));
			copy = new PdfCopy(document, new FileOutputStream(newFile));
			document.open();
			for (int j = from; j <= end; j++) {
				document.newPage();
				PdfImportedPage page = copy.getImportedPage(reader, j);
				copy.addPage(page);
			}
			document.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

    原文作者：toforu
    原文地址: https://blog.csdn.net/u013378306/article/details/122839704
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。

分页预览主要解决两个问题

若要分页预览，也有两种方案：

pdf 分页截取截取生成新的pdf，转图片 代码

pdf 分页截取截取生成新的pdf，转图片代码