一、前言
通过apache的pdfbox、poi定义PdfToImg工具类(依赖包下载),进行将pdf文件转为图片、并多个图片合并为一张图片等逻辑处理,详情参见示例说明。
二、示例说明package pdf;@b@@b@import java.awt.image.BufferedImage;@b@import java.io.ByteArrayInputStream;@b@import java.io.ByteArrayOutputStream;@b@import java.io.File;@b@import java.io.FileInputStream;@b@import java.io.FileNotFoundException;@b@import java.io.FileOutputStream;@b@import java.io.IOException;@b@import java.io.InputStream;@b@import java.util.ArrayList;@b@import java.util.Date;@b@import java.util.List;@b@@b@import javax.imageio.ImageIO;@b@import org.apache.commons.lang3.StringUtils;@b@import org.apache.commons.logging.Log;@b@import org.apache.commons.logging.LogFactory;@b@import org.apache.log4j.Logger;@b@import org.apache.pdfbox.cos.COSBase;@b@import org.apache.pdfbox.cos.COSDocument;@b@import org.apache.pdfbox.cos.COSName;@b@import org.apache.pdfbox.cos.COSObject;@b@import org.apache.pdfbox.io.IOUtils;@b@import org.apache.pdfbox.pdmodel.PDDocument;@b@import org.icepdf.core.pobjects.Document;@b@import org.icepdf.core.pobjects.Page;@b@import org.icepdf.core.util.GraphicsRenderingHints;@b@import org.jpedal.PdfDecoder;@b@import org.jpedal.exception.PdfException;@b@@b@public class PdfToImg {@b@@b@private static String defaut_outpet_filepath="C:/temp/";@b@@b@private static final int SEPARATE_DISTANCE = 10;@b@@b@static Logger log = Logger.getLogger(PdfToImg.class);@b@@b@/**@b@ * 判断PDF文件的压缩算法是否为JBIG2Decode@b@ * @param@b@ */@b@private static boolean isJbig2(File file) throws IOException {@b@ //返回标志,默认为否@b@boolean isJbig2 = false;@b@//读pdf文件@b@ PDDocument pdfDoc = PDDocument.load(file); @b@ //生成COSDocument@b@ COSDocument doc = pdfDoc.getDocument();@b@ COSObject obj =doc.getObjectByType(COSName.XOBJECT); @b@ if(obj != null)@b@ {@b@ COSBase base = obj.getDictionaryObject(COSName.FILTER);@b@ String decode = base.toString();@b@ if(decode.equals("COSName{JBIG2Decode}"))@b@ {@b@ isJbig2 = true;@b@ }@b@ }@b@ doc.close();@b@ pdfDoc.close();@b@@b@ return isJbig2;@b@}@b@@b@@b@private static List toImageProcess(File file) throws IOException{@b@@b@//定义Document,用于转换图片@b@ Document document = new Document();@b@ List filePathList = new ArrayList();@b@ try {@b@ document.setFile(file.getAbsolutePath());@b@ } catch (Exception ex) {@b@ ex.printStackTrace();@b@ }@b@ // save page caputres to file.@b@ float rotation = 0f;@b@ // 循环把每页的数据转换成对应的图片@b@ for (int i = 0; i toImageOfJbig2(String file) throws IOException, PdfException@b@{@b@PdfDecoder decodePdf = new PdfDecoder(true);@b@List bufImages = new ArrayList();@b@try@b@{@b@decodePdf.openPdfFile(file);@b@//PdfDecoder.setFontReplacements(decodePdf);@b@for(int i = 1; i <= decodePdf.getPageCount(); i++){@b@ //设置转换参数@b@ decodePdf.setPageParameters(1.8f, i);@b@ //进行转换@b@ BufferedImage img=decodePdf.getPageAsImage(i);@b@ bufImages.add(img);@b@}@b@decodePdf.closePdfFile();@b@}@b@catch (PdfException e)@b@{@b@e.printStackTrace();@b@}@b@return bufImages;@b@}@b@@b@/**@b@ * 将pdf文件转为图片@b@ * @param pdffile@b@ * @param outputFile@b@ * @throws Exception@b@ */@b@public static void generatorPdf2Img(File pdffile,String outputFile) throws Exception{@b@List imagesCache=toImageBuffer(pdffile);@b@outputCombinedImg(imagesCache,outputFile);@b@}@b@@b@@b@/**@b@ * 将指定图片合并为一张图片@b@ * @param files@b@ * @param outputFile@b@ * @throws Exception@b@ */@b@public static void generatorImg(String[] files,String outputFile) throws Exception{@b@List bufImages = new ArrayList();@b@for(String file:files){@b@bufImages.add(getImageBuffer(file));@b@}@b@outputCombinedImg(bufImages,outputFile);@b@}@b@@b@@b@/**@b@ * PDF转换图片@b@ * @param pdffile@b@ * @return@b@ * @throws Exception@b@ */@b@private static List toImageBuffer(File pdffile) throws Exception{@b@return toImageBuffer(new FileInputStream(pdffile));@b@}@b@@b@@b@/**@b@ * PDF转换成图片@b@ * @param pdfStream PDF文件流@b@ * @return List@b@ */@b@private static List toImageBuffer(InputStream pdfStream) throws IOException, PdfException, SecurityException{ @b@Date start= new Date();@b@//接收PDF流,生成临时文件@b@File tempFile = File.createTempFile("pdf2image", null);@b@String strFile = tempFile.toString();@b@byte[] data = IOUtils.toByteArray(pdfStream);@b@FileOutputStream fo = new FileOutputStream(tempFile);@b@fo.write(data);@b@fo.flush();@b@fo.close();@b@List bufImages = new ArrayList();@b@//判断PDF算法格式@b@if(isJbig2(tempFile)) {@b@bufImages = toImageOfJbig2(strFile);@b@} else {@b@bufImages = toImageProcess(tempFile);@b@}@b@//删除临时文件@b@if(!tempFile.delete())@b@{@b@Log logger = LogFactory.getLog(PdfToImg.class);@b@logger.warn(tempFile.toString() + " -- This file is not deleted.");@b@}@b@long cost = new Date().getTime()-start.getTime();@b@log.info("生成图片耗时耗时"+cost);@b@return bufImages;@b@}@b@ @b@/**@b@ * 获取图片缓存数据@b@ * @param path@b@ * @return@b@ * @throws IOException@b@ */@b@private static BufferedImage getImageBuffer(String path) throws IOException{@b@BufferedImage image =null;@b@ByteArrayInputStream in = null;@b@byte[] buffer = null;@b@FileInputStream fis = null;@b@ByteArrayOutputStream bos = null;@b@ try { @b@ File file = new File(path); @b@ fis = new FileInputStream(file); @b@ bos = new ByteArrayOutputStream(1000); @b@ byte[] b = new byte[1000]; @b@ int n; @b@ while ((n = fis.read(b)) != -1) { @b@ bos.write(b, 0, n); @b@ } @b@ fis.close(); @b@ bos.close(); @b@ buffer = bos.toByteArray(); @b@ in = new ByteArrayInputStream(buffer);//将b作为输入流;@b@ image = ImageIO.read(in); @b@ } catch (FileNotFoundException e) { @b@ e.printStackTrace(); @b@ } catch (IOException e) { @b@ e.printStackTrace(); @b@ }finally{@b@ if(in != null){@b@ in.close();@b@ in.reset();@b@ }@b@if(bos != null) bos.close();@b@if(fis != null) fis.close();@b@ }@b@return image;@b@}@b@@b@private static void outputCombinedImg(ListbufImages,String outpetFile){@b@@b@String imgSuffix="jpg";@b@if(StringUtils.isNotEmpty(outpetFile)&&outpetFile.contains(".")){@b@imgSuffix=outpetFile.substring(outpetFile.indexOf(".")+1);@b@}@b@@b@//默认输出图片路径@b@if(StringUtils.isEmpty(outpetFile)){@b@outpetFile=defaut_outpet_filepath+"temp.jpg";@b@}@b@@b@ //输出图片@b@BufferedImage imageNew = null;@b@for(int i = 0 ; i
控制台执行结果如下图效果
1. generatorImg - 将txmn1027_1.jpg、txmn1027_2.jpg图片合并为txmn1027_0.jpg一张
2. generatorPdf2Img - 将pdf文件传为jpg图片