自己瞎整的,供参考
网上的文档太少了,只能根据自己一边琢磨一边AI的整,最后整了一个还算比较满意的版本出来,尤其是对word中的表格处理这块。
代码
直接贴代码
pom.xml
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
package com.ruoyi.project.project.util;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.util.RandomUtil;
import com.alibaba.fastjson2.JSONArray;
import com.ruoyi.framework.config.RuoYiConfig;
import com.ruoyi.framework.config.ServerConfig;
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPart;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.wml.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import javax.xml.bind.JAXBElement;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Component
public class DocUtil {
// 定义汉字数字
private static final String CHINESE_NUMBERS = "一二三四五六七八九十";
// 定义正则表达式模式
private static final String PATTERN_STRING = "^第[" + CHINESE_NUMBERS + "]+章";
private static final String PATTERN_STRING1 = "^第[" + CHINESE_NUMBERS + "]+节";
static ServerConfig serverConfig;
@Autowired
ServerConfig tempServerConfig;
//判断是否第一章这种一级标题
public static boolean isPara(String input) {
Pattern pattern = Pattern.compile(PATTERN_STRING);
Matcher matcher = pattern.matcher(input);
return matcher.find();
}
//判断是否第一节这种二级标题
public static boolean isPara1(String input) {
Pattern pattern = Pattern.compile(PATTERN_STRING1);
Matcher matcher = pattern.matcher(input);
return matcher.find();
}
public static void main(String[] args) throws Docx4JException, IOException {
// 加载 docx 文件
WordprocessingMLPackage wordprocessingMLPackage = WordprocessingMLPackage.load(new File("/Users/xxxx/Desktop/workspace/表格测试.docx"));
try {
// 获取文档中的所有嵌入部件(包括图片)
MainDocumentPart part = wordprocessingMLPackage.getMainDocumentPart();
List<Object> list = part.getContent();
List<Map<String, Object>> maps = parseObject(list, part.getRelationshipsPart());
List<Map<String, Object>> targetList = DocUtil.formatDocumentObject(maps, null);
targetList = DocUtil.formatDocumentTbl(targetList);
System.out.println("最后的结果:---------------------");
for (Map<String, Object> stringObjectMap : targetList) {
System.out.println(stringObjectMap);
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static List<Map<String, Object>> formatDocumentTbl(List<Map<String, Object>> maps) {
for (Map<String, Object> map : maps) {
// 判断是否为表格
if (!map.containsKey("type") || !map.get("type").equals("TBL")) {
continue;
}
ArrayList<Map<String, Object>> tblContent = (ArrayList<Map<String, Object>>) map.get("content");
// 每一行数据
for (int i = 0; i < tblContent.size(); i++) {
Map<String, Object> row = tblContent.get(i);
ArrayList<Map<String, Object>> rowData = (ArrayList<Map<String, Object>>) row.get("content");
int index = 0;
int size = rowData.size();
for (int i1 = 0; i1 < size; i1++) {
Map<String, Object> stringObjectMap = rowData.get(i1);
// 这个是对单元格处理,vMerge是竖向合并,hMerge是横向合并
// 这里才是每一个格的数据,
// 获得当前的下标
// 判断是不是有vMerge,有的话如果是1就得找到上一级然后加上1
if (stringObjectMap.containsKey("vMerge")) {
String str = stringObjectMap.get("vMerge").toString();
if (str.equals("restart")) {
stringObjectMap.put("vMerge", 1);
} else if (str.equals("continue")) {
searchAndChangePreviousData(i, index, tblContent);
stringObjectMap.remove("vMerge");
}
}
if (stringObjectMap.containsKey("hMerge")) {
index += ((BigInteger) stringObjectMap.get("hMerge")).intValue();
} else {
index++;
}
}
}
for (Map<String, Object> tempMap : tblContent) {
List<Map<String, Object>> content = (List<Map<String, Object>>) tempMap.get("content");
for (int i = 0; i < content.size(); i++) {
Map<String, Object> stringObjectMap = content.get(i);
if (stringObjectMap.isEmpty()) {
content.remove(i);
i--;
} else if (stringObjectMap.containsKey("hMerge") && stringObjectMap.size() == 1) {
content.remove(i);
i--;
}
}
}
map.put("content", JSONArray.toJSONString(tblContent));
}
return maps;
}
private static void searchAndChangePreviousData(int rowIndex, int index, ArrayList<Map<String, Object>> tblContent) {
for (int i = rowIndex - 1; i >= 0; i--) {
Map<String, Object> row = tblContent.get(i);
ArrayList<Map<String, Object>> rowData = (ArrayList<Map<String, Object>>) row.get("content");
int index1 = 0;
for (int i1 = 0; i1 < rowData.size(); i1++) {
Map<String, Object> stringObjectMap = rowData.get(i1);
// 这里才是每一个格的数据,获得当前的下标
if (index1 == index) {
if (stringObjectMap.containsKey("vMerge") && !stringObjectMap.get("vMerge").equals("") && !stringObjectMap.get("vMerge").equals("continue")) {
int flag = (int) stringObjectMap.get("vMerge");
stringObjectMap.put("vMerge", flag + 1);
return;
}
}
if (stringObjectMap.containsKey("hMerge")) {
index1 += ((BigInteger) stringObjectMap.get("hMerge")).intValue();
} else {
index1++;
}
}
}
}
public static List<Map<String, Object>> parseObject(List<Object> list, RelationshipsPart part) {
List<Map<String, Object>> resultList = new ArrayList<>();
for (Object obj1 : list) {
if (obj1 instanceof P) {
P p = (P) obj1;
List<Object> content = p.getContent();
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "P").put("content", parseObject(content, part)).build());
} else if (obj1 instanceof R) {
R r = (R) obj1;
List<Object> content = r.getContent();
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "R").put("content", parseObject(content, part)).build());
} else if (obj1 instanceof JAXBElement) {
JAXBElement element = (JAXBElement) obj1;
Object value = element.getValue();
if (value instanceof Tbl) {
List<Object> tableList = new ArrayList<>();
Tbl table = (Tbl) value;
List<Object> rows = table.getContent();
for (Object obj : rows) {
if (obj instanceof Tr) {
Tr row = (Tr) obj;
List<Object> cells = row.getContent();
tableList.add(parseObject(cells, part));
}
}
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TBL").put("content", tableList).build());
} else if (value instanceof Tc) {
Tc tc = (Tc) value;
List<Object> content = tc.getContent();
Map<String, Object> build = MapUtil.builder(new HashMap<String, Object>()).put("type", "tc").put("content", parseObject(content, part)).build();
if (tc.getTcPr().getGridSpan() != null) {
build.put("hMerge", tc.getTcPr().getGridSpan().getVal());
}
if (tc.getTcPr().getVMerge() != null) {
if (tc.getTcPr().getVMerge().getVal() == null) {
build.put("vMerge", "continue");
} else {
build.put("vMerge", tc.getTcPr().getVMerge().getVal());
}
}
resultList.add(build);
} else if (value instanceof Drawing) {
Drawing image = (Drawing) value;
Object o = image.getAnchorOrInline().get(0);
if (o instanceof Inline) {
Inline inline = (Inline) o;
String relId = inline.getGraphic().getGraphicData().getPic().getBlipFill().getBlip().getEmbed();
//获取图片的像素宽高
long cx = inline.getGraphic().getGraphicData().getPic().getSpPr().getXfrm().getExt().getCx();
long cy = inline.getGraphic().getGraphicData().getPic().getSpPr().getXfrm().getExt().getCy();
int widthPx = emuToPixels((cx), 96);
int heightPx = emuToPixels((cy), 96);
// 获取对应的图像Part
Part imagePart = part.getPart(relId);
String fileurl = "1";
String filename = "image_" + System.currentTimeMillis() + RandomUtil.randomNumbers(6) + ".png";
if (imagePart != null && imagePart instanceof BinaryPart) {
// 获取图片的二进制数据
byte[] imageData = ((BinaryPart) imagePart).getBytes();
// 将图片保存为文件
fileurl = saveFile(imageData, filename);
}
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "IMG")
.put("width", widthPx).put("height", heightPx).put("content", fileurl).build());
}
} else if (value instanceof Text) {
Text text = (Text) value;
// resultList.add(text.getValue());
if (!text.getValue().isEmpty() && !text.getValue().startsWith("HYPERLINK"))
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", text.getValue()).build());
}
}
}
return resultList;
}
// EMU 转英寸
private static double emuToInches(long emu) {
return emu / 914400.0;
}
// EMU 转像素(需指定 DPI)
private static int emuToPixels(long emu, int dpi) {
return (int) (emuToInches(emu) * dpi);
}
//这块可能得你们自己调整一下,或者直接用你们自己的保存文件方法,返回保存的路径就行了
private static String saveFile(byte[] imageData, String filename) {
// 上传文件路径,你本地的保存路径
String filePath = "xxxxxxxxxxx";
// 上传并返回新文件名称
try {
File file = new File(filePath + "/newFolder/");
if (!file.exists()) file.mkdirs();
FileOutputStream fos = new FileOutputStream(filePath + "/newFolder/" + filename);
fos.write(imageData);
System.out.println("Image saved successfully.");
} catch (FileNotFoundException e) {
System.out.println("Image saved error.");
throw new RuntimeException(e);
} catch (IOException e) {
System.out.println("Image saved error.");
throw new RuntimeException(e);
}
return "/profile/upload/newFolder/" + filename;
}
public static List<Map<String, Object>> formatDocumentObject(List<Map<String, Object>> maps, Map<String, Object> parentMap) {
List<Map<String, Object>> resultList = new ArrayList<>();
for (int i = 0; i < maps.size(); i++) {
Map<String, Object> map = maps.get(i);
if (map.get("type").equals("R")) {
List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
resultList.addAll(content);
} else if (map.get("type").equals("tc")) {
List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
StringBuffer sb = new StringBuffer();
List<Map<String, Object>> tempList = new ArrayList<>();
if (content.size() > 0) {
for (Map<String, Object> stringObjectMap : content) {
if (stringObjectMap.containsKey("type") && stringObjectMap.get("type").equals("TEXT")) {
sb.append(stringObjectMap.get("content"));
sb.append("\n");
} else {
if (sb.length() > 0) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("type", "TEXT");
tempMap.put("content", sb.toString().substring(0, sb.toString().length() - 1));
tempList.add(tempMap);
}
tempList.add(stringObjectMap);
sb = new StringBuffer();
}
}
}
if (sb.length() != 0) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("type", "TEXT");
tempMap.put("content", sb.toString().substring(0, sb.toString().length() - 1));
tempList.add(tempMap);
sb = new StringBuffer();
}
content = tempList;
if (map.containsKey("hMerge") || map.containsKey("vMerge")) {
if (content.isEmpty() && map.get("vMerge") != null && map.get("vMerge").equals("continue")) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("vMerge", map.get("vMerge"));
if (map.containsKey("hMerge")) {
tempMap.put("hMerge", map.get("hMerge"));
}
content.add(tempMap);
} else {
if (map.containsKey("hMerge")) {
for (Map<String, Object> stringObjectMap : content) {
stringObjectMap.put("hMerge", map.get("hMerge"));
}
}
if (map.containsKey("vMerge")) {
for (Map<String, Object> stringObjectMap : content) {
stringObjectMap.put("vMerge", map.get("vMerge"));
}
}
}
}
if (content.size() > 1) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("type", "ARRAY");
tempMap.put("content", content);
content = new ArrayList<>();
content.add(tempMap);
}
resultList.addAll(content);
} else if (map.get("type").equals("P")) {
// P开头的,一般是一段,里面都是一句话,可能还有其他结构
List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
if (content.isEmpty()) {
if (parentMap == null) {
// 解决了竖项合并单元格
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", "").build());
} else if (parentMap.get("type").equals("tc") && parentMap.containsKey("vMerge") && parentMap.get("vMerge").equals("continue")) {
// 解决单元格内容为空时的问题
} else {
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", "").build());
}
} else if (content.size() == 1) {
resultList.addAll(content);
} else {
boolean isAllText = true;
StringBuffer sb = new StringBuffer();
for (Map<String, Object> stringObjectMap : content) {
if (stringObjectMap.containsKey("type") && !stringObjectMap.get("type").equals("TEXT"))
isAllText = false;
sb.append(stringObjectMap.get("content"));
}
if (isAllText) {
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", sb.toString()).build());
} else {
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "ARRAY").put("content", content).build());
}
}
} else if (map.get("type").equals("TBL")) {
Map<String, Object> newMap = new HashMap<>();
newMap.put("type", "TBL");
newMap.put("content", formatDocumentObjectList((List<Object>) map.get("content")));
resultList.add(newMap);
} else {
resultList.add(map);
}
}
return resultList;
}
private static List<Map<String, Object>> formatDocumentObjectList(List<Object> content) {
List<Map<String, Object>> resultList = new ArrayList<>();
for (Object o : content) {
if (o instanceof ArrayList) {
ArrayList oList = (ArrayList) o;
if (oList.get(0) instanceof ArrayList) {
resultList.addAll(formatDocumentObjectList(oList));
} else if (oList.get(0) instanceof HashMap) {
Map<String, Object> newMap = new HashMap<>();
newMap.put("type", "TD");
newMap.put("content", formatDocumentObject(oList, newMap));
resultList.add(newMap);
}
} else if (o instanceof HashMap) {
HashMap oMap = (HashMap) o;
resultList.add(oMap);
}
}
return resultList;
}
@PostConstruct
public void init() {
serverConfig = this.tempServerConfig;
}
}
历时一个星期搞出来的,总算是符合了客户的要求。给自己记录一下。
项目是基于ruoyi做的。里面很多用的都是ruoyi的东西,感谢若以大佬。