|
@@ -1,8 +1,11 @@
|
|
|
package com.yaoxiang.diagnosis.word;
|
|
|
|
|
|
+import com.yaoxiang.diagnosis.config.Constants;
|
|
|
+import com.yaoxiang.diagnosis.util.CommonUtil;
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
|
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
|
|
+import org.apache.poi.xwpf.usermodel.XWPFPicture;
|
|
|
import org.apache.poi.xwpf.usermodel.XWPFRun;
|
|
|
import org.apache.xmlbeans.XmlCursor;
|
|
|
import org.apache.xmlbeans.XmlObject;
|
|
@@ -10,16 +13,32 @@ import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObject;
|
|
|
import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData;
|
|
|
import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
|
|
|
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;
|
|
|
+import org.openxmlformats.schemas.officeDocument.x2006.math.CTOMath;
|
|
|
+import org.openxmlformats.schemas.officeDocument.x2006.math.CTOMathPara;
|
|
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDrawing;
|
|
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
|
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
|
|
+import org.slf4j.Logger;
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
+import org.springframework.core.io.ClassPathResource;
|
|
|
+import org.w3c.dom.Node;
|
|
|
|
|
|
+import javax.imageio.ImageIO;
|
|
|
+import javax.xml.transform.Transformer;
|
|
|
+import javax.xml.transform.TransformerFactory;
|
|
|
+import javax.xml.transform.dom.DOMSource;
|
|
|
+import javax.xml.transform.stream.StreamResult;
|
|
|
+import javax.xml.transform.stream.StreamSource;
|
|
|
+import java.awt.image.BufferedImage;
|
|
|
import java.io.*;
|
|
|
import java.util.ArrayList;
|
|
|
+import java.util.HashMap;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
+import java.util.function.BiFunction;
|
|
|
import java.util.function.Consumer;
|
|
|
-import java.util.stream.Collectors;
|
|
|
+import java.util.regex.Matcher;
|
|
|
+import java.util.regex.Pattern;
|
|
|
|
|
|
|
|
|
* @author feick
|
|
@@ -34,6 +53,8 @@ public class WordUtil {
|
|
|
|
|
|
public static final String DIR_CONTRACT_TPL = DIRECTORY_SEPARATOR + "contract" + DIRECTORY_SEPARATOR + "tpl" + DIRECTORY_SEPARATOR;
|
|
|
|
|
|
+ public static final Logger logger = LoggerFactory.getLogger(WordUtil.class);
|
|
|
+
|
|
|
|
|
|
|
|
|
|
|
@@ -68,6 +89,213 @@ public class WordUtil {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+ public static boolean parseFormula(XWPFParagraph p) {
|
|
|
+ List<CTOMath> maths = p.getCTP().getOMathList();
|
|
|
+ List<CTOMathPara> paras = p.getCTP().getOMathParaList();
|
|
|
+ if (CommonUtil.isEmpty(maths) && CommonUtil.isEmpty(paras)) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ XmlCursor c = p.getCTP().newCursor();
|
|
|
+ c.selectPath("./*");
|
|
|
+ if (CommonUtil.notEmpty(paras)) {
|
|
|
+ c.toNextSibling();
|
|
|
+ }
|
|
|
+ int i = 0;
|
|
|
+ Map<Integer, String> insert = new HashMap<>();
|
|
|
+ while (c.toNextSelection()) {
|
|
|
+ XmlObject o = c.getObject();
|
|
|
+ logger.debug("i = " + i + " " + o.getClass());
|
|
|
+ if (o instanceof CTR) {
|
|
|
+ i++;
|
|
|
+ } else if (o instanceof CTOMath) {
|
|
|
+ CTOMath m = (CTOMath) o;
|
|
|
+ try {
|
|
|
+ String mml = getMathML(m);
|
|
|
+ insert.put(i, mml);
|
|
|
+ i++;
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ logger.debug("CTOMath i = " + i + " " + o.getClass());
|
|
|
+ } else if (o instanceof CTOMathPara) {
|
|
|
+
|
|
|
+ c.toLastChild();
|
|
|
+ XmlObject xmlObject = c.getObject();
|
|
|
+ CTOMath m = (CTOMath) xmlObject;
|
|
|
+ try {
|
|
|
+ String mml = getMathML(m);
|
|
|
+ insert.put(i, mml);
|
|
|
+ i++;
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ insert.forEach((k, v) -> {
|
|
|
+ XWPFRun run = p.insertNewRun(k);
|
|
|
+ run.setText(v);
|
|
|
+ });
|
|
|
+ return true;
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ public static String getMathML(CTOMath ctomath) throws Exception {
|
|
|
+
|
|
|
+ Node node = ctomath.getDomNode();
|
|
|
+
|
|
|
+ DOMSource source = new DOMSource(node);
|
|
|
+ StringWriter stringwriter = new StringWriter();
|
|
|
+ StreamResult result = new StreamResult(stringwriter);
|
|
|
+
|
|
|
+ StreamSource styleSource = new StreamSource(new ClassPathResource("OMML2MML.XSL").getInputStream());
|
|
|
+ Transformer transformer = TransformerFactory.newInstance().newTransformer(styleSource);
|
|
|
+
|
|
|
+ transformer.setOutputProperty("omit-xml-declaration", "yes");
|
|
|
+ transformer.transform(source, result);
|
|
|
+
|
|
|
+ String mathML = stringwriter.toString();
|
|
|
+ stringwriter.close();
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ mathML = mathML.replaceAll("xmlns:m=\"http://schemas.openxmlformats.org/officeDocument/2006/math\"", "");
|
|
|
+ mathML = mathML.replaceAll("xmlns:mml", "xmlns");
|
|
|
+ mathML = mathML.replaceAll("mml:", "");
|
|
|
+
|
|
|
+ return mathML;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void parsePicture(XWPFParagraph p, String picturePattern, BiFunction<byte[], String, String> saver) {
|
|
|
+ String text = p.getText();
|
|
|
+ Matcher matcher = Pattern.compile(picturePattern).matcher(text);
|
|
|
+
|
|
|
+ boolean hasPicture = WordUtil.hasPicture(p);
|
|
|
+ if (!hasPicture && !matcher.find()) {
|
|
|
+ logger.info("parsePicture,before parse run ,no picture: {}", text);
|
|
|
+ return;
|
|
|
+ } else if (!hasPicture && matcher.find()) {
|
|
|
+ String msg = "parsePicture error,find tag but not picture: " + text;
|
|
|
+ logger.error(msg, new RuntimeException(msg));
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ String size = Constants.PICTURE_SIZE_LARGE;
|
|
|
+ if (matcher.find()) {
|
|
|
+ size = matcher.group().replaceAll("#", "");
|
|
|
+ }
|
|
|
+ logger.info("parsePicture,before parse run: " + text);
|
|
|
+ List<XWPFRun> runs = p.getRuns();
|
|
|
+ int i = 0;
|
|
|
+ Map<Integer, String> insert = new HashMap<>();
|
|
|
+ for (XWPFRun run : runs) {
|
|
|
+ List<XWPFPicture> pictures = run.getEmbeddedPictures();
|
|
|
+ logger.debug("run pos " + (i++) + " embed picture size " + pictures.size());
|
|
|
+
|
|
|
+
|
|
|
+ String img = "<p style=\"text-align:center\"><img src=\"%s\" style=\"max-width:100%%; width:%spx; center\" /></p>";
|
|
|
+ if (CommonUtil.notEmpty(pictures)) {
|
|
|
+
|
|
|
+ XWPFPicture pp = pictures.get(0);
|
|
|
+ String pName = pp.getPictureData().getFileName();
|
|
|
+
|
|
|
+ String suffix = pName.substring(pName.lastIndexOf("."));
|
|
|
+ if (".emf".equals(suffix)) {
|
|
|
+ logger.error("检测到emf文件,段落内容为{}", p.getText());
|
|
|
+ }
|
|
|
+ String saveName = CommonUtil.randomUUID() + suffix;
|
|
|
+ byte[] pData = pp.getPictureData().getData();
|
|
|
+ int width = getWidth(pData);
|
|
|
+ width = setWidth(size, width);
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ String url = saver.apply(pData, saveName);
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ insert.put(i, String.format(img, url, width));
|
|
|
+ logger.info("picture url is {}, des is {} ", url, pp.getDescription());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ insert.forEach((k, v) -> {
|
|
|
+ XWPFRun r = p.insertNewRun(k);
|
|
|
+ r.setText(v);
|
|
|
+ });
|
|
|
+
|
|
|
+ logger.info("after parse run: " + p.getText());
|
|
|
+ }
|
|
|
+
|
|
|
+ private static int setWidth(String size, int width) {
|
|
|
+ switch (size) {
|
|
|
+ case Constants.PICTURE_SIZE_SMALL:
|
|
|
+ width = setBound(width, 0, Constants.PICTURE_SMALL_SIZE);
|
|
|
+ break;
|
|
|
+ case Constants.PICTURE_SIZE_MIDDLE:
|
|
|
+ width = setBound(width, 0, Constants.PICTURE_MIDDLE_SIZE);
|
|
|
+ break;
|
|
|
+ case Constants.PICTURE_SIZE_LARGE:
|
|
|
+ width = setBound(width, 0, Constants.PICTURE_LARGE_SIZE);
|
|
|
+ }
|
|
|
+ return width;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static int getWidth(byte[] pData) {
|
|
|
+
|
|
|
+ int width = Constants.DEFAULT_WIDTH, height = Constants.DEFAULT_HEIGHT;
|
|
|
+ try {
|
|
|
+ BufferedImage image = ImageIO.read(new ByteArrayInputStream(pData));
|
|
|
+ width = Math.min(image.getWidth(), Constants.MAX_WIDTH);
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ logger.info("picture width is {},height is {}", width, height);
|
|
|
+ } catch (Exception e) {
|
|
|
+ logger.error("can't get width and height from picture, picture error message is {}", e.getMessage());
|
|
|
+ }
|
|
|
+ return width;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static int setBound(int bound, int min, int max) {
|
|
|
+ return bound < min ? min : (Math.min(bound, max));
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void parseSubScript(XWPFParagraph paragraph) {
|
|
|
+ Map<Integer, String> insert = new HashMap<>();
|
|
|
+ List<XWPFRun> runs = paragraph.getRuns();
|
|
|
+ for (int i = 0; i < runs.size(); i++) {
|
|
|
+ switch (runs.get(i).getSubscript()) {
|
|
|
+ case BASELINE:
|
|
|
+ break;
|
|
|
+ case SUBSCRIPT:
|
|
|
+ insert.put(i, "<sub>");
|
|
|
+ insert.put(i + 1, "</sub>");
|
|
|
+ break;
|
|
|
+ case SUPERSCRIPT:
|
|
|
+ insert.put(i, "<sup>");
|
|
|
+ insert.put(i + 1, "</sup>");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ int i = 0;
|
|
|
+ for (Map.Entry<Integer, String> entry : insert.entrySet()) {
|
|
|
+ XWPFRun run = paragraph.insertNewRun(entry.getKey() + i++);
|
|
|
+ run.setText(entry.getValue());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
|
|
|
|
|
|
|
|
@@ -220,7 +448,6 @@ public class WordUtil {
|
|
|
|
|
|
|
|
|
* 将word2007转换成html
|
|
|
- *
|
|
|
*/
|
|
|
|
|
|
|
|
@@ -240,7 +467,6 @@ public class WordUtil {
|
|
|
|
|
|
|
|
|
|
|
|
-
|
|
|
public static void main(String[] args) throws IOException {
|
|
|
|
|
|
String pathname = "test.docx";
|