浏览代码

更新化学试题,测试table识别

zengxj 5 年之前
父节点
当前提交
1e1063a8cb
共有 3 个文件被更改,包括 34 次插入0 次删除
  1. 二进制
      doc/table.docx
  2. 二进制
      doc/化学中考专项诊断.docx
  3. 34 0
      src/test/java/com/yaoxiang/diagnosis/service/WordTest.java

二进制
doc/table.docx


二进制
doc/化学中考专项诊断.docx


+ 34 - 0
src/test/java/com/yaoxiang/diagnosis/service/WordTest.java

@@ -0,0 +1,34 @@
+package com.yaoxiang.diagnosis.service;
+
+import com.yaoxiang.diagnosis.word.WordUtil;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.junit.Test;
+import org.springframework.util.FileCopyUtils;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.List;
+
+public class WordTest {
+
+    @Test
+    public void test() throws Exception{
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        File file = new File("doc/table.docx");
+        InputStream is = new FileInputStream(file);
+        FileCopyUtils.copy(is, baos);
+        is.close();
+        byte[] data = baos.toByteArray();
+        XWPFDocument document = WordUtil.open(data);
+        List<XWPFParagraph> paragraphs = document.getParagraphs();
+        for (XWPFParagraph paragraph:paragraphs){
+            System.out.println(paragraph.getCTP().xmlText());
+            System.out.println(paragraph.getCTP().isImmutable());
+            System.out.println(paragraph.toString());
+        }
+
+    }
+}