配置

4bd7b536 · 任建彩 · fc80d1ca · 4bd7b536
--- a/szzy-provider/szzy-provider-file/src/main/java/com/zrqx/file/controller/backend/WordController.java
+++ b/szzy-provider/szzy-provider-file/src/main/java/com/zrqx/file/controller/backend/WordController.java
@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 /**
@@ -41,8 +42,8 @@ public class WordController {
     * @return
     */

-    @ApiOperation(value = "解析文件目录")
-    @RequestMapping(value = "/upload", method= RequestMethod.POST)
+    @ApiOperation(value = "解析文件目录1")
+    @RequestMapping(value = "/upload1", method= RequestMethod.POST)
    public CallBack<List<String>> searchWordDocX(@RequestParam("fileFolder") MultipartFile file) throws IOException {
        FileInfo entity = service.uploadFile(file);
        String fileName = entity.getOriginalFileName();
@@ -56,40 +57,57 @@ public class WordController {
        }
        return CallBack.success(list);
    }
-    /*public List<String> fileWord(String filePath){
+    @ApiOperation(value = "解析文件目录2")
+    @RequestMapping(value = "/upload2", method= RequestMethod.POST)
+    public CallBack<List<String>> searchWordDoc2(@RequestParam("fileFolder") MultipartFile file) throws IOException {
+        FileInfo entity = service.uploadFile(file);
+        String fileName = entity.getOriginalFileName();
+        String suff = fileName.substring(fileName.lastIndexOf(".") + 1);
        String content = "";
+        String filePath = null;
+        filePath = rootPath+entity.getPath()+"/"+entity.getFileName()+entity.getSuffixName();
        List<String> list = new ArrayList<>();
-        Document doc = new Document();
-        doc.loadFromFile(filePath);
-        //获取段落数量
-        int count = doc.getSections().get(0).getParagraphs().getCount();
-        System.out.println("总共含有段落数:" + count);
-        //获取段落
-        int i=0;
-        int j=0;
-        for (int z = 0; z < count; z++) {
-            Paragraph paragraph = doc.getSections().get(0).getParagraphs().get(z);
-            if(StringUtils.isNotBlank(paragraph.getText())){
-                //获取子段落
-                String text = paragraph.getText();
-                String substring = text.substring(0, 1);
-                if(substring.contains("（")){
-                    int mid = z - 1;
-                    System.out.println("获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>");
-                    content="获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>";
-                    list.add(content);
-                }else{
-                    i++;
-                    System.out.println("获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>");
-                    content="获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>";
-                    list.add(content);
+        if (suff.equals("html")) {
+            File file1 = new File(filePath);
+            org.jsoup.nodes.Document doc = Jsoup.parse(file1, "UTF-8");
+            //读取.html文件为字符串
+            Elements es12 = doc.getElementsByClass("msgdet_left_con");
+            for (Element d:es12) {
+                Elements p1 =d.getElementsByTag("p");
+                if(p1.size()>0){
+                    for (Element  el1:p1) {
+                        //获取a标签下的文本内容
+                        String text = el1.text();
+                        if(StringUtils.isNotBlank(text)) {
+                            String regEx = "[^0-9]";
+                            Pattern p = Pattern.compile(regEx);
+                            Matcher m = p.matcher(text);
+                            String result = m.replaceAll("").trim();
+                            int length = result.length();
+                            if(length==3){
+                                System.out.println("获取三级段落<p class='c03' >" + text+"</p>");
+                                content="获取二级段落<p class='c03'>" + text+"</p>";
+                                list.add(content);
+                            }
+                            if(length==2){
+                                System.out.println("获取二级段落<p class='c02' >" + text+"</p>");
+                                content="获取二级段落<p class='c02'>" + text+"</p>";
+                                list.add(content);
+                            }
+                            if(length==1){
+                                System.out.println("获取一级段落<p class='c01' >" + text+"</p>");
+                                content="获取一级段落<p class='c01'>" + text+"</p>";
+                                list.add(content);
+                            }
+                        }
+                    }
                }
            }
        }
-        return list;
-    }*/
+        return CallBack.success(list);
+    }
    public List<String> fileHtml(String filePath) throws IOException {
-       // filePath = "D:/opt/upload/rsgw/2023/03/21/28622e72956e49a4ae2b6b79ea871c6a.html";
+        // filePath = "D:/opt/upload/rsgw/2023/03/21/28622e72956e49a4ae2b6b79ea871c6a.html";
        String content = "";
        List<String> list = new ArrayList<>();
        File file = new File(filePath);
@@ -127,7 +145,7 @@ public class WordController {
                            boolean hanzi = Pattern.matches(ze, substring);
                            if(substring.contains("（")){
                                j++;
-                                if(text .contains("。")){//是否包含 ?
+                                if(text .contains("。")){
                                    text = text.substring(0,text.indexOf("。"));//拿取字符串 从第一位开始到问号前结束
                                }
                                System.out.println("获取二级段落:<p class='c02'>" + text+"</p>");
@@ -142,4 +160,38 @@ public class WordController {
        return list;
    }

+    /*public List<String> fileWord(String filePath){
+        String content = "";
+        List<String> list = new ArrayList<>();
+        Document doc = new Document();
+        doc.loadFromFile(filePath);
+        //获取段落数量
+        int count = doc.getSections().get(0).getParagraphs().getCount();
+        System.out.println("总共含有段落数:" + count);
+        //获取段落
+        int i=0;
+        int j=0;
+        for (int z = 0; z < count; z++) {
+            Paragraph paragraph = doc.getSections().get(0).getParagraphs().get(z);
+            if(StringUtils.isNotBlank(paragraph.getText())){
+                //获取子段落
+                String text = paragraph.getText();
+                String substring = text.substring(0, 1);
+                if(substring.contains("（")){
+                    int mid = z - 1;
+                    System.out.println("获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>");
+                    content="获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>";
+                    list.add(content);
+                }else{
+                    i++;
+                    System.out.println("获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>");
+                    content="获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>";
+                    list.add(content);
+                }
+            }
+        }
+        return list;
+    }*/
+
+
 }