提交 4bd7b536 authored 作者: 任建彩's avatar 任建彩

配置

上级 fc80d1ca
...@@ -21,6 +21,7 @@ import java.io.File; ...@@ -21,6 +21,7 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
...@@ -41,8 +42,8 @@ public class WordController { ...@@ -41,8 +42,8 @@ public class WordController {
* @return * @return
*/ */
@ApiOperation(value = "解析文件目录") @ApiOperation(value = "解析文件目录1")
@RequestMapping(value = "/upload", method= RequestMethod.POST) @RequestMapping(value = "/upload1", method= RequestMethod.POST)
public CallBack<List<String>> searchWordDocX(@RequestParam("fileFolder") MultipartFile file) throws IOException { public CallBack<List<String>> searchWordDocX(@RequestParam("fileFolder") MultipartFile file) throws IOException {
FileInfo entity = service.uploadFile(file); FileInfo entity = service.uploadFile(file);
String fileName = entity.getOriginalFileName(); String fileName = entity.getOriginalFileName();
...@@ -56,40 +57,57 @@ public class WordController { ...@@ -56,40 +57,57 @@ public class WordController {
} }
return CallBack.success(list); return CallBack.success(list);
} }
/*public List<String> fileWord(String filePath){ @ApiOperation(value = "解析文件目录2")
@RequestMapping(value = "/upload2", method= RequestMethod.POST)
public CallBack<List<String>> searchWordDoc2(@RequestParam("fileFolder") MultipartFile file) throws IOException {
FileInfo entity = service.uploadFile(file);
String fileName = entity.getOriginalFileName();
String suff = fileName.substring(fileName.lastIndexOf(".") + 1);
String content = ""; String content = "";
String filePath = null;
filePath = rootPath+entity.getPath()+"/"+entity.getFileName()+entity.getSuffixName();
List<String> list = new ArrayList<>(); List<String> list = new ArrayList<>();
Document doc = new Document(); if (suff.equals("html")) {
doc.loadFromFile(filePath); File file1 = new File(filePath);
//获取段落数量 org.jsoup.nodes.Document doc = Jsoup.parse(file1, "UTF-8");
int count = doc.getSections().get(0).getParagraphs().getCount(); //读取.html文件为字符串
System.out.println("总共含有段落数:" + count); Elements es12 = doc.getElementsByClass("msgdet_left_con");
//获取段落 for (Element d:es12) {
int i=0; Elements p1 =d.getElementsByTag("p");
int j=0; if(p1.size()>0){
for (int z = 0; z < count; z++) { for (Element el1:p1) {
Paragraph paragraph = doc.getSections().get(0).getParagraphs().get(z); //获取a标签下的文本内容
if(StringUtils.isNotBlank(paragraph.getText())){ String text = el1.text();
//获取子段落 if(StringUtils.isNotBlank(text)) {
String text = paragraph.getText(); String regEx = "[^0-9]";
String substring = text.substring(0, 1); Pattern p = Pattern.compile(regEx);
if(substring.contains("(")){ Matcher m = p.matcher(text);
int mid = z - 1; String result = m.replaceAll("").trim();
System.out.println("获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>"); int length = result.length();
content="获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>"; if(length==3){
list.add(content); System.out.println("获取三级段落<p class='c03' >" + text+"</p>");
}else{ content="获取二级段落<p class='c03'>" + text+"</p>";
i++; list.add(content);
System.out.println("获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>"); }
content="获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>"; if(length==2){
list.add(content); System.out.println("获取二级段落<p class='c02' >" + text+"</p>");
content="获取二级段落<p class='c02'>" + text+"</p>";
list.add(content);
}
if(length==1){
System.out.println("获取一级段落<p class='c01' >" + text+"</p>");
content="获取一级段落<p class='c01'>" + text+"</p>";
list.add(content);
}
}
}
} }
} }
} }
return list; return CallBack.success(list);
}*/ }
public List<String> fileHtml(String filePath) throws IOException { public List<String> fileHtml(String filePath) throws IOException {
// filePath = "D:/opt/upload/rsgw/2023/03/21/28622e72956e49a4ae2b6b79ea871c6a.html"; // filePath = "D:/opt/upload/rsgw/2023/03/21/28622e72956e49a4ae2b6b79ea871c6a.html";
String content = ""; String content = "";
List<String> list = new ArrayList<>(); List<String> list = new ArrayList<>();
File file = new File(filePath); File file = new File(filePath);
...@@ -127,7 +145,7 @@ public class WordController { ...@@ -127,7 +145,7 @@ public class WordController {
boolean hanzi = Pattern.matches(ze, substring); boolean hanzi = Pattern.matches(ze, substring);
if(substring.contains("(")){ if(substring.contains("(")){
j++; j++;
if(text .contains("。")){//是否包含 ? if(text .contains("。")){
text = text.substring(0,text.indexOf("。"));//拿取字符串 从第一位开始到问号前结束 text = text.substring(0,text.indexOf("。"));//拿取字符串 从第一位开始到问号前结束
} }
System.out.println("获取二级段落:<p class='c02'>" + text+"</p>"); System.out.println("获取二级段落:<p class='c02'>" + text+"</p>");
...@@ -142,4 +160,38 @@ public class WordController { ...@@ -142,4 +160,38 @@ public class WordController {
return list; return list;
} }
/*public List<String> fileWord(String filePath){
String content = "";
List<String> list = new ArrayList<>();
Document doc = new Document();
doc.loadFromFile(filePath);
//获取段落数量
int count = doc.getSections().get(0).getParagraphs().getCount();
System.out.println("总共含有段落数:" + count);
//获取段落
int i=0;
int j=0;
for (int z = 0; z < count; z++) {
Paragraph paragraph = doc.getSections().get(0).getParagraphs().get(z);
if(StringUtils.isNotBlank(paragraph.getText())){
//获取子段落
String text = paragraph.getText();
String substring = text.substring(0, 1);
if(substring.contains("(")){
int mid = z - 1;
System.out.println("获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>");
content="获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>";
list.add(content);
}else{
i++;
System.out.println("获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>");
content="获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>";
list.add(content);
}
}
}
return list;
}*/
} }
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论