提交 4bd7b536 authored 作者: 任建彩's avatar 任建彩

配置

上级 fc80d1ca
......@@ -21,6 +21,7 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
......@@ -41,8 +42,8 @@ public class WordController {
* @return
*/
@ApiOperation(value = "解析文件目录")
@RequestMapping(value = "/upload", method= RequestMethod.POST)
@ApiOperation(value = "解析文件目录1")
@RequestMapping(value = "/upload1", method= RequestMethod.POST)
public CallBack<List<String>> searchWordDocX(@RequestParam("fileFolder") MultipartFile file) throws IOException {
FileInfo entity = service.uploadFile(file);
String fileName = entity.getOriginalFileName();
......@@ -56,38 +57,55 @@ public class WordController {
}
return CallBack.success(list);
}
/*public List<String> fileWord(String filePath){
@ApiOperation(value = "解析文件目录2")
@RequestMapping(value = "/upload2", method= RequestMethod.POST)
public CallBack<List<String>> searchWordDoc2(@RequestParam("fileFolder") MultipartFile file) throws IOException {
FileInfo entity = service.uploadFile(file);
String fileName = entity.getOriginalFileName();
String suff = fileName.substring(fileName.lastIndexOf(".") + 1);
String content = "";
String filePath = null;
filePath = rootPath+entity.getPath()+"/"+entity.getFileName()+entity.getSuffixName();
List<String> list = new ArrayList<>();
Document doc = new Document();
doc.loadFromFile(filePath);
//获取段落数量
int count = doc.getSections().get(0).getParagraphs().getCount();
System.out.println("总共含有段落数:" + count);
//获取段落
int i=0;
int j=0;
for (int z = 0; z < count; z++) {
Paragraph paragraph = doc.getSections().get(0).getParagraphs().get(z);
if(StringUtils.isNotBlank(paragraph.getText())){
//获取子段落
String text = paragraph.getText();
String substring = text.substring(0, 1);
if(substring.contains("(")){
int mid = z - 1;
System.out.println("获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>");
content="获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>";
if (suff.equals("html")) {
File file1 = new File(filePath);
org.jsoup.nodes.Document doc = Jsoup.parse(file1, "UTF-8");
//读取.html文件为字符串
Elements es12 = doc.getElementsByClass("msgdet_left_con");
for (Element d:es12) {
Elements p1 =d.getElementsByTag("p");
if(p1.size()>0){
for (Element el1:p1) {
//获取a标签下的文本内容
String text = el1.text();
if(StringUtils.isNotBlank(text)) {
String regEx = "[^0-9]";
Pattern p = Pattern.compile(regEx);
Matcher m = p.matcher(text);
String result = m.replaceAll("").trim();
int length = result.length();
if(length==3){
System.out.println("获取三级段落<p class='c03' >" + text+"</p>");
content="获取二级段落<p class='c03'>" + text+"</p>";
list.add(content);
}else{
i++;
System.out.println("获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>");
content="获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>";
}
if(length==2){
System.out.println("获取二级段落<p class='c02' >" + text+"</p>");
content="获取二级段落<p class='c02'>" + text+"</p>";
list.add(content);
}
if(length==1){
System.out.println("获取一级段落<p class='c01' >" + text+"</p>");
content="获取一级段落<p class='c01'>" + text+"</p>";
list.add(content);
}
}
return list;
}*/
}
}
}
}
return CallBack.success(list);
}
public List<String> fileHtml(String filePath) throws IOException {
// filePath = "D:/opt/upload/rsgw/2023/03/21/28622e72956e49a4ae2b6b79ea871c6a.html";
String content = "";
......@@ -127,7 +145,7 @@ public class WordController {
boolean hanzi = Pattern.matches(ze, substring);
if(substring.contains("(")){
j++;
if(text .contains("。")){//是否包含 ?
if(text .contains("。")){
text = text.substring(0,text.indexOf("。"));//拿取字符串 从第一位开始到问号前结束
}
System.out.println("获取二级段落:<p class='c02'>" + text+"</p>");
......@@ -142,4 +160,38 @@ public class WordController {
return list;
}
/*public List<String> fileWord(String filePath){
String content = "";
List<String> list = new ArrayList<>();
Document doc = new Document();
doc.loadFromFile(filePath);
//获取段落数量
int count = doc.getSections().get(0).getParagraphs().getCount();
System.out.println("总共含有段落数:" + count);
//获取段落
int i=0;
int j=0;
for (int z = 0; z < count; z++) {
Paragraph paragraph = doc.getSections().get(0).getParagraphs().get(z);
if(StringUtils.isNotBlank(paragraph.getText())){
//获取子段落
String text = paragraph.getText();
String substring = text.substring(0, 1);
if(substring.contains("(")){
int mid = z - 1;
System.out.println("获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>");
content="获取二级段落:<p id='p"+j+"' pid='"+i+"'>"+paragraph.getText()+"</p>";
list.add(content);
}else{
i++;
System.out.println("获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>");
content="获取一级段落:<p id='"+i+"'>"+paragraph.getText()+"</p>";
list.add(content);
}
}
}
return list;
}*/
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论