提交 e10e488a authored 作者: xinjunguo's avatar xinjunguo

--no commit message

上级 9598aa76
package com.zrqx.resource.commons.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.List;
import java.util.zip.ZipInputStream;
import nl.siegmann.epublib.domain.Author;
import nl.siegmann.epublib.domain.Book;
import nl.siegmann.epublib.domain.Date;
import nl.siegmann.epublib.domain.Identifier;
import nl.siegmann.epublib.domain.MediaType;
import nl.siegmann.epublib.domain.Metadata;
import nl.siegmann.epublib.domain.Resource;
import nl.siegmann.epublib.domain.Resources;
import nl.siegmann.epublib.domain.SpineReference;
import nl.siegmann.epublib.domain.TOCReference;
import nl.siegmann.epublib.domain.TableOfContents;
import nl.siegmann.epublib.epub.EpubReader;
/**
* epub 数字书籍格式 数据工具包
* @author Administrator
*
*/
public class EpubUtil {
private Book book=null;
private EpubReader epubReader=null;
/**
* 设置Book实例对象
* @return
*/
public EpubUtil setEpubFile(File file){
try {
return setEpubFile(new FileInputStream(file));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return this;
}
/**
* 设置Book实例对象
* @return
*/
public EpubUtil setEpubFile(InputStream in){
try {
epubReader=getEpubReader();
book=epubReader.readEpub(in);
} catch (IOException e) {
e.printStackTrace();
}
return this;
}
/**
* 设置Book实例对象
* @return
*/
public EpubUtil setEpubFile(InputStream in, String encoding){
try {
epubReader=getEpubReader();
book=epubReader.readEpub(in,encoding);
} catch (IOException e) {
e.printStackTrace();
}
return this;
}
/**
* 设置Book实例对象
* @return
*/
public EpubUtil setEpubFile(ZipInputStream in){
try {
epubReader=getEpubReader();
book=epubReader.readEpub(in);
} catch (IOException e) {
e.printStackTrace();
}
return this;
}
/**
* 设置Book实例对象
* @return
*/
public EpubUtil setEpubFile(ZipInputStream in,String encoding){
try {
epubReader=getEpubReader();
book=epubReader.readEpub(in,encoding);
} catch (IOException e) {
e.printStackTrace();
}
return this;
}
/**
* 获取所有的资源对象
* @return
*/
public Resources getResources(){
return book.getResources();
}
/**
* 获取封面
*/
public Resource getCover(){
//epub格式的书籍中都没有将封面图片的信息放到<metadata>标签下的cover标签中。所以用第二种fangs
Resource res = book.getCoverImage();
if(res!=null){
return res;
}else{
Resources ress = book.getResources();
res = ress.getById("cover");
if(res==null){
res = ress.getById("cover-image");
}
if(res==null){
res=ress.getById("Cover");
}
return res;
}
}
/**
* 获取制定的文件资源
* @param mediaTypes
* @return
*/
public List<Resource> getResources(MediaType ... mediaTypes){
return getResources().getResourcesByMediaTypes(mediaTypes);
}
/**
* 获取图书的css资源文件
* @return
*/
public Resource getCssResource(){
List<Resource> list=getResources(new MediaType("text/css","css"));
if(list!=null && list.size()>0){
return list.get(0);
}
return null;
}
/**
* 获取图书的图片资源
* @return
*/
public List<Resource> getImagesResources(){
return getResources(
new MediaType("image/jpeg","jpg"),
new MediaType("image/gif","gif"),
new MediaType("image/png","png")
);
}
/**
*获取根据阅读顺序获取图书的内容资源(html)文件资源
* @return
*/
public List<SpineReference> getSpineReferences(){
return book.getSpine().getSpineReferences();
}
/**
*获取图书的目录结构标题及对应的资源内容
* @return
*/
public List<TOCReference> getTocReferences(){
return book.getTableOfContents().getTocReferences();
}
/**
* 获取图书
* @return
* author:haopeng
*/
public Book getBook(){
return book;
}
/**
* 获取图书信息
* @return
* author:haopeng
* @throws Exception
*/
public Metadata getMetadata() throws Exception{
if(book==null){
throw new NullPointerException("图书Book属性为null");
}
return book.getMetadata();
}
/**
* 获取文件读取器
* @return
*/
private EpubReader getEpubReader(){
if(epubReader==null){
epubReader=new EpubReader();
}
return epubReader;
}
/**
* 获取图书ISBN号
* @throws Exception
*/
public String getISBN() throws Exception{
String isbn=null;
List<Identifier> identifiers=getMetadata().getIdentifiers();
for(Identifier identifier:identifiers){
String value=identifier.getValue();
// if(value.matches("^97[8|9]\\d{10}$") ){//新版isbn号格式
if(value.contains("-")){
value=value.replaceAll("-", "");
isbn=value;
break;
}
}
return isbn;
}
/**
* 获取图书作者
*/
public String getAuthor(){
List<Author> authors = null;
try {
authors = getMetadata().getAuthors();
} catch (Exception e) {
e.printStackTrace();
}
String author = "";
if(authors.size()>0){
author = authors.get(0).getFirstname()+authors.get(0).getLastname();
author = author.replaceAll("[\\[\\],]"," ");
}
return author;
}
/**
* 获取出版日期
*/
public java.util.Date getPublishDate(){
List<Date> dates = null;
try {
dates = getMetadata().getDates();
} catch (Exception e) {
e.printStackTrace();
}
SimpleDateFormat sdf = new SimpleDateFormat("yyy");
String tempDate = "";
if(dates.size()>0){
if(dates.get(0).toString().contains(".")){
sdf= new SimpleDateFormat("yyy.MM");
}
if(dates.get(0).toString().contains("-")){
sdf= new SimpleDateFormat("yyy-MM");
}
tempDate= dates.get(0).toString();
}
java.util.Date publicDate = null;
try {
if(tempDate!=null && !tempDate.equals("")){
publicDate = sdf.parse(tempDate);
}
} catch (ParseException e) {
System.out.println("日期转换错误!");
e.printStackTrace();
}
return publicDate;
}
/**
* 出版单位
*/
public String getPublisher(){
String publisher = "";
List<String> publishers;
try {
publishers = getMetadata().getPublishers();
for(String p : publishers){
if(p.contains("-")){
publisher = publisher + "、" + p;
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
return publisher;
}
/**
* 获取图书的名字
*/
public String getBookTitle(){
String bookTitle = "";
//List<String> bookTitle2 = null;
try {
bookTitle = getMetadata().getFirstTitle();
//bookTitle2 = getMetadata().getTitles();
} catch (Exception e) {
e.printStackTrace();
}
return bookTitle;
}
}
package com.zrqx.resource.commons.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import org.springframework.stereotype.Component;
@Component
public class ParseFile {
/**
* 解析html文件
* @param file
* @return
*/
public static String readHtml(File file){
String body = "";
try {
FileInputStream iStream = new FileInputStream(file);
Reader reader = new InputStreamReader(iStream);
BufferedReader htmlReader = new BufferedReader(reader);
String line;
boolean found = false;
while (!found && (line = htmlReader.readLine()) != null) {
if (line.toLowerCase().indexOf("<body") != -1) { // 在<body>的前面可能存在空格
found = true;
}
}
found = false;
while (!found && (line = htmlReader.readLine()) != null) {
if (line.toLowerCase().indexOf("</body") != -1) {
found = true;
} else {
// 如果存在图片,则将相对路径转换为绝对路径
String lowerCaseLine = line.toLowerCase();
if (lowerCaseLine.contains("src")) {
//这里是定义图片的访问路径
String directory = "D:/test";
// 如果路径名不以反斜杠结尾,则手动添加反斜杠
/*if (!directory.endsWith("\\")) {
directory = directory + "\\";
}*/
// line = line.substring(0, lowerCaseLine.indexOf("src") + 5) + directory + line.substring(lowerCaseLine.indexOf("src") + 5);
/*String filename = extractFilename(line);
line = line.substring(0, lowerCaseLine.indexOf("src") + 5) + directory + filename + line.substring(line.indexOf(filename) + filename.length());
*/
// 如果该行存在多个<img>元素,则分行进行替代
String[] splitLines = line.split("<img\\s+"); // <img后带一个或多个空格
// 因为java中引用的问题不能使用for each
for (int i = 0; i < splitLines.length; i++) {
if (splitLines[i].toLowerCase().startsWith("src")) {
splitLines[i] = splitLines[i].substring(0, splitLines[i].toLowerCase().indexOf("src") + 5)
+ directory
+ splitLines[i].substring(splitLines[i].toLowerCase().indexOf("src") + 5);
}
}
// 最后进行拼接
line = "";
for (int i = 0; i < splitLines.length - 1; i++) { // 循环次数要-1,因为最后一个字符串后不需要添加<img
line = line + splitLines[i] + "<img ";
}
line = line + splitLines[splitLines.length - 1];
}
body = body + line + "\n";
}
}
htmlReader.close();
// System.out.println(body);
} catch (Exception e) {
e.printStackTrace();
}
return body;
}
/**
*
* @param htmlLine 一行html片段,包含<img>元素
* @return 文件名
*/
public static String extractFilename(String htmlLine) {
int srcIndex = htmlLine.toLowerCase().indexOf("src=");
if (srcIndex == -1) { // 图片不存在,返回空字符串
return "";
} else {
String htmlSrc = htmlLine.substring(srcIndex + 4);
char splitChar = '\"'; // 默认为双引号,但也有可能为单引号
if (htmlSrc.charAt(0) == '\'') {
splitChar = '\'';
}
String[] firstSplit = htmlSrc.split(String.valueOf(splitChar));
String path = firstSplit[1]; // 第0位为空字符串
String[] secondSplit = path.split("[/\\\\]"); // 匹配正斜杠或反斜杠
return secondSplit[secondSplit.length - 1];
}
}
}
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论