parent
947a87bf25
commit
be25d5569b
@ -0,0 +1,151 @@ |
||||
package xyz.fycz.myreader.webapi.crawler.read; |
||||
|
||||
import android.text.Html; |
||||
|
||||
import org.jsoup.Jsoup; |
||||
import org.jsoup.nodes.Document; |
||||
import org.jsoup.nodes.Element; |
||||
import org.jsoup.select.Elements; |
||||
|
||||
import java.util.ArrayList; |
||||
|
||||
import xyz.fycz.myreader.entity.SearchBookBean; |
||||
import xyz.fycz.myreader.enums.BookSource; |
||||
import xyz.fycz.myreader.greendao.entity.Book; |
||||
import xyz.fycz.myreader.greendao.entity.Chapter; |
||||
import xyz.fycz.myreader.model.mulvalmap.ConcurrentMultiValueMap; |
||||
import xyz.fycz.myreader.util.StringHelper; |
||||
import xyz.fycz.myreader.webapi.crawler.base.BookInfoCrawler; |
||||
import xyz.fycz.myreader.webapi.crawler.base.ReadCrawler; |
||||
|
||||
|
||||
public class EWenXueReadCrawler implements ReadCrawler, BookInfoCrawler { |
||||
public static final String NAME_SPACE = "http://ewenxue.org"; |
||||
public static final String NOVEL_SEARCH = "http://ewenxue.org/search.htm?keyword={key}"; |
||||
public static final String CHARSET = "GBK"; |
||||
public static final String SEARCH_CHARSET = "utf-8"; |
||||
|
||||
@Override |
||||
public String getSearchLink() { |
||||
return NOVEL_SEARCH; |
||||
} |
||||
|
||||
@Override |
||||
public String getCharset() { |
||||
return CHARSET; |
||||
} |
||||
|
||||
@Override |
||||
public String getNameSpace() { |
||||
return NAME_SPACE; |
||||
} |
||||
|
||||
@Override |
||||
public Boolean isPost() { |
||||
return false; |
||||
} |
||||
|
||||
@Override |
||||
public String getSearchCharset() { |
||||
return SEARCH_CHARSET; |
||||
} |
||||
|
||||
/** |
||||
* 从html中获取章节正文 |
||||
* |
||||
* @param html |
||||
* @return |
||||
*/ |
||||
public String getContentFormHtml(String html) { |
||||
Document doc = Jsoup.parse(html); |
||||
Element divContent = doc.getElementById("cContent"); |
||||
String content = Html.fromHtml(divContent.html()).toString(); |
||||
char c = 160; |
||||
String spaec = "" + c; |
||||
content = content.replace(spaec, " ").replace("setFontSize();", ""); |
||||
return content; |
||||
} |
||||
|
||||
/** |
||||
* 从html中获取章节列表 |
||||
* |
||||
* @param html |
||||
* @return |
||||
*/ |
||||
public ArrayList<Chapter> getChaptersFromHtml(String html) { |
||||
ArrayList<Chapter> chapters = new ArrayList<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
String readUrl = NAME_SPACE + doc.select(".breadcrumb").first() |
||||
.select("a").last().attr("href"); |
||||
Element divList = doc.getElementById("chapters-list"); |
||||
String lastTile = null; |
||||
int i = 0; |
||||
Elements elementsByTag = divList.getElementsByTag("a"); |
||||
for (int j = 0; j < elementsByTag.size(); j++) { |
||||
Element a = elementsByTag.get(j); |
||||
String title = a.text(); |
||||
if (!StringHelper.isEmpty(lastTile) && title.equals(lastTile)) { |
||||
continue; |
||||
} |
||||
Chapter chapter = new Chapter(); |
||||
chapter.setNumber(i++); |
||||
chapter.setTitle(title); |
||||
String url = readUrl + a.attr("href"); |
||||
chapter.setUrl(url); |
||||
chapters.add(chapter); |
||||
lastTile = title; |
||||
} |
||||
return chapters; |
||||
} |
||||
|
||||
/** |
||||
* 从搜索html中得到书列表 |
||||
* |
||||
* @param html |
||||
* @return <li class="list-group-item clearfix"> |
||||
* <div class="col-xs-1"><i class="tag-blue">玄幻</i></div> |
||||
* <div class="col-xs-3"><a href="/xs/163283/">大主宰</a></div> |
||||
* <div class="col-xs-4"><a href="/xs/163283/56818254.htm">第一千三十二章 七阳截天杖</a></div> |
||||
* <div class="col-xs-2">天蚕土豆</div> |
||||
* <div class="col-xs-2"><span class="time">2019-07-05 16:03</span></div> |
||||
* </li> |
||||
*/ |
||||
public ConcurrentMultiValueMap<SearchBookBean, Book> getBooksFromSearchHtml(String html) { |
||||
ConcurrentMultiValueMap<SearchBookBean, Book> books = new ConcurrentMultiValueMap<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
Elements elements = doc.getElementsByClass("clearfix"); |
||||
for (int i = 1; i < elements.size(); i++) { |
||||
Element element = elements.get(i); |
||||
Book book = new Book(); |
||||
Elements info = element.getElementsByTag("div"); |
||||
book.setName(info.get(1).text()); |
||||
book.setInfoUrl(NAME_SPACE + info.get(1).getElementsByTag("a").attr("href")); |
||||
book.setChapterUrl(book.getInfoUrl() + "mulu.htm"); |
||||
book.setAuthor(info.get(3).text()); |
||||
book.setNewestChapterTitle(info.get(2).text()); |
||||
book.setType(info.get(0).text()); |
||||
book.setSource(BookSource.ewenxue.toString()); |
||||
SearchBookBean sbb = new SearchBookBean(book.getName(), book.getAuthor()); |
||||
books.add(sbb, book); |
||||
} |
||||
return books; |
||||
} |
||||
|
||||
/** |
||||
* 获取书籍详细信息 |
||||
* |
||||
* @param book |
||||
*/ |
||||
public Book getBookInfo(String html, Book book) { |
||||
Document doc = Jsoup.parse(html); |
||||
Element img = doc.getElementsByClass("img-thumbnail").first(); |
||||
book.setImgUrl(img.attr("src")); |
||||
Element desc = doc.getElementById("all"); |
||||
if (desc == null) { |
||||
desc = doc.getElementById("shot"); |
||||
} |
||||
book.setDesc(desc.text().replace("[收起]", "")); |
||||
return book; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,172 @@ |
||||
package xyz.fycz.myreader.webapi.crawler.read; |
||||
|
||||
import android.text.Html; |
||||
|
||||
import org.jsoup.Jsoup; |
||||
import org.jsoup.nodes.Document; |
||||
import org.jsoup.nodes.Element; |
||||
import org.jsoup.select.Elements; |
||||
|
||||
import java.util.ArrayList; |
||||
|
||||
import xyz.fycz.myreader.entity.SearchBookBean; |
||||
import xyz.fycz.myreader.enums.BookSource; |
||||
import xyz.fycz.myreader.greendao.entity.Book; |
||||
import xyz.fycz.myreader.greendao.entity.Chapter; |
||||
import xyz.fycz.myreader.model.mulvalmap.ConcurrentMultiValueMap; |
||||
import xyz.fycz.myreader.util.StringHelper; |
||||
import xyz.fycz.myreader.webapi.crawler.base.BookInfoCrawler; |
||||
import xyz.fycz.myreader.webapi.crawler.base.ReadCrawler; |
||||
|
||||
|
||||
public class LuoQiuReadCrawler implements ReadCrawler, BookInfoCrawler { |
||||
public static final String NAME_SPACE = "https://www.lqbook.com"; |
||||
public static final String NOVEL_SEARCH = "https://www.lqbook.com/modules/article/search.php?searchkey={key}&submit=%CB%D1%CB%F7"; |
||||
public static final String CHARSET = "GBK"; |
||||
public static final String SEARCH_CHARSET = "GBK"; |
||||
|
||||
@Override |
||||
public String getSearchLink() { |
||||
return NOVEL_SEARCH; |
||||
} |
||||
|
||||
@Override |
||||
public String getCharset() { |
||||
return CHARSET; |
||||
} |
||||
|
||||
@Override |
||||
public String getNameSpace() { |
||||
return NAME_SPACE; |
||||
} |
||||
|
||||
@Override |
||||
public Boolean isPost() { |
||||
return false; |
||||
} |
||||
|
||||
@Override |
||||
public String getSearchCharset() { |
||||
return SEARCH_CHARSET; |
||||
} |
||||
|
||||
/** |
||||
* 从html中获取章节正文 |
||||
* |
||||
* @param html |
||||
* @return |
||||
*/ |
||||
public String getContentFormHtml(String html) { |
||||
Document doc = Jsoup.parse(html); |
||||
Element divContent = doc.getElementById("content"); |
||||
String content = Html.fromHtml(divContent.html()).toString(); |
||||
char c = 160; |
||||
String spaec = "" + c; |
||||
content = content.replace(spaec, " ") |
||||
.replaceAll("^.*最新章节!", ""); |
||||
return content; |
||||
} |
||||
|
||||
/** |
||||
* 从html中获取章节列表 |
||||
* |
||||
* @param html |
||||
* @return |
||||
*/ |
||||
public ArrayList<Chapter> getChaptersFromHtml(String html) { |
||||
ArrayList<Chapter> chapters = new ArrayList<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
String readUrl = doc.select("meta[property=og:novel:read_url]").attr("content"); |
||||
Element divList = doc.selectFirst(".zjlist"); |
||||
String lastTile = null; |
||||
int i = 0; |
||||
Elements elementsByTag = divList.getElementsByTag("a"); |
||||
for (int j = 0; j < elementsByTag.size(); j++) { |
||||
Element a = elementsByTag.get(j); |
||||
String title = a.text(); |
||||
if (!StringHelper.isEmpty(lastTile) && title.equals(lastTile)) { |
||||
continue; |
||||
} |
||||
Chapter chapter = new Chapter(); |
||||
chapter.setNumber(i++); |
||||
chapter.setTitle(title); |
||||
String url = readUrl + a.attr("href"); |
||||
chapter.setUrl(url); |
||||
chapters.add(chapter); |
||||
lastTile = title; |
||||
} |
||||
return chapters; |
||||
} |
||||
|
||||
/** |
||||
* 从搜索html中得到书列表 |
||||
* |
||||
* @param html |
||||
* @return <tr> |
||||
* <td class="odd" align="center"><a href="https://www.lqbook.com/book_91153/">文娱大主宰</a></td> |
||||
* <td class="even" align="center"><a href="https://www.lqbook.com/book_91153/52814257.html" target="_blank" title=" 很抱歉,是时候结束了。">很抱歉,是时候结束了。</a></td> |
||||
* <td class="odd" align="center">羽林都督</td> |
||||
* <td class="even" align="center">480K</td> |
||||
* <td class="odd" align="center">20-10-02</td> |
||||
* <td class="even" align="center">连载</td> |
||||
* </tr> |
||||
*/ |
||||
public ConcurrentMultiValueMap<SearchBookBean, Book> getBooksFromSearchHtml(String html) { |
||||
ConcurrentMultiValueMap<SearchBookBean, Book> books = new ConcurrentMultiValueMap<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
String urlType = doc.select("meta[property=og:type]").attr("content"); |
||||
if ("novel".equals(urlType)) { |
||||
String readUrl = doc.select("meta[property=og:novel:read_url]").attr("content"); |
||||
Book book = new Book(); |
||||
book.setChapterUrl(readUrl); |
||||
getBookInfo(html, book); |
||||
SearchBookBean sbb = new SearchBookBean(book.getName(), book.getAuthor()); |
||||
books.add(sbb, book); |
||||
} else { |
||||
Element div = doc.getElementById("main"); |
||||
Elements elements = div.getElementsByTag("tr"); |
||||
for (int i = 1; i < elements.size(); i++) { |
||||
Element element = elements.get(i); |
||||
Book book = new Book(); |
||||
Elements info = element.getElementsByTag("td"); |
||||
book.setName(info.get(0).text()); |
||||
book.setChapterUrl(info.get(0).selectFirst("a").attr("href")); |
||||
book.setAuthor(info.get(2).text()); |
||||
book.setNewestChapterTitle(info.get(1).text()); |
||||
book.setSource(BookSource.luoqiu.toString()); |
||||
SearchBookBean sbb = new SearchBookBean(book.getName(), book.getAuthor()); |
||||
books.add(sbb, book); |
||||
} |
||||
} |
||||
return books; |
||||
} |
||||
|
||||
/** |
||||
* 获取书籍详细信息 |
||||
* |
||||
* @param book |
||||
*/ |
||||
public Book getBookInfo(String html, Book book) { |
||||
Document doc = Jsoup.parse(html); |
||||
book.setSource(BookSource.luoqiu.toString()); |
||||
|
||||
String name = doc.select("meta[property=og:title]").attr("content"); |
||||
book.setName(name); |
||||
String url = doc.select("meta[property=og:novel:read_url]").attr("content"); |
||||
book.setChapterUrl(url); |
||||
String author = doc.select("meta[property=og:novel:author]").attr("content"); |
||||
book.setAuthor(author); |
||||
String newestChapter = doc.select("meta[property=og:novel:latest_chapter_name]").attr("content"); |
||||
book.setNewestChapterTitle(newestChapter); |
||||
|
||||
Element img = doc.getElementById("picbox"); |
||||
book.setImgUrl(img.getElementsByTag("img").get(0).attr("src")); |
||||
Element desc = doc.getElementById("intro"); |
||||
book.setDesc(Html.fromHtml(desc.html()).toString()); |
||||
//类型
|
||||
String type = doc.select("meta[property=og:novel:category]").attr("content"); |
||||
book.setType(type); |
||||
return book; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,146 @@ |
||||
package xyz.fycz.myreader.webapi.crawler.read; |
||||
|
||||
import android.text.Html; |
||||
|
||||
import org.jsoup.Jsoup; |
||||
import org.jsoup.nodes.Document; |
||||
import org.jsoup.nodes.Element; |
||||
import org.jsoup.select.Elements; |
||||
|
||||
import java.util.ArrayList; |
||||
|
||||
import xyz.fycz.myreader.entity.SearchBookBean; |
||||
import xyz.fycz.myreader.enums.BookSource; |
||||
import xyz.fycz.myreader.greendao.entity.Book; |
||||
import xyz.fycz.myreader.greendao.entity.Chapter; |
||||
import xyz.fycz.myreader.model.mulvalmap.ConcurrentMultiValueMap; |
||||
import xyz.fycz.myreader.webapi.crawler.base.BookInfoCrawler; |
||||
import xyz.fycz.myreader.webapi.crawler.base.ReadCrawler; |
||||
|
||||
|
||||
public class XBiQuGeReadCrawler implements ReadCrawler { |
||||
public static final String NAME_SPACE = "https://www.xquge.com"; |
||||
public static final String NOVEL_SEARCH = "https://www.xquge.com/search?keyword={key}&sign="; |
||||
public static final String CHARSET = "UTF-8"; |
||||
public static final String SEARCH_CHARSET = "UTF-8"; |
||||
|
||||
@Override |
||||
public String getSearchLink() { |
||||
return NOVEL_SEARCH; |
||||
} |
||||
|
||||
@Override |
||||
public String getCharset() { |
||||
return CHARSET; |
||||
} |
||||
|
||||
@Override |
||||
public String getNameSpace() { |
||||
return NAME_SPACE; |
||||
} |
||||
|
||||
@Override |
||||
public Boolean isPost() { |
||||
return false; |
||||
} |
||||
|
||||
@Override |
||||
public String getSearchCharset() { |
||||
return SEARCH_CHARSET; |
||||
} |
||||
|
||||
/** |
||||
* 从html中获取章节正文 |
||||
* |
||||
* @param html |
||||
* @return |
||||
*/ |
||||
public String getContentFormHtml(String html) { |
||||
Document doc = Jsoup.parse(html); |
||||
Element divContent = doc.getElementById("content"); |
||||
String content = Html.fromHtml(divContent.html()).toString(); |
||||
char c = 160; |
||||
String spaec = "" + c; |
||||
content = content.replace(spaec, " ").replace("applyChapterSetting();", ""); |
||||
return content; |
||||
} |
||||
|
||||
/** |
||||
* 从html中获取章节列表 |
||||
* |
||||
* @param html |
||||
* @return |
||||
*/ |
||||
public ArrayList<Chapter> getChaptersFromHtml(String html) { |
||||
ArrayList<Chapter> chapters = new ArrayList<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
Element divList = doc.getElementsByClass("catelog_list").last(); |
||||
Elements elementsByTag = divList.getElementsByTag("a"); |
||||
int i = 0; |
||||
for (int j = 0; j < elementsByTag.size(); j++) { |
||||
Element a = elementsByTag.get(j); |
||||
String title = a.text(); |
||||
String url = a.attr("href"); |
||||
Chapter chapter = new Chapter(); |
||||
chapter.setNumber(i++); |
||||
chapter.setTitle(title); |
||||
chapter.setUrl(url); |
||||
chapters.add(chapter); |
||||
} |
||||
return chapters; |
||||
} |
||||
|
||||
/** |
||||
* 从搜索html中得到书列表 |
||||
* |
||||
* @param html |
||||
* @return <li> |
||||
* <div class="rank_items"> |
||||
* <div class="items_l"><a href="https://www.xquge.com/book/5661.html" class="book_img"><img |
||||
* src="//static.xquge.com/Public/upload/book/201912/23/22/5715770830115e0060832a553.jpg?v=2020060502" |
||||
* alt="绝世元尊"></a></div> |
||||
* <div class="items_center"> |
||||
* <div class="rank_bkname"><a href="https://www.xquge.com/book/5661.html">异界大主宰</a></div> |
||||
* <div class="rank_bkinfo"><span class="author">范范的萧</span><span>玄幻奇幻</span><span>连载</span> |
||||
* </div> |
||||
* <div class="rank_bkbrief"> |
||||
* “怎么回事?” 冷尧一脸茫然、这是哪儿、他看着这个陌生的的环境,一时不知所错…… 啊!!! 冷尧看着自己有一双粗大而又充满爆发力的大手,每个细胞都充满爆发力。 这具身体不是自己的,难道我穿越了? …… …… …… </div> |
||||
* <div class="rank_bkother"> |
||||
* <div class="rank_bktime">2020-08-28 04:24</div> |
||||
* <div class="rank_newpage"><a href="https://www.xquge.com/book/5661/98087932.html">更新章节:新书《苍山牧云记》以发布</a> |
||||
* </div> |
||||
* </div> |
||||
* </div> |
||||
* <div class="items_rig"> |
||||
* <a href="https://www.xquge.com/book/5661.html" class="bk_brief_btn">书籍详情</a></div> |
||||
* </div> |
||||
* </li> |
||||
*/ |
||||
public ConcurrentMultiValueMap<SearchBookBean, Book> getBooksFromSearchHtml(String html) { |
||||
ConcurrentMultiValueMap<SearchBookBean, Book> books = new ConcurrentMultiValueMap<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
Elements divs = doc.getElementsByClass("rank_ullist"); |
||||
Element div = divs.get(0); |
||||
Elements elementsByTag = div.getElementsByTag("li"); |
||||
for (int i = 0; i < elementsByTag.size(); i++) { |
||||
Element element = elementsByTag.get(i); |
||||
Book book = new Book(); |
||||
Elements as = element.getElementsByTag("a"); |
||||
book.setName(as.get(1).text()); |
||||
book.setChapterUrl(as.get(1).attr("href")); |
||||
book.setNewestChapterTitle(as.get(2).text().replace("更新章节:", "")); |
||||
String img = as.first().selectFirst("img").attr("src"); |
||||
if (!img.contains("http")) img = "https:" + img; |
||||
book.setImgUrl(img); |
||||
Elements spans = element.selectFirst(".rank_bkinfo").select("span"); |
||||
book.setAuthor(spans.first().text()); |
||||
book.setType(spans.get(1).text()); |
||||
book.setDesc(element.selectFirst(".rank_bkbrief").text()); |
||||
book.setSource(BookSource.xbiquge.toString()); |
||||
SearchBookBean sbb = new SearchBookBean(book.getName(), book.getAuthor()); |
||||
books.add(sbb, book); |
||||
} |
||||
return books; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,172 @@ |
||||
package xyz.fycz.myreader.webapi.crawler.read; |
||||
|
||||
import android.text.Html; |
||||
|
||||
import org.jsoup.Jsoup; |
||||
import org.jsoup.nodes.Document; |
||||
import org.jsoup.nodes.Element; |
||||
import org.jsoup.select.Elements; |
||||
|
||||
import java.util.ArrayList; |
||||
|
||||
import xyz.fycz.myreader.entity.SearchBookBean; |
||||
import xyz.fycz.myreader.enums.BookSource; |
||||
import xyz.fycz.myreader.greendao.entity.Book; |
||||
import xyz.fycz.myreader.greendao.entity.Chapter; |
||||
import xyz.fycz.myreader.model.mulvalmap.ConcurrentMultiValueMap; |
||||
import xyz.fycz.myreader.util.StringHelper; |
||||
import xyz.fycz.myreader.webapi.crawler.base.BookInfoCrawler; |
||||
import xyz.fycz.myreader.webapi.crawler.base.ReadCrawler; |
||||
|
||||
|
||||
public class XS7ReadCrawler2 implements ReadCrawler, BookInfoCrawler { |
||||
public static final String NAME_SPACE = "https://www.xs7.co"; |
||||
public static final String NOVEL_SEARCH = "https://www.xs7.co/modules/article/search.php?searchkey={key}&submit=%CB%D1%CB%F7"; |
||||
public static final String CHARSET = "GBK"; |
||||
public static final String SEARCH_CHARSET = "GBK"; |
||||
|
||||
@Override |
||||
public String getSearchLink() { |
||||
return NOVEL_SEARCH; |
||||
} |
||||
|
||||
@Override |
||||
public String getCharset() { |
||||
return CHARSET; |
||||
} |
||||
|
||||
@Override |
||||
public String getNameSpace() { |
||||
return NAME_SPACE; |
||||
} |
||||
|
||||
@Override |
||||
public Boolean isPost() { |
||||
return false; |
||||
} |
||||
|
||||
@Override |
||||
public String getSearchCharset() { |
||||
return SEARCH_CHARSET; |
||||
} |
||||
|
||||
/** |
||||
* 从html中获取章节正文 |
||||
* |
||||
* @param html |
||||
* @return |
||||
*/ |
||||
public String getContentFormHtml(String html) { |
||||
Document doc = Jsoup.parse(html); |
||||
Element divContent = doc.getElementById("content"); |
||||
String content = Html.fromHtml(divContent.html()).toString(); |
||||
char c = 160; |
||||
String spaec = "" + c; |
||||
content = content.replace(spaec, " ") |
||||
.replaceAll("^.*最新章节!", ""); |
||||
return content; |
||||
} |
||||
|
||||
/** |
||||
* 从html中获取章节列表 |
||||
* |
||||
* @param html |
||||
* @return |
||||
*/ |
||||
public ArrayList<Chapter> getChaptersFromHtml(String html) { |
||||
ArrayList<Chapter> chapters = new ArrayList<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
String readUrl = doc.select("meta[property=og:novel:read_url]").attr("content"); |
||||
Element divList = doc.selectFirst(".zjlist"); |
||||
String lastTile = null; |
||||
int i = 0; |
||||
Elements elementsByTag = divList.getElementsByTag("a"); |
||||
for (int j = 0; j < elementsByTag.size(); j++) { |
||||
Element a = elementsByTag.get(j); |
||||
String title = a.text(); |
||||
if (!StringHelper.isEmpty(lastTile) && title.equals(lastTile)) { |
||||
continue; |
||||
} |
||||
Chapter chapter = new Chapter(); |
||||
chapter.setNumber(i++); |
||||
chapter.setTitle(title); |
||||
String url = readUrl + a.attr("href"); |
||||
chapter.setUrl(url); |
||||
chapters.add(chapter); |
||||
lastTile = title; |
||||
} |
||||
return chapters; |
||||
} |
||||
|
||||
/** |
||||
* 从搜索html中得到书列表 |
||||
* |
||||
* @param html |
||||
* @return <tr> |
||||
* <td class="odd" align="center"><a href="https://www.lqbook.com/book_91153/">文娱大主宰</a></td> |
||||
* <td class="even" align="center"><a href="https://www.lqbook.com/book_91153/52814257.html" target="_blank" title=" 很抱歉,是时候结束了。">很抱歉,是时候结束了。</a></td> |
||||
* <td class="odd" align="center">羽林都督</td> |
||||
* <td class="even" align="center">480K</td> |
||||
* <td class="odd" align="center">20-10-02</td> |
||||
* <td class="even" align="center">连载</td> |
||||
* </tr> |
||||
*/ |
||||
public ConcurrentMultiValueMap<SearchBookBean, Book> getBooksFromSearchHtml(String html) { |
||||
ConcurrentMultiValueMap<SearchBookBean, Book> books = new ConcurrentMultiValueMap<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
String urlType = doc.select("meta[property=og:type]").attr("content"); |
||||
if ("novel".equals(urlType)) { |
||||
String readUrl = doc.select("meta[property=og:novel:read_url]").attr("content"); |
||||
Book book = new Book(); |
||||
book.setChapterUrl(readUrl); |
||||
getBookInfo(html, book); |
||||
SearchBookBean sbb = new SearchBookBean(book.getName(), book.getAuthor()); |
||||
books.add(sbb, book); |
||||
} else { |
||||
Element div = doc.getElementById("main"); |
||||
Elements elements = div.getElementsByTag("tr"); |
||||
for (int i = 1; i < elements.size(); i++) { |
||||
Element element = elements.get(i); |
||||
Book book = new Book(); |
||||
Elements info = element.getElementsByTag("td"); |
||||
book.setName(info.get(0).text()); |
||||
book.setChapterUrl(info.get(0).selectFirst("a").attr("href")); |
||||
book.setAuthor(info.get(2).text()); |
||||
book.setNewestChapterTitle(info.get(1).text()); |
||||
book.setSource(BookSource.xs7.toString()); |
||||
SearchBookBean sbb = new SearchBookBean(book.getName(), book.getAuthor()); |
||||
books.add(sbb, book); |
||||
} |
||||
} |
||||
return books; |
||||
} |
||||
|
||||
/** |
||||
* 获取书籍详细信息 |
||||
* |
||||
* @param book |
||||
*/ |
||||
public Book getBookInfo(String html, Book book) { |
||||
Document doc = Jsoup.parse(html); |
||||
book.setSource(BookSource.xs7.toString()); |
||||
|
||||
String name = doc.select("meta[property=og:title]").attr("content"); |
||||
book.setName(name); |
||||
String url = doc.select("meta[property=og:novel:read_url]").attr("content"); |
||||
book.setChapterUrl(url); |
||||
String author = doc.select("meta[property=og:novel:author]").attr("content"); |
||||
book.setAuthor(author); |
||||
String newestChapter = doc.select("meta[property=og:novel:latest_chapter_name]").attr("content"); |
||||
book.setNewestChapterTitle(newestChapter); |
||||
|
||||
Element img = doc.getElementById("picbox"); |
||||
book.setImgUrl(img.getElementsByTag("img").get(0).attr("src")); |
||||
Element desc = doc.getElementById("intro"); |
||||
book.setDesc(Html.fromHtml(desc.html()).toString()); |
||||
//类型
|
||||
String type = doc.select("meta[property=og:novel:category]").attr("content"); |
||||
book.setType(type); |
||||
return book; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,170 @@ |
||||
package xyz.fycz.myreader.webapi.crawler.read; |
||||
|
||||
import android.text.Html; |
||||
|
||||
import org.jsoup.Jsoup; |
||||
import org.jsoup.nodes.Document; |
||||
import org.jsoup.nodes.Element; |
||||
import org.jsoup.select.Elements; |
||||
|
||||
import java.util.ArrayList; |
||||
import java.util.List; |
||||
import java.util.regex.Matcher; |
||||
import java.util.regex.Pattern; |
||||
|
||||
import xyz.fycz.myreader.entity.SearchBookBean; |
||||
import xyz.fycz.myreader.entity.bookstore.BookType; |
||||
import xyz.fycz.myreader.enums.BookSource; |
||||
import xyz.fycz.myreader.greendao.entity.Book; |
||||
import xyz.fycz.myreader.greendao.entity.Chapter; |
||||
import xyz.fycz.myreader.model.mulvalmap.ConcurrentMultiValueMap; |
||||
import xyz.fycz.myreader.util.StringHelper; |
||||
import xyz.fycz.myreader.webapi.crawler.base.BookInfoCrawler; |
||||
import xyz.fycz.myreader.webapi.crawler.base.ReadCrawler; |
||||
|
||||
|
||||
public class ZW37ReadCrawler implements ReadCrawler, BookInfoCrawler { |
||||
private static final String NAME_SPACE = "https://www.37zww.net"; |
||||
private static final String NOVEL_SEARCH = "https://www.37zww.net/modules/article/search.php?searchtype=articlename&searchkey={key}"; |
||||
private static final String CHARSET = "GBK"; |
||||
public static final String SEARCH_CHARSET = "GBK"; |
||||
|
||||
@Override |
||||
public String getSearchLink() { |
||||
return NOVEL_SEARCH; |
||||
} |
||||
|
||||
@Override |
||||
public String getNameSpace() { |
||||
return NAME_SPACE; |
||||
} |
||||
|
||||
@Override |
||||
public Boolean isPost() { |
||||
return false; |
||||
} |
||||
|
||||
@Override |
||||
public String getCharset() { |
||||
return CHARSET; |
||||
} |
||||
|
||||
@Override |
||||
public String getSearchCharset() { |
||||
return SEARCH_CHARSET; |
||||
} |
||||
|
||||
|
||||
@Override |
||||
public String getContentFormHtml(String html) { |
||||
Document doc = Jsoup.parse(html); |
||||
Element divContent = doc.getElementById("content"); |
||||
String content = Html.fromHtml(divContent.html()).toString(); |
||||
char c = 160; |
||||
String spaec = "" + c; |
||||
content = content.replace(spaec, " "); |
||||
return content; |
||||
} |
||||
|
||||
@Override |
||||
public ArrayList<Chapter> getChaptersFromHtml(String html) { |
||||
ArrayList<Chapter> chapters = new ArrayList<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
String readUrl = doc.select("meta[property=og:novel:read_url]").attr("content"); |
||||
Element divList = doc.getElementById("list"); |
||||
String lastTile = null; |
||||
int i = 0; |
||||
Elements elementsByTag = divList.getElementsByTag("a"); |
||||
for (int j = 0; j < elementsByTag.size(); j++) { |
||||
Element a = elementsByTag.get(j); |
||||
String title = a.text(); |
||||
if (!StringHelper.isEmpty(lastTile) && title.equals(lastTile)) { |
||||
continue; |
||||
} |
||||
Chapter chapter = new Chapter(); |
||||
chapter.setNumber(i++); |
||||
chapter.setTitle(title); |
||||
String url = readUrl + a.attr("href"); |
||||
chapter.setUrl(url); |
||||
chapters.add(chapter); |
||||
lastTile = title; |
||||
} |
||||
return chapters; |
||||
} |
||||
|
||||
/** |
||||
* 从搜索html中得到书列表 |
||||
* |
||||
* @param html |
||||
* @return <tr> |
||||
* <td class="odd"><a href="https://www.37zww.net/1/1812/">斗罗大陆IV终极斗罗</a></td> |
||||
* <td class="even"><a href="https://www.37zww.net/1/1812/index.html" target="_blank"> 第一千五百八十一章 突破,真神级!</a></td> |
||||
* <td class="odd">唐家三少</td> |
||||
* <td class="even">8427K</td> |
||||
* <td class="odd" align="center">21-02-06</td> |
||||
* <td class="even" align="center">连载</td> |
||||
* </tr> |
||||
*/ |
||||
public ConcurrentMultiValueMap<SearchBookBean, Book> getBooksFromSearchHtml(String html) { |
||||
final ConcurrentMultiValueMap<SearchBookBean, Book> books = new ConcurrentMultiValueMap<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
String urlType = doc.select("meta[property=og:type]").attr("content"); |
||||
if ("novel".equals(urlType)) { |
||||
String readUrl = doc.select("meta[property=og:novel:read_url]").attr("content"); |
||||
Book book = new Book(); |
||||
book.setChapterUrl(readUrl); |
||||
getBookInfo(html, book); |
||||
SearchBookBean sbb = new SearchBookBean(book.getName(), book.getAuthor()); |
||||
books.add(sbb, book); |
||||
} else { |
||||
Element div = doc.getElementById("main"); |
||||
Elements elements = div.getElementsByTag("tr"); |
||||
for (int i = 1; i < elements.size(); i++) { |
||||
Element element = elements.get(i); |
||||
Book book = new Book(); |
||||
Elements info = element.getElementsByTag("td"); |
||||
book.setName(info.get(0).text()); |
||||
book.setChapterUrl(info.get(0).selectFirst("a").attr("href")); |
||||
book.setAuthor(info.get(2).text()); |
||||
book.setNewestChapterTitle(info.get(1).text()); |
||||
book.setSource(BookSource.zw37.toString()); |
||||
SearchBookBean sbb = new SearchBookBean(book.getName(), book.getAuthor()); |
||||
books.add(sbb, book); |
||||
} |
||||
} |
||||
return books; |
||||
} |
||||
|
||||
/** |
||||
* 获取小说详细信息 |
||||
* |
||||
* @param html |
||||
* @return |
||||
*/ |
||||
public Book getBookInfo(String html, Book book) { |
||||
Document doc = Jsoup.parse(html); |
||||
book.setSource(BookSource.zw37.toString()); |
||||
|
||||
String name = doc.select("meta[property=og:title]").attr("content"); |
||||
book.setName(name); |
||||
String url = doc.select("meta[property=og:novel:read_url]").attr("content"); |
||||
book.setChapterUrl(url); |
||||
String author = doc.select("meta[property=og:novel:author]").attr("content"); |
||||
book.setAuthor(author); |
||||
String newestChapter = doc.select("meta[property=og:novel:latest_chapter_name]").attr("content"); |
||||
book.setNewestChapterTitle(newestChapter); |
||||
|
||||
String img = doc.select("meta[property=og:image]").attr("content"); |
||||
book.setImgUrl(img); |
||||
|
||||
String desc = doc.select("meta[property=og:description]").attr("content"); |
||||
book.setDesc(desc); |
||||
//类型
|
||||
String type = doc.select("meta[property=og:novel:category]").attr("content"); |
||||
book.setType(type); |
||||
return book; |
||||
|
||||
} |
||||
|
||||
|
||||
} |
@ -0,0 +1,161 @@ |
||||
package xyz.fycz.myreader.webapi.crawler.read; |
||||
|
||||
import android.text.Html; |
||||
|
||||
import org.jsoup.Jsoup; |
||||
import org.jsoup.nodes.Document; |
||||
import org.jsoup.nodes.Element; |
||||
import org.jsoup.select.Elements; |
||||
|
||||
import java.util.ArrayList; |
||||
import java.util.LinkedHashMap; |
||||
import java.util.List; |
||||
|
||||
import xyz.fycz.myreader.entity.SearchBookBean; |
||||
import xyz.fycz.myreader.entity.bookstore.BookType; |
||||
import xyz.fycz.myreader.enums.BookSource; |
||||
import xyz.fycz.myreader.greendao.entity.Book; |
||||
import xyz.fycz.myreader.greendao.entity.Chapter; |
||||
import xyz.fycz.myreader.model.mulvalmap.ConcurrentMultiValueMap; |
||||
import xyz.fycz.myreader.webapi.crawler.base.FindCrawler; |
||||
import xyz.fycz.myreader.webapi.crawler.base.ReadCrawler; |
||||
|
||||
|
||||
public class ZaiShuYuanReadCrawler implements ReadCrawler { |
||||
public static final String NAME_SPACE = "https://www.zhaishuyuan.com"; |
||||
public static final String NOVEL_SEARCH = "https://www.zhaishuyuan.com/search/,key={key}"; |
||||
public static final String CHARSET = "gbk"; |
||||
public static final String SEARCH_CHARSET = "gbk"; |
||||
|
||||
@Override |
||||
public String getSearchLink() { |
||||
return NOVEL_SEARCH; |
||||
} |
||||
|
||||
@Override |
||||
public String getCharset() { |
||||
return CHARSET; |
||||
} |
||||
|
||||
@Override |
||||
public String getNameSpace() { |
||||
return NAME_SPACE; |
||||
} |
||||
|
||||
@Override |
||||
public Boolean isPost() { |
||||
return true; |
||||
} |
||||
|
||||
@Override |
||||
public String getSearchCharset() { |
||||
return SEARCH_CHARSET; |
||||
} |
||||
|
||||
/** |
||||
* 从html中获取章节正文 |
||||
* |
||||
* @param html |
||||
* @return |
||||
*/ |
||||
public String getContentFormHtml(String html) { |
||||
Document doc = Jsoup.parse(html); |
||||
Element divContent = doc.getElementById("content"); |
||||
String content = Html.fromHtml(divContent.html()).toString(); |
||||
char c = 160; |
||||
String spaec = "" + c; |
||||
content = content.replace(spaec, " ").replaceAll("您可以在.*最新章节!|\\\\", ""); |
||||
return content; |
||||
} |
||||
|
||||
/** |
||||
* 从html中获取章节列表 |
||||
* |
||||
* @param html |
||||
* @return |
||||
*/ |
||||
public ArrayList<Chapter> getChaptersFromHtml(String html) { |
||||
ArrayList<Chapter> chapters = new ArrayList<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
Element divList = doc.getElementById("readerlist"); |
||||
Elements elementsByTag = divList.getElementsByTag("a"); |
||||
int i = 0; |
||||
for (int j = 0; j < elementsByTag.size(); j++) { |
||||
Element a = elementsByTag.get(j); |
||||
String title = a.text(); |
||||
String url = a.attr("href"); |
||||
Chapter chapter = new Chapter(); |
||||
chapter.setNumber(i++); |
||||
chapter.setTitle(title); |
||||
chapter.setUrl(NAME_SPACE + url); |
||||
chapters.add(chapter); |
||||
} |
||||
return chapters; |
||||
} |
||||
|
||||
/** |
||||
* 从搜索html中得到书列表 |
||||
* |
||||
* @param html |
||||
* @return <dl> |
||||
* <dt><a href="/book/3"><img class="lazyload" _src="https://img.zhaishuyuan.com/bookpic/s3.jpg" alt="<font color=#F30>大主宰</font>" height="155" width="120"></a></dt> |
||||
* <dd><h3><a href="/read/3"><font color=#F30>大主宰</font></a><span class="alias">别名:<font color=#F30>大主宰</font></span></h3></dd> |
||||
* <dd class="book_other">作者:<span>天蚕土豆</span>状态:<span>已完结</span>小类:<span>异世大陆</span>字数:<span>4944063</span>标签:<a href="/search/?key=%C8%C8%D1%AA" target="_blank" rel="nofollow">热血</a> <a href="/search/?key=%CB%AC%CE%C4" target="_blank" rel="nofollow">爽文</a></dd> |
||||
* <dd class="book_des">大千世界,位面交汇,万族林立,群雄荟萃,一位位来自下位面的天之至尊,在这无尽世界,演绎着令人向往的传奇,追求着那主宰之路。无尽火域,炎帝执掌,万火焚苍穹。武境之内,武祖之威,震慑乾坤。西天之殿,百战之皇,战威无可敌。北荒之丘,万墓之地,不死…</dd> |
||||
* <dd class="book_other">最新章节:<a href="/chapter/3/8855386">第一千五百五十一章 邪神陨落(大结局)</a> 更新时间:<span>2020-2-26 13:26:49</span></dd> |
||||
* </dl> |
||||
*/ |
||||
public ConcurrentMultiValueMap<SearchBookBean, Book> getBooksFromSearchHtml(String html) { |
||||
ConcurrentMultiValueMap<SearchBookBean, Book> books = new ConcurrentMultiValueMap<>(); |
||||
Document doc = Jsoup.parse(html); |
||||
String urlType = doc.select("meta[property=og:type]").attr("content"); |
||||
if ("novel".equals(urlType)) { |
||||
String readUrl = doc.select("meta[property=og:novel:read_url]").attr("content"); |
||||
Book book = new Book(); |
||||
book.setChapterUrl(readUrl); |
||||
getBookInfo(doc, book); |
||||
SearchBookBean sbb = new SearchBookBean(book.getName(), book.getAuthor()); |
||||
books.add(sbb, book); |
||||
} else { |
||||
Element div = doc.getElementById("sitembox"); |
||||
Elements dls = div.getElementsByTag("dl"); |
||||
for (Element dl : dls) { |
||||
Elements as = dl.getElementsByTag("a"); |
||||
Book book = new Book(); |
||||
book.setName(as.get(1).text()); |
||||
book.setImgUrl(as.first().getElementsByTag("img").attr("_src")); |
||||
book.setNewestChapterTitle(as.last().text()); |
||||
Elements spans = dl.selectFirst(".book_other").select("span"); |
||||
book.setAuthor(spans.get(0).text()); |
||||
book.setType(spans.get(2).text()); |
||||
book.setDesc(dl.getElementsByClass("book_des").first().text()); |
||||
book.setChapterUrl(NAME_SPACE + as.get(1).attr("href").replace("novel", "read").replace(".html", "/")); |
||||
book.setSource(BookSource.zaishuyuan.toString()); |
||||
SearchBookBean sbb = new SearchBookBean(book.getName(), book.getAuthor()); |
||||
books.add(sbb, book); |
||||
} |
||||
} |
||||
return books; |
||||
} |
||||
|
||||
private void getBookInfo(Document doc, Book book) { |
||||
book.setSource(BookSource.zaishuyuan.toString()); |
||||
|
||||
String name = doc.select("meta[property=og:title]").attr("content"); |
||||
book.setName(name); |
||||
String url = doc.select("meta[property=og:novel:read_url]").attr("content"); |
||||
book.setChapterUrl(url); |
||||
String author = doc.select("meta[property=og:novel:author]").attr("content"); |
||||
book.setAuthor(author); |
||||
String newestChapter = doc.select("meta[property=og:novel:latest_chapter_name]").attr("content"); |
||||
book.setNewestChapterTitle(newestChapter); |
||||
|
||||
String img = doc.select("meta[property=og:image]").attr("content"); |
||||
book.setImgUrl(img); |
||||
Element desc = doc.getElementById("bookintro"); |
||||
book.setDesc(Html.fromHtml(desc.html()).toString()); |
||||
//类型
|
||||
String type = doc.select("meta[property=og:novel:category]").attr("content"); |
||||
book.setType(type); |
||||
} |
||||
} |
@ -1,2 +1,2 @@ |
||||
#Wed Feb 03 13:23:18 CST 2021 |
||||
VERSION_CODE=181 |
||||
#Sat Feb 06 19:10:55 CST 2021 |
||||
VERSION_CODE=182 |
||||
|
Loading…
Reference in new issue