org.jsoup jsoup1.11.3
package com.risen.aaa;
import cn.hutool.core.io.FileUtil;
import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpUtil;
import org.jsoup.Jsoup;
import org.jsoup.nodes.document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
public class Xiaoshuo {
static String url = "https://www.xbiquge.la/2/2029/";
public static void main(String[] args) throws IOException, InterruptedException {
HttpRequest get = HttpUtil.createGet(url);
String body = get.execute().body();
document parse = Jsoup.parse(body);
Element wrapper = parse.getElementById("wrapper");
Elements box_con = wrapper.getElementsByClass("box_con");
Element maininfo = box_con.get(0).getElementById("maininfo");
Element info = maininfo.getElementById("info");
Elements h1 = info.select("h1");
String title = h1.text();
File f = new File("G:/" + title + ".txt");
FileUtil.appendString(title, f, "UTF-8");
Elements list = box_con.get(1).getElementById("list").select("dl").get(0).select("dd");
for (int i = 0; i < list.size(); i++) {
Elements a = list.get(i).select("a");
String href = a.attr("href");
String s = href.substring(href.lastIndexOf("/") + 1);
document document;
try {
document = Jsoup.connect(url + s).get();
} catch (Exception e) {
i--;
continue;
}
document parse1 = Jsoup.parse(document.html().replace("
", "$$$$$"));
Element box = parse1.getElementById("wrapper").getElementsByClass("content_read").get(0).getElementsByClass("box_con").get(0);
Element bookname = box.getElementsByClass("bookname").get(0);
String name = bookname.select("h1").get(0).text();
Element content = box.getElementById("content");
String replace = content.text().replace("$$$$$", "n");
String s1 = replace.replaceAll("((rn)|n)[\st ]*(\1)+", "$1").replaceAll("^((rn)|n)", "")
.replaceAll(" 亲,点击进去,给个好评呗,分数越高更新越快,据说给新笔趣阁打满分的最后都找到了漂亮的老婆哦!", "")
.replaceAll("手机站全新改版升级地址:https://m.xbiquge.la,数据和书签与电脑站同步,无广告清新阅读!", "");
FileUtil.appendString(name + "n", f, "UTF-8");
System.out.println(name);
FileUtil.appendString(s1, f, "UTF-8");
if (i % 4 == 0) {
Thread.sleep(1000);
}
}
}
}
解决了Jsoup忽略
标签导致下载的文本不能换行
下载速度很慢,只是为了看一下Jsoup怎么玩
我试了多线程速度也一样,应该是主站有ip限流机制,基本上一秒一章
写的简陋的一匹,莫笑
换小说下就直接手动把url替换一下就行了,懒得弄那些花里胡哨的东西
今天我生日^_^


![java用Jsoup爬[新笔趣阁]小说 java用Jsoup爬[新笔趣阁]小说](http://www.mshxw.com/aiimages/31/351260.png)
