1.Maven包需要
org.jsoup jsoup 1.8.3 org.apache.httpcomponents httpclient 4.3.5
2.具体代码如下所示
package com.example.demo;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Jsoup;
import org.jsoup.nodes.document;
import org.jsoup.select.Elements;
import org.springframework.util.CollectionUtils;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
//@Slf4j
public class BlogVisits {
public static void main(String[] args) throws Exception {
//取址
List blogDetailUrl = getBlogDetailUrl("https://blog.csdn.net/Lilayzzz?type=blog");
//非空校验
if (CollectionUtils.isEmpty(blogDetailUrl)){
return;
}
//创建线程池
ExecutorService fixedThreadPool = Executors.newFixedThreadPool(5);
while (1 == 1)
//不加上 cpu爆炸
Thread.sleep(5000);
for (String url:blogDetailUrl){
fixedThreadPool.execute(() ->{
try {
boolean result = httpGet(url);
//log.info("url: {} , result: {}", url, result);
} catch (Exception e) {
e.printStackTrace();
}
});
}
}
public static boolean httpGet(String url) throws Exception{
//1.创建客户端
CloseableHttpClient httpclient = HttpClients.createDefault();
//2.生成一个get请求
HttpGet httpget = new HttpGet(url);
//3.执行get请求并返回结果
CloseableHttpResponse response = httpclient.execute(httpget);
//4.记得关
response.close();
httpclient.close();
return response.getStatusLine().getStatusCode() == 200 ? true : false;
}
public static List getBlogDetailUrl(String url) throws Exception{
//把网页巴拉出来
document doc = doc = Jsoup.connect(url).data("query", "Java") .userAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)").post(); // 使用post方法访问 URL
//doc里面是一整个网页内容 找到你网站地址元素
Elements links = doc.select("div").attr("class", "article-item-box csdn-tracking-statistics").select("a");
//取出正确的链接 根据前缀排除乱七八糟的
List urls = links.stream().map(item -> item.attr("abs:href")).filter(item -> item.indexOf("https://blog.csdn.net/Lilayzzz/article/details") > -1).distinct().collect(Collectors.toList());
//返回出克
return urls;
}
}



