获取粉丝名
package com.web;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.JsonPathSelector;
public class UsernameSpider implements PageProcessor {
private final String TAG = UsernameSpider.class.getSimpleName();
List usernames = new ArrayList<>();
private Site site = Site
.me()
.setDomain("blog.csdn.net")
.setSleepTime(1000)
// 便于测试,休眠较长时间。
.setUserAgent(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
@Override
public void process(Page page) {
String rawText = page.getRawText();
String data = new JsonPathSelector("$.data").select(rawText);
String replace = data.replace("{list=", "{"list":");
// String selectStr = new JsonPathSelector("$.list").select(replace);
List selectList = new JsonPathSelector("$.list").selectList(replace);
for (String s : selectList) {
String username = getUsername(s);
usernames.add(username);
}
}
public String getUsername(String line) {
String s = ""username": "m0_70555190"";
String pattern = "("username":".+?")";
// 创建 Pattern 对象
Pattern r = Pattern.compile(pattern);
// 现在创建 matcher 对象
Matcher m = r.matcher(line);
if (m.find()) {
String username = m.group(0);
String replace = username.replace(""username":"", "");
String result = replace.replace(""", "");
return result;
}
return "";
}
@Override
public Site getSite() {
return site;
}
}
测试类
package com.web;
import java.util.List;
import us.codecraft.webmagic.Spider;
public class T2 {
public static void main2(String[] args) {
String username = "weixin_45781381";
String fanUrl = "https://blog.csdn.net/community/home-api/v2/get-fans-list" +
"?page=1&pageSize=100&id=0&noMore=true&blogUsername="+username;
CsdnUsernameSpider csdnUsernameSpider = new CsdnUsernameSpider();
Spider.create(csdnUsernameSpider).addUrl(fanUrl).thread(1).run();
List usernames = csdnUsernameSpider.usernames;
System.out.println(usernames);
}
}



