1、使用的是tess4j识别验证码;
2、使用jsoup模拟浏览器登录请求。
package com.test.tess;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.SerializableString;
import com.jst.tess.constants.Constants;
import com.jst.tess.util.FileUtils;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.struts2.ServletActionContext;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.web.context.request.RequestContextHolder;
import org.springframework.web.context.request.ServletRequestAttributes;
import sun.net.www.http.HttpClient;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.HttpSession;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class test4 extends HttpServlet{
//登录链接
private static String baseUrl = "http://192.168.0.20:8080/test/login.jsp";
//验证码保存路径
private static String verCodePath = "D:\img\codeimg";
//验证码请求地址
private static String codeimgurl = "http://192.168.0.20:8080/test/login/getCode.do";
//登录地址
private static String loginUrl = "http://192.168.0.20:8080/test/login/login.do";
//注销地址
private static String logoutUrl = "http://192.168.0.20:8080/test/login/logout.do";
//测试数据列表路径
private static String listUrl = "http://192.168.0.20:8080/test/testList/getList.do";
//测试数据详情路径
private static String getoneUrl = "http://192.168.0.20:8080/test/testView/view.do";
//用户名
private static String userName = "test";
//密码
private static String passWord = "96af831e99ef1788b04c84d0a7782e855d700d4d6e7938722cfbcbaa";
//判断是否进入首页标识,根据id属性获取
private static String ifIndexPage = "index-menu";
//全局session信息
private static String baseSessions ="";
public static void main(String[] args) throws IOException, TesseractException {
// login();
// getList();
// getOne("9");
}
public Map login(String url, String user, String pwd, String tess4jpath) {
System.out.println("begin:");
Map map = null;
Connection.Response LoginResponse = null;
try {
LoginResponse = Jsoup.connect(url).method(Connection.Method.GET).execute();
map = LoginResponse.cookies();//获取会话,登录后需要保持会话
String sessName = "JSESSIONID";
String sessions = (String) map.get("JSESSIONID");
System.out.println("sessions="+sessions);
// System.out.println("map1:"+map.toString());
// document document = LoginResponse.parse();
// Element element = document.getElementById("varifyCodeImg");
// String codeimgurl2 = element.attr("id");
// System.out.println("222222:"+codeimgurl2);
String codeimgurl = "http://192.168.0.37:8080/test/login/getCode.do";
String connectPath = "http://192.168.0.37:8080/test/login/login.do";
String codeimgpath = tess4jpath+"\codeimg";
//下载验证码图片
byte[] codeimgdata = Jsoup.connect(codeimgurl).header("cookie",sessName + "=" + sessions).ignoreContentType(true).execute().bodyAsBytes();
FileUtils.saveImg(codeimgdata, codeimgpath, "codeimg.jpg");
//识别样本输出地址
String ocrResult = codeimgpath+"\codetmpimgtmp.jpg";
String OriginalImg = codeimgpath+"\codeimg.jpg";
//去噪点
FileUtils.removeBackground(OriginalImg, ocrResult);
ITesseract instance =new Tesseract();
instance.setDatapath(tess4jpath);
//获得Tesseract的文字库
URL url2 = ClassLoader.getSystemResource("tessdata");
String tesspath = url2.getPath().substring(1);
instance.setDatapath(tesspath);//进行读取,默认是英文,如果要使用中文包,加上instance.setLanguage("chi_sim");
File imgDir =new File(OriginalImg);
String code = instance.doOCR(imgDir);//识别验证码
code = replaceBlank(code);
System.out.println("codeLength:"+code.length()+",code:"+code);
Map datas = new HashMap();
datas.put("username", user);
datas.put("loginkey", pwd);
datas.put("verifycode",code);
// Connection.Response connection = Jsoup.connect(connectPath).header("cookie",sessName + "=" + sessions).data(datas).execute();
// connection.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*
public static void creatDir(String path) {
File file = new File(path);
if(!file.exists()) {
file.mkdirs();
}
}
public static void removeBackground(String imgUrl, String resUrl){
//定义一个临界阈值
int threshold = 400;
try{
BufferedImage img = ImageIO.read(new File(imgUrl));
int width = img.getWidth();
int height = img.getHeight();
for(int i = 1;i < width;i++){
for (int x = 0; x < width; x++){
for (int y = 0; y < height; y++){
Color color = new Color(img.getRGB(x, y));
//System.out.println("red:"+color.getRed()+" | green:"+color.getGreen()+" | blue:"+color.getBlue());
int num = color.getRed()+color.getGreen()+color.getBlue();
if(num >= threshold){
img.setRGB(x, y, Color.WHITE.getRGB());
}
}
}
}
for(int i = 1;i
部分代码参考自:Java识别验证码和图像处理_梁康h的博客-CSDN博客
Java 爬虫之识别图片验证码后登录_JavaBigADog的博客-CSDN博客



