逻辑忽略证书（使用了ssl证书的网站）

Java代码访问某网站时出现SSL错误（证书问题）

下面的工具类是解决问题的关键，以爬虫爬取某网站数据为例

工具类

package nuc.zy.edu.utils;

import javax.net.ssl.*;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;


public class SslUtils {
 
    public static void trustAllHttpsCertificates() throws Exception {
        TrustManager[] trustAllCerts = new TrustManager[1];
        TrustManager tm = new miTM();
        trustAllCerts[0] = tm;
        SSLContext sc = SSLContext.getInstance("SSL");
        sc.init(null, trustAllCerts, null);
        HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
    }
 
    static class miTM implements TrustManager, X509TrustManager {
        public X509Certificate[] getAcceptedIssuers() {
            return null;
        }
 
        public boolean isServerTrusted(X509Certificate[] certs) {
            return true;
        }
 
        public boolean isClientTrusted(X509Certificate[] certs) {
            return true;
        }
 
        public void checkServerTrusted(X509Certificate[] certs, String authType)
                throws CertificateException {
            return;
        }
 
        public void checkClientTrusted(X509Certificate[] certs, String authType)
                throws CertificateException {
            return;
        }
    }
     
    
    public static void ignoreSsl() throws Exception{
        HostnameVerifier hv = new HostnameVerifier() {
            public boolean verify(String urlHostName, SSLSession session) {
                return true;
            }
        };
        trustAllHttpsCertificates();
        HttpsURLConnection.setDefaultHostnameVerifier(hv);
    }
}

案例

此处以使用jsoup爬取某个https开头的网站时（使用了ssl证书的网站）为例
采用ES技术将爬取的数据存放在ES中
在爬取之前使用工具类
逻辑忽略证书SslUtils.ignoreSsl()

依赖



    4.0.0
    
        org.springframework.boot
        spring-boot-starter-parent
        2.3.5.RELEASE
         
    
    nuc.zy.edu
    es-jd
    0.0.1-SNAPSHOT
    es-jd
    Demo project for Spring Boot
    
        1.8
        7.8.0
    
    

        
        
            org.jsoup
            jsoup
            1.10.2
        


        
            org.springframework.boot
            spring-boot-starter-data-elasticsearch
        
        
            org.springframework.boot
            spring-boot-starter-thymeleaf
        
        
            org.springframework.boot
            spring-boot-starter-web
        

        
            com.alibaba
            fastjson
            1.2.62
        

        
            org.springframework.boot
            spring-boot-devtools
            runtime
            true
        
        
            org.springframework.boot
            spring-boot-configuration-processor
            true
        
        
            org.projectlombok
            lombok
            true
        
        
            org.springframework.boot
            spring-boot-starter-test
            test
        
    

    
        
            
                org.springframework.boot
                spring-boot-maven-plugin
                
                    
                        
                            org.projectlombok
                            lombok

代码

package nuc.zy.edu.utils;

import nuc.zy.edu.entity.Goods;
import org.jsoup.Jsoup;
import org.jsoup.nodes.document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.net.URL;
import java.util.ArrayList;
import java.util.List;


public class HtmlParseUtil {

    public static List parseJD(String keywords) throws Exception {
        // 逻辑忽略证书
        SslUtils.ignoreSsl();
        //前提 需要联网  不能获取ajax数据
        String url = "https://search.jd.com/Search?keyword=" + keywords ;
        //解析网页  document就是游览器document页面对象
        document document = Jsoup.parse(new URL(url), 30000);
        //所有在js中可以使用的方法  这里都可以使用
        Element element = document.getElementById("J_goodsList");
        List goods = new ArrayList<>() ;
        Elements elements = element.getElementsByTag("li");
        for (Element e : elements) {
            String img = e.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = e.getElementsByClass("p-price").eq(0).text();
            String name =  e.getElementsByClass("p-name").eq(0).text();
            Goods good = new Goods() ;
            good.setImg(img);
            good.setName(name);
            good.setPrice(price);
            goods.add(good) ;
        }
        System.out.println(goods);
        return goods ;
    }

}

package nuc.zy.edu.entity;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;


@Data
@AllArgsConstructor
@NoArgsConstructor
//主分片是3  副本是1
@document(indexName = "goods",shards = 3,replicas = 1)
public class Goods {

    @Id
    private String id ;
    @Field(type = FieldType.Keyword)
    private String name ;
    @Field(type = FieldType.Text)
    private String img ;
    @Field(type = FieldType.Text)
    private String price ;
//    private Double price ;
}

逻辑忽略证书（使用了ssl证书的网站）

Java相关栏目本月热门文章