栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 前沿技术 > 大数据 > 大数据系统

ElasticSearch学习 (三)模仿京东搜索 仅后台

ElasticSearch学习 (三)模仿京东搜索 仅后台

集成jsoup
   
      
      org.jsoup
      jsoup
      1.10.2
    
解析html页面
@Component
public class HtmlParseUtil {
//https://search.jd.com/Search?keyword=java  网站
//    public static void main(String[] args) throws Exception {
//        new HtmlParseUtil().parseJd("java").forEach(System.out ::println);
//    }
    public List parseJd(String keywords) throws Exception {

        String url="https://search.jd.com/Search?keyword="+keywords;
        // 解析网页获取 document
        document document = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 5.1; zh-CN) AppleWebKit/535.12 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/535.12").timeout(30000).get();
//        System.out.println(document);
        // 找到商品位置
        Element element = document.getElementById("J_goodsList");
        // 解析li标签
        Elements li = element.getElementsByTag("li");
        ArrayList contentList = new ArrayList();
        for (int i = 0; i < li.size(); i++) {
            // 图片
            String img = li.get(i).getElementsByTag("img").attr("data-lazy-img");
            // 价格
            String price = Arrays.asList(li.get(i).getElementsByClass("p-price").eq(0).text().split("¥")).get(1);
            // 标题
            String title = li.get(i).getElementsByClass("p-name").eq(0).text();
//            System.out.println("----------------------");
//            System.out.println(img);
//            System.out.println(price);
//            System.out.println(title);
            Content content = new Content();
            content.setTitle(title);
            content.setImg(img);
            content.setPrice(price);
            contentList.add(content);
        }
        return contentList;
    }
}
实体类
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {
    private String title;
    private String img;
    private String price;

}

controller层
@RestController
public class ContentController {
    @Autowired
    private ContentService contentService;

    @GetMapping("/parse/{keyword}")
    public Boolean parse(@PathVariable("keyword") String keyword) throws Exception {
        return contentService.parseContent(keyword);
    }
    @GetMapping("/parse/{keyword}/{pageNo}/{pageSize}")
    public List> search(@PathVariable("keyword") String keyword,
            @PathVariable("pageNo")int pageNo,
            @PathVariable("pageSize")int pageSize) throws IOException {
        List> list = contentService.searchContent(keyword, pageNo, pageSize);
        return list;
    }
}

业务层
@Service
public class ContentService {
     @Autowired
     private RestHighLevelClient restHighLevelClient;
     public boolean parseContent(String keywords) throws Exception {
         List contentList = new HtmlParseUtil().parseJd(keywords);
         BulkRequest bulkRequest = new BulkRequest();
         bulkRequest.timeout("2m");
         for (int i=0;i < contentList.size(); i++){
             bulkRequest.add(
                     new IndexRequest("jd_goods")
                     .source(JSON.toJSONString(contentList.get(i)), XContentType.JSON));
         }
         BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
         return bulkResponse.hasFailures();
     }

     // 获取数据实现查询
     public List> searchContent(String keyword,int pageNo,int pageSize) throws IOException {
         if (pageNo < 1) {
             pageNo = 1;
         }
         //条件搜索
         SearchRequest searchRequest = new SearchRequest("jd_goods");
         SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

         //分页
         searchSourceBuilder.from(pageNo);
         searchSourceBuilder.size(pageSize);

         //放入条件
         TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title",keyword);
         searchSourceBuilder.query(termQueryBuilder);
         searchSourceBuilder.timeout(new Timevalue(60,TimeUnit.SECONDS));

         // 执行搜索
         searchRequest.source(searchSourceBuilder);
         SearchResponse searchResponse = restHighLevelClient.search(searchRequest,RequestOptions.DEFAULT);

         //解析结果
         List> list = new ArrayList<>();
         for (SearchHit searchHit : searchResponse.getHits()) {
             list.add(searchHit.getSourceAsMap());
         }
         return list;
     }
}

最终实现结果:

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/344479.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号