一、项目准备
1.我们还是采用前面的springboot框架进行本次项目开发;
2.相关前端的资源放入网盘中自行下载:百度网盘 请输入提取码
3.将网盘中前端资源放入springboot中,位置如下:
其中application.properties中也配置了对应的项目访问端口,以及关闭thymeleaf,端口号可以自行修改。
# 应用服务 WEB 访问端口 server.port=8080 # 关闭thymeleaf spring.thymeleaf.cache=false
4.创建基本包文件:controller、service、untils,另外将前端所需要的vue以及axios包放入项目中,结构如下:
4.项目中添加相关依赖,
org.springframework.boot spring-boot-starter-thymeleaforg.jsoup jsoup1.10.2
二、后端代码
1、until层
package com.elasticsearch.esapi.unitls;
import com.elasticsearch.esapi.pojo.JDGoodsInfo;
import org.jsoup.Jsoup;
import org.jsoup.nodes.document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
// 工具类,用于解析京东的html页面
public class HtmlParEs {
private static final String URLJD = "https://search.jd.com/Search?keyword=";
public static List paraHtml(String param) throws IOException {
param = URLEncoder.encode(param, "utf-8");
// 1.通过jsoup进行获取网页信息,返回整个页面对应的html页面
document document = Jsoup.parse(new URL(URLJD+param), 3000);
// 2.解析页面,通过id,获取最近的标签
Element element = document.getElementById("J_goodsList");
// 3.解析每一个图片所有信息,获取所有的li标签
Elements lis = element.getElementsByTag("li");
// 4.遍历解析每个li标签
List goodsList = new ArrayList<>();
for (Element li:lis) {
// 获取li下的img标签
String img = li.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = li.getElementsByClass("p-price").eq(0).text(); // text()获取当前标签下所有文本信息,包括子节点
String title = li.getElementsByClass("p-name").eq(0).text();
JDGoodsInfo jdGoodsInfo = new JDGoodsInfo(img,price,title);
goodsList.add(jdGoodsInfo);
}
return goodsList;
}
public static void main(String[] args) throws IOException {
new HtmlParEs().paraHtml("学习").forEach(System.out::println);
}
}
2、service层代码:
package com.elasticsearch.esapi.service;
import com.alibaba.fastjson.JSON;
import com.elasticsearch.esapi.pojo.JDGoodsInfo;
import com.elasticsearch.esapi.unitls.HtmlParEs;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.Timevalue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Service
public class JDGoodsService {
@Autowired
private RestHighLevelClient restHighLevelClient;
@Value("${es.name.one}")
public String es1;
// 解析网页获取数据,并且将数据放入es库
public Boolean parseHtmlAndSaveEs(String keyWord) throws IOException {
// 1.获取页面数据
List jdGoodsInfos = HtmlParEs.paraHtml(keyWord);
//2. 批量保存至es库
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
for (int i = 0; i < jdGoodsInfos.size(); i++) {
bulkRequest.add(new IndexRequest(es1)
.source(JSON.toJSonString(jdGoodsInfos.get(i)), XContentType.JSON));
}
BulkResponse index = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !index.hasFailures();
}
// 查询es数据,响应前端
public List
3.controller 层
package com.elasticsearch.esapi.controller;
import com.elasticsearch.esapi.service.JDGoodsService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import java.io.IOException;
import java.util.List;
import java.util.Map;
@Controller
public class JDGoodsController {
@Autowired
private JDGoodsService jdGoodsService;
@GetMapping("/parseSaveEs")
@ResponseBody
public Boolean parseSaveEs(String keyWord) throws IOException {
Boolean aBoolean = jdGoodsService.parseHtmlAndSaveEs(keyWord);
return aBoolean;
}
@GetMapping("/getGoods")
@ResponseBody
public List> getGoods(String keyWord,int pageNo,int pageSize) throws IOException {
List> mapList = jdGoodsService.searchGoods(keyWord, pageNo, pageSize);
return mapList;
}
@GetMapping("/getGoodsHight")
@ResponseBody
public List> getGoodsHight(String keyWord,int pageNo,int pageSize) throws IOException {
List> mapList = jdGoodsService.searchGoods(keyWord, pageNo, pageSize);
return mapList;
}
}
package com.elasticsearch.esapi.controller;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
@Controller
public class IndexController {
@GetMapping({"/","/index"})
public String getIndex(){
return "index";
}
}
三、启动springboot项目
访问:http://localhost:8080/ 即可看到对应的前端页面
四、前端代码
在index.html页面添加代码如下
五、总结
1.本次仿的京东项目,首先是将京东的商品数据爬取出来,然后存入es库中;然后再从es库中将数据拿出来,响应给前端。



