ElasticSearch用于数据检索,效率非常高效,尤其是在大数据环境下,所以学习非常有必要!
1. 安装这里我使用阿里云服务器,并且采用Docker 安装ES
安装elasticsearch
# 1.拉取镜像 docker pull elasticsearch:7.7.1 # 2.生成容器 docker run -d -p 9300:9300 -p 9200:9200 --name es -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -e "discovery.type=single-node" -v /root/es/plugins:/usr/share/elasticsearch/plugins -v /root/es/data:/usr/share/elasticsearch/data elasticsearch:7.7.1
安装kibana
# 1.下载kibana镜像到本地 docker pull kibana:7.7.1 # 2.启动kibana容器 docker run -d --name kibana -e ELASTICSEARCH_URL=http://47.101.52.63:9200 -p 5601:5601 kibana:7.7.1
安装elasticsearch-head
# 1.下载镜像 docker pull mobz/elasticsearch-head:5 # 2.生成容器 docker run -d -p 9100:9100 --name es-head docker.io/mobz/elasticsearch-head:5 # 3.在这里可能会出现跨域拒绝访问问题 进入elasticsearch容器内部,修改配置文件elasticsearch.yml docker ps -a #拿到运行容器elasticsearch 的 id docker exec -it ******(容器id) /bin/bash cd ./config vi elasticsearch.yml 在elasticsearch.yml中添加: http.cors.enabled: true http.cors.allow-origin: "*" 然后重启容器 docker restart es
安装IK分词器
# 1.下载对应版本的IK分词器 wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.8.2/elasticsearch-analysis-ik-7.7.1.zip # 2.解压到plugins/elasticsearch文件夹中 yum install -y unzip #下载unzip unzip -d plugins/elasticsearch elasticsearch-analysis-ik-7.7.1.zip # 3.添加自定义扩展词和停用词 cd plugins/elasticsearch/config vim IKAnalyzer.cfg.xml2.项目实战(基于es的仿京东搜索)# 4.在ik分词器目录下config目录中创建ext_dict.dic文件 编码一定要为UTF-8才能生效 vim ext_dict.dic 加入扩展词即可 # 5. 在ik分词器目录下config目录中创建ext_stopword.dic文件 vim ext_stopwords.dic 加入停用词即可 # 6.将此容器提交成为一个新的镜像 docker commit -a="zk" -m="with IKAnalyzer" b35d35f72b8d zk/elasticsearch:6.8.2 # 7.使用新生成的这个es镜像创建容器,并挂载数据卷 docker run -d --name es -p 9200:9200 -p 9300:9300 -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -v /usr/local/IKAnalyzer:/usr/share/elasticsearch/plugins/elasticsearch/config zk/elasticsearch:6.8.2 IK Analyzer 扩展配置 ext_dict.dic ext_stopwords.dic
- 爬虫
- 导入jsoup依赖
org.jsoup jsoup 1.10.2 - 编写测试,生成工具类
package com.ittao.utils;
import com.ittao.entity.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class HtmlParseUtil {
// public static void main(String[] args) throws IOException {
// HtmlParseUtil.parseJd("黄涛").forEach(System.out::println);
//
// }
public static List parseJd(String keyword) throws IOException {
//1.获取搜索url
String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8";
//2.通过jsoup解析 获取文档对象
document document = Jsoup.parse(new URL(url), 30000);
//3.接下来的操作和js一样了
Element j_goodsList = document.getElementById("J_goodsList");
Elements elements = j_goodsList.getElementsByTag("li");
ArrayList contentArrayList = new ArrayList<>();
for (Element element : elements) {
String img = element.getElementsByTag("img").eq(0).attr("src");
String price = element.getElementsByClass("p-price").text();
String title = element.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setTitle(title);
content.setImg(img);
content.setPrice(price);
contentArrayList.add(content);
}
return contentArrayList;
}
}
- 前后端分离实现
-
后端实现
整体结构
pom.xml
4.0.0 org.springframework.boot spring-boot-starter-parent 2.3.0.RELEASE com.ittao elasticsearch_study 0.0.1-SNAPSHOT elasticsearch_study Demo project for Spring Boot 1.8 7.7.1 org.springframework.boot spring-boot-starter-data-elasticsearch org.springframework.boot spring-boot-starter-web org.springframework.boot spring-boot-devtools runtime true org.springframework.boot spring-boot-configuration-processor true org.projectlombok lombok true org.jsoup jsoup 1.10.2 com.alibaba fastjson 1.2.61 org.springframework.boot spring-boot-starter-test test org.junit.vintage junit-vintage-engine org.springframework.boot spring-boot-maven-plugin config
package com.ittao.config; import org.apache.http.HttpHost; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestHighLevelClient; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @Configuration public class ElasticsearchConfig { @Bean public RestHighLevelClient restHighLevelClient(){ RestHighLevelClient client = new RestHighLevelClient( RestClient.builder( new HttpHost("47.101.52.63", 9200, "http"))); return client; } }entity
package com.ittao.entity; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; @Data @AllArgsConstructor @NoArgsConstructor public class Content { private String title; private String img; private String price; }package com.ittao.entity; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; import lombok.experimental.Accessors; import org.springframework.stereotype.Component; @Data @AllArgsConstructor @NoArgsConstructor @Accessors(chain = true) @Component public class User { private String name; private int age; }service
package com.ittao.service.impl; import com.alibaba.fastjson.JSON; import com.ittao.entity.Content; import com.ittao.service.ContentService; import com.ittao.utils.HtmlParseUtil; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.unit.Timevalue; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.query.FuzzyQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.stereotype.Service; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @Service public class ContentServiceImpl implements ContentService { @Autowired @Qualifier("restHighLevelClient") private RestHighLevelClient client; @Override public boolean addToEs(String keyword) throws IOException { //1.获取要添加的数据 ListcontentList = HtmlParseUtil.parseJd(keyword); //2.创建批量添加请求 BulkRequest request = new BulkRequest(); //3.批量添加数据 for (Content content : contentList) { request.add(new IndexRequest("jd_goods"). //添加到jd_goods这个索引中 source(JSON.toJSONString(content), XContentType.JSON)); } request.timeout(new Timevalue(2, TimeUnit.MINUTES)); //4.执行批量添加请求 BulkResponse response = client.bulk(request, RequestOptions.DEFAULT); //5.获取响应 return !response.hasFailures(); } @Override public List controller
package com.ittao.Controller; import com.ittao.service.ContentService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.CrossOrigin; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RestController; import java.io.IOException; import java.util.List; import java.util.Map; @RestController @CrossOrigin public class ContentController { @Autowired private ContentService contentService; @GetMapping("/addToEs/{keyword}") public boolean addToEs(@PathVariable("keyword") String keyword) throws IOException { return contentService.addToEs(keyword); } @GetMapping("/searchPage/{keyword}/{pageNo}/{pageSize}") public List> searchPage(@PathVariable("keyword") String keyword, @PathVariable("pageNo") int pageNo, @PathVariable("pageSize") int pageSize) throws IOException { return contentService.searchPage(keyword, pageNo, pageSize); } } utils
package com.ittao.utils; import com.ittao.entity.Content; import org.jsoup.Jsoup; import org.jsoup.nodes.document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; public class HtmlParseUtil { // public static void main(String[] args) throws IOException { // HtmlParseUtil.parseJd("黄涛").forEach(System.out::println); // // } public static ListparseJd(String keyword) throws IOException { //1.获取搜索url String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8"; //2.通过jsoup解析 获取文档对象 document document = Jsoup.parse(new URL(url), 30000); //3.接下来的操作和js一样了 Element j_goodsList = document.getElementById("J_goodsList"); Elements elements = j_goodsList.getElementsByTag("li"); ArrayList contentArrayList = new ArrayList<>(); for (Element element : elements) { String img = element.getElementsByTag("img").eq(0).attr("src"); String price = element.getElementsByClass("p-price").text(); String title = element.getElementsByClass("p-name").eq(0).text(); Content content = new Content(); content.setTitle(title); content.setImg(img); content.setPrice(price); contentArrayList.add(content); } return contentArrayList; } }
前端实现
首页
ElasticSearch的简单实战
第一个功能:从京东商城中爬取我们搜索的数据,存放到eslasticsearch中
第二个功能:从eslasticsearch中根据关键字查询我们的数据,进行展示
点我去搜索数据
点我去爬取数据
.text{
font-size: 20px;
}
.link{
text-align: left;
margin-left: 450px;
}
.logo{
height: 200px;
}
查询页面
!
搜索
点我去爬取数据
点我去首页
-
{{item.price}}
1300+条评价
文轩网旗舰店
.el-header,
.el-footer {
text-align: center;
line-height: 80px;
}
.el-main {
text-align: center;
line-height: 800px;
}
body > .el-container {
margin-bottom: 40px;
}
.el-container:nth-child(5) .el-aside,
.el-container:nth-child(6) .el-aside {
line-height: 260px;
}
.el-container:nth-child(7) .el-aside {
line-height: 320px;
}
.content {
width: 100%;
height: 1200px;
margin: 50px auto;
}
.row {
width: 100%;
height: 400px;
float: left;
}
.col {
width: 25%;
height: 400px;
float: left;
}
.image {
text-align: left;
margin-left: 20px;
}
.p-price {
text-align: left;
margin-left: 20px;
color: red;
}
.p-title1{
font-size: 10px;
}
.p-commit {
text-align: left;
}
.p-shop {
text-align: left;
}
ul li {
list-style-type: none;
}
生成数据页面
生成
点我去查询
点我去首页
.el-header,
.el-footer {
text-align: center;
line-height: 80px;
}
.el-main {
text-align: center;
line-height: 800px;
}
body > .el-container {
margin-bottom: 40px;
}
.el-container:nth-child(5) .el-aside,
.el-container:nth-child(6) .el-aside {
line-height: 260px;
}
.el-container:nth-child(7) .el-aside {
line-height: 320px;
}
route中index.js
import Vue from 'vue'
import VueRouter from 'vue-router'
import Search from '../views/Search.vue'
import Home from '../views/Home.vue'
import GenerateData from '../views/GenerateData.vue'
Vue.use(VueRouter)
const routes = [
{
path: '/',
name: 'Home',
component: Home
},
{
path: '/search',
name: 'Search',
component: Search
},
{
path: '/generateData',
name: 'GenerateData',
component: GenerateData
}
]
const router = new VueRouter({
mode: 'history',
base: process.env.base_URL,
routes
})
export default router
main.js
import Vue from 'vue'
import App from './App.vue'
import router from './router'
import store from './store'
import ElementUI from 'element-ui'
import 'element-ui/lib/theme-chalk/index.css'
import axios from 'axios'
Vue.config.productionTip = false
Vue.use(ElementUI);
Vue.prototype.$http = axios
Vue.prototype.$http.defaults.baseURL = 'http://localhost:8989' // `baseURL` 将自动加在 `url` 前面,除非 `url` 是一个绝对 URL
new Vue({
router,
store,
render: h => h(App)
}).$mount('#app')
3.总结
通过学习,对es的基本使用算是初步的掌握了.学习es的步骤如下
1.es的安装,尤其通过docker安装
2.es的简单restful api的使用,包括简单查询和复杂查询,通过kibana可视化界面操作
3.es的java客户端工具api学习,通过java语句去实现增删改查,其实本质上和查询语句类似,该有的方法都有,
4.最后通过es仿京东搜索的实战练习,达到了对es有基本的运行能力



