ElasticSearch总结_大数据系统

ElasticSearch学习

ElasticSearch用于数据检索,效率非常高效,尤其是在大数据环境下,所以学习非常有必要!

1. 安装

这里我使用阿里云服务器,并且采用Docker 安装ES

安装elasticsearch

# 1.拉取镜像
docker pull elasticsearch:7.7.1
# 2.生成容器
docker run -d -p 9300:9300 -p 9200:9200 --name es -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -e "discovery.type=single-node" -v /root/es/plugins:/usr/share/elasticsearch/plugins -v /root/es/data:/usr/share/elasticsearch/data elasticsearch:7.7.1

安装kibana

# 1.下载kibana镜像到本地
docker pull kibana:7.7.1

# 2.启动kibana容器
docker run -d --name kibana -e ELASTICSEARCH_URL=http://47.101.52.63:9200 -p 5601:5601 kibana:7.7.1

安装elasticsearch-head

# 1.下载镜像
docker pull mobz/elasticsearch-head:5
# 2.生成容器
docker run -d -p 9100:9100 --name es-head docker.io/mobz/elasticsearch-head:5
# 3.在这里可能会出现跨域拒绝访问问题
  进入elasticsearch容器内部，修改配置文件elasticsearch.yml
  docker ps -a   #拿到运行容器elasticsearch 的 id
  docker exec -it ******(容器id) /bin/bash
  cd ./config
  vi elasticsearch.yml 
  在elasticsearch.yml中添加：
  http.cors.enabled: true
  http.cors.allow-origin: "*"
　然后重启容器
　docker restart  es

安装IK分词器

# 1.下载对应版本的IK分词器
wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.8.2/elasticsearch-analysis-ik-7.7.1.zip

# 2.解压到plugins/elasticsearch文件夹中
	yum install -y unzip  #下载unzip
	unzip -d plugins/elasticsearch elasticsearch-analysis-ik-7.7.1.zip

# 3.添加自定义扩展词和停用词
	cd plugins/elasticsearch/config
	vim IKAnalyzer.cfg.xml
	
		IK Analyzer 扩展配置
		
		ext_dict.dic
		
		ext_stopwords.dic
	

# 4.在ik分词器目录下config目录中创建ext_dict.dic文件   编码一定要为UTF-8才能生效
	vim ext_dict.dic 加入扩展词即可
# 5. 在ik分词器目录下config目录中创建ext_stopword.dic文件 
	vim ext_stopwords.dic 加入停用词即可
# 6.将此容器提交成为一个新的镜像
	docker commit -a="zk" -m="with IKAnalyzer" b35d35f72b8d zk/elasticsearch:6.8.2
# 7.使用新生成的这个es镜像创建容器，并挂载数据卷
	docker run -d --name es -p 9200:9200 -p 9300:9300 -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -v /usr/local/IKAnalyzer:/usr/share/elasticsearch/plugins/elasticsearch/config zk/elasticsearch:6.8.2

2.项目实战(基于es的仿京东搜索)

爬虫

导入jsoup依赖

  
    org.jsoup
    jsoup
    1.10.2

编写测试,生成工具类

package com.ittao.utils;

import com.ittao.entity.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;



public class HtmlParseUtil {
//    public static void main(String[] args) throws IOException {
//        HtmlParseUtil.parseJd("黄涛").forEach(System.out::println);
//
//    }

    
    public static List parseJd(String keyword) throws IOException {
        //1.获取搜索url
        String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8";
        //2.通过jsoup解析  获取文档对象
        document document = Jsoup.parse(new URL(url), 30000);

        //3.接下来的操作和js一样了
        Element j_goodsList = document.getElementById("J_goodsList");
        Elements elements = j_goodsList.getElementsByTag("li");

        ArrayList contentArrayList = new ArrayList<>();

        for (Element element : elements) {
            String img = element.getElementsByTag("img").eq(0).attr("src");
            String price = element.getElementsByClass("p-price").text();
            String title = element.getElementsByClass("p-name").eq(0).text();
            Content content = new Content();
            content.setTitle(title);
            content.setImg(img);
            content.setPrice(price);
            contentArrayList.add(content);
        }

        return contentArrayList;
    }
}

前后端分离实现

后端实现

整体结构

pom.xml



    4.0.0
    
        org.springframework.boot
        spring-boot-starter-parent
        2.3.0.RELEASE
         
    
    com.ittao
    elasticsearch_study
    0.0.1-SNAPSHOT
    elasticsearch_study
    Demo project for Spring Boot

    
        1.8

        7.7.1
    

    
        
            org.springframework.boot
            spring-boot-starter-data-elasticsearch
        
        
            org.springframework.boot
            spring-boot-starter-web
        

        
            org.springframework.boot
            spring-boot-devtools
            runtime
            true
        
        
            org.springframework.boot
            spring-boot-configuration-processor
            true
        
        
            org.projectlombok
            lombok
            true
        


        
            org.jsoup
            jsoup
            1.10.2
        


        
            com.alibaba
            fastjson
            1.2.61
        

        
            org.springframework.boot
            spring-boot-starter-test
            test
            
                
                    org.junit.vintage
                    junit-vintage-engine
                
            
        
    

    
        
            
                org.springframework.boot
                spring-boot-maven-plugin

config

package com.ittao.config;

import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;


@Configuration
public class ElasticsearchConfig {


    @Bean
    public RestHighLevelClient restHighLevelClient(){

        RestHighLevelClient client = new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("47.101.52.63", 9200, "http")));
        return client;
    }

}

entity

package com.ittao.entity;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;


@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {

    private String title;
    private String img;
    private String price;
}

package com.ittao.entity;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;
import org.springframework.stereotype.Component;


@Data
@AllArgsConstructor
@NoArgsConstructor
@Accessors(chain = true)
@Component
public class User {

    private String name;
    private int age;
}

service

package com.ittao.service.impl;

import com.alibaba.fastjson.JSON;
import com.ittao.entity.Content;
import com.ittao.service.ContentService;
import com.ittao.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.Timevalue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.FuzzyQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;


@Service
public class ContentServiceImpl implements ContentService {

    @Autowired
    @Qualifier("restHighLevelClient")
    private RestHighLevelClient client;


    
    @Override
    public boolean addToEs(String keyword) throws IOException {
        //1.获取要添加的数据
        List contentList = HtmlParseUtil.parseJd(keyword);
        //2.创建批量添加请求
        BulkRequest request = new BulkRequest();
        //3.批量添加数据
        for (Content content : contentList) {
            request.add(new IndexRequest("jd_goods").  //添加到jd_goods这个索引中
                    source(JSON.toJSONString(content), XContentType.JSON));
        }
        request.timeout(new Timevalue(2, TimeUnit.MINUTES));
        //4.执行批量添加请求
        BulkResponse response = client.bulk(request, RequestOptions.DEFAULT);
        //5.获取响应
        return !response.hasFailures();
    }

    @Override
    public List> searchPage(String keyword, int pageNo, int pageSize) throws IOException {

        if (pageNo<=0){
            pageNo=1;
        }

        //根据关键字进行搜索
        //1.创建搜索请求
        SearchRequest request = new SearchRequest("jd_goods");
        //2.添加搜索条件
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        //添加分页
        sourceBuilder.from(pageNo);
        sourceBuilder.size(pageSize);

        //添加高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        //设置高亮字段
        highlightBuilder.field("title");
        //是否显示多个高亮
        highlightBuilder.requireFieldMatch(true);
        //设置高亮前缀
        highlightBuilder.preTags("");
        //设置高亮后缀
        highlightBuilder.postTags("");
        sourceBuilder.highlighter(highlightBuilder);

        //根据关键字搜索title包含的
        MatchQueryBuilder termQuery = QueryBuilders.matchQuery("title", keyword);
        sourceBuilder.query(termQuery);
        sourceBuilder.timeout(new Timevalue(1, TimeUnit.MINUTES));
        request.source(sourceBuilder);



        //3.执行搜索
        SearchResponse response = client.search(request, RequestOptions.DEFAULT);
        //4.将响应数据进行封装
        List> mapList = new ArrayList<>();
        for (SearchHit documentFields : response.getHits().getHits()) {
            //目标:将高亮字段替换我们原先的字段

            Map sourceAsMap = documentFields.getSourceAsMap(); //原先的字段
            //1.获取高亮的全部字段
            Map highlightFields = documentFields.getHighlightFields();
            //2.获取我们设置的title高亮字段
            HighlightField title = highlightFields.get("title");
            //3.解析高亮的字段
            if (title!=null){
                //获取高亮片段
                Text[] fragments = title.getFragments();
                String n_title="";
                for (Text fragment : fragments) {
                    n_title +=fragment;
                }
                //4.替换
                sourceAsMap.put("title", n_title);
            }
            mapList.add(sourceAsMap);
        }

        return mapList;
    }
}

controller

package com.ittao.Controller;

import com.ittao.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.CrossOrigin;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;

import java.io.IOException;
import java.util.List;
import java.util.Map;

@RestController
@CrossOrigin
public class ContentController {

    @Autowired
    private ContentService contentService;

    @GetMapping("/addToEs/{keyword}")
    public boolean addToEs(@PathVariable("keyword") String keyword) throws IOException {

        return contentService.addToEs(keyword);
    }

    @GetMapping("/searchPage/{keyword}/{pageNo}/{pageSize}")
    public List> searchPage(@PathVariable("keyword") String keyword,
                                                @PathVariable("pageNo") int pageNo,
                                                @PathVariable("pageSize") int pageSize) throws IOException {
        return contentService.searchPage(keyword, pageNo, pageSize);

    }
}

utils

package com.ittao.utils;

import com.ittao.entity.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;



public class HtmlParseUtil {
//    public static void main(String[] args) throws IOException {
//        HtmlParseUtil.parseJd("黄涛").forEach(System.out::println);
//
//    }

    
    public static List parseJd(String keyword) throws IOException {
        //1.获取搜索url
        String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8";
        //2.通过jsoup解析  获取文档对象
        document document = Jsoup.parse(new URL(url), 30000);

        //3.接下来的操作和js一样了
        Element j_goodsList = document.getElementById("J_goodsList");
        Elements elements = j_goodsList.getElementsByTag("li");

        ArrayList contentArrayList = new ArrayList<>();

        for (Element element : elements) {
            String img = element.getElementsByTag("img").eq(0).attr("src");
            String price = element.getElementsByClass("p-price").text();
            String title = element.getElementsByClass("p-name").eq(0).text();
            Content content = new Content();
            content.setTitle(title);
            content.setImg(img);
            content.setPrice(price);
            contentArrayList.add(content);
        }

        return contentArrayList;
    }
}

前端实现

首页






.text{
    font-size: 20px;
}
.link{
    text-align: left;
    margin-left: 450px;
}
.logo{
    height: 200px;
}

查询页面

!






.el-header,
.el-footer {
  text-align: center;
  line-height: 80px;
}

.el-main {
  text-align: center;
  line-height: 800px;
}

body > .el-container {
  margin-bottom: 40px;
}

.el-container:nth-child(5) .el-aside,
.el-container:nth-child(6) .el-aside {
  line-height: 260px;
}

.el-container:nth-child(7) .el-aside {
  line-height: 320px;
}

.content {
  
  width: 100%;
  height: 1200px;
  margin: 50px auto;
}

.row {
  
  width: 100%;
  height: 400px;
  float: left;
}

.col {
  
  width: 25%;
  height: 400px;
  float: left;
}

.image {
  text-align: left;
  margin-left: 20px;
}

.p-price {
  text-align: left;
  margin-left: 20px;
  color: red;
}
.p-title1{
  font-size: 10px;
}

.p-commit {
  text-align: left;
}

.p-shop {
  text-align: left;
}

ul li {
  list-style-type: none;
}

生成数据页面







.el-header,
.el-footer {
  text-align: center;
  line-height: 80px;
}

.el-main {
  text-align: center;
  line-height: 800px;
}

body > .el-container {
  margin-bottom: 40px;
}

.el-container:nth-child(5) .el-aside,
.el-container:nth-child(6) .el-aside {
  line-height: 260px;
}

.el-container:nth-child(7) .el-aside {
  line-height: 320px;
}

route中index.js

import Vue from 'vue'
import VueRouter from 'vue-router'
import Search from '../views/Search.vue'
import Home from '../views/Home.vue'
import GenerateData from '../views/GenerateData.vue'

Vue.use(VueRouter)

  const routes = [
  {
    path: '/',
    name: 'Home',
    component: Home
  },
  {
    path: '/search',
    name: 'Search',
    component: Search
  },
  {
    path: '/generateData',
    name: 'GenerateData',
    component: GenerateData
  }
  
]

const router = new VueRouter({
  mode: 'history',
  base: process.env.base_URL,
  routes
})

export default router

main.js

import Vue from 'vue'
import App from './App.vue'
import router from './router'
import store from './store'
import ElementUI from 'element-ui'
import 'element-ui/lib/theme-chalk/index.css'
import axios from 'axios'

Vue.config.productionTip = false
Vue.use(ElementUI);
Vue.prototype.$http = axios
Vue.prototype.$http.defaults.baseURL = 'http://localhost:8989' // `baseURL` 将自动加在 `url` 前面，除非 `url` 是一个绝对 URL

new Vue({
  router,
  store,
  render: h => h(App)
}).$mount('#app')

3.总结

通过学习,对es的基本使用算是初步的掌握了.学习es的步骤如下

1.es的安装,尤其通过docker安装

2.es的简单restful api的使用,包括简单查询和复杂查询,通过kibana可视化界面操作

3.es的java客户端工具api学习,通过java语句去实现增删改查,其实本质上和查询语句类似,该有的方法都有,

4.最后通过es仿京东搜索的实战练习,达到了对es有基本的运行能力

ElasticSearch总结

大数据系统相关栏目本月热门文章