ElasticSearch
简称es
高扩展的分布式全文检索引擎;实时的存储、检索数据
基于java开发
ElasticSearch安装JDK1.8以上ElasticSearch客户端界面工具官网资料百度网盘地址,提取码:s824
目录解压就可以使用了!
bin 启动文件config 配置文件
log4j2 日志配置文件jvm.options java虚拟机相关的配置elasticsearch.yml elasticsearch的配置文件 默认端口9200 跨域 lib 相关的jar包logs 日志modules 功能模块plugins 插件 ik 启动
安装可视化界面 es head的插件双击bin目录下的elasticsearch.bat
默认访问9200,通讯9300
访问测试:
数据视图展示工具,后续查询用kibana
github有下载地址,太慢了没打开 = =, 需要前端
解决跨域问题npm install
npm run start
此时访问 http://localhost:9100/,但是要解决跨域问题
在elasticsearch.yml中加上这个,重启服务即可解决
http.cors.enabled: true http.cors.allow-origin: "*"创建索引
安装kibana如果报错就重启服务即可
可以把es当作一个数据库,可以建立索引(库),文档(库中的数据)
官网下载,拆箱即用
启动测试访问测试 汉化 ElasticSearch概念默认端口:http://localhost:5601
面向文档, 一切都是JSON
| Relational DB | ElasticSearch |
|---|---|
| 数据库(database) | 索引(indeices == 数据库) |
| 表(tables) | types |
| 行(rows) | document(文档==记录) |
| 字段(columns) | fields |
中文分词器使用IK
把一段段中文划分成一个个的关键字
最少切分 ik_smart最细粒度划分 il_max_word 下载安装
加载插件 查看加载进来的插件解压到 ElasticSearch的插件中,全部关了重启观察es
使用kibana测试elasticsearch-plugin list
查看不同的分词器启动kibana
最少切分 ik_smart
最细粒度划分 il_max_word(穷尽词库,根据字典)
ik分词器增加自己的配置发现问题,输入字典没有的词,需要的词汇被拆开
这种自己需要的词,需要自己加到分词器的字典中
增加配置后重启es
加载到了!
Rest风格 索引的基本操作 1、创建一个索引三个工具都打开
PUT /索引名/(类型名)/文档id
{
请求体
}
PUT /test1/type1/1
{
"name": "蒋二妹",
"age": 3
}
完成了自动增加的索引,数据也成功的添加了 2、指定字段的类型 创建规则,以后放数据 获取规则
GET test2
{
"test2" : {
"aliases" : { },
"mappings" : {
"properties" : {
"age" : {
"type" : "long"
},
"birthday" : {
"type" : "date"
},
"name" : {
"type" : "text"
}
}
},
"settings" : {
"index" : {
"creation_date" : "1646576023965",
"number_of_shards" : "1",
"number_of_replicas" : "1",
"uuid" : "ga5mzX7YQFG6GsfzOXohVA",
"version" : {
"created" : "7060199"
},
"provided_name" : "test2"
}
}
}
}
查看默认的信息
如果文档字段没有指定,es会默认配置字段类型
通过GET _cat @Test void testCreateIndex() throws IOException { //1.创建索引请求 CreateIndexRequest request = new CreateIndexRequest("jq"); //2.执行请求,请求后获得响应 CreateIndexResponse response = client.indices().create(request, RequestOptions.DEFAULT); System.out.println(response); } @Test void testGetIndex() throws IOException { GetIndexRequest request = new GetIndexRequest("jq"); boolean exists = client.indices().exists(request, RequestOptions.DEFAULT); System.out.println(exists); //true } @Test void testDeleteIndex() throws IOException { DeleteIndexRequest request = new DeleteIndexRequest("jq"); AcknowledgedResponse response = client.indices().delete(request, RequestOptions.DEFAULT); System.out.println(response.isAcknowledged()); //true } } 测试2(文档的API基操) 1. 新建实体类
package com.jiang.pojo; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; @Data @AllArgsConstructor @NoArgsConstructor public class User { private String name; private int age; }2. 测试@SpringBootTest class EsApiApplicationTests { @Autowired @Qualifier("restHighLevelClient") private RestHighLevelClient client; @Test public void testAdddocument() throws IOException { //创建对象 User user = new User("蒋二妹", 3); //1.创建请求 如果此时没有这个索引,就要先创建索引 IndexRequest request = new IndexRequest("jq"); //规则 put/jq/1 request.id("1"); request.timeout(Timevalue.timevalueSeconds(1)); //request.timeout("1s"); //2.将数据放入请求之中,数据都是json数据哦 --需要引入阿里巴巴的fastjson IndexRequest source = request.source(JSON.toJSONString(user), XContentType.JSON); //不用拿请求也是可以的 //3.客户端发送请求,获取响应结果 IndexResponse indexResponse = client.index(request, RequestOptions.DEFAULT); System.out.println(indexResponse.toString()); System.out.println(indexResponse.status()); //CREATE } @Test public void textIsExists() throws IOException { GetRequest getRequest = new GetRequest("jq", "1"); //不获取返回的 _source 的上下文 getRequest.fetchSourceContext(new FetchSourceContext(false)); getRequest.storedFields("_none_"); boolean exists = client.exists(getRequest, RequestOptions.DEFAULT); System.out.println(exists); } @Test public void testGetdocument() throws IOException { GetRequest getRequest = new GetRequest("jq", "1"); GetResponse response = client.get(getRequest, RequestOptions.DEFAULT); System.out.println(response.getSourceAsString()); //打印文档的内容 {"age":3,"name":"蒋二妹"} System.out.println(response); //返回全部 } @Test public void testUpdatedocument() throws IOException { UpdateRequest updateRequest = new UpdateRequest("jq", "1"); updateRequest.timeout("1s"); User user = new User("蒋二妹QAQ", 11); updateRequest.doc(JSON.toJSONString(user), XContentType.JSON); UpdateResponse updateResponse = client.update(updateRequest, RequestOptions.DEFAULT); System.out.println(updateResponse.status()); } @Test public void testDeletedocument() throws IOException { DeleteRequest deleteRequest = new DeleteRequest("jq", "1"); deleteRequest.timeout("1s"); DeleteResponse response = client.delete(deleteRequest, RequestOptions.DEFAULT); System.out.println(response.status()); } @Test public void testBulkRequest() throws IOException { BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("1s"); ArrayList**实战userList = new ArrayList<>(); userList.add(new User("张三1", 1)); userList.add(new User("张三2", 2)); userList.add(new User("张三3", 3)); userList.add(new User("张三4", 4)); userList.add(new User("张三5", 5)); userList.add(new User("张三6", 6)); //批处理请求 for (int i = 0; i < userList.size(); i++) { //批量更新和删除,在这里修改对应的请求 bulkRequest.add( new IndexRequest("jq") .id("" + (i + 2)) //不设置的话就是随机 .source(JSON.toJSONString(userList.get(i)), XContentType.JSON) ); } BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT); System.out.println(bulkResponse.hasFailures()); //false 没有失败 } @Test public void testSearch() throws IOException { SearchRequest searchRequest = new SearchRequest("jq"); //1. 构建搜索的条件 SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); //QueryBuilders 快速实现设置查询条件 //QueryBuilders.termQuery() 精确匹配 //QueryBuilders.matchAllQuery() 匹配所有 TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", "张三1");//精确查询 //MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery(); //构造器 sourceBuilder.query(termQueryBuilder); //分页 //sourceBuilder.from(); //sourceBuilder.size(); sourceBuilder.timeout(new Timevalue(60, TimeUnit.SECONDS)); //2. 放到请求里面 searchRequest.source(sourceBuilder); //3. 发送请求 SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(searchResponse.getHits())); System.out.println("======================================"); for (SearchHit documentFields : searchResponse.getHits().getHits()) { System.out.println(documentFields.getSourceAsMap()); } } } 爬虫新建好一个springboot web的项目后,准备好前端资料
数据问题:
数据库获取消息队列中获取爬虫获取
都可以成为数据源
爬取数据:(获取请求返回的信息,筛选出想要的数据)
导入jsoup包
爬虫工具类org.jsoup jsoup 1.10.2 HtmlParseUtil
package com.jiang.utils; @Component public class HtmlParseUtil { //public static void main(String[] args) throws IOException { // new HtmlParseUtil().parseJD("java").forEach(System.out::println); //} public List实体类parseJD(String keywords) throws IOException { String url = "https://search.jd.com/Search?keyword=" + keywords; document document = Jsoup.parse(new URL(url), 30000); Element element = document.getElementById("J_goodsList"); Elements elements = element.getElementsByTag("li"); ArrayList goodsList = new ArrayList<>(); for (Element el : elements) { String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img"); String price = el.getElementsByClass("p-price").eq(0).text(); String title = el.getElementsByClass("p-name").eq(0).text(); Content content = new Content(); content.setTitle(title); content.setPrice(price); content.setImg(img); goodsList.add(content); } return goodsList; } } package com.jiang.pojo; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; import java.math.BigDecimal; @Data @AllArgsConstructor @NoArgsConstructor public class Content { private String img; private String price; private String title; }客户端package com.jiang.config; //找对象 //放到springboot中待用 @Configuration //相比于xml public class ElasticSearchClientConfig { @Bean public RestHighLevelClient restHighLevelClient() { //保证本地的es开启状态 RestHighLevelClient client = new RestHighLevelClient( RestClient.builder( new HttpHost("localhost", 9200, "http") ) ); return client; } }controllerpackage com.jiang.controller; @RestController public class ContentController { @Autowired private ContentService contentService; @GetMapping("/parse/{keyword}") public boolean parse(@PathVariable("keyword") String keyword) throws Exception { return contentService.parseContent(keyword); } @GetMapping("/search/{keyword}/{pageNum}/{pageSize}") public Listservicepackage com.jiang.service; @Service public class ContentService { @Autowired private RestHighLevelClient client; public boolean parseContent(String keywords) throws Exception { List前后端分离contents = new HtmlParseUtil().parseJD(keywords); //查询出来的数据放入到es中,批量添加 BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("2m"); for (int i = 0; i < contents.size(); i++) { bulkRequest.add(new IndexRequest("jd_goods") .source(JSON.toJSONString(contents.get(i)), XContentType.JSON)); } BulkResponse bulk = client.bulk(bulkRequest, RequestOptions.DEFAULT); return !bulk.hasFailures(); } public List 新建一个文件夹
npm init
npm install
npm install vue
npm install axios
主要是把js放到static下
index.html使用VUE
小结狂神说Java-ES仿京东实战



