1.8 7.6.1
导入elasticsearch
org.springframework.boot spring-boot-starter-data-elasticsearch
提前导入fastjson、lombok
1.2创建并编写配置类–>连上EScom.alibaba fastjson 1.2.70 org.projectlombok lombok true
@Configuration
public class ElasticSearchConfig {
// 注册 rest高级客户端
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("127.0.0.1",9200,"http")
)
);
return client;
}
}
1.3测试索引的操作
1、索引的创建
@Test
void testCreateIndex() throws IOException {
//1、创建索引请求
CreateIndexRequest request = new CreateIndexRequest("jd_goods");
//2、执行创建请求
CreateIndexResponse createIndexResponse =
restHighLevelClient.indices().create(request, RequestOptions.DEFAULT);
System.out.println("执行创建请求===>"+createIndexResponse);
}
2、测试获取索引
@Test
void testExistIndex() throws IOException {
GetIndexRequest request = new GetIndexRequest("kuang_index");
boolean exists = restHighLevelClient.indices().exists(request, RequestOptions.DEFAULT);
System.out.println("测试获取索引===>"+exists);
}
3、测试删除索引
@Test
void testDeleteIndex() throws IOException {
DeleteIndexRequest request = new DeleteIndexRequest("kuang_index");
AcknowledgedResponse delete = restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT);
System.out.println("是否删除成功===>"+delete);
}
4、测试添加文档
@Test
void testAdddocument() throws IOException {
//创建对象
User user = new User("狂神说",3);
//创建请求
IndexRequest request = new IndexRequest("kuang_index");
//规则
request.id("1");
request.timeout(Timevalue.timevalueSeconds(1));
request.timeout("1s");
//将我们的数据放入请求 json
request.source(JSON.toJSONString(user), XContentType.JSON);
//客户端发送请求 获取响应的结果
IndexResponse indexResponse = restHighLevelClient.index(request, RequestOptions.DEFAULT);
System.out.println(indexResponse.toString());
System.out.println(indexResponse.status());//对应我们命令返回的状态 CREATED
}
5、获取文档 判断是否存在
@Test
void testIsExists() throws IOException {
GetRequest getRequest = new GetRequest("kuang_index","1");
//不获取返回的 _source的上下文
getRequest.fetchSourceContext(new FetchSourceContext(false));
getRequest.storedFields("_none_");
boolean exists = restHighLevelClient.exists(getRequest, RequestOptions.DEFAULT);
System.out.println(exists);
}
6、获取文档的信息
@Test
void testGetdocument() throws IOException {
GetRequest getRequest = new GetRequest("kuang_index","1");
GetResponse documentFields = restHighLevelClient.get(getRequest, RequestOptions.DEFAULT);
System.out.println(getRequest);
System.out.println(documentFields);
}
7、更新文档的信息
@Test
void testUpdateRequest() throws IOException {
UpdateRequest updateRequest = new UpdateRequest("test","1");
updateRequest.timeout("1s");
User user = new User("狂神说java",18);
updateRequest.doc(JSON.toJSONString(user),XContentType.JSON);
UpdateResponse updateResponse = restHighLevelClient.update(updateRequest, RequestOptions.DEFAULT);
System.out.println(updateResponse.status());
}
8、删除文档记录
@Test
void testDeleteRequest() throws IOException {
DeleteRequest request = new DeleteRequest("kuang_index","2");
request.timeout("1s");
DeleteResponse delete = restHighLevelClient.delete(request, RequestOptions.DEFAULT);
System.out.println(delete.status());
}
9、批量插入数据
@Test
void testBulkRequest() throws IOException {
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("10s");
ArrayList userList = new ArrayList<>();
userList.add(new User("kuangshen1",3));
userList.add(new User("kuangshen2",4));
userList.add(new User("kuangshen3",5));
userList.add(new User("kuangshen4",6));
userList.add(new User("kuangshen5",13));
userList.add(new User("kuangshen6",23));
userList.add(new User("kuangshen7",33));
// 批处理请求
for (int i = 0; i < userList.size(); i++) {
bulkRequest.add(new IndexRequest("kuang_index")
.id(""+(i+1))
.source(JSON.toJSONString(userList.get(i)),XContentType.JSON));
}
BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest,RequestOptions.DEFAULT);
System.out.println(bulkResponse.hasFailures());//是否失败 返回false 代表成功
}
10、查询
@Test
void testSearch() throws IOException {
SearchRequest searchRequest = new SearchRequest(ESconst.ES_INDEX);
//构建搜索条件
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//高亮
sourceBuilder.highlighter();
//查询条件 我们可以使用 QueryBuilders 工具来实现
//QueryBuilders.termQuery() 精确
//QueryBuilders.matchAllQuery() 匹配所有
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", "qinjiang1");
MatchAllQueryBuilder allQueryBuilder = QueryBuilders.matchAllQuery();
System.out.println("allQueryBuilder===>>"+allQueryBuilder);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new Timevalue(60, TimeUnit.SECONDS));
//放入请求
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
System.out.println(JSON.toJSONString(searchResponse.getHits()));
System.out.println("=====================================");
for (SearchHit documentFields : searchResponse.getHits().getHits()) {
System.out.println(documentFields.getSourceAsMap());
}
}
二、ElasticSearch实战
2.1导入依赖
2.2编写 application.preperties配置文件1.8 7.6.1 org.jsoup jsoup 1.10.2 com.alibaba fastjson 1.2.70 org.springframework.boot spring-boot-starter-data-elasticsearch org.springframework.boot spring-boot-starter-thymeleaf org.springframework.boot spring-boot-starter-web org.springframework.boot spring-boot-devtools runtime true org.springframework.boot spring-boot-configuration-processor true org.projectlombok lombok true org.springframework.boot spring-boot-starter-test test
# 更改端口,防止冲突 server.port=9999 # 关闭thymeleaf缓存 spring.thymeleaf.cache=false2.3测试controller和view
@Controller
public class IndexController {
@GetMapping({"/","index"})
public String index(){
return "index";
}
}
三、爬虫京东的数据到ES
3.1编写Config
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("127.0.0.1", 9200, "http")));
return client;
}
}
3.2编写service
因为是爬取的数据,那么就不走Dao,以下编写都不会编写接口,开发中必须严格要求编写
ContentService
// 1、解析数据放入 es 索引中
public Boolean parseContents(String keyword) throws Exception {
// 获取内容
List contents = new HtmlParseUtil().params(keyword);
// 内容放入 es 中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m"); // 可更具实际业务是指
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(
new IndexRequest("jd_goods")
.id(""+(i+1))
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON)
);
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
restHighLevelClient.close();
return !bulk.hasFailures();
}
// 2、根据keywords分页查询结果
public List
编写controller
@Autowired
private ContentService contentService;
@GetMapping("/parse/{keywords}")
@ResponseBody
public Boolean parses(@PathVariable("keywords") String keywords) throws Exception {
return contentService.parseContents(keywords);
}
@ResponseBody
@GetMapping("/search/{keywords}/{pageNo}/{pageSize}")
public List> search(@PathVariable("keywords") String keywords,
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize) throws IOException {
return contentService.highlightBuilder(keywords,1,10);
}
3.2爬虫工具类
public class HtmlParseUtil {
public static void main(String[] args) throws Exception {
new HtmlParseUtil().params("码出高效").forEach(System.out::println);
}
public List params(String keywords) throws Exception {
//获取请求 https://search.jd.com/Search?keyword=java
//前提 需要联网
String url = "https://search.jd.com/Search?keyword="+keywords+"&enc=utf-8";
//解析网页
document document = Jsoup.parse(new URL(url), 30000);
//所有你在js中可以使用的方法 这里都能用
Element element = document.getElementById("J_goodsList");
//获取所有的li元素
Elements elements = element.getElementsByTag("li");
ArrayList goodList = new ArrayList<>();
for (Element el : elements) {
//关于图片特别多的网站 所有图片都是延迟加载的data-lazy-img
String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setImg(img);
content.setTitle(title);
content.setPrice(price);
goodList.add(content);
}
return goodList;
}
}
3.3测试效果
待解决问题:
1、只能半模糊查询,不可以精确查询;
2、分词时 自定义分词 需手动建.dic 添加;
3、轮询时间如何设置为秒;
ElasticSearch入门学习笔记(一)概念篇
ElasticSearch入门学习笔记(二)软件安装篇



