1.需求描述
从hive抽数到es中,应用中查询es,模糊匹配标题、公共日志字段中任意一个满足条件的数据
条件格式为“条件1 AND 条件2”
查阅es资料,刚好query_string可以满足本次需求
2.es建表等
参考:映射和分析 | Elasticsearch: 权威指南 | Elastic,详解了映射类型区别,分词等的使用
3.hive表抽数到es
es建表语句:
curl -X PUT 'http://ip:9200/itsm_result_index' -H 'Content-Type:application/json' -d '{"settings":{"number_of_shards":3, "number_of_replicas":2},"mappings": { "properties": { "id": { "type": "keyword" }, "status": { "type": "text","analyzer":"ik_max_word","search_analyzer":"ik_smart" }, "request_type": { "type": "keyword" },"impact": { "type": "keyword" },"priority": { "type": "text","analyzer":"ik_max_word","search_analyzer":"ik_smart" },"urgency": { "type": "keyword" },"origin": { "type": "keyword" },"servicesubcategory_id": { "type": "long" },"resolution_date": { "type": "date", "format": "yyyy-MM-dd HH:mm:ss" },"last_pending_date": { "type": "date", "format": "yyyy-MM-dd HH:mm:ss" },"org_id": { "type": "long" },"caller_id": { "type": "long" },"team_id": { "type": "long" },"agent_id": { "type": "long" },"title": { "type": "text","analyzer":"ik_max_word","search_analyzer":"ik_smart" },"description": { "type": "text","analyzer":"ik_max_word","search_analyzer":"ik_smart" },"start_date": { "type": "date", "format": "yyyy-MM-dd HH:mm:ss" },"end_date": { "type": "date", "format": "yyyy-MM-dd HH:mm:ss" },"last_update": { "type": "date", "format": "yyyy-MM-dd HH:mm:ss" },"close_date": { "type": "date", "format": "yyyy-MM-dd HH:mm:ss" },"private_log": { "type": "text","analyzer":"ik_max_word","search_analyzer":"ik_smart" },"itsm_id": { "type": "long" },"expire": { "type": "date", "format": "yyyy-MM-dd HH:mm:ss" },"item_id": { "type": "long" },"item_org_id": { "type": "long" },"agent_name": { "type": "keyword" },"caller_name": { "type": "keyword" },"public_log": { "type": "text","analyzer":"ik_max_word","search_analyzer":"ik_smart" }}}}'
hive外联表:
add jar xxx/elasticsearch-hadoop-7.8.0.jar;
CREATE EXTERNAL TABLE if not exists tdata.itsm_result(
`id` string COMMENT 'id',
`status` string COMMENT '状态',
`request_type` string COMMENT '请求类型',
`impact` string COMMENT '影响',
`priority` string COMMENT '优先级',
`urgency` string COMMENT '紧急度',
`origin` string COMMENT '来源',
`servicesubcategory_id` bigint COMMENT '子服务',
`resolution_date` string COMMENT '解决日期',
`last_pending_date` string COMMENT '最后更新',
`org_id` bigint COMMENT '组织',
`caller_id` bigint COMMENT '申请人',
`team_id` bigint COMMENT '工单id',
`agent_id` bigint COMMENT '执行人',
`title` string COMMENT '标题',
`description` string COMMENT '描述',
`start_date` string COMMENT '开始日期',
`end_date` string COMMENT '结束日期',
`last_update` string COMMENT 'ticket-最后更新',
`close_date` string COMMENT '关闭日期',
`private_log` string COMMENT '补充说明',
`itsm_id` bigint COMMENT '附件编号',
`expire` string COMMENT '日期',
`item_id` bigint COMMENT '请求单号',
`item_org_id` bigint COMMENT '组织编号',
`agent_name` string COMMENT '执行人',
`caller_name` string COMMENT '申请人',
`public_log` string COMMENT '公共日志')COMMENT 'itsm工单结果外联表'
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource'='itsm_result_index/_doc',
'es.index.auto.create'='false',
'es.nodes'='ip:port,ip:port,ip:port',
'es.read.metadata'='true',
'es.mapping.id'='id',
'es.mapping.date.rich'='false'
);
'es.mapping.id'='id' es的id自动生成,如果不想使用自动生成的,可以用这个设置
'es.mapping.date.rich'='false' :es.mapping.date.rich 为false,禁止自动解析时间格式字段,只返回字符串;本案例hive表日期相关的字段是string类型,es是date类型,查询外联表是报错:org.elasticsearch.hadoop.rest.EsHadoopParsingException: Cannot parse value [2017-11-01 09:05:46.583] for field [end_date],因此配置了此项
4.java代码
//根据条件查询 "message:'LNCityCode' AND message:'辽宁'"
String conditions = requestType.getTitle();
//field1满足条件1且条件2 || field2满足条件1且条件2
String conditionVal = conditions.replace("message:","").replace(""","");
searchSourceBuilder.query(new QueryStringQueryBuilder(conditionVal).fields(fields));
Map resultMap = esUtil.searchV2(ITSM_INDEX, searchSourceBuilder, Map.class);
public



