前期准备部分
测试数据
user1;18;male;{"id": 1,"ids": [101,102,103],"total_number": 3}
user2;20;female;{"id": 2,"ids": [201,202,203,204],"total_number": 4}
user3;23;male;{"id": 3,"ids": [301,302,303,304,305],"total_number": 5}
user4;17;male;{"id": 4,"ids": [401,402,403,304],"total_number":5}
user5;35;female;{"id": 5,"ids": [501,502,503],"total_number": 3}
pom.xml所需要的依赖
org.apache.hive hive-exec 2.3.7 provided
代码部分
package com.lagou.hive.udf;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.google.common.base.Strings;
import groovy.json.JsonException;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.junit.Test;
import java.util.ArrayList;
public class PareseJsonArray extends UDF {
public ArrayList evaluate(String jsonStr,String key){
//传入字符串,返回null
if (Strings.isNullOrEmpty(jsonStr)){
return null;
}
try{
//解析json串
JSONObject object = JSON.parseObject(jsonStr);
//根据Key拿取到json数组
JSONArray jsonArray = object.getJSONArray(key);
ArrayList list = new ArrayList<>();
for (Object o : jsonArray) {
list.add(o.toString());
}
return list;
}catch (JsonException e){
return null;
}
}
@Test
public void JuntTest(){
String jsonArr = "{"id": 1,"ids": [101,102,103],"total_number": 3}";
String key = "ids";
ArrayList result = evaluate(jsonArr, key);
System.out.println(JSON.toJSONString(result));
}
}
操作部分
①添加开发的jar包(在Hive命令行中)
add jar /data/lagoudw/jars/cn.lagou.dw-1.0-SNAPSHOT-jar-with-dependencies.jar;
②创建临时函数。指定类名一定要完整的路径,即包名加类名
create temporary function lagou_json_array as "com.lagou.hive.udf.PareseJsonArray";
③解析json串中的数组,并展开
select username, age, sex, ids1 from jsont1 lateral view explode(lagou_json_array(json, "ids")) t1 as ids1;
④解析json串中的id、num
select username, age, sex, id, num from jsont1 lateral view json_tuple(json, 'id', 'total_number') t1 as id, num;
⑤全部合一起并展开
select username, age, sex, ids1, id, num from jsont1 lateral view explode(lagou_json_array(json, "ids")) t1 as ids1 lateral view json_tuple(json, 'id', 'total_number') t1 as id, num;



