JAVA大数据的六十一天——hive函数语句大全

一、创建表

create table student(
id int,
name string,
likes array,
address map
)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

2.加载本地文件 local inpath
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student;
3.加载hdfs文件系统 path
0: jdbc:hive2://192.168.153.139:10000/default> load data inpath '/tmp/student.txt' into table student;
4.覆盖 overwrite
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' overwrite into table student;

5.查询表数据

0: jdbc:hive2://192.168.153.139:10000/default> select * from student;
+-------------+---------------+------------------------------+-----------------------------------------+--+
| student.id | student.name | student.likes | student.address |
+-------------+---------------+------------------------------+-----------------------------------------+--+
| 1 | zhansan1 | ["cs","lol","book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 2 | zhansan2 | ["cs","lol"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 3 | zhansan3 | ["book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 4 | zhansan4 | ["book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 5 | zhansan5 | ["cs","lol","book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 6 | zhansan6 | ["cs","lol","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 7 | zhansan7 | ["lol","book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 8 | zhansan8 | ["cs","book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 9 | zhansan9 | ["cs","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 10 | zhansan10 | ["cs","lol","book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 11 | zhansan11 | ["cs","book"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
+-------------+---------------+------------------------------+-----------------------------------------+--+

6.分区表
create table student2(
id int,
name string,
likes array,
address map
)
partitioned by (age int)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

7.导入分区数据指定分区
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student2 partition(age=10);
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student2 partition(age=20);

8.查看表分区信息
0: jdbc:hive2://192.168.153.139:10000/default> show partitions student2;
+------------+--+
| partition |
+------------+--+
| age=10 |
| age=20 |
+------------+--+

9.双分区
create table student3(
id int,
name string,
likes array,
address map
)
partitioned by (age int, gender string)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student3 partition(age=20,gender='boy');
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student3 partition(age=20,gender='girl');

0: jdbc:hive2://192.168.153.139:10000/default> alter table student3 add partition(age=20,gender="man");

0: jdbc:hive2://192.168.153.139:10000/default> alter table student3 add partition(age=10,gender="girl");

10.查询分区

------------------------------------
11.动态分区
create table studentp(
id int,
name string,
age int,
gender string,
likes array,
address map
)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

12.分区表
create table studentp1(
id int,
name string,
likes array,
address map
)
partitioned by (age int, gender string)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

13.开启动态分区
0: jdbc:hive2://192.168.153.139:10000/default> set hive.exec.dynamic.partition=true;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.exec.dynamic.partition.mode=nonstrict;

0: jdbc:hive2://192.168.153.139:10000/default> insert into table studentp1 partition(age,gender)
. . . . . . . . . . . . . . . . . . . . . . .> select id,name,likes,address,age,gender from studentp;

------------------------------------------------------------
employee.txt

create table if not exists employee(
name string,
work_place array,
gender_age struct,
skills_score map,
depart_title map>
)
comment 'this is an internal table'
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n';

0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/employee.txt' into table employee;

0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where gender_age.gender='Male';
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where skills_score["Python"]=80;

14.创建外部表
create external table if not exists employee_external(
name string,
work_place array,
gender_age struct,
skills_score map,
depart_title map>
)
comment 'this is an internal table'
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n'
location '/tmp/hivedata/employee';

hdfs dfs -put ./employee.txt /tmp/hivedata/employee

15.创建表， | ： array map struct 手动分区，动态分区分区表 datagrade安装内部表，外部表

0: jdbc:hive2://192.168.153.139:10000/default> create temporary table tmp_employee as select name,work_place from employee;

16.查询性别为男性，名字叫Will
0: jdbc:hive2://192.168.153.139:10000/default> select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will';
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where gender_age.gender='Female';

17.查询所有男性中名字叫Will男性与所有的女性的信息
0: jdbc:hive2://192.168.153.139:10000/default> select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will'
. . . . . . . . . . . . . . . . . . . . . . .> union all
. . . . . . . . . . . . . . . . . . . . . . .> select * from employee where gender_age.gender='Female';

18.CTAS
0: jdbc:hive2://192.168.153.139:10000/default> with
. . . . . . . . . . . . . . . . . . . . . . .> t1 as (select * from employee where gender_age.gender='Male'),
. . . . . . . . . . . . . . . . . . . . . . .> t2 as (select * from t1 where t1.name='Will'),
. . . . . . . . . . . . . . . . . . . . . . .> t3 as (select * from employee where gender_age.gender='Female')
. . . . . . . . . . . . . . . . . . . . . . .> select * from t2 union all select * from t3;

使用ctas查询，将结果保存到临时表ctas_employee表中
0: jdbc:hive2://192.168.153.139:10000/default> create temporary table ctas_employee as
. . . . . . . . . . . . . . . . . . . . . . .> with
. . . . . . . . . . . . . . . . . . . . . . .> t1 as (select * from employee where gender_age.gender='Male'),
. . . . . . . . . . . . . . . . . . . . . . .> t2 as (select * from t1 where t1.name='Will'),
. . . . . . . . . . . . . . . . . . . . . . .> t3 as (select * from employee where gender_age.gender='Female')
. . . . . . . . . . . . . . . . . . . . . . .> select * from t2 union all select * from t3;

19.分桶表
create table employee_id(
name string,
employee_id int,
work_place array,
gender_age struct,
skills_score map,
depart_title map>
)
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n';

0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/employee_id.txt' overwrite into table employee_id;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id;

0: jdbc:hive2://192.168.153.139:10000/default> create table employee_id_buckets(
. . . . . . . . . . . . . . . . . . . . . . .> name string,
. . . . . . . . . . . . . . . . . . . . . . .> employee_id int,
. . . . . . . . . . . . . . . . . . . . . . .> work_place array,
. . . . . . . . . . . . . . . . . . . . . . .> gender_age struct,
. . . . . . . . . . . . . . . . . . . . . . .> skills_score map,
. . . . . . . . . . . . . . . . . . . . . . .> depart_title map>
. . . . . . . . . . . . . . . . . . . . . . .> )
. . . . . . . . . . . . . . . . . . . . . . .> clustered by(employee_id) into 2 buckets
. . . . . . . . . . . . . . . . . . . . . . .> row format delimited fields terminated by '|'
. . . . . . . . . . . . . . . . . . . . . . .> collection items terminated by ','
. . . . . . . . . . . . . . . . . . . . . . .> map keys terminated by ':'
. . . . . . . . . . . . . . . . . . . . . . .> lines terminated by 'n';

20.分奇偶
0: jdbc:hive2://192.168.153.139:10000/default> set mapred.reduce.tasks=2;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.enforce.bucketing=true;

21.将employee_id数据写入到分桶表employee_id_buckets
0: jdbc:hive2://192.168.153.139:10000/default> insert overwrite table employee_id_buckets select * from employee_id;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(1M)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(100 percent)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(10 rows)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(bucket 1 out of 32 on employee_id)s;

22.侧视图
将employee表中的work_place(array)列炸裂开
0: jdbc:hive2://192.168.153.139:10000> select explode(work_place) from employee;
0: jdbc:hive2://192.168.153.139:10000> select name, wp from employee lateral view explode(work_place) work_place as wp;
将employee表中的skills_score(map)列炸裂开
0: jdbc:hive2://192.168.153.139:10000> select explode(skills_score) from employee;
0: jdbc:hive2://192.168.153.139:10000> select name,skill,score from employee lateral view explode(skills_score) sk_sc as skill,score;

0: jdbc:hive2://192.168.153.139:10000> select name,wp,skill,score from employee
0: jdbc:hive2://192.168.153.139:10000> lateral view explode(work_place) wo_pl as wp
0: jdbc:hive2://192.168.153.139:10000> lateral view explode(skills_score) sk_sc as skill, score;

23.WordCount案例 explode始用
0: jdbc:hive2://192.168.153.139:10000> create table wordcount(line string);

[root@gree139 tmp]# vi ./wc.txt

hello world
hello java
hello hadoop
hello hive
are you ok?
no i'm very bad

0: jdbc:hive2://192.168.153.139:10000> load data local inpath '/opt/tmp/wc.txt' overwrite into table wordcount;

0: jdbc:hive2://192.168.153.139:10000> select * from wordcount;

0: jdbc:hive2://192.168.153.139:10000> select split(line,"\s") from wordcount;

0: jdbc:hive2://192.168.153.139:10000> select w.word,count(1) num from
. . . . . . . . . . . . . . . . . . .> (select explode(split(line,"\s")) word from wordcount) w
. . . . . . . . . . . . . . . . . . .> group by w.word order by num desc;

0: jdbc:hive2://192.168.153.139:10000> with
. . . . . . . . . . . . . . . . . . .> t1 as (select explode(split(line,"\s")) word from wordcount)
. . . . . . . . . . . . . . . . . . .> select t1.word,count(1) num from t1 group by word order by num desc;

24.CASE WHEN THEN ELSE END
将年龄分段 int 转成字符串
0: jdbc:hive2://192.168.153.139:10000> select name,age,case when age>=20 then 'old' when age<=18 then 'young' else 'middle' end as tag from studentp;
+------------+------+---------+--+
| name | age | tag |
+------------+------+---------+--+
| zhansan1 | 20 | old |
| zhansan2 | 20 | old |
| zhansan3 | 18 | young |
| zhansan4 | 18 | young |
| zhansan5 | 30 | old |
| zhansan6 | 19 | middle |
| zhansan7 | 19 | middle |
| zhansan8 | 19 | middle |
| zhansan9 | 12 | young |
| zhansan10 | 12 | young |
| zhansan11 | 12 | young |
+------------+------+---------+--+

25.将性别转数字 boy->1 girl->0
0: jdbc:hive2://192.168.153.139:10000> select name,gender, case when gender='boy' then 1 when gender='girl' then 0 else -1 end as gendertag from studentp;
+------------+---------+------------+--+
| name | gender | gendertag |
+------------+---------+------------+--+
| zhansan1 | boy | 1 |
| zhansan2 | girl | 0 |
| zhansan3 | boy | 1 |
| zhansan4 | girl | 0 |
| zhansan5 | boy | 1 |
| zhansan6 | girl | 0 |
| zhansan7 | girl | 0 |
| zhansan8 | boy | 1 |
| zhansan9 | girl | 0 |
| zhansan10 | girl | 0 |
| zhansan11 | girl | 0 |
+------------+---------+------------+--+

select name,gender,
case when gender='boy' then 1 else 0 end as boytag,
case when gender='girl' then 1 else 0 end as girltag,
case when gender is null then 1 else 0 end as taijiantag
from studentp;

0: jdbc:hive2://192.168.153.139:10000> create temporary table tmp_gender_tag as select name,gender,
. . . . . . . . . . . . . . . . . . .> case when gender='boy' then 1 else 0 end as boytag,
. . . . . . . . . . . . . . . . . . .> case when gender='girl' then 1 else 0 end as girltag,
. . . . . . . . . . . . . . . . . . .> case when gender is null then 1 else 0 end as taijiantag
. . . . . . . . . . . . . . . . . . .> from studentp;
+----------------------+------------------------+------------------------+-------------------------+----------------------------+--+
| tmp_gender_tag.name | tmp_gender_tag.gender | tmp_gender_tag.boytag | tmp_gender_tag.girltag | tmp_gender_tag.taijiantag |
+----------------------+------------------------+------------------------+-------------------------+----------------------------+--+
| zhansan1 | boy | 1 | 0 | 0 |
| zhansan2 | girl | 0 | 1 | 0 |
| zhansan3 | boy | 1 | 0 | 0 |
| zhansan4 | girl | 0 | 1 | 0 |
| zhansan5 | boy | 1 | 0 | 0 |
| zhansan6 | girl | 0 | 1 | 0 |
| zhansan7 | girl | 0 | 1 | 0 |
| zhansan8 | boy | 1 | 0 | 0 |
| zhansan9 | girl | 0 | 1 | 0 |
| zhansan10 | girl | 0 | 1 | 0 |
| zhansan11 | girl | 0 | 1 | 0 |
+----------------------+------------------------+------------------------+-------------------------+----------------------------+--+

0: jdbc:hive2://192.168.153.139:10000> select sum(boytag) as boysum,sum(girltag) as girlnum,sum(taijiantag) as tjnum from tmp_gender_tag;
+---------+----------+--------+--+
| boysum | girlnum | tjnum |
+---------+----------+--------+--+
| 4 | 7 | 0 |
+---------+----------+--------+--+

26.行转列 concat_ws collect_set collect_list
select gender,concat_ws(",",collect_set(name)) as stu_col from studentp group by gender; 去重
select gender,concat_ws(",",collect_list(name)) as stu_col from studentp group by gender; 不去重

27.导出表数据到指定文件夹，必须要求导出文件夹为空文件夹，否则会报错
0: jdbc:hive2://192.168.153.139:10000> export table studentp to '/tmp/out1';

28.导出表数据到本地文件夹，要确保目标文件夹存在，否导出失败
[root@gree139 tmp]# hive -e "select * from greedemo.studentp1" >> /opt/tmp/out/student.txt

order by 全局排序
sort by 分区内部有序，整体无序
distribute by mapTask分区一般与sort by合作使用
cluster by 如果分区和排序字段相同并且升序，等同与distrubute by age sort by age asc
0: jdbc:hive2://192.168.153.139:10000> select * from studentp order by age;

0: jdbc:hive2://192.168.153.139:10000> select * from studentp sort by age;

28.分奇偶
0: jdbc:hive2://192.168.153.139:10000/default> set mapred.reduce.tasks=2;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.enforce.bucketing=true;

0: jdbc:hive2://192.168.153.139:10000> create table dissorstudenttp as select * from studentp distribute by id sort by age ;
0: jdbc:hive2://192.168.153.139:10000> create table dissorstudenttp2 as select * from studentp distribute by age sort by age ;
0: jdbc:hive2://192.168.153.139:10000> insert overwrite table dissorstudenttp2 select * from studentp distribute by age sort by age desc ;

0: jdbc:hive2://192.168.153.139:10000> create table dissorstudenttp3 as select * from studentp cluster by age ;

JAVA大数据的六十一天——hive函数语句大全

大数据系统相关栏目本月热门文章