栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 前沿技术 > 大数据 > 大数据系统

JAVA大数据的六十一天——hive函数语句大全

JAVA大数据的六十一天——hive函数语句大全

一、创建表

create table student(
id int,
name string,
likes array,
address map
)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

2.加载本地文件  local inpath
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student;
3.加载hdfs文件系统 path
0: jdbc:hive2://192.168.153.139:10000/default> load data inpath '/tmp/student.txt' into table student;
4.覆盖 overwrite
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' overwrite into table student;

5.查询表数据

0: jdbc:hive2://192.168.153.139:10000/default> select * from student;
+-------------+---------------+------------------------------+-----------------------------------------+--+
| student.id  | student.name  |        student.likes         |             student.address             |
+-------------+---------------+------------------------------+-----------------------------------------+--+
| 1           | zhansan1      | ["cs","lol","book","movie"]  | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 2           | zhansan2      | ["cs","lol"]                 | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 3           | zhansan3      | ["book","movie"]             | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 4           | zhansan4      | ["book","movie"]             | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 5           | zhansan5      | ["cs","lol","book","movie"]  | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 6           | zhansan6      | ["cs","lol","movie"]         | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 7           | zhansan7      | ["lol","book","movie"]       | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 8           | zhansan8      | ["cs","book","movie"]        | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 9           | zhansan9      | ["cs","movie"]               | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 10          | zhansan10     | ["cs","lol","book","movie"]  | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 11          | zhansan11     | ["cs","book"]                | {"beijing":"bdqn","nanjing":"zhongbo"}  |
+-------------+---------------+------------------------------+-----------------------------------------+--+


6.分区表
create table student2(
id int,
name string,
likes array,
address map
)
partitioned by (age int)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

7.导入分区数据 指定分区
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student2 partition(age=10);
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student2 partition(age=20);

8.查看表分区信息
0: jdbc:hive2://192.168.153.139:10000/default> show partitions student2;
+------------+--+
| partition  |
+------------+--+
| age=10     |
| age=20     |
+------------+--+

9.双分区
create table student3(
id int,
name string,
likes array,
address map
)
partitioned by (age int, gender string)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student3 partition(age=20,gender='boy');
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student3 partition(age=20,gender='girl');

0: jdbc:hive2://192.168.153.139:10000/default> alter table student3 add partition(age=20,gender="man");

0: jdbc:hive2://192.168.153.139:10000/default> alter table student3 add partition(age=10,gender="girl");

10.查询分区

0: jdbc:hive2://192.168.153.139:10000/default> show partitions student3;
+---------------------+--+
|      partition      |
+---------------------+--+
| age=10/gender=girl  |
| age=20/gender=boy   |
| age=20/gender=girl  |
| age=20/gender=man   |
+---------------------+--+

------------------------------------
11.动态分区
create table studentp(
id int,
name string,
age int,
gender string,
likes array,
address map
)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

12.分区表
create table studentp1(
id int,
name string,
likes array,
address map
)
partitioned by (age int, gender string)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

13.开启动态分区
0: jdbc:hive2://192.168.153.139:10000/default> set hive.exec.dynamic.partition=true;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.exec.dynamic.partition.mode=nonstrict;

0: jdbc:hive2://192.168.153.139:10000/default> insert into table studentp1 partition(age,gender) 
. . . . . . . . . . . . . . . . . . . . . . .> select id,name,likes,address,age,gender from studentp;

------------------------------------------------------------
employee.txt

Michael|Montreal,Toronto|Male,30|DB:80|Product:DeveloperLead
Will|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Shelley|New York|Female,27|Python:80|Test:Lead,COE:Architect
Lucy|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead

create table if not exists employee(
name string,
work_place array,
gender_age struct,
skills_score map,
depart_title map>
)
comment 'this is an internal table'
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n';

0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/employee.txt' into table employee;

0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where gender_age.gender='Male';
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where skills_score["Python"]=80;

14.创建外部表
create external table if not exists employee_external(
name string,
work_place array,
gender_age struct,
skills_score map,
depart_title map>
)
comment 'this is an internal table'
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n'
location '/tmp/hivedata/employee';

hdfs dfs -put ./employee.txt /tmp/hivedata/employee


15.创建表  , | : array  map  struct  手动分区,动态分区 分区表  datagrade安装  内部表,外部表

0: jdbc:hive2://192.168.153.139:10000/default> create temporary table tmp_employee as select name,work_place from employee;

16.查询性别为男性,名字叫Will
0: jdbc:hive2://192.168.153.139:10000/default> select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will';
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where gender_age.gender='Female';

17.查询所有男性中名字叫Will男性与所有的女性的信息
0: jdbc:hive2://192.168.153.139:10000/default> select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will' 
. . . . . . . . . . . . . . . . . . . . . . .> union all
. . . . . . . . . . . . . . . . . . . . . . .> select * from employee where gender_age.gender='Female';

18.CTAS
0: jdbc:hive2://192.168.153.139:10000/default> with
. . . . . . . . . . . . . . . . . . . . . . .> t1 as (select * from employee where gender_age.gender='Male'),
. . . . . . . . . . . . . . . . . . . . . . .> t2 as (select * from t1 where t1.name='Will'),
. . . . . . . . . . . . . . . . . . . . . . .> t3 as (select * from employee where gender_age.gender='Female')
. . . . . . . . . . . . . . . . . . . . . . .> select * from t2 union all select * from t3;


使用ctas查询 ,将结果保存到临时表ctas_employee表中
0: jdbc:hive2://192.168.153.139:10000/default> create temporary table ctas_employee as
. . . . . . . . . . . . . . . . . . . . . . .> with
. . . . . . . . . . . . . . . . . . . . . . .> t1 as (select * from employee where gender_age.gender='Male'),
. . . . . . . . . . . . . . . . . . . . . . .> t2 as (select * from t1 where t1.name='Will'),
. . . . . . . . . . . . . . . . . . . . . . .> t3 as (select * from employee where gender_age.gender='Female')
. . . . . . . . . . . . . . . . . . . . . . .> select * from t2 union all select * from t3;


19.分桶表
create table employee_id(
name string,
employee_id int,
work_place array,
gender_age struct,
skills_score map,
depart_title map>
)
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n';

0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/employee_id.txt' overwrite into table employee_id;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id;

0: jdbc:hive2://192.168.153.139:10000/default> create table employee_id_buckets(
. . . . . . . . . . . . . . . . . . . . . . .> name string,
. . . . . . . . . . . . . . . . . . . . . . .> employee_id int,
. . . . . . . . . . . . . . . . . . . . . . .> work_place array,
. . . . . . . . . . . . . . . . . . . . . . .> gender_age struct,
. . . . . . . . . . . . . . . . . . . . . . .> skills_score map,
. . . . . . . . . . . . . . . . . . . . . . .> depart_title map>
. . . . . . . . . . . . . . . . . . . . . . .> )
. . . . . . . . . . . . . . . . . . . . . . .> clustered by(employee_id) into 2 buckets
. . . . . . . . . . . . . . . . . . . . . . .> row format delimited fields terminated by '|'
. . . . . . . . . . . . . . . . . . . . . . .> collection items terminated by ','
. . . . . . . . . . . . . . . . . . . . . . .> map keys terminated by ':'
. . . . . . . . . . . . . . . . . . . . . . .> lines terminated by 'n';

20.分奇偶
0: jdbc:hive2://192.168.153.139:10000/default> set mapred.reduce.tasks=2;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.enforce.bucketing=true;

21.将employee_id数据写入到分桶表employee_id_buckets
0: jdbc:hive2://192.168.153.139:10000/default> insert overwrite table employee_id_buckets select * from employee_id;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(1M)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(100 percent)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(10 rows)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(bucket 1 out of 32 on employee_id)s;


22.侧视图
将employee表中的work_place(array)列 炸裂开
0: jdbc:hive2://192.168.153.139:10000> select explode(work_place) from employee;
0: jdbc:hive2://192.168.153.139:10000> select name, wp from employee lateral view explode(work_place) work_place as wp;
将employee表中的skills_score(map)列 炸裂开
0: jdbc:hive2://192.168.153.139:10000> select explode(skills_score) from employee;
0: jdbc:hive2://192.168.153.139:10000> select name,skill,score from employee lateral view explode(skills_score) sk_sc as skill,score;

0: jdbc:hive2://192.168.153.139:10000> select name,wp,skill,score from employee 
0: jdbc:hive2://192.168.153.139:10000> lateral view explode(work_place) wo_pl as wp 
0: jdbc:hive2://192.168.153.139:10000> lateral view explode(skills_score) sk_sc as skill, score;

23.WordCount案例 explode始用
0: jdbc:hive2://192.168.153.139:10000> create table wordcount(line string);

[root@gree139 tmp]# vi ./wc.txt 

hello world
hello java
hello hadoop
hello hive
are you ok?
no i'm very bad

0: jdbc:hive2://192.168.153.139:10000> load data local inpath '/opt/tmp/wc.txt' overwrite into table wordcount;

0: jdbc:hive2://192.168.153.139:10000> select * from wordcount;

0: jdbc:hive2://192.168.153.139:10000> select split(line,"\s") from wordcount;

0: jdbc:hive2://192.168.153.139:10000> select explode(split(line,"\s")) from wordcount;
+---------+--+
|   col   |
+---------+--+
| hello   |
| world   |
| hello   |
| java    |


0: jdbc:hive2://192.168.153.139:10000> select w.word,count(1) num from 
. . . . . . . . . . . . . . . . . . .> (select explode(split(line,"\s")) word from wordcount) w 
. . . . . . . . . . . . . . . . . . .> group by w.word order by num desc;

0: jdbc:hive2://192.168.153.139:10000> with 
. . . . . . . . . . . . . . . . . . .> t1 as (select explode(split(line,"\s")) word from wordcount)
. . . . . . . . . . . . . . . . . . .> select t1.word,count(1) num from t1 group by word order by num desc;


24.CASE WHEN THEN ELSE END
将年龄分段   int 转成 字符串
0: jdbc:hive2://192.168.153.139:10000> select name,age,case when age>=20 then 'old' when age<=18 then 'young' else 'middle' end as tag from studentp;
+------------+------+---------+--+
|    name    | age  |   tag   |
+------------+------+---------+--+
| zhansan1   | 20   | old     |
| zhansan2   | 20   | old     |
| zhansan3   | 18   | young   |
| zhansan4   | 18   | young   |
| zhansan5   | 30   | old     |
| zhansan6   | 19   | middle  |
| zhansan7   | 19   | middle  |
| zhansan8   | 19   | middle  |
| zhansan9   | 12   | young   |
| zhansan10  | 12   | young   |
| zhansan11  | 12   | young   |
+------------+------+---------+--+

25.将性别转数字 boy->1  girl->0
0: jdbc:hive2://192.168.153.139:10000> select name,gender, case when gender='boy' then 1 when gender='girl' then 0 else -1 end as gendertag from studentp;
+------------+---------+------------+--+
|    name    | gender  | gendertag  |
+------------+---------+------------+--+
| zhansan1   | boy     | 1          |
| zhansan2   | girl    | 0          |
| zhansan3   | boy     | 1          |
| zhansan4   | girl    | 0          |
| zhansan5   | boy     | 1          |
| zhansan6   | girl    | 0          |
| zhansan7   | girl    | 0          |
| zhansan8   | boy     | 1          |
| zhansan9   | girl    | 0          |
| zhansan10  | girl    | 0          |
| zhansan11  | girl    | 0          |
+------------+---------+------------+--+


select name,gender, 
case when gender='boy' then 1 else 0 end as boytag, 
case when gender='girl' then 1 else 0 end as girltag,
case when gender is null then 1 else 0 end as taijiantag
from studentp;

0: jdbc:hive2://192.168.153.139:10000> create temporary table tmp_gender_tag as select name,gender, 
. . . . . . . . . . . . . . . . . . .> case when gender='boy' then 1 else 0 end as boytag, 
. . . . . . . . . . . . . . . . . . .> case when gender='girl' then 1 else 0 end as girltag,
. . . . . . . . . . . . . . . . . . .> case when gender is null then 1 else 0 end as taijiantag
. . . . . . . . . . . . . . . . . . .> from studentp;
+----------------------+------------------------+------------------------+-------------------------+----------------------------+--+
| tmp_gender_tag.name  | tmp_gender_tag.gender  | tmp_gender_tag.boytag  | tmp_gender_tag.girltag  | tmp_gender_tag.taijiantag  |
+----------------------+------------------------+------------------------+-------------------------+----------------------------+--+
| zhansan1             | boy                    | 1                      | 0                       | 0                          |
| zhansan2             | girl                   | 0                      | 1                       | 0                          |
| zhansan3             | boy                    | 1                      | 0                       | 0                          |
| zhansan4             | girl                   | 0                      | 1                       | 0                          |
| zhansan5             | boy                    | 1                      | 0                       | 0                          |
| zhansan6             | girl                   | 0                      | 1                       | 0                          |
| zhansan7             | girl                   | 0                      | 1                       | 0                          |
| zhansan8             | boy                    | 1                      | 0                       | 0                          |
| zhansan9             | girl                   | 0                      | 1                       | 0                          |
| zhansan10            | girl                   | 0                      | 1                       | 0                          |
| zhansan11            | girl                   | 0                      | 1                       | 0                          |
+----------------------+------------------------+------------------------+-------------------------+----------------------------+--+

0: jdbc:hive2://192.168.153.139:10000> select sum(boytag) as boysum,sum(girltag) as girlnum,sum(taijiantag) as tjnum  from tmp_gender_tag;
+---------+----------+--------+--+
| boysum  | girlnum  | tjnum  |
+---------+----------+--------+--+
| 4       | 7        | 0      |
+---------+----------+--------+--+

26.行转列  concat_ws  collect_set collect_list
select gender,concat_ws(",",collect_set(name)) as stu_col from studentp group by gender;  去重
select gender,concat_ws(",",collect_list(name)) as stu_col from studentp group by gender; 不去重


27.导出表数据到指定文件夹,必须要求导出文件夹为空文件夹,否则会报错
0: jdbc:hive2://192.168.153.139:10000> export table studentp to '/tmp/out1';

28.导出表数据到本地文件夹,要确保目标文件夹存在,否导出失败
[root@gree139 tmp]# hive -e "select * from greedemo.studentp1" >> /opt/tmp/out/student.txt


order by    全局排序
sort by     分区内部有序,整体无序
distribute by  mapTask分区  一般与sort by合作使用
cluster by   如果分区和排序字段相同并且升序,等同与distrubute by age sort by age asc
0: jdbc:hive2://192.168.153.139:10000> select * from studentp order by age;

0: jdbc:hive2://192.168.153.139:10000> select * from studentp sort by  age;

28.分奇偶
0: jdbc:hive2://192.168.153.139:10000/default> set mapred.reduce.tasks=2;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.enforce.bucketing=true;

0: jdbc:hive2://192.168.153.139:10000> create table dissorstudenttp as select * from studentp distribute by id sort by age ;
0: jdbc:hive2://192.168.153.139:10000> create table dissorstudenttp2 as select * from studentp distribute by age sort by age ;
0: jdbc:hive2://192.168.153.139:10000> insert overwrite table dissorstudenttp2 select * from studentp distribute by age sort by age desc ;

0: jdbc:hive2://192.168.153.139:10000> create table dissorstudenttp3 as select * from studentp cluster by age ;

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/460422.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号