一、创建表
create table student(
id int,
name string,
likes array
address map
)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';
2.加载本地文件 local inpath
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student;
3.加载hdfs文件系统 path
0: jdbc:hive2://192.168.153.139:10000/default> load data inpath '/tmp/student.txt' into table student;
4.覆盖 overwrite
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' overwrite into table student;
5.查询表数据
0: jdbc:hive2://192.168.153.139:10000/default> select * from student;
+-------------+---------------+------------------------------+-----------------------------------------+--+
| student.id | student.name | student.likes | student.address |
+-------------+---------------+------------------------------+-----------------------------------------+--+
| 1 | zhansan1 | ["cs","lol","book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 2 | zhansan2 | ["cs","lol"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 3 | zhansan3 | ["book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 4 | zhansan4 | ["book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 5 | zhansan5 | ["cs","lol","book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 6 | zhansan6 | ["cs","lol","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 7 | zhansan7 | ["lol","book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 8 | zhansan8 | ["cs","book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 9 | zhansan9 | ["cs","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 10 | zhansan10 | ["cs","lol","book","movie"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
| 11 | zhansan11 | ["cs","book"] | {"beijing":"bdqn","nanjing":"zhongbo"} |
+-------------+---------------+------------------------------+-----------------------------------------+--+
6.分区表
create table student2(
id int,
name string,
likes array
address map
)
partitioned by (age int)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';
7.导入分区数据 指定分区
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student2 partition(age=10);
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student2 partition(age=20);
8.查看表分区信息
0: jdbc:hive2://192.168.153.139:10000/default> show partitions student2;
+------------+--+
| partition |
+------------+--+
| age=10 |
| age=20 |
+------------+--+
9.双分区
create table student3(
id int,
name string,
likes array
address map
)
partitioned by (age int, gender string)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student3 partition(age=20,gender='boy');
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student3 partition(age=20,gender='girl');
0: jdbc:hive2://192.168.153.139:10000/default> alter table student3 add partition(age=20,gender="man");
0: jdbc:hive2://192.168.153.139:10000/default> alter table student3 add partition(age=10,gender="girl");
10.查询分区
0: jdbc:hive2://192.168.153.139:10000/default> show partitions student3;
+---------------------+--+
| partition |
+---------------------+--+
| age=10/gender=girl |
| age=20/gender=boy |
| age=20/gender=girl |
| age=20/gender=man |
+---------------------+--+
------------------------------------
11.动态分区
create table studentp(
id int,
name string,
age int,
gender string,
likes array
address map
)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';
12.分区表
create table studentp1(
id int,
name string,
likes array
address map
)
partitioned by (age int, gender string)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';
13.开启动态分区
0: jdbc:hive2://192.168.153.139:10000/default> set hive.exec.dynamic.partition=true;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.exec.dynamic.partition.mode=nonstrict;
0: jdbc:hive2://192.168.153.139:10000/default> insert into table studentp1 partition(age,gender)
. . . . . . . . . . . . . . . . . . . . . . .> select id,name,likes,address,age,gender from studentp;
------------------------------------------------------------
employee.txt
Michael|Montreal,Toronto|Male,30|DB:80|Product:DeveloperLead
Will|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Shelley|New York|Female,27|Python:80|Test:Lead,COE:Architect
Lucy|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead
create table if not exists employee(
name string,
work_place array
gender_age struct
skills_score map
depart_title map
)
comment 'this is an internal table'
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n';
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/employee.txt' into table employee;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where gender_age.gender='Male';
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where skills_score["Python"]=80;
14.创建外部表
create external table if not exists employee_external(
name string,
work_place array
gender_age struct
skills_score map
depart_title map
)
comment 'this is an internal table'
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n'
location '/tmp/hivedata/employee';
hdfs dfs -put ./employee.txt /tmp/hivedata/employee
15.创建表 , | : array map struct 手动分区,动态分区 分区表 datagrade安装 内部表,外部表
0: jdbc:hive2://192.168.153.139:10000/default> create temporary table tmp_employee as select name,work_place from employee;
16.查询性别为男性,名字叫Will
0: jdbc:hive2://192.168.153.139:10000/default> select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will';
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where gender_age.gender='Female';
17.查询所有男性中名字叫Will男性与所有的女性的信息
0: jdbc:hive2://192.168.153.139:10000/default> select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will'
. . . . . . . . . . . . . . . . . . . . . . .> union all
. . . . . . . . . . . . . . . . . . . . . . .> select * from employee where gender_age.gender='Female';
18.CTAS
0: jdbc:hive2://192.168.153.139:10000/default> with
. . . . . . . . . . . . . . . . . . . . . . .> t1 as (select * from employee where gender_age.gender='Male'),
. . . . . . . . . . . . . . . . . . . . . . .> t2 as (select * from t1 where t1.name='Will'),
. . . . . . . . . . . . . . . . . . . . . . .> t3 as (select * from employee where gender_age.gender='Female')
. . . . . . . . . . . . . . . . . . . . . . .> select * from t2 union all select * from t3;
使用ctas查询 ,将结果保存到临时表ctas_employee表中
0: jdbc:hive2://192.168.153.139:10000/default> create temporary table ctas_employee as
. . . . . . . . . . . . . . . . . . . . . . .> with
. . . . . . . . . . . . . . . . . . . . . . .> t1 as (select * from employee where gender_age.gender='Male'),
. . . . . . . . . . . . . . . . . . . . . . .> t2 as (select * from t1 where t1.name='Will'),
. . . . . . . . . . . . . . . . . . . . . . .> t3 as (select * from employee where gender_age.gender='Female')
. . . . . . . . . . . . . . . . . . . . . . .> select * from t2 union all select * from t3;
19.分桶表
create table employee_id(
name string,
employee_id int,
work_place array
gender_age struct
skills_score map
depart_title map
)
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n';
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/employee_id.txt' overwrite into table employee_id;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id;
0: jdbc:hive2://192.168.153.139:10000/default> create table employee_id_buckets(
. . . . . . . . . . . . . . . . . . . . . . .> name string,
. . . . . . . . . . . . . . . . . . . . . . .> employee_id int,
. . . . . . . . . . . . . . . . . . . . . . .> work_place array
. . . . . . . . . . . . . . . . . . . . . . .> gender_age struct
. . . . . . . . . . . . . . . . . . . . . . .> skills_score map
. . . . . . . . . . . . . . . . . . . . . . .> depart_title map
. . . . . . . . . . . . . . . . . . . . . . .> )
. . . . . . . . . . . . . . . . . . . . . . .> clustered by(employee_id) into 2 buckets
. . . . . . . . . . . . . . . . . . . . . . .> row format delimited fields terminated by '|'
. . . . . . . . . . . . . . . . . . . . . . .> collection items terminated by ','
. . . . . . . . . . . . . . . . . . . . . . .> map keys terminated by ':'
. . . . . . . . . . . . . . . . . . . . . . .> lines terminated by 'n';
20.分奇偶
0: jdbc:hive2://192.168.153.139:10000/default> set mapred.reduce.tasks=2;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.enforce.bucketing=true;
21.将employee_id数据写入到分桶表employee_id_buckets
0: jdbc:hive2://192.168.153.139:10000/default> insert overwrite table employee_id_buckets select * from employee_id;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(1M)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(100 percent)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(10 rows)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(bucket 1 out of 32 on employee_id)s;
22.侧视图
将employee表中的work_place(array)列 炸裂开
0: jdbc:hive2://192.168.153.139:10000> select explode(work_place) from employee;
0: jdbc:hive2://192.168.153.139:10000> select name, wp from employee lateral view explode(work_place) work_place as wp;
将employee表中的skills_score(map)列 炸裂开
0: jdbc:hive2://192.168.153.139:10000> select explode(skills_score) from employee;
0: jdbc:hive2://192.168.153.139:10000> select name,skill,score from employee lateral view explode(skills_score) sk_sc as skill,score;
0: jdbc:hive2://192.168.153.139:10000> select name,wp,skill,score from employee
0: jdbc:hive2://192.168.153.139:10000> lateral view explode(work_place) wo_pl as wp
0: jdbc:hive2://192.168.153.139:10000> lateral view explode(skills_score) sk_sc as skill, score;
23.WordCount案例 explode始用
0: jdbc:hive2://192.168.153.139:10000> create table wordcount(line string);
[root@gree139 tmp]# vi ./wc.txt
hello world
hello java
hello hadoop
hello hive
are you ok?
no i'm very bad
0: jdbc:hive2://192.168.153.139:10000> load data local inpath '/opt/tmp/wc.txt' overwrite into table wordcount;
0: jdbc:hive2://192.168.153.139:10000> select * from wordcount;
0: jdbc:hive2://192.168.153.139:10000> select split(line,"\s") from wordcount;
0: jdbc:hive2://192.168.153.139:10000> select explode(split(line,"\s")) from wordcount;
+---------+--+
| col |
+---------+--+
| hello |
| world |
| hello |
| java |
0: jdbc:hive2://192.168.153.139:10000> select w.word,count(1) num from
. . . . . . . . . . . . . . . . . . .> (select explode(split(line,"\s")) word from wordcount) w
. . . . . . . . . . . . . . . . . . .> group by w.word order by num desc;
0: jdbc:hive2://192.168.153.139:10000> with
. . . . . . . . . . . . . . . . . . .> t1 as (select explode(split(line,"\s")) word from wordcount)
. . . . . . . . . . . . . . . . . . .> select t1.word,count(1) num from t1 group by word order by num desc;
24.CASE WHEN THEN ELSE END
将年龄分段 int 转成 字符串
0: jdbc:hive2://192.168.153.139:10000> select name,age,case when age>=20 then 'old' when age<=18 then 'young' else 'middle' end as tag from studentp;
+------------+------+---------+--+
| name | age | tag |
+------------+------+---------+--+
| zhansan1 | 20 | old |
| zhansan2 | 20 | old |
| zhansan3 | 18 | young |
| zhansan4 | 18 | young |
| zhansan5 | 30 | old |
| zhansan6 | 19 | middle |
| zhansan7 | 19 | middle |
| zhansan8 | 19 | middle |
| zhansan9 | 12 | young |
| zhansan10 | 12 | young |
| zhansan11 | 12 | young |
+------------+------+---------+--+
25.将性别转数字 boy->1 girl->0
0: jdbc:hive2://192.168.153.139:10000> select name,gender, case when gender='boy' then 1 when gender='girl' then 0 else -1 end as gendertag from studentp;
+------------+---------+------------+--+
| name | gender | gendertag |
+------------+---------+------------+--+
| zhansan1 | boy | 1 |
| zhansan2 | girl | 0 |
| zhansan3 | boy | 1 |
| zhansan4 | girl | 0 |
| zhansan5 | boy | 1 |
| zhansan6 | girl | 0 |
| zhansan7 | girl | 0 |
| zhansan8 | boy | 1 |
| zhansan9 | girl | 0 |
| zhansan10 | girl | 0 |
| zhansan11 | girl | 0 |
+------------+---------+------------+--+
select name,gender,
case when gender='boy' then 1 else 0 end as boytag,
case when gender='girl' then 1 else 0 end as girltag,
case when gender is null then 1 else 0 end as taijiantag
from studentp;
0: jdbc:hive2://192.168.153.139:10000> create temporary table tmp_gender_tag as select name,gender,
. . . . . . . . . . . . . . . . . . .> case when gender='boy' then 1 else 0 end as boytag,
. . . . . . . . . . . . . . . . . . .> case when gender='girl' then 1 else 0 end as girltag,
. . . . . . . . . . . . . . . . . . .> case when gender is null then 1 else 0 end as taijiantag
. . . . . . . . . . . . . . . . . . .> from studentp;
+----------------------+------------------------+------------------------+-------------------------+----------------------------+--+
| tmp_gender_tag.name | tmp_gender_tag.gender | tmp_gender_tag.boytag | tmp_gender_tag.girltag | tmp_gender_tag.taijiantag |
+----------------------+------------------------+------------------------+-------------------------+----------------------------+--+
| zhansan1 | boy | 1 | 0 | 0 |
| zhansan2 | girl | 0 | 1 | 0 |
| zhansan3 | boy | 1 | 0 | 0 |
| zhansan4 | girl | 0 | 1 | 0 |
| zhansan5 | boy | 1 | 0 | 0 |
| zhansan6 | girl | 0 | 1 | 0 |
| zhansan7 | girl | 0 | 1 | 0 |
| zhansan8 | boy | 1 | 0 | 0 |
| zhansan9 | girl | 0 | 1 | 0 |
| zhansan10 | girl | 0 | 1 | 0 |
| zhansan11 | girl | 0 | 1 | 0 |
+----------------------+------------------------+------------------------+-------------------------+----------------------------+--+
0: jdbc:hive2://192.168.153.139:10000> select sum(boytag) as boysum,sum(girltag) as girlnum,sum(taijiantag) as tjnum from tmp_gender_tag;
+---------+----------+--------+--+
| boysum | girlnum | tjnum |
+---------+----------+--------+--+
| 4 | 7 | 0 |
+---------+----------+--------+--+
26.行转列 concat_ws collect_set collect_list
select gender,concat_ws(",",collect_set(name)) as stu_col from studentp group by gender; 去重
select gender,concat_ws(",",collect_list(name)) as stu_col from studentp group by gender; 不去重
27.导出表数据到指定文件夹,必须要求导出文件夹为空文件夹,否则会报错
0: jdbc:hive2://192.168.153.139:10000> export table studentp to '/tmp/out1';
28.导出表数据到本地文件夹,要确保目标文件夹存在,否导出失败
[root@gree139 tmp]# hive -e "select * from greedemo.studentp1" >> /opt/tmp/out/student.txt
order by 全局排序
sort by 分区内部有序,整体无序
distribute by mapTask分区 一般与sort by合作使用
cluster by 如果分区和排序字段相同并且升序,等同与distrubute by age sort by age asc
0: jdbc:hive2://192.168.153.139:10000> select * from studentp order by age;
0: jdbc:hive2://192.168.153.139:10000> select * from studentp sort by age;
28.分奇偶
0: jdbc:hive2://192.168.153.139:10000/default> set mapred.reduce.tasks=2;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.enforce.bucketing=true;
0: jdbc:hive2://192.168.153.139:10000> create table dissorstudenttp as select * from studentp distribute by id sort by age ;
0: jdbc:hive2://192.168.153.139:10000> create table dissorstudenttp2 as select * from studentp distribute by age sort by age ;
0: jdbc:hive2://192.168.153.139:10000> insert overwrite table dissorstudenttp2 select * from studentp distribute by age sort by age desc ;
0: jdbc:hive2://192.168.153.139:10000> create table dissorstudenttp3 as select * from studentp cluster by age ;



