栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 前沿技术 > 大数据 > 大数据系统

hive学习笔记——常用命令

hive学习笔记——常用命令

show databases like '';数据库
show functions;方法
show tables;表
show create table demo;表结构
show partitions student2;表分区

desc database extended hivedemo;数据库
desc demo;表结果
desc formatted demo;表

use hivedemo;使用数据库

drop table demotwo;删除表
drop database hivedemo cascade;彻底删除数据库

truncate table demo;删除表数据

insert into demo values(1,"zhangsan");插入数据
insert overwrite table demo values(3,"wangwu");插入数据并覆盖原数据
insert into table studentp1 partition(age,gender) select id,name,likes,address,age,gender from studentp;根据指定分区插入数据
insert overwrite table employee_id_buckets select * from employee_id;

select * from demo;查看表所有数据
select current_database();查看当前使用的数据库
select * from employee where gender_age.age=27;数据类型struct
select * from employee where skill_score["DB"]=80;数据类型map
select * from employee where work_place[1]="Toronto";数据类型array
select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will';子查询

select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will'
union all
select * from employee where gender_age.gender='Female';

select * from employee_id_buckets tablesample(bucket 1 out of 4 on employee_id)s;
select * from employee_id_buckets tablesample(10 rows)s;
select * from employee_id_buckets tablesample(1M)s;
select * from employee_id_buckets tablesample(100 percent)s;

with
t1 as (select * from employee where gender_age.gender='Male'),
t2 as (select * from t1 where t1.name='Will'),
t3 as (select * from employee where gender_age.gender='Female')
select * from t2 union all select * from t3;

alter database demo2 set dbproperties('createtime'='20202020');
alter table demo2 set tbproperties('EXTERNAL'='TRUE');修改表为外部表
alter table demo2 set tbproperties('EXTERNAL'='FALSE');修改表为内部表
alter table demo2 rename to demotwo;表改名
alter table demotwo change name username string;更新列
alter table demotwo add columns(age int comment'user age', email string);加列
alter table demotwo replace columns(id int,username string);替换列
alter table student3 add partition(age=20,gender='male') partition(age=30,gender='male');加分区
alter table student3 drop partition(age=20,gender='male'),partition(age=30,gender='male');删除分区

create database if not exists hivedemo location '';创建数据库
create table demo(id int, name string);创建表

create table if not exists employee(
id int,
name string,
work_place array,
gender_age struct,
skill_score map,
depart_title map>
)
comment 'this is an internal table'
partitioned by (age int,gender string)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';创建内部表分区

create external table if not exists employee_external(
name string,
work_place array,
gender_age struct,
skill_score map,
depart_title map>
)
comment 'this is an internal table'
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n'
location '/tmp/hivedata/employee';创建外部表

create table employee_id_buckets(
name string,
employee_id int,
work_place array,
gender_age struct,
skills_score map,
depart_title map>
)
clustered by(employee_id) into 2 buckets
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n';创建内部表分桶

create temporary table tmp_employee as select name,work_place from employee;使用查询,将结果存入临时表中

create temporary table ctas_employee as
with
t1 as (select * from employee where gender_age.gender='Male'),
t2 as (select * from t1 where t1.name='Will'),
t3 as (select * from employee where gender_age.gender='Female')
select * from t2 union all select * from t3;使用ctas查询,将结果保存到临时表中

load data local inpath '/opt/tmp/student.txt' into table student;插入本地数据
load data inpath '/tmp/student.txt' into table student;插入hdfs数据
load data local inpath '/opt/tmp/student.txt' overwrite into table student;插入本地数据并覆盖
load data local inpath '/opt/tmp/student.txt' into table student2 partition(age=20,gender='man');插入数据并分区

set hive.exec.dynamic.partition=true;分区用
set hive.exec.dynamic.partition.mode=nonstrict;
set map.reduce.tasks=2;分桶用
set hive.enforce.bucketing=true;

侧视图
将employee表中的work_place(array)炸裂
select explode(work_place) from employee;
select name,wp from employee lateral view explode(work_place) work_place as wp;
将employee表中的skill_score(map)炸裂
select explode(skill_score) from employee;
select name,skill,score from employee lateral view explode(skill_score) sk_sc as skill,score;
select name,wp,skill,score from employee 
lateral view explode(work_place) wo_pl as wp 
lateral view explode(skill_score) sk_sc as skill,score;

WOrdCount案例,explode使用
select split(line,"\s") from wordcount;
select w.word,count(1) num 
from(select explode(split(line,"\s")) word from wordcount) w 
group by w.word order by num desc;
with
t1 as (select explode(split(line,"\s")) word from wordcount)
select t1.word,count(1) num from t1 group by word order by num desc;

case when then else end用法
将年龄分段
select name,age,case when age>30 then 'old' when age<=18 then 'young' else 'middle' end as tag from studentp;
将性别转数字
select name,gender,case when gender='boy' then 1 when gender='man' then 1 when gender='male' then 1 when gender='girl' then 0 when gender='woman' then 0 when gender='female' then 0 else -1 end as gendertag from studentp; 

create temporary table tmp_gender_tag as select name, gender,
case when gender='boy' then 1 else 0 end as boygender,
case when gender='girl' then 1 else 0 end as girlgender,
case when gender is null then 1 else 0 end as taijiangender
from studentp;

select sum(boygender) as boysum,sum(girlgender) as girlsum,sum(taijiangender) as taijiansum from tmp_gender_tag;

行转列
select gender,concat_ws(",",collect_set(name)) as stu_col from studentp group by gender;去重
select gender,concat_ws("&",collect_list(name)) as stu_col from studentp group by gender;不去重

导出表数据到指定文件夹,必须要求导出文件夹为空文件夹,否则会报错
export table studentp to '/tmp/out1';

导出表数据到本地文件夹,要确保目标文件夹存在,否则导出失败
hive -e "select * from hivedemo2.studentp1" >> /opt/tmp/out/student.txt

order by          全局排序
sort by           分区内部有序,整体无序
distribute by     mapTask分区   一般与sort by合作使用
cluster by        如果分区和排序字段相同并且升序,等同与distribute by age sort aby age asc

select * from studentp order by age;

select * from studentp sort by age;

create table dissortstudentp as select * from studentp distribute by id sort by age;

insert overwrite table dissortstudentp select * from studentp distribute by age sort by age desc;

create table dissorstudenttp3 as select * from studentp cluster by age;
转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/487082.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号