hive 动态分区表
partitioned by(colName Type)
create table tableName( ....... ....... ) partitioned by (colName colType [comment '...'],...)
例如
create table dy_part1( sid int, name string, gender string, age int, academy string ) partitioned by (dt string) row format delimited fields terminated by ',' ;
动态分区表导入数据要建立一个临时表,不然没有分区的效果
创建临时表
create table temp_part1( sid int, name string, gender string, age int, academy string, dt string ) row format delimited fields terminated by ',' ; 加载数据 load data local inpath '路径' into table temp_part1;
动态导入数据
insert into dy_part1 partition(dt) select sid,name,gender,age,academy,dt from temp_part1;
hive 分桶表
drop table student; create table student( sno int, name string, sex string, age int, academy string ) clustered by (sno) sorted by (age desc) into 4 buckets row format delimited fields terminated by ',' ;
创建临时表
create table temp_student( sno int, name string, sex string, age int, academy string ) clustered by (sno) sorted by (age desc) into 4 buckets row format delimited fields terminated by ',' ; 导入数据 load data local inpath '路径' into table temp_student
从临时表中导入数据
insert into table student select * from temp_student distribute by(sno) sort by (age desc) ; 或者 insert overwrite table student select * from temp_student distribute by(sno) sort by (age desc) ;
hive既分区又分桶
创建分区分桶表
create table student( sid int, name string, gender string, age int, chinese int, math int, english int ) partitioned by (academy string,dt date) clustered by(sid) into 4 buckets row format delimited fields terminated by 't' 注意 clustered by 要写在partitioned 的后面
导入数据建立临时表
create table student_temp( sid int, name string, gender string, age int, academy string, dt date, chinese int, math int, english int )row format delimited fields terminated by 't' 导入数据 load data local inpath './data/students_test' into table students_test;
从临时表中导入数据
insert into student partition (academy,dt) select sid,name,gender,age,chinese,math,english,academy,dt from student_temp
注意 分区又分桶的时候导入数据不能把分桶的字段写上如:(cluster by (sid)),会自动分桶



