Hive——分区

内部表与外部表的相互转换

alter table student2 set tblpropertes('external'='TRUE')

desc formatted student

创建一级分区表
create table if not exists country(
province STRING,
city STRING
)
partitioned by(dates STRING)
row format delimited
fields terminated by ',';

load data local inpath '/data/country.txt' into table country partition(dates='20210826');
增加分区
alter table country add partition(dates='20211021');
删除分区
alter table country drop partition(dates='20211021');
修改分区的名字

查看分区的个数
show partitions country;
查看分区表结构
desc formatted country;

创建二级分区表
create table if not exists country2(
province string,
city string
)
partitioned by(year string,month string)
row format delimited 
fields terminated by ',';

导入数据
load data local inpath '/data/country2.txt' into table country2 partition(year='2021',month='11');
load data local inpath '/data/country2.txt' into table 
country2 partition(year='2021',month='12');

显示分区
show partitions country2;

修改分区（增加分区目录）

增加分区无指定目录
alter table country add if not exists partition(dates='20210912');


增加分区指定目录

第一种方式：分区目录正好在表目录下

1、先直接在很hdfs建立目录
hdfs dfs -mkdir /user/hive/warehouse/hive.db/country/dates=20211113
2、上传数据到此目录
hdfs dfs -put /data/country2.txt /user/hive/warehouse/hive.db/country/dates=20211113
3、要让元数据和数据目录关联起来
alter table country add if not exists partition(dates=20211113);

第二种方式

分区目录不在表目录之下（实际这种情况居多）
hdfs dfs -mkdir -p /hive/country/20211114
hdfs dfs -put country2.txt /hive/country/20211114

alter table country add if not exists partition(dates='20211114') location '/hive/country/20211114';
**这里的主要就是指定分区的固定的目录location ''

修改分区（重命名分区目录）

alter table country partition(dates='20211109') renme to partition(dates='20211111');
注意：这个修改分区的值，不要修改分区的名称。

修改分区（删除分区）

alter table country drop if not exists partition(dates='20211114');

动态分区

在进行动态分区的时候需要开启动态分区，并且要关闭严格模式，具体配置如下，这里的动态分区是完全的动态分区

    hive.exec.dynamic.partition
    true




    hive.exec.dynamic.partition.mode
    nonstrict

（在创建动态分区表的时候需要有原始数据表和分区表，原始表需要导入到分区表中，所以原始表的字段需要要比分区表的字段要多，可以安装原始表中多余的字段作为分区的标志）

创建原始表以及导入数据
create table if not t_part(
uid int,
uname string,
sex string,
age int,
country string//在这里这个字段就可以作为分区的依据
)
row format delimited 
fields terminated by 't';

load data local inpath '/data/part.txt'
into table t_part;

创建分区表（这里的动态分区是和静态分区是一样的创建表的模式是一样的）
create table if not exists t_part_dynamic(
uid int,
uname string,
sex string,
age int
)
pratitioned by(country string)//这里面的这个列叫做伪列
row foramt delimited
fields terminated by 't';

在动态分区中导入数据只能用insert into，不能使用load
insert into t_part_dynamic partition(country)
select * from t_part;

混合分区：动态+静态（在公司中会经常用到）

首先需要把配置文件里的需要开启严格模式（默认的情况下严格模式，由于刚才我们进行测试全动态的换成不是严格模式下，因此需要修改配置文件）

//这里面需要注意数据的格式，也就是对数据进行数据，特别是中文问题的解决
建混合表
(混合建表在原有的表的基础上进行再次分区，只是混合建表是有一个固定的分区目录，也就是静态静态分区表,就相当于在该目录下创建一个固定的目录，然后再分区进行)
create table if not exists t_part_all(
uid int,
uname string,
sex string,
age int
)
partitioned by(year string,month string)
row format delimited
fields terminated by 't';

建原始表
（创建原始表以导入数据）
create table if not exists t_part(
uid int,
uname string,
sex string,
age int,
month string//这里的这个字段是进行动静态分区的
) 
row format delimited 
fields terminated by 't';
//这里的这个字段是进行动静态分区的
加载数据
load data local inpath '/data/part.txt' into table t_part;

insert into t_part_all partition(year='2021',month)
select * from t_part;

Hive——分区

其他相关栏目本月热门文章