2021.11.10创建表，| ： array map struct 手动分区，动态分区，分区表，内部表，外部表，分桶表

赋予权限
[root@gree139 tmp]# hadoop fs -chmod -R 777 /

student.txt
1,zhansan1,cs-lol-book-movie,beijing:bdqn-nanjing:zhongbo
2,zhansan2,cs-lol,beijing:bdqn-nanjing:zhongbo
3,zhansan3,book-movie,beijing:bdqn-nanjing:zhongbo
4,zhansan4,book-movie,beijing:bdqn-nanjing:zhongbo
5,zhansan5,cs-lol-book-movie,beijing:bdqn-nanjing:zhongbo
6,zhansan6,cs-lol-movie,beijing:bdqn-nanjing:zhongbo
7,zhansan7,lol-book-movie,beijing:bdqn-nanjing:zhongbo
8,zhansan8,cs-book-movie,beijing:bdqn-nanjing:zhongbo
9,zhansan9,cs-movie,beijing:bdqn-nanjing:zhongbo
10,zhansan10,cs-lol-book-movie,beijing:bdqn-nanjing:zhongbo
11,zhansan11,cs-book,beijing:bdqn-nanjing:zhongbo



create table student(
id int,
name string,
likes array,
address map
)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

加载本地文件  local inpath
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student;
加载hdfs文件系统 path
0: jdbc:hive2://192.168.153.139:10000/default> load data inpath '/tmp/student.txt' into table student;
覆盖 overwrite
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' overwrite into table student;

0: jdbc:hive2://192.168.153.139:10000/default> select * from student;
+-------------+---------------+------------------------------+-----------------------------------------+--+
| student.id  | student.name  |        student.likes         |             student.address             |
+-------------+---------------+------------------------------+-----------------------------------------+--+
| 1           | zhansan1      | ["cs","lol","book","movie"]  | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 2           | zhansan2      | ["cs","lol"]                 | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 3           | zhansan3      | ["book","movie"]             | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 4           | zhansan4      | ["book","movie"]             | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 5           | zhansan5      | ["cs","lol","book","movie"]  | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 6           | zhansan6      | ["cs","lol","movie"]         | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 7           | zhansan7      | ["lol","book","movie"]       | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 8           | zhansan8      | ["cs","book","movie"]        | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 9           | zhansan9      | ["cs","movie"]               | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 10          | zhansan10     | ["cs","lol","book","movie"]  | {"beijing":"bdqn","nanjing":"zhongbo"}  |
| 11          | zhansan11     | ["cs","book"]                | {"beijing":"bdqn","nanjing":"zhongbo"}  |
+-------------+---------------+------------------------------+-----------------------------------------+--+


分区表
create table student2(
id int,
name string,
likes array,
address map
)
partitioned by (age int)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

导入分区数据 指定分区
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student2 partition(age=10);
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student2 partition(age=20);

查看表分区信息
0: jdbc:hive2://192.168.153.139:10000/default> show partitions student2;
+------------+--+
| partition  |
+------------+--+
| age=10     |
| age=20     |
+------------+--+

双分区
create table student3(
id int,
name string,
likes array,
address map
)
partitioned by (age int, gender string)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student3 partition(age=20,gender='boy');
0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/student.txt' into table student3 partition(age=20,gender='girl');

0: jdbc:hive2://192.168.153.139:10000/default> alter table student3 add partition(age=20,gender="man");

0: jdbc:hive2://192.168.153.139:10000/default> alter table student3 add partition(age=10,gender="girl");

0: jdbc:hive2://192.168.153.139:10000/default> show partitions student3;
+---------------------+--+
|      partition      |
+---------------------+--+
| age=10/gender=girl  |
| age=20/gender=boy   |
| age=20/gender=girl  |
| age=20/gender=man   |
+---------------------+--+

------------------------------------
动态分区
create table studentp(
id int,
name string,
age int,
gender string,
likes array,
address map
)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

分区表
create table studentp1(
id int,
name string,
likes array,
address map
)
partitioned by (age int, gender string)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by 'n';

开启动态分区
0: jdbc:hive2://192.168.153.139:10000/default> set hive.exec.dynamic.partition=true;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.exec.dynamic.partition.mode=nonstrict;

0: jdbc:hive2://192.168.153.139:10000/default> insert into table studentp1 partition(age,gender) 
. . . . . . . . . . . . . . . . . . . . . . .> select id,name,likes,address,age,gender from studentp;

------------------------------------------------------------
employee.txt

Michael|Montreal,Toronto|Male,30|DB:80|Product:DeveloperLead
Will|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Shelley|New York|Female,27|Python:80|Test:Lead,COE:Architect
Lucy|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead

create table if not exists employee(
name string,
work_place array,
gender_age struct,
skills_score map,
depart_title map>
)
comment 'this is an internal table'
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n';

0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/employee.txt' into table employee;



0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where gender_age.gender='Male';
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where skills_score["Python"]=80;

创建外部表
create external table if not exists employee_external(
name string,
work_place array,
gender_age struct,
skills_score map,
depart_title map>
)
comment 'this is an internal table'
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n'
location '/tmp/hivedata/employee';

hdfs dfs -put ./employee.txt /tmp/hivedata/employee


创建表  ， | ： array  map  struct  手动分区，动态分区 分区表  datagrade安装  内部表，外部表

0: jdbc:hive2://192.168.153.139:10000/default> create temporary table tmp_employee as select name,work_place from employee;

查询性别为男性，名字叫Will
0: jdbc:hive2://192.168.153.139:10000/default> select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will';
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee where gender_age.gender='Female';

查询所有男性中名字叫Will男性与所有的女性的信息
0: jdbc:hive2://192.168.153.139:10000/default> select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will' 
. . . . . . . . . . . . . . . . . . . . . . .> union all
. . . . . . . . . . . . . . . . . . . . . . .> select * from employee where gender_age.gender='Female';

CTAS
0: jdbc:hive2://192.168.153.139:10000/default> with
. . . . . . . . . . . . . . . . . . . . . . .> t1 as (select * from employee where gender_age.gender='Male'),
. . . . . . . . . . . . . . . . . . . . . . .> t2 as (select * from t1 where t1.name='Will'),
. . . . . . . . . . . . . . . . . . . . . . .> t3 as (select * from employee where gender_age.gender='Female')
. . . . . . . . . . . . . . . . . . . . . . .> select * from t2 union all select * from t3;


使用ctas查询 ，将结果保存到临时表ctas_employee表中
0: jdbc:hive2://192.168.153.139:10000/default> create temporary table ctas_employee as
. . . . . . . . . . . . . . . . . . . . . . .> with
. . . . . . . . . . . . . . . . . . . . . . .> t1 as (select * from employee where gender_age.gender='Male'),
. . . . . . . . . . . . . . . . . . . . . . .> t2 as (select * from t1 where t1.name='Will'),
. . . . . . . . . . . . . . . . . . . . . . .> t3 as (select * from employee where gender_age.gender='Female')
. . . . . . . . . . . . . . . . . . . . . . .> select * from t2 union all select * from t3;

分桶表
create table employee_id(
name string,
employee_id int,
work_place array,
gender_age struct,
skills_score map,
depart_title map>
)
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by 'n';

0: jdbc:hive2://192.168.153.139:10000/default> load data local inpath '/opt/tmp/employee_id.txt' overwrite into table employee_id;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id;



0: jdbc:hive2://192.168.153.139:10000/default> create table employee_id_buckets(
. . . . . . . . . . . . . . . . . . . . . . .> name string,
. . . . . . . . . . . . . . . . . . . . . . .> employee_id int,
. . . . . . . . . . . . . . . . . . . . . . .> work_place array,
. . . . . . . . . . . . . . . . . . . . . . .> gender_age struct,
. . . . . . . . . . . . . . . . . . . . . . .> skills_score map,
. . . . . . . . . . . . . . . . . . . . . . .> depart_title map>
. . . . . . . . . . . . . . . . . . . . . . .> )
. . . . . . . . . . . . . . . . . . . . . . .> clustered by(employee_id) into 2 buckets
. . . . . . . . . . . . . . . . . . . . . . .> row format delimited fields terminated by '|'
. . . . . . . . . . . . . . . . . . . . . . .> collection items terminated by ','
. . . . . . . . . . . . . . . . . . . . . . .> map keys terminated by ':'
. . . . . . . . . . . . . . . . . . . . . . .> lines terminated by 'n';

对比：
开启动态分区
0: jdbc:hive2://192.168.153.139:10000/default> set hive.exec.dynamic.partition=true;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.exec.dynamic.partition.mode=nonstrict;
分桶设置
0: jdbc:hive2://192.168.153.139:10000/default> set map.reduce.tasks=2;
0: jdbc:hive2://192.168.153.139:10000/default> set hive.enforce.bucketing=true;


将employee_id数据写入到分桶表employee_id_buckets
0: jdbc:hive2://192.168.153.139:10000/default> insert overwrite table employee_id_buckets select * from employee_id;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(1M)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(100 percent)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(10 rows)s;
0: jdbc:hive2://192.168.153.139:10000/default> select * from employee_id_buckets tablesample(bucket 1 out of 32 on employee_id)s;

2021.11.10创建表 ，| ： array map struct 手动分区，动态分区，分区表 ，内部表，外部表，分桶表

大数据系统相关栏目本月热门文章

2021.11.10创建表，| ： array map struct 手动分区，动态分区，分区表，内部表，外部表，分桶表