动态分区和静态分区的区别
1.动态分区和静态分区都需要自己去指定分区的字段,静态分区同时需要去指定分区的内容
2.动态分区不能用load data的方法进行数据的导入,而静态分区可以通过load data和insert overwrite两种方法导入数据
3.进行insert overwrite 导入的时候,静态不需要将分区字段的内容在select中查询出来的,但是动态需要查询出这个字段
4.动态分区在使用之前,需要打开动态分区的开关,并且要打开非严格模式的开光。
分桶使用的场景
分桶在表链接的时候使用,可以加快两张表联合查询的速度,且只有当联合表格的分桶数量相等,或者分桶数量是倍数关系的时候,才有加速效果
分桶可以让表链接加速,是因为分桶可以让表连接的笛卡尔积数据量变少。
#!/bin/bash echo 用shell脚本实现分区的加载 sleep 2s #删除已存在的文件 rm -rvf /root/log*.txt #1、生成log1.txt echo 100,登录,20210510>>/root/log1.txt echo 101,下单,20210510>>/root/log1.txt echo 102,退出,20210510>>/root/log1.txt echo 103,登录,20210510>>/root/log1.txt echo 104,退出,20210510>>/root/log1.txt echo 105,登录,20210510>>/root/log1.txt echo 106,下单,20210510>>/root/log1.txt echo 107,退出,20210510>>/root/log1.txt #2、生成log2.txt echo 1100,登录,20210511>>/root/log2.txt echo 1101,下单,20210511>>/root/log2.txt echo 1102,退出,20210511>>/root/log2.txt echo 1103,登录,20210511>>/root/log2.txt echo 1104,退出,20210511>>/root/log2.txt echo 1105,登录,20210511>>/root/log2.txt echo 1106,下单,20210511>>/root/log2.txt echo 1107,退出,20210511>>/root/log2.txt #3、生成log3.txt echo 2100,登录,20210512>>/root/log3.txt echo 2101,下单,20210512>>/root/log3.txt echo 2102,退出,20210512>>/root/log3.txt echo 2103,登录,20210512>>/root/log3.txt echo 2104,退出,20210512>>/root/log3.txt echo 2105,登录,20210512>>/root/log3.txt echo 2106,下单,20210512>>/root/log3.txt echo 2107,退出,20210512>>/root/log3.txt sleep 3s #新建目录并上传文件 #先删除已经存在的目录 hadoop fs -rm -r /logs #新建目录 hadoop fs -mkdir /logs hadoop fs -mkdir /logs/20210510 hadoop fs -mkdir /logs/20210511 hadoop fs -mkdir /logs/20210512 #切换路径 cd /root hadoop fs -put log1.txt /logs/20210510 hadoop fs -put log2.txt /logs/20210511 hadoop fs -put log3.txt /logs/20210512 #执行hive3.hql脚本 hive -f /root/birth/hive3.hql --用vi新建hive3.hql脚本 --切换数据库 use bigdate007; --删除表 drop table if exists ext_logs_p; --在bigdata 数据库中建表 create external table bigdate007.ext_logs_p( id int, name string, operate string) partitioned by(date_time string) row format delimited fields terminated by ',' location '/logs'; --挂载分区 alter table bigdate007.ext_logs_p add partition (date_time='20210510') location '/logs/20210510'; alter table bigdate007.ext_logs_p add partition (date_time='20210511') location '/logs/20210511'; alter table bigdate007.ext_logs_p add partition (date_time='20210512') location '/logs/20210512'; 1.先创建一个带有分桶属性的表格 create table s_cl( id int, name string, sex string, age int) clustered by(age) into 4 buckets row format delimited fields terminated by ','; insert into s_cl values(15,'wangxiaoer15','男',15); --2.复制一个相同结构的备份表格 create table s_cl_tmp like s_cl; --3.将文件的内容映射到备份表中 load data local inpath '/root/s.txt' into table bigdate007.s_cl_tmp; --4.使用查询的方式对备份表进行分桶 insert overwrite table s_cl select * from s_c1_tmp cluster by age; #!/bin/bash echo 开始运行脚本 cd /root/ rm -vf stu_c.txt sleep 2s echo 1001,lilei,男,18>>stu_c.txt echo 1002,lucy,女,16>>stu_c.txt echo 1003,tom,男,17>>stu_c.txt echo 1004,jack,男,18>>stu_c.txt echo 1005,eve,女,18>>stu_c.txt echo 1006,allen,男,18>>stu_c.txt echo 1007,miller,男,19>> stu_c.txt echo 1008,flower,女,16>> stu_c.txt echo 1009,adam,男,17>> stu_c.txt echo 1010,toly,男,16>> stu_c.txt echo 1011,steven,男,17>> stu_c.txt echo 1012,bob,男,18>> stu_c.txt echo 1013,lucky,女,17>> stu_c.txt sleep 2s hadoop fs -rm -r /data hadoop fs -mkdir /data hadoop fs -put stu_c.txt /data sleep 2s #执行hive5.hql hive -f /root/hive5.hql echo 脚本执行完毕!!! hive5.hql脚本 --1.删除表 use bigdate007; drop table if exists stu_info_c; drop table if exists stu_info_c_tmp; --2.新建表 create table stu_info_c( id int, name string, sex string, age int) clustered by(age) into 4 buckets row format delimited fields terminated by ','; --3.再复制一个和分桶表相同结构的临时表,只有结构不会有内容 create table stu_info_c_tmp like stu_info_c; --4.在临时表中添加表格的所有的数据 load data inpath '/data/stu_c.txt' into table bigdate007.stu_info_c_tmp; --5.使用insert复制表格的内容到分桶表中,复制的同时进行数据的分桶,cluster by (age) insert overwrite table stu_info_c select * from stu_info_c_tmp cluster by (age);



