新建一个orders.txt文件,内容如下:
使用xftp将orders.txt上传到linux虚拟机中。
新建orders表
create table orders( orderid int, createdate string, modifiedtime string, status string) row format delimited fields terminated by 't';
将orders.txt文件上传到hdfs中
hdfs dfs -put ./orders.txt /user/hive/warehouse/lalian.db/orders
使用命令查询数据select * from orders;
1 2021-11-20 2021-11-20 创建
2 2021-11-20 2021-11-20 创建
3 2021-11-20 2021-11-20 创建
创建ods_orders_inc表
create table ods_orders_inc( orderid int, createtime string, modifiedtime string, status string ) partitioned by (day string) row format delimited fields terminated by 't';
插入数据,并分区insert overwrite table ods_orders_inc partition(day='2021-11-20') select orderid,createdate,modifiedtime,status from orders;
创建dws_orders_his表
create table dws_orders_his( orderid int, createtime string, modifiedtime string, status string, start_time string, end_time string) row format delimited fields terminated by 't';
插入数据,insert overwrite table dws_orders_his select orderid,createtime,modifiedtime,status,modifiedtime,'9999-12-31' from ods_orders_inc where day='2021-11-20';
在orders.txt中添加新数据
重新将orders.txt上传到hdfs中
hdfs dfs -put -f ./orders.txt /user/hive/warehouse/lalian.db/orders
将21号orders表里的状态修改及新增的数据导入到ods_orders_inc 分区为2021-11-21
insert overwrite table ods_orders_inc partition(day='2021-11-21') select orderid,createdate,modifiedtime,status from orders where (createdate='2021-11-21' and modifiedtime='2021-11-21') or modifiedtime='2021-11-21';
两个查询,合并 union all
通过his left join inc 找出订单状态有更新的数据
将inc 2021-11-21的数据添加end_time为9999-12-31后插入his表
create table dws_his_20211121_tmp as select t.orderid,t.createtime,t.modifiedtime, t.status,t.start_time,t.end_time from ( select t1.orderid, t1.createtime, t1.modifiedtime, t1.status, t1.start_time, case when t2.orderid is not null and t1.end_time > '2021-11-21' then '2021-11-21' else t1.end_time end end_time from dws_orders_his t1 left join (select orderid from ods_orders_inc where day='2021-11-21') t2 on t1.orderid=t2.orderid union all select orderid, createtime, modifiedtime, status, modifiedtime as start_time, '9999-12-31' as end_time from ods_orders_inc where day='2021-11-21' ) t order by t.orderid,t.start_time
想dws_orders_his表中插入最新数据insert overwrite table dws_orders_his select * from dws_his_20211121_tmp;
使用命令查询数据select * from dws_orders_his;
1 2021-11-20 2021-11-20 创建 2021-11-20 2021-11-21
1 2021-11-20 2021-11-21 支付 2021-11-21 9999-12-31
2 2021-11-20 2021-11-20 创建 2021-11-20 2021-11-21
2 2021-11-20 2021-11-21 支付 2021-11-21 9999-12-31
3 2021-11-20 2021-11-20 创建 2021-11-20 9999-12-31
4 2021-11-21 2021-11-21 创建 2021-11-21 9999-12-31



