#!/bin/bash
source /etc/profile
source ~/.bash_profile
today=`date --date='0 days ago' "+%Y-%m-%d"`
yesterday=`date --date='1 days ago' "+%Y-%m-%d"`
#先删除分区数据
impala-shell -B -u hive -q "alter table ods_out_cy drop partition(dt<='$today');"
[[ $? -eq 0 ]]|| exit
#将master服务器上的数据copy到本机
#今天的数据
hadoop distcp -pb webhdfs://master:8080/foo/ods/ods_out_cy/dt=$today /foo/ods/ods_out_cy/dt=$today
#昨天的数据
hadoop distcp -pb webhdfs://master:8080/foo/ods/ods_out_cy/dt=$yesterday /foo/ods/ods_out_cy/dt=$yesterday
#给数仓中的表增加分区信息
impala-shell -B -u hive -q "alter table ods_out_cy add partition(dt='$yesterday'); "
impala-shell -B -u hive -q "alter table ods_out_cy add partition(dt='$today'); "
2.DM--非分区表#!/bin/bash
source /etc/profile
source ~/.bash_profile
today=`date --date='0 days ago' "+%Y-%m-%d"`
yesterday=`date --date='1 days ago' "+%Y-%m-%d"`
impala-shell -B -u hive -q "truncate table dm_out_cy; "
[[ $? -eq 0 ]]|| exit
hadoop -distcp webhdfs://master:8080/foo/dm/dm_out_cy/* /foo/dm/dm_out_cy/
#刷新元数据信息
impala-shell -B -u hive -q "refresh dm_out_cy"
备注:
distcp命令最常用的调用方式是在集群之间进行数据拷贝:
hadoop distcp hdfs://nn1:8020/foo/bar hdfs://nn2:8020/bar/foo



