--日粒度维表
drop table dim.dim_date;
CREATE TABLE IF NOT EXISTS dim.dim_date(
`datetimestr` STRING COMMENT '当前时间(yyyy-MM-dd HH:mm:ss)'
,`yearstr` STRING COMMENT '当前年份'
,`quarterid` BIGINT COMMENT '当前季度'
,`quarter_cn` STRING COMMENT '当前季度中文'
,`monthstr` STRING COMMENT '当前月份'
,`weekofyearid` BIGINT COMMENT '当年第几周'
,`dayofyearid` BIGINT COMMENT '当年第几天'
,`dayofmonthstr` STRING COMMENT '当月日期(dd)'
,`dayofweekid` BIGINT COMMENT '当前周几(0-6)'
,`dayofweek_cn` STRING COMMENT '当前周中文'
--,`hourstr` STRING COMMENT '当前小时(HH)'
--,`minutestr` STRING COMMENT '当前分钟(mm)'
,`workday_flag` BIGINT COMMENT '是否为工作日(1,是;0,不是)'
,`create_time` STRING COMMENT '数据创建日期(yyyy-MM-dd HH:mm:ss.SSS)'
)
COMMENT '日期粒度度维表'
--PARTITIonED BY(yearstr STRING COMMENT '当前年分区(yyyy)',monthstr STRING COMMENT '当前月分区(MM)')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY 't'
NULL DEFINED AS ''
STORED AS ORC
TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false');
--日分钟粒度维表
drop table dim.dim_datetime;
CREATE TABLE IF NOT EXISTS dim.dim_datetime(
`timestr` STRING COMMENT '当前时间(HH:mm:ss)'
,`hourstr` STRING COMMENT '当前小时(HH)'
,`minutestr` STRING COMMENT '当前分钟(mm)'
)
COMMENT '日分钟粒度维表'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY 't'
NULL DEFINED AS ''
STORED AS ORC
TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false');
生成数据
--生成日粒度维表数据
WITH datetime_t AS(
SELECT FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr
FROM
(
SELECt
POSEXPLODE(
SPLIT(
REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT))
,'o'
)
)
) idx_t
),
date_t AS (
select date_add('2000-01-01',idx_t.pos) as datetimestr
from
(
select
posexplode(
split(
repeat('o',datediff(from_unixtime(unix_timestamp('2040-12-31 00:00:00'),'yyyy-MM-dd'), '2000-01-01'))
,'o'
)
)
) idx_t
),
tmp AS(
select
datetimestr AS datetimestr
,substr(datetimestr,1,4) AS yearstr
,CASE WHEN MONTH(datetimestr) <= 3 then 1
WHEN MONTH(datetimestr) <= 6 then 2
WHEN MONTH(datetimestr) <= 9 then 3
ELSE 4
END AS quarterid
,substr(datetimestr,6,2) AS monthstr
,WEEKOFYEAR(datetimestr) AS weekofyearid
,DATEDIFF(datetimestr,CONCAT(YEAR(datetimestr),'-01-01')) + 1 AS dayofyearid
,substr(datetimestr,9,2) AS dayofmonthstr
,PMOD(DATEDIFF(datetimestr, '2000-01-01')-1, 7) AS dayofweekid
--,substr(datetimestr,12,2) AS hourstr
--,substr(datetimestr,15,2) AS minutestr
FROM date_t
)
INSERT OVERWRITE TABLE dim.dim_date --PARTITION(yearstr,monthstr)
SELECt
datetimestr
,yearstr
,quarterid
,CASE WHEN quarterid = 1 then '第一季度'
WHEN quarterid = 2 then '第二季度'
WHEN quarterid = 3 then '第三季度'
WHEN quarterid = 4 then '第四季度'
END AS quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,CASE WHEN dayofweekid = 0 then '星期日'
WHEN dayofweekid = 1 then '星期一'
WHEN dayofweekid = 2 then '星期二'
WHEN dayofweekid = 3 then '星期三'
WHEN dayofweekid = 4 then '星期四'
WHEN dayofweekid = 5 then '星期五'
WHEN dayofweekid = 6 then '星期六'
END AS dayofweek_cn
--,hourstr
--,minutestr
,CASE WHEN dayofweekid = 0 or dayofweekid = 6 then 0
ELSE 1
END AS workday_flag
,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time
--,yearstr
--,monthstr
FROM tmp;
--生成分钟粒度维表数据
WITH datetime_t AS(
SELECt FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr
FROM
(
SELECt
POSEXPLODE(
SPLIT(
REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT))
,'o'
)
)
) idx_t
),
tmp AS(
select
substr(datetimestr,12) AS timestr
,substr(datetimestr,12,2) AS hourstr
,substr(datetimestr,15,2) AS minutestr
FROM datetime_t
)
INSERT OVERWRITE TABLE dim.dim_datetime
select
timestr
,hourstr
,minutestr
from tmp
变更节假日
变更节假日sql
INSERT INTO TABLE dim.dim_date
SELECt
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,0 AS workday_flag --变更为非工作日
--,1 AS workday_flag --变更为工作日
,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time
FROM dim.dim_date
where yearstr='2000' and monthstr='01' and dayofmonthstr='31';
变更节假日脚本
变更节假日脚本(单个)
#/bin/bash env
#update_workday_flag.sh
USAGE="Usage: $0 [yearstr] [monthstr] [dayofmonthstr] [workday_flag]
e.g: $0 2000 01 01 0"
function main(){
if [[ $# -lt 4 ]] ; then
echo "${USAGE}";
exit 1;
fi
SQL_UPDATE="INSERT INTO TABLE cdm.dim_date
SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,${4} AS workday_flag
,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time
FROM cdm.dim_date
where yearstr='${1}' and monthstr='${2}' and dayofmonthstr='${3}'"
SQL_QUERY="SELECt
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,workday_flag
,create_time
FROM cdm.dim_date
where yearstr='${1}' and monthstr='${2}' and dayofmonthstr='${3}'"
beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm -e "${SQL_UPDATe};${SQL_QUERY}"
}
main "$@"
变更节假日脚本(批量)
#/bin/bash env
#update_workday_flag_batch.sh
USAGE="agrs: [yearstr] [monthstr] [dayofmonthstr] [workday_flag]
e.g: 2000 01 01 0"
function generate_sql(){
if [[ $# -lt 4 ]] ; then
echo "Invalid arguments !!!"
echo "${USAGE}";
exit 1;
fi
SQL_UPDATE="INSERT INTO TABLE cdm.dim_date
SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,${4} AS workday_flag
,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time
FROM cdm.dim_date
where yearstr='${1}' and monthstr='${2}' and dayofmonthstr='${3}'"
SQL_QUERY="SELECt
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,workday_flag
,create_time
FROM cdm.dim_date
where yearstr='${1}' and monthstr='${2}' and dayofmonthstr='${3}'"
}
for line in `cat days_*.csv`
do
OLD_IFS="${IFS}"
IFS=","
ARR=(${line})
IFS="{OLD_IFS}"
generate_sql ${ARR[@]}
SQL="${SQL};${SQL_UPDATe};${SQL_QUERY}"
done
beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm -e "${SQL}"
变更日期数据
tee days_2022.csv <使用日期维表 --使用维表(日期粒度) WITH tmp AS( SELECT datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num FROM dim.dim_date --WHERe yearstr='yyyy' and monthstr='MM' WHERe yearstr='2000' and monthstr='01' ) SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag FROM tmp WHERe num=1 --使用维表(小时粒度) WITH tmp AS( SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num FROM dim.dim_date --WHERe yearstr='yyyy' and monthstr='MM' WHERe yearstr='2000' and monthstr='01' ), date_t AS( SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag FROM tmp WHERe num=1 ), datetime_t AS( SELECt hourstr FROM dim.dim_datetime group by hourstr ) select datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,b.weekofyearid ,b.dayofyearid ,b.dayofmonthstr ,b.dayofweekid ,b.dayofweek_cn ,a.hourstr ,b.workday_flag from datetime_t a,date_t b --使用维表(分钟粒度) WITH tmp AS( SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag ,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num FROM dim.dim_date --WHERe yearstr='yyyy' and monthstr='MM' WHERe yearstr='2000' and monthstr='01' ), date_t AS( SELECt datetimestr ,yearstr ,quarterid ,quarter_cn ,monthstr ,weekofyearid ,dayofyearid ,dayofmonthstr ,dayofweekid ,dayofweek_cn ,workday_flag FROM tmp WHERe num=1 ) select concat(b.datetimestr," ",a.timestr) AS datetimestr ,b.yearstr ,b.quarterid ,b.quarter_cn ,b.monthstr ,b.weekofyearid ,b.dayofyearid ,b.dayofmonthstr ,b.dayofweekid ,b.dayofweek_cn ,a.hourstr ,a.minutestr ,b.workday_flag from dim.dim_datetime a,date_t b



