行:维度对象天粒度的汇总
列:维度ID+维度模型中与该维度相关的事实表的度量按天汇总
分区:每个分区存放维度对象对应天的汇总
遇到次数的指标:count(*)/sum(if(xxx,1,0)) 解决
多张表进行 full join ,为了防止数据丢失,第二个关联条件开始需要用coalesce
select
coalesce(a.id,b.id,c.id) as id
from a
full outer join b
on a.id = b.id
full outer join c
on a.id = c.id;--a和c不相交,b和c相交的数据就会过滤掉
需要修改为:
select
coalesce(a.id,b.id,c.id) as id
from a
full outer join b
on a.id = b.id
full outer join c
on nvl(a.id,b.id) = c.id;--保证数据准确
--结构体数组。数组里面放结构体 `order_detail_stats` array> --'下单明细统计' --例子 --user day 粒度下的汇总 SELECt user_id,date,COLLECT_SET(named_struct('sku_id',sku_id,'sku_num',sku_num)) as order_detail_stats from ( --user day sku 粒度下的汇总 select '111' as user_id,'20211005' as date,'aaa' as sku_id,2 as sku_num union all select '111' as user_id,'20211005' as date,'bbb' as sku_id,3 as sku_num union all select '222' as user_id,'20211005' as date,'aaa' as sku_id,4 as sku_num ) a group by user_id,date;
用户主题
DROP TABLE IF EXISTS dws_user_action_daycount;
CREATE EXTERNAL TABLE IF NOT EXISTS dws_user_action_daycount
(
`user_id` STRING COMMENT '用户id',
`login_count` BIGINT COMMENT '登录次数',
`cart_count` BIGINT COMMENT '加入购物车次数',
`favor_count` BIGINT COMMENT '收藏次数',
`order_count` BIGINT COMMENT '下单次数',
`order_activity_count` BIGINT COMMENT '订单参与活动次数',
`order_activity_reduce_amount` DECIMAL(16,2) COMMENT '订单减免金额(活动)',
`order_coupon_count` BIGINT COMMENT '订单用券次数',
`order_coupon_reduce_amount` DECIMAL(16,2) COMMENT '订单减免金额(优惠券)',
`order_original_amount` DECIMAL(16,2) COMMENT '订单单原始金额',
`order_final_amount` DECIMAL(16,2) COMMENT '订单总金额',
`payment_count` BIGINT COMMENT '支付次数',
`payment_amount` DECIMAL(16,2) COMMENT '支付金额',
`refund_order_count` BIGINT COMMENT '退单次数',
`refund_order_num` BIGINT COMMENT '退单件数',
`refund_order_amount` DECIMAL(16,2) COMMENT '退单金额',
`refund_payment_count` BIGINT COMMENT '退款次数',
`refund_payment_num` BIGINT COMMENT '退款件数',
`refund_payment_amount` DECIMAL(16,2) COMMENT '退款金额',
`coupon_get_count` BIGINT COMMENT '优惠券领取次数',
`coupon_using_count` BIGINT COMMENT '优惠券使用(下单)次数',
`coupon_used_count` BIGINT COMMENT '优惠券使用(支付)次数',
`appraise_good_count` BIGINT COMMENT '好评数',
`appraise_mid_count` BIGINT COMMENT '中评数',
`appraise_bad_count` BIGINT COMMENT '差评数',
`appraise_default_count` BIGINT COMMENT '默认评价数',
`order_detail_stats` array> COMMENT '下单明细统计'
) COMMENT '每日用户行为'
PARTITIonED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dws/dws_user_action_daycount/'
TBLPROPERTIES ("parquet.compression"="lzo");
--with as 生成临时表
with
tmp_login as
(
select
dt,
user_id,
count(*) login_count
from dwd_page_log
where user_id is not null
and last_page_id is null
group by dt,user_id
),
tmp_cf as
(
select
dt,
user_id,
sum(if(action_id='cart_add',1,0)) cart_count,
sum(if(action_id='favor_add',1,0)) favor_count
from dwd_action_log
where user_id is not null
and action_id in ('cart_add','favor_add')
group by dt,user_id
),
tmp_order as
(
select
date_format(create_time,'yyyy-MM-dd') dt,
user_id,
count(*) order_count,
sum(if(activity_reduce_amount>0,1,0)) order_activity_count,
sum(if(coupon_reduce_amount>0,1,0)) order_coupon_count,
sum(activity_reduce_amount) order_activity_reduce_amount,
sum(coupon_reduce_amount) order_coupon_reduce_amount,
sum(original_amount) order_original_amount,
sum(final_amount) order_final_amount
from dwd_order_info
group by date_format(create_time,'yyyy-MM-dd'),user_id
),
tmp_pay as
(
select
date_format(callback_time,'yyyy-MM-dd') dt,
user_id,
count(*) payment_count,
sum(payment_amount) payment_amount
from dwd_payment_info
group by date_format(callback_time,'yyyy-MM-dd'),user_id
),
tmp_ri as
(
select
date_format(create_time,'yyyy-MM-dd') dt,
user_id,
count(*) refund_order_count,
sum(refund_num) refund_order_num,
sum(refund_amount) refund_order_amount
from dwd_order_refund_info
group by date_format(create_time,'yyyy-MM-dd'),user_id
),
tmp_rp as
(
select
date_format(callback_time,'yyyy-MM-dd') dt,
rp.user_id,
count(*) refund_payment_count,
sum(ri.refund_num) refund_payment_num,
sum(rp.refund_amount) refund_payment_amount
from
(
select
user_id,
order_id,
sku_id,
refund_amount,
callback_time
from dwd_refund_payment
)rp
left join
(
select
user_id,
order_id,
sku_id,
refund_num
from dwd_order_refund_info
)ri
on rp.order_id=ri.order_id
and rp.sku_id=rp.sku_id
group by date_format(callback_time,'yyyy-MM-dd'),rp.user_id
),
tmp_coupon as
(
select
coalesce(coupon_get.dt,coupon_using.dt,coupon_used.dt) dt,
coalesce(coupon_get.user_id,coupon_using.user_id,coupon_used.user_id) user_id,
nvl(coupon_get_count,0) coupon_get_count,
nvl(coupon_using_count,0) coupon_using_count,
nvl(coupon_used_count,0) coupon_used_count
from
(
select
date_format(get_time,'yyyy-MM-dd') dt,
user_id,
count(*) coupon_get_count
from dwd_coupon_use
where get_time is not null
group by user_id,date_format(get_time,'yyyy-MM-dd')
)coupon_get
full outer join
(
select
date_format(using_time,'yyyy-MM-dd') dt,
user_id,
count(*) coupon_using_count
from dwd_coupon_use
where using_time is not null
group by user_id,date_format(using_time,'yyyy-MM-dd')
)coupon_using
on coupon_get.dt=coupon_using.dt
and coupon_get.user_id=coupon_using.user_id
full outer join
(
select
date_format(used_time,'yyyy-MM-dd') dt,
user_id,
count(*) coupon_used_count
from dwd_coupon_use
where used_time is not null
group by user_id,date_format(used_time,'yyyy-MM-dd')
)coupon_used
on nvl(coupon_get.dt,coupon_using.dt)=coupon_used.dt
and nvl(coupon_get.user_id,coupon_using.user_id)=coupon_used.user_id
),
tmp_comment as
(
select
date_format(create_time,'yyyy-MM-dd') dt,
user_id,
sum(if(appraise='1201',1,0)) appraise_good_count,
sum(if(appraise='1202',1,0)) appraise_mid_count,
sum(if(appraise='1203',1,0)) appraise_bad_count,
sum(if(appraise='1204',1,0)) appraise_default_count
from dwd_comment_info
group by date_format(create_time,'yyyy-MM-dd'),user_id
),
tmp_od as
(
select
dt,
user_id,
collect_set(named_struct('sku_id',sku_id,'sku_num',sku_num,'order_count',order_count,'activity_reduce_amount',activity_reduce_amount,'coupon_reduce_amount',coupon_reduce_amount,'original_amount',original_amount,'final_amount',final_amount)) order_detail_stats
from
(
select
date_format(create_time,'yyyy-MM-dd') dt,
user_id,
sku_id,
sum(sku_num) sku_num,
count(*) order_count,
cast(sum(split_activity_amount) as decimal(16,2)) activity_reduce_amount,
cast(sum(split_coupon_amount) as decimal(16,2)) coupon_reduce_amount,
cast(sum(original_amount) as decimal(16,2)) original_amount,
cast(sum(split_final_amount) as decimal(16,2)) final_amount
from dwd_order_detail
group by date_format(create_time,'yyyy-MM-dd'),user_id,sku_id
)t1
group by dt,user_id
)
--用户主题dws宽表 首日装载
insert overwrite table dws_user_action_daycount partition(dt)
select
coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id,tmp_od.user_id),
nvl(login_count,0),
nvl(cart_count,0),
nvl(favor_count,0),
nvl(order_count,0),
nvl(order_activity_count,0),
nvl(order_activity_reduce_amount,0),
nvl(order_coupon_count,0),
nvl(order_coupon_reduce_amount,0),
nvl(order_original_amount,0),
nvl(order_final_amount,0),
nvl(payment_count,0),
nvl(payment_amount,0),
nvl(refund_order_count,0),
nvl(refund_order_num,0),
nvl(refund_order_amount,0),
nvl(refund_payment_count,0),
nvl(refund_payment_num,0),
nvl(refund_payment_amount,0),
nvl(coupon_get_count,0),
nvl(coupon_using_count,0),
nvl(coupon_used_count,0),
nvl(appraise_good_count,0),
nvl(appraise_mid_count,0),
nvl(appraise_bad_count,0),
nvl(appraise_default_count,0),
order_detail_stats,
coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt,tmp_comment.dt,tmp_coupon.dt,tmp_od.dt)
from tmp_login
full outer join tmp_cf
on tmp_login.user_id=tmp_cf.user_id
and tmp_login.dt=tmp_cf.dt
full outer join tmp_order
on coalesce(tmp_login.user_id,tmp_cf.user_id)=tmp_order.user_id
and coalesce(tmp_login.dt,tmp_cf.dt)=tmp_order.dt
full outer join tmp_pay
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id)=tmp_pay.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt)=tmp_pay.dt
full outer join tmp_ri
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id)=tmp_ri.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt)=tmp_ri.dt
full outer join tmp_rp
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id)=tmp_rp.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt)=tmp_rp.dt
full outer join tmp_comment
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id)=tmp_comment.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt)=tmp_comment.dt
full outer join tmp_coupon
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id)=tmp_coupon.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt,tmp_comment.dt)=tmp_coupon.dt
full outer join tmp_od
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id)=tmp_od.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt,tmp_comment.dt,tmp_coupon.dt)=tmp_od.dt;
关联:
1、关联方式 full outer join
2、关联字段 user_id和date
--每日装载
with
tmp_login as
(
select
user_id,
count(*) login_count
from dwd_page_log
where dt='2020-06-15'
and user_id is not null
and last_page_id is null
group by user_id
),
tmp_cf as
(
select
user_id,
sum(if(action_id='cart_add',1,0)) cart_count,
sum(if(action_id='favor_add',1,0)) favor_count
from dwd_action_log
where dt='2020-06-15'
and user_id is not null
and action_id in ('cart_add','favor_add')
group by user_id
),
tmp_order as
(
select
user_id,
count(*) order_count,
sum(if(activity_reduce_amount>0,1,0)) order_activity_count,
sum(if(coupon_reduce_amount>0,1,0)) order_coupon_count,
sum(activity_reduce_amount) order_activity_reduce_amount,
sum(coupon_reduce_amount) order_coupon_reduce_amount,
sum(original_amount) order_original_amount,
sum(final_amount) order_final_amount
from dwd_order_info--累积快照事实表
where (dt='2020-06-15'
or dt='9999-12-31')
and date_format(create_time,'yyyy-MM-dd')='2020-06-15'
group by user_id
),
tmp_pay as
(
select
user_id,
count(*) payment_count,
sum(payment_amount) payment_amount
from dwd_payment_info--累积快照事实表
where dt='2020-06-15'
group by user_id
),
tmp_ri as
(
select
user_id,
count(*) refund_order_count,
sum(refund_num) refund_order_num,
sum(refund_amount) refund_order_amount
from dwd_order_refund_info--事务型事实表
where dt='2020-06-15'
group by user_id
),
tmp_rp as
(
select
rp.user_id,
count(*) refund_payment_count,
sum(ri.refund_num) refund_payment_num,
sum(rp.refund_amount) refund_payment_amount
from
(
select
user_id,
order_id,
sku_id,
refund_amount
from dwd_refund_payment--累积快照事实表
where dt='2020-06-15'
)rp
left join
(
select
user_id,
order_id,
sku_id,
refund_num
from dwd_order_refund_info--事务型事实表
where dt>=date_add('2020-06-15',-15)--前15日的退单记录
)ri
on rp.order_id=ri.order_id
and rp.sku_id=rp.sku_id
group by rp.user_id
),
tmp_coupon as
(
select
user_id,
sum(if(date_format(get_time,'yyyy-MM-dd')='2020-06-15',1,0)) coupon_get_count,
sum(if(date_format(using_time,'yyyy-MM-dd')='2020-06-15',1,0)) coupon_using_count,
sum(if(date_format(used_time,'yyyy-MM-dd')='2020-06-15',1,0)) coupon_used_count
from dwd_coupon_use--累积快照事实表
where (dt='2020-06-15' or dt='9999-12-31')
and (date_format(get_time, 'yyyy-MM-dd') = '2020-06-15'
or date_format(using_time,'yyyy-MM-dd')='2020-06-15'
or date_format(used_time,'yyyy-MM-dd')='2020-06-15')
group by user_id
),
tmp_comment as
(
select
user_id,
sum(if(appraise='1201',1,0)) appraise_good_count,
sum(if(appraise='1202',1,0)) appraise_mid_count,
sum(if(appraise='1203',1,0)) appraise_bad_count,
sum(if(appraise='1204',1,0)) appraise_default_count
from dwd_comment_info--事务型事实表
where dt='2020-06-15'
group by user_id
),
tmp_od as
(
select
user_id,
collect_set(named_struct('sku_id',sku_id,'sku_num',sku_num,'order_count',order_count,'activity_reduce_amount',activity_reduce_amount,'coupon_reduce_amount',coupon_reduce_amount,'original_amount',original_amount,'final_amount',final_amount)) order_detail_stats
from
(--用户当天sku粒度汇总。用户一天可能购买某个商品多次
select
user_id,
sku_id,
sum(sku_num) sku_num,
count(*) order_count,
cast(sum(split_activity_amount) as decimal(16,2)) activity_reduce_amount,
cast(sum(split_coupon_amount) as decimal(16,2)) coupon_reduce_amount,
cast(sum(original_amount) as decimal(16,2)) original_amount,
cast(sum(split_final_amount) as decimal(16,2)) final_amount
from dwd_order_detail--累积快照事实表
where dt='2020-06-15'
group by user_id,sku_id
)t1
group by user_id
)
insert overwrite table dws_user_action_daycount partition(dt='2020-06-15')
select
coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id,tmp_od.user_id),
nvl(login_count,0),
nvl(cart_count,0),
nvl(favor_count,0),
nvl(order_count,0),
nvl(order_activity_count,0),
nvl(order_activity_reduce_amount,0),
nvl(order_coupon_count,0),
nvl(order_coupon_reduce_amount,0),
nvl(order_original_amount,0),
nvl(order_final_amount,0),
nvl(payment_count,0),
nvl(payment_amount,0),
nvl(refund_order_count,0),
nvl(refund_order_num,0),
nvl(refund_order_amount,0),
nvl(refund_payment_count,0),
nvl(refund_payment_num,0),
nvl(refund_payment_amount,0),
nvl(coupon_get_count,0),
nvl(coupon_using_count,0),
nvl(coupon_used_count,0),
nvl(appraise_good_count,0),
nvl(appraise_mid_count,0),
nvl(appraise_bad_count,0),
nvl(appraise_default_count,0),
order_detail_stats
from tmp_login
full outer join tmp_cf on tmp_login.user_id=tmp_cf.user_id
full outer join tmp_order on coalesce(tmp_login.user_id,tmp_cf.user_id)=tmp_order.user_id
full outer join tmp_pay on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id)=tmp_pay.user_id
full outer join tmp_ri on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id)=tmp_ri.user_id
full outer join tmp_rp on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id)=tmp_rp.user_id
full outer join tmp_comment on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id)=tmp_comment.user_id
full outer join tmp_coupon on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id)=tmp_coupon.user_id
full outer join tmp_od on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id)=tmp_od.user_id;
商品主题
DROp TABLE IF EXISTS dws_sku_action_daycount;
CREATE EXTERNAL TABLE IF NOT EXISTS dws_sku_action_daycount
(
`sku_id` STRING COMMENT 'sku_id',
`order_count` BIGINT COMMENT '被下单次数',
`order_num` BIGINT COMMENT '被下单件数',
`order_activity_count` BIGINT COMMENT '参与活动被下单次数',
`order_coupon_count` BIGINT COMMENT '使用优惠券被下单次数',
`order_activity_reduce_amount` DECIMAL(16,2) COMMENT '优惠金额(活动)',
`order_coupon_reduce_amount` DECIMAL(16,2) COMMENT '优惠金额(优惠券)',
`order_original_amount` DECIMAL(16,2) COMMENT '被下单原价金额',
`order_final_amount` DECIMAL(16,2) COMMENT '被下单最终金额',
`payment_count` BIGINT COMMENT '被支付次数',
`payment_num` BIGINT COMMENT '被支付件数',
`payment_amount` DECIMAL(16,2) COMMENT '被支付金额',
`refund_order_count` BIGINT COMMENT '被退单次数',
`refund_order_num` BIGINT COMMENT '被退单件数',
`refund_order_amount` DECIMAL(16,2) COMMENT '被退单金额',
`refund_payment_count` BIGINT COMMENT '被退款次数',
`refund_payment_num` BIGINT COMMENT '被退款件数',
`refund_payment_amount` DECIMAL(16,2) COMMENT '被退款金额',
`cart_count` BIGINT COMMENT '被加入购物车次数',
`favor_count` BIGINT COMMENT '被收藏次数',
`appraise_good_count` BIGINT COMMENT '好评数',
`appraise_mid_count` BIGINT COMMENT '中评数',
`appraise_bad_count` BIGINT COMMENT '差评数',
`appraise_default_count` BIGINT COMMENT '默认评价数'
) COMMENT '每日商品行为'
PARTITIonED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dws/dws_sku_action_daycount/'
TBLPROPERTIES ("parquet.compression"="lzo");
with
tmp_order as
(
select
date_format(create_time,'yyyy-MM-dd') dt,
sku_id,
count(*) order_count,--被下单次数
sum(sku_num) order_num,--被下单件数
sum(if(split_activity_amount>0,1,0)) order_activity_count,--参与活动被下单次数
sum(if(split_coupon_amount>0,1,0)) order_coupon_count,--使用优惠券被下单次数
sum(split_activity_amount) order_activity_reduce_amount,--优惠金额(活动)
sum(split_coupon_amount) order_coupon_reduce_amount,--优惠金额(优惠券)
sum(original_amount) order_original_amount,--被下单原价金额
sum(split_final_amount) order_final_amount--被下单最终金额
from dwd_order_detail
group by date_format(create_time,'yyyy-MM-dd'),sku_id
),
tmp_pay as
(
select
date_format(callback_time,'yyyy-MM-dd') dt,
sku_id,
count(*) payment_count,--被支付次数
sum(sku_num) payment_num,--被支付件数
sum(split_final_amount) payment_amount--被支付金额
from dwd_order_detail od
join
(
select
order_id,
callback_time
from dwd_payment_info
where callback_time is not null
)pi on pi.order_id=od.order_id
group by date_format(callback_time,'yyyy-MM-dd'),sku_id
),
tmp_ri as
(
select
date_format(create_time,'yyyy-MM-dd') dt,
sku_id,
count(*) refund_order_count,--被退单次数
sum(refund_num) refund_order_num,--被退单件数
sum(refund_amount) refund_order_amount--被退单金额
from dwd_order_refund_info
group by date_format(create_time,'yyyy-MM-dd'),sku_id
),
tmp_rp as
(
select
date_format(callback_time,'yyyy-MM-dd') dt,
rp.sku_id,
count(*) refund_payment_count,--被退款次数
sum(ri.refund_num) refund_payment_num,--被退款件数
sum(refund_amount) refund_payment_amount--被退款金额
from
(
select
order_id,
sku_id,
refund_amount,
callback_time
from dwd_refund_payment
)rp
left join
(
select
order_id,
sku_id,
refund_num
from dwd_order_refund_info
)ri
on rp.order_id=ri.order_id
and rp.sku_id=ri.sku_id
group by date_format(callback_time,'yyyy-MM-dd'),rp.sku_id
),
tmp_cf as
(
select
dt,
item sku_id,
sum(if(action_id='cart_add',1,0)) cart_count,--被加入购物车次数
sum(if(action_id='favor_add',1,0)) favor_count--被收藏次数
from dwd_action_log
where action_id in ('cart_add','favor_add')
group by dt,item
),
tmp_comment as
(
select
date_format(create_time,'yyyy-MM-dd') dt,
sku_id,
sum(if(appraise='1201',1,0)) appraise_good_count,--好评数
sum(if(appraise='1202',1,0)) appraise_mid_count,--中评数
sum(if(appraise='1203',1,0)) appraise_bad_count,--差评数
sum(if(appraise='1204',1,0)) appraise_default_count--默认评价数
from dwd_comment_info
group by date_format(create_time,'yyyy-MM-dd'),sku_id
)
--首日装载 采用 union all 代替 full outer join
insert overwrite table dws_sku_action_daycount partition(dt)
select
sku_id,
sum(order_count),
sum(order_num),
sum(order_activity_count),
sum(order_coupon_count),
sum(order_activity_reduce_amount),
sum(order_coupon_reduce_amount),
sum(order_original_amount),
sum(order_final_amount),
sum(payment_count),
sum(payment_num),
sum(payment_amount),
sum(refund_order_count),
sum(refund_order_num),
sum(refund_order_amount),
sum(refund_payment_count),
sum(refund_payment_num),
sum(refund_payment_amount),
sum(cart_count),
sum(favor_count),
sum(appraise_good_count),
sum(appraise_mid_count),
sum(appraise_bad_count),
sum(appraise_default_count),
dt
from
(
select
dt,
sku_id,
order_count,
order_num,
order_activity_count,
order_coupon_count,
order_activity_reduce_amount,
order_coupon_reduce_amount,
order_original_amount,
order_final_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_order_count,
0 refund_order_num,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_num,
0 refund_payment_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_order
union all
select
dt,
sku_id,
0 order_count,
0 order_num,
0 order_activity_count,
0 order_coupon_count,
0 order_activity_reduce_amount,
0 order_coupon_reduce_amount,
0 order_original_amount,
0 order_final_amount,
payment_count,
payment_num,
payment_amount,
0 refund_order_count,
0 refund_order_num,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_num,
0 refund_payment_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_pay
union all
select
dt,
sku_id,
0 order_count,
0 order_num,
0 order_activity_count,
0 order_coupon_count,
0 order_activity_reduce_amount,
0 order_coupon_reduce_amount,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
refund_order_count,
refund_order_num,
refund_order_amount,
0 refund_payment_count,
0 refund_payment_num,
0 refund_payment_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_ri
union all
select
dt,
sku_id,
0 order_count,
0 order_num,
0 order_activity_count,
0 order_coupon_count,
0 order_activity_reduce_amount,
0 order_coupon_reduce_amount,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_order_count,
0 refund_order_num,
0 refund_order_amount,
refund_payment_count,
refund_payment_num,
refund_payment_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_rp
union all
select
dt,
sku_id,
0 order_count,
0 order_num,
0 order_activity_count,
0 order_coupon_count,
0 order_activity_reduce_amount,
0 order_coupon_reduce_amount,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_order_count,
0 refund_order_num,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_num,
0 refund_payment_amount,
cart_count,
favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_cf
union all
select
dt,
sku_id,
0 order_count,
0 order_num,
0 order_activity_count,
0 order_coupon_count,
0 order_activity_reduce_amount,
0 order_coupon_reduce_amount,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_order_count,
0 refund_order_num,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_num,
0 refund_payment_amount,
0 cart_count,
0 favor_count,
appraise_good_count,
appraise_mid_count,
appraise_bad_count,
appraise_default_count
from tmp_comment
)t1
group by dt,sku_id;
--每日装载
with
tmp_order as
(
select
sku_id,
count(*) order_count,
sum(sku_num) order_num,
sum(if(split_activity_amount>0,1,0)) order_activity_count,
sum(if(split_coupon_amount>0,1,0)) order_coupon_count,
sum(split_activity_amount) order_activity_reduce_amount,
sum(split_coupon_amount) order_coupon_reduce_amount,
sum(original_amount) order_original_amount,
sum(split_final_amount) order_final_amount
from dwd_order_detail
where dt='2020-06-15'
group by sku_id
),
--不太理解
tmp_pay as
(
select
sku_id,
count(*) payment_count,
sum(sku_num) payment_num,
sum(split_final_amount) payment_amount
from dwd_order_detail
where (dt='2020-06-15'
or dt=date_add('2020-06-15',-1))
and order_id in
(
select order_id from dwd_payment_info where dt='2020-06-15'
)
group by sku_id
),
tmp_ri as
(
select
sku_id,
count(*) refund_order_count,
sum(refund_num) refund_order_num,
sum(refund_amount) refund_order_amount
from dwd_order_refund_info
where dt='2020-06-15'
group by sku_id
),
tmp_rp as
(
select
rp.sku_id,
count(*) refund_payment_count,
sum(ri.refund_num) refund_payment_num,
sum(refund_amount) refund_payment_amount
from
(
select
order_id,
sku_id,
refund_amount
from dwd_refund_payment
where dt='2020-06-15'
)rp
left join
(
select
order_id,
sku_id,
refund_num
from dwd_order_refund_info
where dt>=date_add('2020-06-15',-15)
)ri
on rp.order_id=ri.order_id
and rp.sku_id=ri.sku_id
group by rp.sku_id
),
tmp_cf as
(
select
item sku_id,
sum(if(action_id='cart_add',1,0)) cart_count,
sum(if(action_id='favor_add',1,0)) favor_count
from dwd_action_log
where dt='2020-06-15'
and action_id in ('cart_add','favor_add')
group by item
),
tmp_comment as
(
select
sku_id,
sum(if(appraise='1201',1,0)) appraise_good_count,
sum(if(appraise='1202',1,0)) appraise_mid_count,
sum(if(appraise='1203',1,0)) appraise_bad_count,
sum(if(appraise='1204',1,0)) appraise_default_count
from dwd_comment_info
where dt='2020-06-15'
group by sku_id
)
insert overwrite table dws_sku_action_daycount partition(dt='2020-06-15')
select
sku_id,
sum(order_count),
sum(order_num),
sum(order_activity_count),
sum(order_coupon_count),
sum(order_activity_reduce_amount),
sum(order_coupon_reduce_amount),
sum(order_original_amount),
sum(order_final_amount),
sum(payment_count),
sum(payment_num),
sum(payment_amount),
sum(refund_order_count),
sum(refund_order_num),
sum(refund_order_amount),
sum(refund_payment_count),
sum(refund_payment_num),
sum(refund_payment_amount),
sum(cart_count),
sum(favor_count),
sum(appraise_good_count),
sum(appraise_mid_count),
sum(appraise_bad_count),
sum(appraise_default_count)
from
(
select
sku_id,
order_count,
order_num,
order_activity_count,
order_coupon_count,
order_activity_reduce_amount,
order_coupon_reduce_amount,
order_original_amount,
order_final_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_order_count,
0 refund_order_num,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_num,
0 refund_payment_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_order
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_activity_count,
0 order_coupon_count,
0 order_activity_reduce_amount,
0 order_coupon_reduce_amount,
0 order_original_amount,
0 order_final_amount,
payment_count,
payment_num,
payment_amount,
0 refund_order_count,
0 refund_order_num,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_num,
0 refund_payment_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_pay
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_activity_count,
0 order_coupon_count,
0 order_activity_reduce_amount,
0 order_coupon_reduce_amount,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
refund_order_count,
refund_order_num,
refund_order_amount,
0 refund_payment_count,
0 refund_payment_num,
0 refund_payment_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_ri
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_activity_count,
0 order_coupon_count,
0 order_activity_reduce_amount,
0 order_coupon_reduce_amount,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_order_count,
0 refund_order_num,
0 refund_order_amount,
refund_payment_count,
refund_payment_num,
refund_payment_amount,
0 cart_count,
0 favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_rp
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_activity_count,
0 order_coupon_count,
0 order_activity_reduce_amount,
0 order_coupon_reduce_amount,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_order_count,
0 refund_order_num,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_num,
0 refund_payment_amount,
cart_count,
favor_count,
0 appraise_good_count,
0 appraise_mid_count,
0 appraise_bad_count,
0 appraise_default_count
from tmp_cf
union all
select
sku_id,
0 order_count,
0 order_num,
0 order_activity_count,
0 order_coupon_count,
0 order_activity_reduce_amount,
0 order_coupon_reduce_amount,
0 order_original_amount,
0 order_final_amount,
0 payment_count,
0 payment_num,
0 payment_amount,
0 refund_order_count,
0 refund_order_num,
0 refund_order_amount,
0 refund_payment_count,
0 refund_payment_num,
0 refund_payment_amount,
0 cart_count,
0 favor_count,
appraise_good_count,
appraise_mid_count,
appraise_bad_count,
appraise_default_count
from tmp_comment
)t1
group by sku_id;
优惠券主题、活动主题、地区主题等等
首日调度脚本
#!/bin/bash
APP=gmall
if [ -n "$2" ] ;then
do_date=$2
else
echo "请传入日期参数"
exit
fi
dws_visitor_action_daycount="
insert overwrite table ${APP}.dws_visitor_action_daycount partition(dt='$do_date')
select
"
dws_area_stats_daycount="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp as
()
insert overwrite table ${APP}.dws_area_stats_daycount partition(dt)
select
"
dws_user_action_daycount="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp as
()
insert overwrite table ${APP}.dws_user_action_daycount partition(dt)
select
"
dws_activity_info_daycount="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp as
()
insert overwrite table ${APP}.dws_activity_info_daycount partition(dt)
select
"
dws_sku_action_daycount="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp as
()
insert overwrite table ${APP}.dws_sku_action_daycount partition(dt)
select
"
dws_coupon_info_daycount="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp as
()
insert overwrite table ${APP}.dws_coupon_info_daycount partition(dt)
select
"
case $1 in
"dws_visitor_action_daycount" )
hive -e "$dws_visitor_action_daycount"
;;
"dws_user_action_daycount" )
hive -e "$dws_user_action_daycount"
;;
"dws_activity_info_daycount" )
hive -e "$dws_activity_info_daycount"
;;
"dws_area_stats_daycount" )
hive -e "$dws_area_stats_daycount"
;;
"dws_sku_action_daycount" )
hive -e "$dws_sku_action_daycount"
;;
"dws_coupon_info_daycount" )
hive -e "$dws_coupon_info_daycount"
;;
"all" )
hive -e "$dws_visitor_action_daycount$dws_user_action_daycount$dws_activity_info_daycount$dws_area_stats_daycount$dws_sku_action_daycount$dws_coupon_info_daycount"
;;
esac
每日调度 需要将 dt 修改为 dt='$do_date'



