sql代码(HUE上运行):
SELECt
a.dt1 `创作日期`,
a.cnt `创作用户量`,
concat((round((a.cnt1/a.cnt)*100, 2)), '%') `次日首页留存率`,
concat((round((a.cnt2/a.cnt)*100, 2)), '%') `3日首页留存率`,
concat((round((a.cnt3/a.cnt)*100, 2)), '%') `7日首页留存率`
from (
SELECt t1.dt1,
count(distinct if(datediff(t2.dt2,t1.dt1)=0,t2.device_id,null)) cnt,
count(distinct if(datediff(t2.dt2,t1.dt1)=1,t2.device_id,null)) cnt1,
count(distinct if(datediff(t2.dt2,t1.dt1)=3,t2.device_id,null)) cnt2,
count(distinct if(datediff(t2.dt2,t1.dt1)=7,t2.device_id,null)) cnt3
FROM
(SELECt dt dt1,
device_id
FROM dws_zdm.dws_zdm_hive_traffic_home_feed_h
WHERe dt BETWEEN '${start_date}' AND'${end_date}'---时间可筛选 一个个来 '2021-03-01' '2021-03-02' '2021-03-03' '2021-03-04' '2021-03-05'
--where dt ='${start_date}'
GROUP BY dt1,
device_id)t1
LEFT JOIN
(SELECt dt dt2,
device_id
FROM dws_zdm.dws_zdm_hive_traffic_home_feed_h
WHERe dt >= '${start_date}'
GROUP BY dt2,
device_id)t2
ON t1.device_id = t2.device_id
WHERe t2.dt2 >= t1.dt1
and ( datediff(t2.dt2,t1.dt1)=0 or datediff(t2.dt2,t1.dt1)=1 or datediff(t2.dt2,t1.dt1)=3 or datediff(t2.dt2,t1.dt1)=7 )
GROUP BY t1.dt1
ORDER BY t1.dt1)a
数据显示
AB实验显著性判断
-- 计算一段时间内的用户留存率
cache table tmp_1
SELECt
a.dt1 dt,
a.user_type user_type,
round((a.cnt1/a.cnt)*100, 2) liucun1,
round((a.cnt2/a.cnt)*100, 2) liucun3,
round((a.cnt7/a.cnt)*100, 2) liucun7
from (
SELECt t1.dt1,
case when t1.abtest regexp '${shiyan_value}' then '实验组'
when t1.abtest regexp '${duizhao_value}' then '对照组'
end user_type,
count(distinct if(datediff(t2.dt2,t1.dt1)=0,t2.device_id,null)) cnt,
count(distinct if(datediff(t2.dt2,t1.dt1)=1,t2.device_id,null)) cnt1,
count(distinct if(datediff(t2.dt2,t1.dt1)=3,t2.device_id,null)) cnt2,
count(distinct if(datediff(t2.dt2,t1.dt1)=7,t2.device_id,null)) cnt7
FROM
(SELECt dt dt1,
regexp_extract(abtest_collection,'(^55|,55)_([^,]+)',2) abtest,
device_id
FROM dws_zdm.dws_zdm_hive_traffic_home_feed_h
WHERe dt BETWEEN '${start_date}' AND'${end_date}'
and project_id = '2'
and expose >0
GROUP BY 1,2,3)t1
LEFT JOIN
(SELECt dt dt2,
device_id
FROM dws_zdm.dws_zdm_hive_traffic_home_feed_h
WHERe dt >= '${start_date}'
and project_id = '2'
and expose >0
GROUP BY 1,2)t2
ON t1.device_id = t2.device_id
WHERe t2.dt2 >= t1.dt1
and ( datediff(t2.dt2,t1.dt1)=0 or datediff(t2.dt2,t1.dt1)=1 or datediff(t2.dt2,t1.dt1)=3 or datediff(t2.dt2,t1.dt1)=7 )
GROUP BY 1,2
ORDER BY 1,2)a
where a.user_type is not null
-- 计算均值和标准差
cache table tmp_2
SELECt user_type,
count(user_type) sample_num,
avg(liucun1) liucun1_avg,
avg(liucun3) liucun3_avg,
avg(liucun7) liucun7_avg,
stddev(liucun1) liucun1_std,
stddev(liucun3) liucun3_std,
stddev(liucun7) liucun7_std
from tmp_1
group by 1
-- 假设检验
select sample_num_shiyan `实验组样本量`,
sample_num_duizhao `对照组样本量`,
liucun1_avg_shiyan `实验组日均次日留存率`,
liucun1_avg_duizhao `对照组日均次日留存率`,
(liucun1_avg_shiyan/liucun1_avg_duizhao)-1 `日均次日留存率变化度`,
if(abs(liucun1_avg_shiyan-liucun1_avg_duizhao)/
sqrt(liucun1_std_shiyan*liucun1_std_shiyan/sample_num_shiyan+
liucun1_std_duizhao*liucun1_std_duizhao/sample_num_duizhao)>1.96,'显著','不显著') `日均次日留存率显著性`,
liucun3_avg_shiyan `实验组日均3日留存率`,
liucun3_avg_duizhao `对照组日均3日留存率`,
(liucun3_avg_shiyan/liucun3_avg_duizhao)-1 `日均3日留存率变化度`,
if(abs(liucun3_avg_shiyan-liucun3_avg_duizhao)/
sqrt(liucun3_std_shiyan*liucun3_std_shiyan/sample_num_shiyan+
liucun3_std_duizhao*liucun3_std_duizhao/sample_num_duizhao)>1.96,'显著','不显著') `日均3日留存率显著性`,
liucun7_avg_shiyan `实验组日均7日留存率`,
liucun7_avg_duizhao `对照组日均7日留存率`,
(liucun7_avg_shiyan/liucun7_avg_duizhao)-1 `日均7日留存率变化度`,
if(abs(liucun7_avg_shiyan-liucun7_avg_duizhao)/
sqrt(liucun7_std_shiyan*liucun7_std_shiyan/sample_num_shiyan+
liucun7_std_duizhao*liucun7_std_duizhao/sample_num_duizhao)>1.96,'显著','不显著') `日均7日留存率显著性`
from
(
select sum(if(user_type='实验组',sample_num,0)) sample_num_shiyan,
sum(if(user_type='对照组',sample_num,0)) sample_num_duizhao,
sum(if(user_type='实验组',liucun1_avg,0)) liucun1_avg_shiyan,
sum(if(user_type='对照组',liucun1_avg,0)) liucun1_avg_duizhao,
sum(if(user_type='实验组',liucun1_std,0)) liucun1_std_shiyan,
sum(if(user_type='对照组',liucun1_std,0)) liucun1_std_duizhao,
sum(if(user_type='实验组',liucun3_avg,0)) liucun3_avg_shiyan,
sum(if(user_type='对照组',liucun3_avg,0)) liucun3_avg_duizhao,
sum(if(user_type='实验组',liucun3_std,0)) liucun3_std_shiyan,
sum(if(user_type='对照组',liucun3_std,0)) liucun3_std_duizhao,
sum(if(user_type='实验组',liucun7_avg,0)) liucun7_avg_shiyan,
sum(if(user_type='对照组',liucun7_avg,0)) liucun7_avg_duizhao,
sum(if(user_type='实验组',liucun7_std,0)) liucun7_std_shiyan,
sum(if(user_type='对照组',liucun7_std,0)) liucun7_std_duizhao
from tmp_2
)t
数据显示



