Write a query that'll identify returning active users. A returning active user is a user that has made a second purchase within 7 days of any other of their purchases. Output a list of user_ids of these returning active users.
--建表 use strata; drop table amazon_transactions; create table amazon_transactions ( id int ,user_id int ,item string ,created_at date ,revenue int ) row format delimited fields terminated by ','; load data local inpath '/tmp/strata/amazon_transactions.txt' overwrite into table amazon_transactions; --查看数据 hive> select * from amazon_transactions limit 10; OK 1 109 milk 2020-03-03 123 2 139 biscuit 2020-03-18 421 3 120 milk 2020-03-18 176 4 108 banana 2020-03-18 862 5 130 milk 2020-03-28 333 6 103 bread 2020-03-29 862 7 122 banana 2020-03-07 952 8 125 bread 2020-03-13 317 9 139 bread 2020-03-30 929 10 141 banana 2020-03-17 812 Time taken: 0.08 seconds, Fetched: 10 row(s)
--output
with t as
(
select user_id
,created_at
,datediff(created_at,lag(created_at,1)over(partition by user_id order by created_at asc)) as diff
from amazon_transactions
)
select distinct user_id
from t where t.diff<=7;
---
100
103
105
109
110
111
112
114
117
120
122
128
129
130
131
133
141
143
150
Time taken: 39.329 seconds, Fetched: 19 row(s)



