- HIVE的3种去重方法
- 应用场景:统计各页面每天 UV
- MySQL方案
- HIVE方案
- 建表
- 写入
SELECt DISTINCT user_id FROM t;
SELECt user_id FROM t GROUP BY user_id;
SELECt user_id FROM (
SELECt
user_id,
ROW_NUMBER()OVER(PARTITION BY user_id ORDER BY user_id)r
FROM t
)t1 WHERe r=1;
建议用第三种,效率高
应用场景:统计各页面每天 UV MySQL方案SELECt
page,
ymd,
COUNT(DISTINCT name) uv
FROM
t
GROUP BY
page,
ymd
;
HIVE方案
建表
删库,建库
DROp DATAbase IF EXISTS app; CREATE DATAbase app LOCATION '/app';
创建明细表
DROP TABLE IF EXISTS app.tb;
CREATE TABLE app.tb (
name STRING,
page STRING
)
PARTITIonED BY (ymd STRING)
LOCATION '/app/tb';
-- 插入数据
INSERT INTO TABLE app.tb PARTITION(ymd='2021-08-31') VALUES
('剑圣','首页'),('剑圣','首页'),('巫妖','首页'),('剑圣','商品页'),('剑圣','商品页'),('先知','首页');
INSERT INTO TABLE app.tb PARTITION(ymd='2021-09-01') VALUES
('先知','首页'),('剑圣','首页'),('先知','首页'),('剑圣','商品页'),('先知','商品页'),('先知','首页');
创建UV统计表
-- 删表,建表:
DROP TABLE IF EXISTS app.tb_uv;
CREATE TABLE app.tb_uv (
page STRING,
uv INT
)
PARTITIonED BY (ymd STRING)
LOCATION '/app/tb_uv';
写入
INSERT OVERWRITE TABLE app.tb_uv PARTITION(ymd='2021-08-31')
SELECT
page,
count(name)
FROM
(
SELECt
page,
name,
ROW_NUMBER()OVER(PARTITION BY name,page ORDER BY name)r
FROM app.tb
WHERe ymd='2021-08-31'
)t
WHERe r=1
GROUP BY page;
INSERT OVERWRITE TABLE app.tb_uv PARTITION(ymd='2021-09-01')
SELECt
page,
count(name)
FROM
(
SELECt
page,
name,
ROW_NUMBER()OVER(PARTITION BY name,page ORDER BY name)r
FROM app.tb
WHERe ymd='2021-09-01'
)t
WHERe r=1
GROUP BY page;



