表结构
-- 表1 小明 26 小强 45 小司 57 小武 12 小高 80 小陈 99 小张 45 小李 77 小红 93 小赵 90 -- 表2 0 30 60 80 100 -- 建表 create table if not exists score( name string, score string ) row format delimited fields terminated by 't' stored as textfile location '/user/hive/warehouse/score'; create table if not exists edge( val string ) row format delimited fields terminated by 't' stored as textfile location '/user/hive/warehouse/edge';
要求最终展示的效果如下:
思路
-- 第一步:先构建分数区间
select concat(val, '-', edge_max) as score_range
from (
SELECt val, lead(val, 1) OVER (ORDER BY CAST(val AS int)) AS edge_max
FROM edge
) B1 where edge_max is not null;
如下:
score_range
0-30
30-60
60-80
80-100
-- 第二步:组装想要的数据,获得的是笛卡尔积,后续优化
select A.*,
split(B.score_range, '-')[0] score_min,
split(B.score_range, '-')[1] score_max,
B.score_range
from (select name, score from score) A,
(
select concat(val, '-', edge_max) as score_range
from (
SELECt val, lead(val, 1) OVER (ORDER BY CAST(val AS int)) AS edge_max
FROM edge
) B1
where edge_max is not null
) B;
-- 第三步:过滤数据,得到最终结果
select name,
score,
if(cast(score as int) <= cast((score_min + score_max) as int) / 2 , score_min, score_max) as near_edge,
score_range
from (
select A.*,
split(B.score_range, '-')[0] score_min,
split(B.score_range, '-')[1] score_max,
B.score_range
from (select name, score from score) A,
(
select concat(val, '-', edge_max) as score_range
from (
SELECt val, lead(val, 1) OVER (ORDER BY CAST(val AS int)) AS edge_max
FROM edge
) B1
where edge_max is not null
) B
) T
where cast(T.score as int) > cast(T.score_min as int)
and cast(T.score as int) < cast(T.score_max as int);
学习lead()函数的用法



