栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 前沿技术 > 大数据 > 大数据系统

2021.11.12窗口函数(开窗函数) 简单使用

2021.11.12窗口函数(开窗函数) 简单使用

hive> create table t_window(
    > name string,
    > orderdate date,
    > cost int
    > )
    > row format delimited fields terminated by ',';
OK

vi order.csv
jack,2015-01-01,10
tony,2015-01-02,15
jack,2015-02-03,23
tony,2015-01-04,29
jack,2015-01-05,46
jack,2015-04-06,42
tony,2015-01-07,50
jack,2015-01-08,55
mart,2015-04-08,62
mart,2015-04-09,68
neil,2015-05-10,12
mart,2015-04-11,75
neil,2015-06-12,80
mart,2015-04-13,94

hive> load data local inpath '/opt/tmp/order.csv' into table t_window;


hive> select substring('2015-04-13',1,5);
OK
2015-
Time taken: 0.03 seconds, Fetched: 1 row(s)
hive> select substring('2015-04-13',2,5);
OK
015-0
Time taken: 0.033 seconds, Fetched: 1 row(s)
hive> select substring('2015-04-13',3,5);
OK
15-04

hive> select substr('2015-04-13',1,5);
OK
2015-
Time taken: 0.038 seconds, Fetched: 1 row(s)
hive> select substr('2015-04-13',2,5);
OK
015-0
Time taken: 0.038 seconds, Fetched: 1 row(s)
hive> select substr('2015-04-13',3,5);
OK
15-04

hive> select distinct name,count(*) over() from t_window where substring(orderdate,1,7)='2015-04';

hive> select distinct name,substring(orderdate,1,7),count(*) over() from t_window where substring(orderdate,1,7)='2015-04';
OK
mart	2015-04	2
jack	2015-04	2
hive> select name,count(*) over() from t_window where substring(orderdate,1,7)='2015-04' group by name;
OK
mart	2
jack	2


hive> select name,orderdate,cost,sum(cost) over() from t_window;
OK      最后一列显示的内容为 所有行的第三列(cost)相加这和
mart	2015-04-13	94	661
neil	2015-06-12	80	661
mart	2015-04-11	75	661
neil	2015-05-10	12	661
mart	2015-04-09	68	661
mart	2015-04-08	62	661
jack	2015-01-08	55	661
tony	2015-01-07	50	661
jack	2015-04-06	42	661
jack	2015-01-05	46	661
tony	2015-01-04	29	661
jack	2015-02-03	23	661
tony	2015-01-02	15	661
jack	2015-01-01	10	661

hive> select name,orderdate,cost,sum(cost) over(partition by month(orderdate)) from t_window;
OK
jack	2015-01-01	10	205
jack	2015-01-08	55	205
tony	2015-01-07	50	205
jack	2015-01-05	46	205
tony	2015-01-04	29	205
tony	2015-01-02	15	205
jack	2015-02-03	23	23
mart	2015-04-13	94	341
jack	2015-04-06	42	341
mart	2015-04-11	75	341
mart	2015-04-09	68	341
mart	2015-04-08	62	341
neil	2015-05-10	12	12
neil	2015-06-12	80	80

hive> select name, gender,count(*) over(partition by gender) from studentp;
OK
zhansan8	boy	4
zhansan5	boy	4
zhansan3	boy	4
zhansan1	boy	4
zhansan11	girl	7
zhansan10	girl	7
zhansan9	girl	7
zhansan7	girl	7
zhansan6	girl	7
zhansan4	girl	7
zhansan2	girl	7

hive> select name,orderdate,cost,
      sum(cost) over() as sample1, --所有的cost列相加
      sum(cost) over(partition by name) as sample2, --将按名字分组,然后把每一组的cost列相加
	  sum(cost) over(partition by name order by orderdate) as sample3
      from t_window;
OK
name orderdate cost sample1 sample2
jack	2015-01-01	10	661	176
jack	2015-02-03	23	661	176
jack	2015-01-05	46	661	176
jack	2015-04-06	42	661	176
jack	2015-01-08	55	661	176
mart	2015-04-13	94	661	299
mart	2015-04-08	62	661	299
mart	2015-04-09	68	661	299
mart	2015-04-11	75	661	299
neil	2015-06-12	80	661	92
neil	2015-05-10	12	661	92
tony	2015-01-07	50	661	94
tony	2015-01-02	15	661	94
tony	2015-01-04	29	661	94

select name,orderdate,cost,
   sum(cost) over() as sample1, 
   sum(cost) over(partition by name) as sample2,
   sum(cost) over(partition by name order by orderdate) as sample3
   from t_window;
jack	2015-01-01	10	661	176	10
jack	2015-01-05	46	661	176	56
jack	2015-01-08	55	661	176	111
jack	2015-02-03	23	661	176	134
jack	2015-04-06	42	661	176	176
mart	2015-04-08	62	661	299	62
mart	2015-04-09	68	661	299	130
mart	2015-04-11	75	661	299	205
mart	2015-04-13	94	661	299	299
neil	2015-05-10	12	661	92	12
neil	2015-06-12	80	661	92	92
tony	2015-01-02	15	661	94	15
tony	2015-01-04	29	661	94	44
tony	2015-01-07	50	661	94	94
   
select name,orderdate,cost,
   sum(cost) over() as sample1, 
   sum(cost) over(partition by name) as sample2,
   sum(cost) over(partition by name order by orderdate) as sample3,
   sum(cost) over(partition by name order by orderdate rows between unbounded preceding and current row) as sample4
   from t_window;   
OK
jack	2015-01-01	10	661	176	10	10
jack	2015-01-05	46	661	176	56	56
jack	2015-01-08	55	661	176	111	111
jack	2015-02-03	23	661	176	134	134
jack	2015-04-06	42	661	176	176	176
mart	2015-04-08	62	661	299	62	62
mart	2015-04-09	68	661	299	130	130
mart	2015-04-11	75	661	299	205	205
mart	2015-04-13	94	661	299	299	299
neil	2015-05-10	12	661	92	12	12
neil	2015-06-12	80	661	92	92	92
tony	2015-01-02	15	661	94	15	15
tony	2015-01-04	29	661	94	44	44
tony	2015-01-07	50	661	94	94	94

select name,orderdate,cost,
   sum(cost) over() as sample1, 
   sum(cost) over(partition by name) as sample2,
   sum(cost) over(partition by name order by orderdate) as sample3,
   sum(cost) over(partition by name order by orderdate rows between unbounded preceding and current row) as sample4,
   sum(cost) over(partition by name order by orderdate rows between 1 preceding and current row) as sample5,
   sum(cost) over(partition by name order by orderdate rows between 1 preceding and 1 following) as sample6,
   sum(cost) over(partition by name order by orderdate rows between current row and unbounded following) as sample7
   from t_window; 
   
   
hive> select name,orderdate,cost, ntile(3) over() as sample1 from t_window;
OK
mart	2015-04-13	94	1
neil	2015-06-12	80	1
mart	2015-04-11	75	1
neil	2015-05-10	12	1
mart	2015-04-09	68	1
mart	2015-04-08	62	2
jack	2015-01-08	55	2
tony	2015-01-07	50	2
jack	2015-04-06	42	2
jack	2015-01-05	46	2
tony	2015-01-04	29	3
jack	2015-02-03	23	3
tony	2015-01-02	15	3
jack	2015-01-01	10	3
   
hive> select name,orderdate,cost, ntile(3) over() as sample1,
    > ntile(3) over(partition by name) as sample2,
	> ntile(2) over(partition by month(orderdate)) as sample3,
	> ntile(3) over(partition by name order by cost desc) as sample4
    > from t_window;
   
hive> select name,orderdate,cost, row_number() over() as r1,
    > row_number() over(order by name) as r2,
    > rank() over(order by name) as r3,
    > dense_rank() over(order by name) as r4
    > from t_window;
OK
jack	2015-01-01	10	14	1	1	1
jack	2015-02-03	23	12	2	1	1
jack	2015-01-05	46	10	3	1	1
jack	2015-04-06	42	9	4	1	1
jack	2015-01-08	55	7	5	1	1
mart	2015-04-13	94	1	6	6	2
mart	2015-04-08	62	6	7	6	2
mart	2015-04-09	68	5	8	6	2
mart	2015-04-11	75	3	9	6	2
neil	2015-06-12	80	2	10	10	3
neil	2015-05-10	12	4	11	10	3
tony	2015-01-07	50	8	12	12	4
tony	2015-01-02	15	13	13	12	4
tony	2015-01-04	29	11	14	12	4   
   
   
hive> select name,orderdate,cost,
lag(orderdate,1) over(partition by name order by orderdate) as sample1,
lag(orderdate,1,'1972-01-01') over(partition by name order by orderdate) as sample2,
lead(orderdate,1,'3000-01-01') over(partition by name order by orderdate) as sample3
from t_window;
OK
jack	2015-01-01	10	NULL
jack	2015-01-05	46	2015-01-01
jack	2015-01-08	55	2015-01-05
jack	2015-02-03	23	2015-01-08
jack	2015-04-06	42	2015-02-03
mart	2015-04-08	62	NULL
mart	2015-04-09	68	2015-04-08
mart	2015-04-11	75	2015-04-09
mart	2015-04-13	94	2015-04-11
neil	2015-05-10	12	NULL
neil	2015-06-12	80	2015-05-10
tony	2015-01-02	15	NULL
tony	2015-01-04	29	2015-01-02
tony	2015-01-07	50	2015-01-04

hive> select name,orderdate,cost,
first_value(orderdate) over(partition by name order by orderdate) as time ,
last_value(orderdate) over(partition by name order by orderdate) as time
from t_window;
OK
jack	2015-01-01	10	2015-01-01	2015-01-01
jack	2015-01-05	46	2015-01-01	2015-01-05
jack	2015-01-08	55	2015-01-01	2015-01-08
jack	2015-02-03	23	2015-01-01	2015-02-03
jack	2015-04-06	42	2015-01-01	2015-04-06
mart	2015-04-08	62	2015-04-08	2015-04-08
mart	2015-04-09	68	2015-04-08	2015-04-09
mart	2015-04-11	75	2015-04-08	2015-04-11
mart	2015-04-13	94	2015-04-08	2015-04-13
neil	2015-05-10	12	2015-05-10	2015-05-10
neil	2015-06-12	80	2015-05-10	2015-06-12
tony	2015-01-02	15	2015-01-02	2015-01-02
tony	2015-01-04	29	2015-01-02	2015-01-04
tony	2015-01-07	50	2015-01-02	2015-01-07

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/487776.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号