1.建表
create table t_window(
name string,
orderdate date,
cost int
)
row format delimited fields terminated by ',';
2.载入数据
load data local inpath '/opt/tmp/order.csv' into table t_window;
3.查询表数据
select * from t_window;
4.比较substring substr
select substring('2015-04-13',1,7);
select substr('2015-04-13',1,7);
5.查询在2015年4月份购买过的顾客及总人数
select name,count(*) over ()
from t_window
where substring(orderdate,1,7)='2015-04';
6.查询2015年4月所有顾客以及总次数
select name,substring(orderdate,1,7) month,count(*) over() from t_window where substring(orderdate,1,7)='2015-04';
7.查询2015年4月总次数
select substring(orderdate,1,7) month,count(*) over() from t_window where substring(orderdate,1,7)='2015-04';
8.查询2015年4月每个人的总次数(窗口函数)
select distinct name,substring(orderdate,1,7) month,count(*) over() from t_window where substring(orderdate,1,7)='2015-04';
select name,count(*) over() from t_window where substring(orderdate,1,7)='2015-04' group by name;
9.查询总人数
select name,count(*) over() from student;
10.查询每个性别的人数
select gender,count(*) from studentp group by gender;
或者, select name,gender,count(gender) over(partition by gender) from studentp;
11.group by 全局分
partition by 查询分区子句
12.不分区
select name,orderdate,cost,sum(cost) over() from t_window;
| name | orderdate | cost | _wcol0 |
+-------+-------------+-------+---------+--+
| mart | 2015-04-13 | 94 | 661 |
| neil | 2015-06-12 | 80 | 661 |
| mart | 2015-04-11 | 75 | 661 |
| neil | 2015-05-10 | 12 | 661 |
| mart | 2015-04-09 | 68 | 661 |
| mart | 2015-04-08 | 62 | 661 |
| jack | 2015-01-08 | 55 | 661 |
| tony | 2015-01-07 | 50 | 661 |
| jack | 2015-04-06 | 42 | 661 |
| jack | 2015-01-05 | 46 | 661 |
| tony | 2015-01-04 | 29 | 661 |
| jack | 2015-02-03 | 23 | 661 |
| tony | 2015-01-02 | 15 | 661 |
| jack | 2015-01-01 | 10 | 661 |
13.分区 按月份分区,统计每月的总量 聚合分组内所有内容
select name,orderdate,cost,sum(cost) over(partition by month(orderdate)) from t_window;
name | orderdate | cost | _wcol0 |
+-------+-------------+-------+---------+--+
| jack | 2015-01-01 | 10 | 205 |
| jack | 2015-01-08 | 55 | 205 |
| tony | 2015-01-07 | 50 | 205 |
| jack | 2015-01-05 | 46 | 205 |
| tony | 2015-01-04 | 29 | 205 |
| tony | 2015-01-02 | 15 | 205 |
| jack | 2015-02-03 | 23 | 23 |
| mart | 2015-04-13 | 94 | 341 |
| jack | 2015-04-06 | 42 | 341 |
| mart | 2015-04-11 | 75 | 341 |
| mart | 2015-04-09 | 68 | 341 |
| mart | 2015-04-08 | 62 | 341 |
| neil | 2015-05-10 | 12 | 12 |
| neil | 2015-06-12 | 80 | 80
14.按月份累计求和,按月份排序
select name,orderdate,cost,sum(cost) over(partition by month(orderdate) order by orderdate) from t_window;
+-------+-------------+-------+---------+--+
| name | orderdate | cost | _wcol0 |
+-------+-------------+-------+---------+--+
| jack | 2015-01-01 | 10 | 10 |
| tony | 2015-01-02 | 15 | 25 |
| tony | 2015-01-04 | 29 | 54 |
| jack | 2015-01-05 | 46 | 100 |
| tony | 2015-01-07 | 50 | 150 |
| jack | 2015-01-08 | 55 | 205 |
| jack | 2015-02-03 | 23 | 23 |
| jack | 2015-04-06 | 42 | 42 |
| mart | 2015-04-08 | 62 | 104 |
| mart | 2015-04-09 | 68 | 172 |
| mart | 2015-04-11 | 75 | 247 |
| mart | 2015-04-13 | 94 | 341 |
| neil | 2015-05-10 | 12 | 12 |
| neil | 2015-06-12 | 80 | 80 |
+-------+-------------+-------+---------
15.查询所有的总销量
select name,orderdate,cost,sum(cost) over() as sample1 from t_window;
name | orderdate | cost | sample1 |
+-------+-------------+-------+----------+--+
| mart | 2015-04-13 | 94 | 661 |
| neil | 2015-06-12 | 80 | 661 |
| mart | 2015-04-11 | 75 | 661 |
| neil | 2015-05-10 | 12 | 661 |
| mart | 2015-04-09 | 68 | 661 |
| mart | 2015-04-08 | 62 | 661 |
| jack | 2015-01-08 | 55 | 661 |
| tony | 2015-01-07 | 50 | 661 |
| jack | 2015-04-06 | 42 | 661 |
| jack | 2015-01-05 | 46 | 661 |
| tony | 2015-01-04 | 29 | 661 |
| jack | 2015-02-03 | 23 | 661 |
| tony | 2015-01-02 | 15 | 661 |
| jack | 2015-01-01 | 10 | 661
16.按名字分组,然后把每组的cost列相加,以及所有cost列相加
select name,orderdate,cost,
sum(cost) over() as sample1, //所有cost列相加
sum (cost) over(partition by name ) as sample2, //按名字分组,然后把每组的cost列相加
sum(cost) over(partition by name order by orderdate) as sample3 //购买日期排序,所有cost累计相加
from t_window;
+-------+-------------+-------+----------+----------+----------+--+
| name | orderdate | cost | sample1 | sample2 | sample3 |
+-------+-------------+-------+----------+----------+----------+--+
| jack | 2015-01-01 | 10 | 661 | 176 | 10 |
| jack | 2015-01-05 | 46 | 661 | 176 | 56 |
| jack | 2015-01-08 | 55 | 661 | 176 | 111 |
| jack | 2015-02-03 | 23 | 661 | 176 | 134 |
| jack | 2015-04-06 | 42 | 661 | 176 | 176 |
| mart | 2015-04-08 | 62 | 661 | 299 | 62 |
| mart | 2015-04-09 | 68 | 661 | 299 | 130 |
| mart | 2015-04-11 | 75 | 661 | 299 | 205 |
| mart | 2015-04-13 | 94 | 661 | 299 | 299 |
| neil | 2015-05-10 | 12 | 661 | 92 | 12 |
| neil | 2015-06-12 | 80 | 661 | 92 | 92 |
| tony | 2015-01-02 | 15 | 661 | 94 | 15 |
| tony | 2015-01-04 | 29 | 661 | 94 | 44 |
| tony | 2015-01-07 | 50 | 661 | 94 | 94
17.按名字分组,然后把每组的cost列相加,以及所有cost列相加 unbounded preceding 起点
select name,orderdate,cost,
sum(cost) over() as sample1, //所有cost列相加
sum (cost) over(partition by name ) as sample2, //按名字分组,然后把每组的cost列相加
sum(cost) over(partition by name order by orderdate) as sample3 , //购买日期排序,所有cost累计相加
sum(cost) over(partition by name order by orderdate rows between unbounded preceding and current row ) as sample4
from t_window;
-------+-------------+-------+----------+----------+----------+----------+--+
| name | orderdate | cost | sample1 | sample2 | sample3 | sample4 |
+-------+-------------+-------+----------+----------+----------+----------+--+
| jack | 2015-01-01 | 10 | 661 | 176 | 10 | 10 |
| jack | 2015-01-05 | 46 | 661 | 176 | 56 | 56 |
| jack | 2015-01-08 | 55 | 661 | 176 | 111 | 111 |
| jack | 2015-02-03 | 23 | 661 | 176 | 134 | 134 |
| jack | 2015-04-06 | 42 | 661 | 176 | 176 | 176 |
| mart | 2015-04-08 | 62 | 661 | 299 | 62 | 62 |
| mart | 2015-04-09 | 68 | 661 | 299 | 130 | 130 |
| mart | 2015-04-11 | 75 | 661 | 299 | 205 | 205 |
| mart | 2015-04-13 | 94 | 661 | 299 | 299 | 299 |
| neil | 2015-05-10 | 12 | 661 | 92 | 12 | 12 |
| neil | 2015-06-12 | 80 | 661 | 92 | 92 | 92 |
| tony | 2015-01-02 | 15 | 661 | 94 | 15 | 15 |
| tony | 2015-01-04 | 29 | 661 | 94 | 44 | 44 |
| tony | 2015-01-07 | 50 | 661 | 94 | 94 | 94 |
18.计算前一行到当前行
select name,orderdate,cost,
sum(cost) over() as sample1,
sum (cost) over(partition by name ) as sample2,
sum(cost) over(partition by name order by orderdate) as sample3,
sum(cost) over(partition by name order by orderdate rows between unbounded preceding and current row ) as sample4,
sum(cost) over(partition by name order by orderdate rows between 1 preceding and current row) as sample5
from t_window;
-------+-------------+-------+----------+----------+----------+----------+----------+--+
| name | orderdate | cost | sample1 | sample2 | sample3 | sample4 | sample5 |
+-------+-------------+-------+----------+----------+----------+----------+----------+--+
| jack | 2015-01-01 | 10 | 661 | 176 | 10 | 10 | 10 |
| jack | 2015-01-05 | 46 | 661 | 176 | 56 | 56 | 56 |
| jack | 2015-01-08 | 55 | 661 | 176 | 111 | 111 | 101 |
| jack | 2015-02-03 | 23 | 661 | 176 | 134 | 134 | 78 |
| jack | 2015-04-06 | 42 | 661 | 176 | 176 | 176 | 65 |
| mart | 2015-04-08 | 62 | 661 | 299 | 62 | 62 | 62 |
| mart | 2015-04-09 | 68 | 661 | 299 | 130 | 130 | 130 |
| mart | 2015-04-11 | 75 | 661 | 299 | 205 | 205 | 143 |
| mart | 2015-04-13 | 94 | 661 | 299 | 299 | 299 | 169 |
| neil | 2015-05-10 | 12 | 661 | 92 | 12 | 12 | 12 |
| neil | 2015-06-12 | 80 | 661 | 92 | 92 | 92 | 92 |
| tony | 2015-01-02 | 15 | 661 | 94 | 15 | 15 | 15 |
| tony | 2015-01-04 | 29 | 661 | 94 | 44 | 44 | 44 |
| tony | 2015-01-07 | 50 | 661 | 94 | 94 | 94 | 79 |
19.前一行与后一行、当前行与最后一行
select name,orderdate,cost,
sum(cost) over() as sample1,
sum (cost) over(partition by name ) as sample2,
sum(cost) over(partition by name order by orderdate) as sample3,
sum(cost) over(partition by name order by orderdate rows between unbounded preceding and current row ) as sample4,
sum(cost) over(partition by name order by orderdate rows between 1 preceding and current row) as sample5,
sum(cost) over(partition by name order by orderdate rows between 1 preceding and 1 following) as sample6,
sum(cost) over(partition by name order by orderdate rows between current row and unbounded following ) as sample7
from t_window;
name | orderdate | cost | sample1 | sample2 | sample3 | sample4 | sample5 | sample6 | sample7 |
+-------+-------------+-------+----------+----------+----------+----------+----------+----------+----------+--+
| jack | 2015-01-01 | 10 | 661 | 176 | 10 | 10 | 10 | 56 | 176 |
| jack | 2015-01-05 | 46 | 661 | 176 | 56 | 56 | 56 | 111 | 166 |
| jack | 2015-01-08 | 55 | 661 | 176 | 111 | 111 | 101 | 124 | 120 |
| jack | 2015-02-03 | 23 | 661 | 176 | 134 | 134 | 78 | 120 | 65 |
| jack | 2015-04-06 | 42 | 661 | 176 | 176 | 176 | 65 | 65 | 42 |
| mart | 2015-04-08 | 62 | 661 | 299 | 62 | 62 | 62 | 130 | 299 |
| mart | 2015-04-09 | 68 | 661 | 299 | 130 | 130 | 130 | 205 | 237 |
| mart | 2015-04-11 | 75 | 661 | 299 | 205 | 205 | 143 | 237 | 169 |
| mart | 2015-04-13 | 94 | 661 | 299 | 299 | 299 | 169 | 169 | 94 |
| neil | 2015-05-10 | 12 | 661 | 92 | 12 | 12 | 12 | 92 | 92 |
| neil | 2015-06-12 | 80 | 661 | 92 | 92 | 92 | 92 | 92 | 80 |
| tony | 2015-01-02 | 15 | 661 | 94 | 15 | 15 | 15 | 44 | 94 |
| tony | 2015-01-04 | 29 | 661 | 94 | 44 | 44 | 44 | 94 | 79 |
| tony | 2015-01-07 | 50 | 661 | 94 | 94 | 94 | 79 | 79 | 50 |
20.序列函数NTILE 切片
把数据切成3份
select name,orderdate,cost,ntile(3) over() as sample1 from t_window;
name | orderdate | cost | sample1 |
+-------+-------------+-------+----------+--+
| mart | 2015-04-13 | 94 | 1 |
| neil | 2015-06-12 | 80 | 1 |
| mart | 2015-04-11 | 75 | 1 |
| neil | 2015-05-10 | 12 | 1 |
| mart | 2015-04-09 | 68 | 1 |
| mart | 2015-04-08 | 62 | 2 |
| jack | 2015-01-08 | 55 | 2 |
| tony | 2015-01-07 | 50 | 2 |
| jack | 2015-04-06 | 42 | 2 |
| jack | 2015-01-05 | 46 | 2 |
| tony | 2015-01-04 | 29 | 3 |
| jack | 2015-02-03 | 23 | 3 |
| tony | 2015-01-02 | 15 | 3 |
| jack | 2015-01-01 | 10 | 3 |
+-------+-------------+-------+----------+
21.按名字分区,然后每个分区切成3份
select name,orderdate,cost,ntile(3) over() as sample1,
ntile(3) over (partition by name) as sample2
from t_window
name | orderdate | cost | sample1 | sample2 |
+-------+-------------+-------+----------+----------+--+
| jack | 2015-01-01 | 10 | 3 | 1 |
| jack | 2015-02-03 | 23 | 3 | 1 |
| jack | 2015-01-05 | 46 | 2 | 2 |
| jack | 2015-04-06 | 42 | 2 | 2 |
| jack | 2015-01-08 | 55 | 2 | 3 |
| mart | 2015-04-13 | 94 | 1 | 1 |
| mart | 2015-04-08 | 62 | 2 | 1 |
| mart | 2015-04-09 | 68 | 1 | 2 |
| mart | 2015-04-11 | 75 | 1 | 3 |
| neil | 2015-06-12 | 80 | 1 | 1 |
| neil | 2015-05-10 | 12 | 1 | 2 |
| tony | 2015-01-07 | 50 | 2 | 1 |
| tony | 2015-01-02 | 15 | 3 | 2 |
| tony | 2015-01-04 | 29 | 3 | 3
22.按月分区,按名字分区,然后每个分区切成3份
select name,orderdate,cost,ntile(3) over() as sample1,
ntile(3) over (partition by name) as sample2,
ntile(2) over (partition by orderdate) as sample3
from t_window;
jack 2015-01-01 10 3 1 1
tony 2015-01-02 15 3 2 1
tony 2015-01-04 29 3 3 1
jack 2015-01-05 46 2 2 1
tony 2015-01-07 50 2 1 1
jack 2015-01-08 55 2 3 1
jack 2015-02-03 23 3 1 1
jack 2015-04-06 42 2 2 1
mart 2015-04-08 62 2 1 1
mart 2015-04-09 68 1 2 1
mart 2015-04-11 75 1 3 1
mart 2015-04-13 94 1 1 1
neil 2015-05-10 12 1 2 1
neil 2015-06-12 80 1 1 1
23.row_number、rank、dense_rank
排序cost r1
select name,orderdate,cost,row_number() over() as r1 from t_window; // 排序cost
24.按name排序
select name,orderdate,cost,row_number() over() as r1 ,
row_number() over(order by name) as r2
from t_window;
jack 2015-01-01 10 14 1
jack 2015-02-03 23 12 2
jack 2015-01-05 46 10 3
jack 2015-04-06 42 9 4
jack 2015-01-08 55 7 5
mart 2015-04-13 94 1 6
mart 2015-04-08 62 6 7
mart 2015-04-09 68 5 8
mart 2015-04-11 75 3 9
neil 2015-06-12 80 2 10
neil 2015-05-10 12 4 11
tony 2015-01-07 50 8 12
tony 2015-01-02 15 13 13
tony 2015-01-04 29 11 14
25.rank
select name,orderdate,cost,row_number() over() as r1 ,
row_number() over(order by name) as r2,
rank () over (order by name) as r3
from t_window;
jack 2015-01-01 10 14 1 1
jack 2015-02-03 23 12 2 1
jack 2015-01-05 46 10 3 1
jack 2015-04-06 42 9 4 1
jack 2015-01-08 55 7 5 1
mart 2015-04-13 94 1 6 6
mart 2015-04-08 62 6 7 6
mart 2015-04-09 68 5 8 6
mart 2015-04-11 75 3 9 6
neil 2015-06-12 80 2 10 10
neil 2015-05-10 12 4 11 10
tony 2015-01-07 50 8 12 12
tony 2015-01-02 15 13 13 12
tony 2015-01-04 29 11 14 12
26.dense_rank() 有问题
select name,orderdate,cost,row_number() over() as r1 ,
row_number() over(order by name) as r2,
rank () over (order by name) as r3,
dense_rank() over (order by name) as r4
from t_window;
jack 2015-01-01 10 14 1 1 1
jack 2015-02-03 23 12 2 1 1
jack 2015-01-05 46 10 3 1 1
jack 2015-04-06 42 9 4 1 1
jack 2015-01-08 55 7 5 1 1
mart 2015-04-13 94 1 6 6 2
mart 2015-04-08 62 6 7 6 2
mart 2015-04-09 68 5 8 6 2
mart 2015-04-11 75 3 9 6 2
neil 2015-06-12 80 2 10 10 3
neil 2015-05-10 12 4 11 10 3
tony 2015-01-07 50 8 12 12 4
tony 2015-01-02 15 13 13 12 4
tony 2015-01-04 29 11 14 12 4
27.lag
select name,orderdate,cost,lag(orderdate,1) over() from t_window;
mart 2015-04-13 94 NULL
neil 2015-06-12 80 2015-04-13
mart 2015-04-11 75 2015-06-12
neil 2015-05-10 12 2015-04-11
mart 2015-04-09 68 2015-05-10
mart 2015-04-08 62 2015-04-09
jack 2015-01-08 55 2015-04-08
tony 2015-01-07 50 2015-01-08
jack 2015-04-06 42 2015-01-07
jack 2015-01-05 46 2015-04-06
tony 2015-01-04 29 2015-01-05
jack 2015-02-03 23 2015-01-04
tony 2015-01-02 15 2015-02-03
jack 2015-01-01 10 2015-01-02
28.lag 按名字分区,按日期排序
select name,orderdate,cost,
lag(orderdate,1) over(partition by name order by orderdate) from t_window;
jack 2015-01-01 10 NULL
jack 2015-01-05 46 2015-01-01
jack 2015-01-08 55 2015-01-05
jack 2015-02-03 23 2015-01-08
jack 2015-04-06 42 2015-02-03
mart 2015-04-08 62 NULL
mart 2015-04-09 68 2015-04-08
mart 2015-04-11 75 2015-04-09
mart 2015-04-13 94 2015-04-11
neil 2015-05-10 12 NULL
neil 2015-06-12 80 2015-05-10
tony 2015-01-02 15 NULL
tony 2015-01-04 29 2015-01-02
tony 2015-01-07 50 2015-01-04
29.lag lead
select name,orderdate,cost,
lag(orderdate,1) over(partition by name order by orderdate) as sample1,
lag(orderdate,1,'1972-01-01') over(partition by name order by orderdate) as sample2,
lead(orderdate,1,'3000-01-01') over(partition by name order by orderdate) as sample3
from t_window;
jack 2015-01-01 10 NULL 1972-01-01 2015-01-05
jack 2015-01-05 46 2015-01-01 2015-01-01 2015-01-08
jack 2015-01-08 55 2015-01-05 2015-01-05 2015-02-03
jack 2015-02-03 23 2015-01-08 2015-01-08 2015-04-06
jack 2015-04-06 42 2015-02-03 2015-02-03 3000-01-01
mart 2015-04-08 62 NULL 1972-01-01 2015-04-09
mart 2015-04-09 68 2015-04-08 2015-04-08 2015-04-11
mart 2015-04-11 75 2015-04-09 2015-04-09 2015-04-13
mart 2015-04-13 94 2015-04-11 2015-04-11 3000-01-01
neil 2015-05-10 12 NULL 1972-01-01 2015-06-12
neil 2015-06-12 80 2015-05-10 2015-05-10 3000-01-01
tony 2015-01-02 15 NULL 1972-01-01 2015-01-04
tony 2015-01-04 29 2015-01-02 2015-01-02 2015-01-07
tony 2015-01-07 50 2015-01-04 2015-01-04 3000-01-01
30.first_value
select name,orderdate,cost,first_value(orderdate) over(partition by name order by orderdate) as time from t_window;
jack 2015-01-01 10 2015-01-01
jack 2015-01-05 46 2015-01-01
jack 2015-01-08 55 2015-01-01
jack 2015-02-03 23 2015-01-01
jack 2015-04-06 42 2015-01-01
mart 2015-04-08 62 2015-04-08
mart 2015-04-09 68 2015-04-08
mart 2015-04-11 75 2015-04-08
mart 2015-04-13 94 2015-04-08
neil 2015-05-10 12 2015-05-10
neil 2015-06-12 80 2015-05-10
tony 2015-01-02 15 2015-01-02
tony 2015-01-04 29 2015-01-02
tony 2015-01-07 50 2015-01-02
31. first_value 、 last_value
select name,orderdate,cost,
first_value(orderdate) over(partition by name order by orderdate) as time,
last_value(orderdate) over(partition by name order by orderdate) as time
from t_window;
jack 2015-01-01 10 2015-01-01 2015-01-01
jack 2015-01-05 46 2015-01-01 2015-01-05
jack 2015-01-08 55 2015-01-01 2015-01-08
jack 2015-02-03 23 2015-01-01 2015-02-03
jack 2015-04-06 42 2015-01-01 2015-04-06
mart 2015-04-08 62 2015-04-08 2015-04-08
mart 2015-04-09 68 2015-04-08 2015-04-09
mart 2015-04-11 75 2015-04-08 2015-04-11
mart 2015-04-13 94 2015-04-08 2015-04-13
neil 2015-05-10 12 2015-05-10 2015-05-10
neil 2015-06-12 80 2015-05-10 2015-06-12
tony 2015-01-02 15 2015-01-02 2015-01-02
tony 2015-01-04 29 2015-01-02 2015-01-04
tony 2015-01-07 50 2015-01-02 2015-01-07
32.获取时间戳
select unix_timestamp();
1636690319
获取当前时间
select from_unixtime(1636690319);



