hive> create table t_window(
> name string,
> orderdate date,
> cost int
> )
> row format delimited fields terminated by ',';
OK
vi order.csv
jack,2015-01-01,10
tony,2015-01-02,15
jack,2015-02-03,23
tony,2015-01-04,29
jack,2015-01-05,46
jack,2015-04-06,42
tony,2015-01-07,50
jack,2015-01-08,55
mart,2015-04-08,62
mart,2015-04-09,68
neil,2015-05-10,12
mart,2015-04-11,75
neil,2015-06-12,80
mart,2015-04-13,94
hive> load data local inpath '/opt/tmp/order.csv' into table t_window;
hive> select substring('2015-04-13',1,5);
OK
2015-
Time taken: 0.03 seconds, Fetched: 1 row(s)
hive> select substring('2015-04-13',2,5);
OK
015-0
Time taken: 0.033 seconds, Fetched: 1 row(s)
hive> select substring('2015-04-13',3,5);
OK
15-04
hive> select substr('2015-04-13',1,5);
OK
2015-
Time taken: 0.038 seconds, Fetched: 1 row(s)
hive> select substr('2015-04-13',2,5);
OK
015-0
Time taken: 0.038 seconds, Fetched: 1 row(s)
hive> select substr('2015-04-13',3,5);
OK
15-04
hive> select distinct name,count(*) over() from t_window where substring(orderdate,1,7)='2015-04';
hive> select distinct name,substring(orderdate,1,7),count(*) over() from t_window where substring(orderdate,1,7)='2015-04';
OK
mart 2015-04 2
jack 2015-04 2
hive> select name,count(*) over() from t_window where substring(orderdate,1,7)='2015-04' group by name;
OK
mart 2
jack 2
hive> select name,orderdate,cost,sum(cost) over() from t_window;
OK 最后一列显示的内容为 所有行的第三列(cost)相加这和
mart 2015-04-13 94 661
neil 2015-06-12 80 661
mart 2015-04-11 75 661
neil 2015-05-10 12 661
mart 2015-04-09 68 661
mart 2015-04-08 62 661
jack 2015-01-08 55 661
tony 2015-01-07 50 661
jack 2015-04-06 42 661
jack 2015-01-05 46 661
tony 2015-01-04 29 661
jack 2015-02-03 23 661
tony 2015-01-02 15 661
jack 2015-01-01 10 661
hive> select name,orderdate,cost,sum(cost) over(partition by month(orderdate)) from t_window;
OK
jack 2015-01-01 10 205
jack 2015-01-08 55 205
tony 2015-01-07 50 205
jack 2015-01-05 46 205
tony 2015-01-04 29 205
tony 2015-01-02 15 205
jack 2015-02-03 23 23
mart 2015-04-13 94 341
jack 2015-04-06 42 341
mart 2015-04-11 75 341
mart 2015-04-09 68 341
mart 2015-04-08 62 341
neil 2015-05-10 12 12
neil 2015-06-12 80 80
hive> select name, gender,count(*) over(partition by gender) from studentp;
OK
zhansan8 boy 4
zhansan5 boy 4
zhansan3 boy 4
zhansan1 boy 4
zhansan11 girl 7
zhansan10 girl 7
zhansan9 girl 7
zhansan7 girl 7
zhansan6 girl 7
zhansan4 girl 7
zhansan2 girl 7
hive> select name,orderdate,cost,
sum(cost) over() as sample1, --所有的cost列相加
sum(cost) over(partition by name) as sample2, --将按名字分组,然后把每一组的cost列相加
sum(cost) over(partition by name order by orderdate) as sample3
from t_window;
OK
name orderdate cost sample1 sample2
jack 2015-01-01 10 661 176
jack 2015-02-03 23 661 176
jack 2015-01-05 46 661 176
jack 2015-04-06 42 661 176
jack 2015-01-08 55 661 176
mart 2015-04-13 94 661 299
mart 2015-04-08 62 661 299
mart 2015-04-09 68 661 299
mart 2015-04-11 75 661 299
neil 2015-06-12 80 661 92
neil 2015-05-10 12 661 92
tony 2015-01-07 50 661 94
tony 2015-01-02 15 661 94
tony 2015-01-04 29 661 94
select name,orderdate,cost,
sum(cost) over() as sample1,
sum(cost) over(partition by name) as sample2,
sum(cost) over(partition by name order by orderdate) as sample3
from t_window;
jack 2015-01-01 10 661 176 10
jack 2015-01-05 46 661 176 56
jack 2015-01-08 55 661 176 111
jack 2015-02-03 23 661 176 134
jack 2015-04-06 42 661 176 176
mart 2015-04-08 62 661 299 62
mart 2015-04-09 68 661 299 130
mart 2015-04-11 75 661 299 205
mart 2015-04-13 94 661 299 299
neil 2015-05-10 12 661 92 12
neil 2015-06-12 80 661 92 92
tony 2015-01-02 15 661 94 15
tony 2015-01-04 29 661 94 44
tony 2015-01-07 50 661 94 94
select name,orderdate,cost,
sum(cost) over() as sample1,
sum(cost) over(partition by name) as sample2,
sum(cost) over(partition by name order by orderdate) as sample3,
sum(cost) over(partition by name order by orderdate rows between unbounded preceding and current row) as sample4
from t_window;
OK
jack 2015-01-01 10 661 176 10 10
jack 2015-01-05 46 661 176 56 56
jack 2015-01-08 55 661 176 111 111
jack 2015-02-03 23 661 176 134 134
jack 2015-04-06 42 661 176 176 176
mart 2015-04-08 62 661 299 62 62
mart 2015-04-09 68 661 299 130 130
mart 2015-04-11 75 661 299 205 205
mart 2015-04-13 94 661 299 299 299
neil 2015-05-10 12 661 92 12 12
neil 2015-06-12 80 661 92 92 92
tony 2015-01-02 15 661 94 15 15
tony 2015-01-04 29 661 94 44 44
tony 2015-01-07 50 661 94 94 94
select name,orderdate,cost,
sum(cost) over() as sample1,
sum(cost) over(partition by name) as sample2,
sum(cost) over(partition by name order by orderdate) as sample3,
sum(cost) over(partition by name order by orderdate rows between unbounded preceding and current row) as sample4,
sum(cost) over(partition by name order by orderdate rows between 1 preceding and current row) as sample5,
sum(cost) over(partition by name order by orderdate rows between 1 preceding and 1 following) as sample6,
sum(cost) over(partition by name order by orderdate rows between current row and unbounded following) as sample7
from t_window;
hive> select name,orderdate,cost, ntile(3) over() as sample1 from t_window;
OK
mart 2015-04-13 94 1
neil 2015-06-12 80 1
mart 2015-04-11 75 1
neil 2015-05-10 12 1
mart 2015-04-09 68 1
mart 2015-04-08 62 2
jack 2015-01-08 55 2
tony 2015-01-07 50 2
jack 2015-04-06 42 2
jack 2015-01-05 46 2
tony 2015-01-04 29 3
jack 2015-02-03 23 3
tony 2015-01-02 15 3
jack 2015-01-01 10 3
hive> select name,orderdate,cost, ntile(3) over() as sample1,
> ntile(3) over(partition by name) as sample2,
> ntile(2) over(partition by month(orderdate)) as sample3,
> ntile(3) over(partition by name order by cost desc) as sample4
> from t_window;
hive> select name,orderdate,cost, row_number() over() as r1,
> row_number() over(order by name) as r2,
> rank() over(order by name) as r3,
> dense_rank() over(order by name) as r4
> from t_window;
OK
jack 2015-01-01 10 14 1 1 1
jack 2015-02-03 23 12 2 1 1
jack 2015-01-05 46 10 3 1 1
jack 2015-04-06 42 9 4 1 1
jack 2015-01-08 55 7 5 1 1
mart 2015-04-13 94 1 6 6 2
mart 2015-04-08 62 6 7 6 2
mart 2015-04-09 68 5 8 6 2
mart 2015-04-11 75 3 9 6 2
neil 2015-06-12 80 2 10 10 3
neil 2015-05-10 12 4 11 10 3
tony 2015-01-07 50 8 12 12 4
tony 2015-01-02 15 13 13 12 4
tony 2015-01-04 29 11 14 12 4
hive> select name,orderdate,cost,
lag(orderdate,1) over(partition by name order by orderdate) as sample1,
lag(orderdate,1,'1972-01-01') over(partition by name order by orderdate) as sample2,
lead(orderdate,1,'3000-01-01') over(partition by name order by orderdate) as sample3
from t_window;
OK
jack 2015-01-01 10 NULL
jack 2015-01-05 46 2015-01-01
jack 2015-01-08 55 2015-01-05
jack 2015-02-03 23 2015-01-08
jack 2015-04-06 42 2015-02-03
mart 2015-04-08 62 NULL
mart 2015-04-09 68 2015-04-08
mart 2015-04-11 75 2015-04-09
mart 2015-04-13 94 2015-04-11
neil 2015-05-10 12 NULL
neil 2015-06-12 80 2015-05-10
tony 2015-01-02 15 NULL
tony 2015-01-04 29 2015-01-02
tony 2015-01-07 50 2015-01-04
hive> select name,orderdate,cost,
first_value(orderdate) over(partition by name order by orderdate) as time ,
last_value(orderdate) over(partition by name order by orderdate) as time
from t_window;
OK
jack 2015-01-01 10 2015-01-01 2015-01-01
jack 2015-01-05 46 2015-01-01 2015-01-05
jack 2015-01-08 55 2015-01-01 2015-01-08
jack 2015-02-03 23 2015-01-01 2015-02-03
jack 2015-04-06 42 2015-01-01 2015-04-06
mart 2015-04-08 62 2015-04-08 2015-04-08
mart 2015-04-09 68 2015-04-08 2015-04-09
mart 2015-04-11 75 2015-04-08 2015-04-11
mart 2015-04-13 94 2015-04-08 2015-04-13
neil 2015-05-10 12 2015-05-10 2015-05-10
neil 2015-06-12 80 2015-05-10 2015-06-12
tony 2015-01-02 15 2015-01-02 2015-01-02
tony 2015-01-04 29 2015-01-02 2015-01-04
tony 2015-01-07 50 2015-01-02 2015-01-07