栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 前沿技术 > 大数据 > 大数据系统

hive sql 50道练习题

hive sql 50道练习题

建库建表

--建库
create database test;

--建表
create table student(s_id string,s_name string,s_birth string,s_sex string) row format delimited fields terminated by 't';
create table course(c_id string,c_name string,t_id string) row format delimited fields terminated by 't';
create table teacher(t_id string,t_name string) row format delimited fields terminated by 't';
create table score(s_id string,c_id string,s_score int) row format delimited fields terminated by 't';

生成数据
vim /opt/module/test/course.txt

01	语文	02
02 	数学	01
03	英语	03

vim /opt/module/test/score.txt

01	01	80
01	02	90
01	03	99
02	01	70
02	02	60
02	03	80
03	01	80
03	02	80
03	03	80
04	01	50
04	02	30
04	03	20
05	01	76
05	02	87
06	01	31
06	03	34
07	02	89
07	03	98

vim /opt/module/test/student.txt

01	赵雷	1990-01-01	男
02	钱电	1990-12-21	男
03	孙风	1990-05-20	男
04	李云	1990-08-06	男
05	周梅	1991-12-01	女
06	吴兰	1992-03-01	女
07	郑竹	1989-07-01	女
08	王菊	1990-01-20	女

vim /opt/module/test/teacher.txt

01	张三
02	李四
03	王五

导入数据

--导入数据
load data local inpath '/opt/module/test/student.txt' into table student;
load data local inpath '/opt/module/test/course.txt' into table course;
load data local inpath '/opt/module/test/teacher.txt' into table teacher;
load data local inpath '/opt/module/test/score.txt' into table score;
  1. 查询"01"课程比"02"课程成绩高的学生的信息及课程分数:
--方式1:直接连接sc1和sc2
select
    sc1.s_id,
    sc1.s_score score01,
    sc2.s_score score02
from score sc1
join score sc2 on sc1.s_id=sc2.s_id
where sc1.c_id='01' and sc2.c_id='02' and sc1.s_score>sc2.s_score;

--方式2
select
    student.*,
    s1.s_score score01,
    s2.s_score score02
from student
join score s1 on student.s_id = s1.s_id and s1.c_id='01'
left join score s2 on student.s_id = s2.s_id and s2.c_id='02'
where s1.s_score>s2.s_score;

--方式3
select
    student.*,
    s1.s_score score01,
    s2.s_score score02
from student
join score s1 on student.s_id = s1.s_id
left join score s2 on student.s_id = s2.s_id
where s1.c_id='01' and s2.c_id='02' and s1.s_score>s2.s_score;

--方式4
select
    student.*,
    s1.s_score score01,
    s2.s_score score02
from student
join score s1 on s1.c_id='01'
left join score s2 on s2.c_id='02'
where student.s_id = s1.s_id and student.s_id = s2.s_id and s1.s_score>s2.s_score;
  1. 查询"01"课程比"02"课程成绩低的学生的信息及课程分数:(和上面的第1题一样)
select
    student.*,
    s1.s_score score01,
    s2.s_score score02
from student
join score s1 on student.s_id = s1.s_id and s1.c_id='01'
left join score s2 on s1.s_id = s2.s_id and s2.c_id='02'
where s1.s_score 
  1. 查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩:
--方式1:round函数可以去掉null值
select
    t1.sid,
    t1.sname,
    t1.avg_score
from
(
    select
        student.s_id sid,
        student.s_name sname,
        round(avg(s.s_score),1)  avg_score
    from student
    join score s on student.s_id = s.s_id
    group by student.s_id,student.s_name
)t1
where avg_score>=60
order by t1.sid;

--方式2:这个比较好
--第一步:查询出每个sid对应的平均成绩(通过round函数去掉null值,并且取小数点2位),作为临时表tmp
--第二步:将student表和tmp表进行关联,筛选出平均成绩avg_score大大于60的信息。
select
    student.s_id,
    student.s_name,
    tmp.avg_score
from student
join
(
    select
        s_id,
        round(avg(s_score),2) avg_score
    from score
    group by s_id
)tmp
on student.s_id=tmp.s_id
where tmp.avg_score>=60;

--方式3:将student表和score表进行关联,通过s_id和s_name进行分组,在分组的基础上查询每个分组的平均成绩大于等于60的学生信息和平均成绩
--这个比较简单,而且容易想到
select
    student.s_id,
    student.s_name,
    round(avg(s.s_score),2)
from student
join score s on student.s_id = s.s_id
group by student.s_id, student.s_name
having avg(s.s_score)>=60;
  1. 查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩:(包括有成绩的和无成绩的)
--方式1:将成绩小于60以及没有成绩的查询结果union all
--提示:union all去重;union不去重
--有成绩大于60的
select
    student.s_id,
    student.s_name,
    avg_score
from student
join
(
select
    tmp.s_id,
    tmp.avg_score
 from (
          select s_id,
                 round(avg(s_score), 2) avg_score
          from score
          group by s_id
      ) tmp
 where tmp.avg_score < 60
)t1
on student.s_id=t1.s_id
union all
(
--没有成绩的
    select
        student.s_id,
        student.s_name,
        0 avg_score
    from student
    left join score s on student.s_id = s.s_id
    where student.s_id not in
    (
        select s_id
        from score
        group by s_id
    )
);

--方式1
select
    student.s_id,
    student.s_name,
    tmp.avgScore from student
join
(
    select
        score.s_id,
        round(avg(score.s_score),1)as avgScore
    from score group by s_id
)as tmp
on tmp.avgScore < 60
where student.s_id=tmp.s_id
union all
select
    s2.s_id,
    s2.s_name,
    0 as avgScore
from student s2
where s2.s_id not in
(
    select
        distinct sc2.s_id
    from score sc2
);

--方式2:用having
select
    student.s_id,
    student.s_name,
    round(avg(s.s_score),2)
from student
join score s on student.s_id = s.s_id
group by student.s_id, student.s_name
having avg(s.s_score)<60
union all
--没有成绩的
    select
        student.s_id,
        student.s_name,
        0 avg_score
    from student
    left join score s on student.s_id = s.s_id
    where student.s_id not in
    (
        select s_id
        from score
        group by s_id
    );
  1. 查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩:
--方式1:
--1.通过查询score查询有成绩的选课总数和所有课程的总成绩
--2.处理选课总数和所有课程总成绩为null的数据,并设置为0
select
    student.s_id,
    student.s_name,
    case when tmp.count_course is null then 0 else tmp.count_course end,
    case when tmp.sum_score is null then 0 else tmp.sum_score end
from student
left join
(
    select
        s_id,
        count(c_id) count_course,
        sum(score.s_score) sum_score
    from score
    group by s_id
)tmp
on student.s_id=tmp.s_id;

--方式2:直接group by
select
    student.s_id,
    student.s_name,
    count(s.c_id) total_course,
    sum(s.s_score) total_score
from student
left join score s on student.s_id = s.s_id
group by student.s_id,student.s_name;
  1. 查询"李"姓老师的数量
select
    t_name,
    count(1)
from teacher
where t_name like '李%'
group by t_name;
  1. 查询学过"张三"老师授课的同学的信息:
select *
from student
join score s on student.s_id = s.s_id
join course c on s.c_id = c.c_id
join teacher t on c.t_id = t.t_id and t.t_name='张三';
  1. 查询没学过"张三"老师授课的同学的信息:
select
    student.*
from student
left join
(
    select
        s_id
    from score
    join course c on score.c_id = c.c_id
    join teacher t on c.t_id = t.t_id
)tmp
on student.s_id=tmp.s_id
where tmp.s_id is null--这个思想比较好,反过来求张三老师教过的;
  1. 查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息:
--方式1,不能使用‘01’,而需要使用1
select student.*
from student
where s_id in
(
    select
        t1.s_id
    from
    (
        select
            s_id
        from score
        where c_id=1
        group by s_id
    )t1
    join
    (
        select
            s_id
        from score
        where c_id=2
        group by s_id
    )t2
    on t1.s_id=t2.s_id
);
--方式2
select student.*
from student
join
(
    select
        s_id
    from score
    where c_id=1
) tmp1
on student.s_id = tmp1.s_id
join
(
    select
        s_id
    from score
    where c_id=2
)tmp2
on student.s_id = tmp2.s_id;
  1. 查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息
select
    student.s_id,
    s_name,
    s_sex,
    s_birth
from student
join
(
select
    s_id
from score
where c_id='01'
)t1
on student.s_id=t1.s_id
left join
(
    select
        s_id
    from score
    where c_id='02'
)t2
on student.s_id=t2.s_id
where t2.s_id is null;
  1. 查询没有学全所有课程的同学的信息:
select
    student.s_id,
    student.s_name,
    student.s_sex,
    student.s_birth
from student
join
(
    select
        s_id
    from score
    group by s_id
    having count(c_id)<3
) t1
on student.s_id=t1.s_id;
--方法1:不知道这个行不行
select
    student.*
from student
where s_id in 
(
    select
        s_id
    from score
    where c_id in 
    (
        select
            c_id
        from score
        where s_id='01'
    )t1
)t2

--方法2:先使用join
select 
    s_id,
    s_name,
    s_sex
from student
join
(
    select 
        c_id
    from score 
    where s_id='01'
)t1
join
(
    select
        s_id,
        c_id
    from score
)t2
on t1.c_id=t2.c_id and student.s_id=t2.s_id
where student.s_id is not '01'
group by s_id,s_name,s_sex;
  1. 查询和"01"号的同学学习的课程完全相同的其他同学的信息
--on条件一起写和分开写有什么区别???
--备注:hive不支持group_concat方法,可用 concat_ws(’|’, collect_set(str)) 实现
select
    student.*
from student
join
(
    select
        s_id,
        concat_ws('|',collect_set(c_id)) concat_cid
    from score
    group by s_id
    where s_id in not '01'
)t1
on student.s_id=t1.s_id
join
(
    select 
        concat_ws('|',collect_set(c_id)) concat_cid
    from score 
    group by s_id
    where s_id='01'
)t2
on t1.concat_cid=t2.concat_cid ;
  1. 查询没学过"张三"老师讲授的任一门课程的学生姓名:
--这个思想很牛逼!!!
--先查询学过张三教授的课程的学生信息,反过来求没有学过张三老师课程的学生
select
    student.s_id,
    student.s_name
from student
left join
(
    select
        s_id
    from score 
    join
    (
        select 
            c_id
        from course
        join teacher
        on course.t_id=teacher.t_id
        where teacher.t_name='张三'
    )t1
    on score.c_id=t1.c_id
)t2
on student.s_id=t2.s_id
where t2.s_id is null;--注意这里!!!

  1. 查询两门及其以上不及格课程的同学的学号,姓名及其平均成绩:
select
    s_id,
    s_name,
   avg(s_score)
from 
(
select
    s_id,
    c_id,
    case when s_score<60 then '不及格' else '及格' s_score_jige
from score 
);
  1. 检索"01"课程分数小于60,按分数降序排列的学生信息:
select
    student.s_id,
    student.s_name,
    student.s_sex,
    student.s_birth,
    t1.s_score
from student
join
(
    select
        s_id,
        s_score
    from score
    where c_id='01' and s_score<60
    order by s_score desc
)t1
on student.s_id=t1.s_id;
  1. 按平均成绩从高到低显示所有学生的所有课程的成绩以及平均成绩:
--类似列转行!!
--由学生id,课程id和成绩转为--学生id 数学 语文 英语 平均成绩
--第一步:按平均成绩从高到低显示所有学生的id和平均成绩 --注意08王菊没有成绩
--第二步:通过左连接将三个课程转为列
--注意:因为需要查询课程的字段,在进行group by的时候需要将该字段加上
select
    score.s_id,
    t1.s_score as Chinese,
    t2.s_score as Math,
    t3.s_score as English,
    round(avg(score.s_score),2) avg_score
from score
left join
(
    select
        s_id,
        s_score
    from score
    where c_id='01'
)t1
on score.s_id=t1.s_id
left join
(
    select
        s_id,
        s_score
    from score
    where c_id='02'
)t2
on score.s_id=t2.s_id
left join
(
    select
        s_id,
        s_score
    from score
    where c_id='03'
)t3
on score.s_id=t3.s_id
group by score.s_id,t1.s_score,t2.s_score,t3.s_score
order by avg_score desc;

  1. 查询各科成绩最高分、最低分和平均分:
-- 以如下形式显示:课程ID,课程name,最高分,最低分,平均分,及格率,中等率,优良率,优秀率
-- 及格为>=60,中等为:70-80,优良为:80-90,优秀为:>=90
select
    c.c_id,
    c.c_name,
    max_score,
    min_score,
    avg_score,
    passRate,
    middleRate,
    goodRate,
    excellentRate
from course c
join
(
    select
        c_id,
        max(s_score) max_score,
        min(s_score) min_score,
        round(avg(s_score),2) avg_score,
        round(sum(case when s_score>=60 then 1 else 0 end)/count(c_id),2) passRate,
        round(sum(case when s_score>=70 and s_score<80 then 1 else 0 end)/count(c_id),2) middleRate,
        round(sum(case when s_score>=80 and s_score<90 then 1 else 0 end)/count(c_id),2) goodRate,
        round(sum(case when s_score>=90 then 1 else 0 end)/count(c_id),2) excellentRate
    from score
    group by c_id
)t1
on c.c_id=t1.c_id;

  1. 按各科成绩进行排序,并显示排名
-- 开窗函数
select
    score.*,
    row_number() over (order by s_score desc) Ranking
from score
where c_id='01'
union all
(
select
    score.*,
    row_number() over (order by s_score desc) Ranking
from score
where c_id='02'
)union all
(
select
    score.*,
    row_number() over (order by s_score desc) Ranking
from score
where c_id='03'
);
  1. 查询学生的总成绩并进行排名
select
    s_id,
    sum(s_score) sum_score,
    row_number() over (order by sum(s_score) desc ) rk
from score
group by s_id;
  1. 查询不同老师所教不同课程平均分从高到低显示:不同课程平均分从高到低显示
--方法1
select
    course.t_id,
    course.c_id,
    t_name,
    round(avg(s_score),2) avg_score
from course
join score s on course.c_id = s.c_id
join teacher t on course.t_id = t.t_id
group by course.c_id,course.t_id,t_name
order by avg_score desc ;

--方法2
select
    course.c_id,
    course.t_id
    t_name,
    round(avg(s_score),2)as avgscore
from course,teacher,score
where teacher.t_id=course.t_id and course.c_id=score.c_id
group by course.c_id,course.t_id,t_name
order by avgscore desc;
  1. 查询所有课程的成绩第2名到第3名的学生信息及该课程成绩:
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc )rk
    from score
    where c_id='01'
    order by s_score desc
)t1
where rk=2 or rk=3
union all
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc )rk
    from score
    where c_id='02'
    order by s_score desc
)t1
where rk=2 or rk=3
union all
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc )rk
    from score
    where c_id='03'
    order by s_score desc
)t1
where rk=2 or rk=3;
  1. 统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[0-60]及所占百分比
select
    course.c_id,
    course.c_name,
    first,
    firstRate,
    second,
    secondRate,
    third,
    thirdRate,
    fourth,
    fourthRate
from course
join
(
    select
        c_id,
        sum(case when s_score<=100 and s_score>=85 then 1 else 0 end) first,
        round(sum(case when s_score<=100 and s_score>=85 then 1 else 0 end)/sum(c_id),2) as firstRate,
        sum(case when s_score<=100 and s_score>=85 then 1 else 0 end) second,
        round(sum(case when s_score<=85 and s_score>=70 then 1 else 0 end)/sum(c_id),2) as secondRate,
        sum(case when s_score<=100 and s_score>=85 then 1 else 0 end) third,
        round(sum(case when s_score<=70 and s_score>=60 then 1 else 0 end)/sum(c_id),2) as thirdRate,
        sum(case when s_score<=100 and s_score>=85 then 1 else 0 end) fourth,
        round(sum(case when s_score<=60 and s_score>=0 then 1 else 0 end)/sum(c_id),2) as fourthRate
    from score
    group by c_id
)tmp
on course.c_id=tmp.c_id;
  1. 查询学生平均成绩及其名次:
select
    s_id,
    round(avg(s_score) ,2) avg_score,
    row_number() over (order by avg(s_score) desc ) rk
from score
group by s_id;
  1. 查询各科成绩前三名的记录
(select
    s_id,
    c_id,
    s_score
from score
where c_id='01'
order by s_score desc
limit 3)
union all
(
    select
        s_id,
        c_id,
        s_score
    from score
    where c_id='02'
    order by s_score desc
    limit 3
)
union all
(
    select
        s_id,
        c_id,
        s_score
    from score
    where c_id='03'
    order by s_score desc
    limit 3
);
  1. 查询每门课程被选修的学生数:
select
    c_id,
    count(s_id)
from score
group by c_id;
  1. 查询出只有两门课程的全部学生的学号和姓名
--方法1:在最外层筛选
select
    student.s_id,
    s_name
from student
join
(
    select
        s_id,
        count(c_id) count_cid
    from score
    group by s_id
)tmp
on student.s_id=tmp.s_id
where tmp.count_cid=2;
--方法2:用having筛选
select
    student.s_id,
    s_name
from student
join
(
    select
        s_id,
        count(c_id) count_cid
    from score
    group by s_id
    having count_cid=2
)tmp
on student.s_id=tmp.s_id;
--或者简单点
select
    student.s_id,
    s_name
from student
join
(
    select
        s_id
    from score
    group by s_id
    having count(s_score)=2
)tmp
on student.s_id=tmp.s_id;
  1. 查询男生、女生人数:
select
    sum(case when s_sex='男' then 1 else 0 end) men,
    sum(case when s_sex='女' then 1 else 0 end) women
from student;
  1. 查询名字中含有"风"字的学生信息:
select
    *
from student
where s_name like '%风%';
  1. 查询同名同性学生名单,并统计同名人数:
select
    s1.s_id,
    s1.s_name,
    count(*) as sameStu
from student s1,test.student s2
where s1.s_name=s2.s_name and s1.s_id<>s2.s_id and s1.s_sex=s2.s_sex
group by s1.s_id,s1.s_name,s1.s_sex;
  1. 查询1990年出生的学生名单:
select
    *
from student
where s_birth like '1990%';
  1. 查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列:
select
    c_id,
    round(avg(s_score),2) avg_score
from score
group by c_id
order by avg_score desc,c_id;
  1. 查询平均成绩大于等于85的所有学生的学号、姓名和平均成绩:
select
    student.s_id,
    s_name,
    avg_score
from student
join
(
    select
        s_id,
        round(avg(s_score),2) avg_score
    from score
    group by s_id
    having avg(s_score)>=85--这里不能写别名
)tmp
on student.s_id=tmp.s_id;
  1. 查询课程名称为"数学",且分数低于60的学生姓名和分数:
--先看有几张表合在一起,然后用where筛选条件避免笛卡尔积
select
    s_name,
    s_score
from student,score,course
where student.s_id=score.s_id and score.c_id=course.c_id and score.s_score<60 and c_name='数学';
  1. 查询所有学生的课程及分数情况:
--姓名 语文 数学 英语 总分
select
    s.s_id,
    s_name,
    sum(case when c_name='语文' then s_score else 0 end) as Chinese,
    sum(case when c_name='数学' then s_score else 0 end) as Math,
    sum(case when c_name='英语' then s_score else 0 end) as English
from score
join course c on score.c_id = c.c_id
join student s on score.s_id = s.s_id
group by s.s_id,s_name
order by s_id;
  1. 查询任何一门课程成绩在70分以上的学生姓名、课程名称和分数:
--先查询语文的
--后面union all
--union需要去重,但效率低;union all不去重,但效率高
select
    s_name,
    c_name,
    tmp.s_score
from
(
    (
        select
            s_id,
            c_id,
            s_score
        from score
        where c_id='01' and s_score>70
    )
    union all
    (
        select
            s_id,
            c_id,
            s_score
        from score
        where c_id='02' and s_score>70
    )
    union all
    (
        select
            s_id,
            c_id,
            s_score
        from score
        where c_id='03' and s_score>70
    )
)tmp
left join course on course.c_id=tmp.c_id
left join student on student.s_id = tmp.s_id;
  1. 查询课程不及格的学生
select *
from score
where s_score<60;
  1. 查询课程编号为01且课程成绩在80分以上的学生的学号和姓名:
select
    student.s_id,
    s_name
from student
join
(
    select
        s_id
    from score
    where c_id='01' and s_score>=80
)tmp
on student.s_id=tmp.s_id;
  1. 求每门课程的学生人数:
select
    c_id,
    count(1)
from score
group by c_id;
  1. 查询选修"张三"老师所授课程的学生中,成绩最高的学生信息及其成绩
select
    s_id,
    s_score
from score
join
(
    select
        course.c_id,
        course.c_name,
        course.t_id,
        t1.t_name
    from course
    join
    (
        select
            t_id,
            t_name
        from teacher
        where t_name='张三'
    )t1
    on course.t_id=t1.t_id
)t2
on score.c_id=t2.c_id
order by s_score desc
limit 1;
  1. 查询不同课程成绩相同的学生的学生编号、课程编号、学生成绩:
select distinct--去重
    s1.s_id,
    s1.c_id,
    s1.s_score
from score s1,score s2
where s1.c_id<>s2.c_id and s1.s_score=s2.s_score;
  1. 查询每门课程成绩最好的前三名:
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc ) rk
    from score
    where c_id='01'
)t1
where rk<=3
union all
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc ) rk
    from score
    where c_id='02'
)t1
where rk<=3
union all
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc ) rk
    from score
    where c_id='03'
)t1
where rk<=3;
  1. 统计每门课程的学生选修人数(超过5人的课程才统计):要求输出课程号和选修人数,查询结果按人数降序排列,若人数相同,按课程号升序排列
select
    c_id,
    count(1)
from score
group by c_id
having count(1) > 5
order by count(1) desc,c_id;
  1. 检索至少选修两门课程的学生学号
select
    s_id,
    count(c_id)
from score
group by s_id
having count(c_id)>=2;
  1. 查询选修了全部课程的学生信息:
select
    s_id,
    count(c_id)
from score
group by s_id
having count(c_id)=3;
  1. 查询各学生的年龄(周岁):
    –按照出生日期来算,当前月日 < 出生年月的月日则,年龄减一
select
    s_id,
    case
        when month(s_birth) 
  1. 查询本周过生日的学生
select
    *
from student
where weekofyear(`current_date`())=weekofyear(s_birth);
  1. 查询下周过生日的学生
select
    *
from student
where weekofyear(`current_date`())+1=weekofyear(s_birth);
  1. 查询本月过生日的学生
select
    *
from student
where month(`current_date`())=month(s_birth);
  1. 查询12月份过生日的学生
select
    *
from student
where month(s_birth)='12';
转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/303949.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号