美文网首页
hive sql练习

hive sql练习

作者: 阿发贝塔伽马 | 来源:发表于2018-09-22 22:03 被阅读0次

    参考

    -- 创建学生表,学号,姓名,性别,年龄,班级
    DROP TABLE student;
    CREATE TEMPORARY TABLE  student(
      Sno int,
      Sname VARCHAR(20),
      Sex VARCHAR(20),
      Sage int,
      Sdept VARCHAR(20)
    );
    
    -- 创建 课程表, 课程编号,课程名称
    DROP TABLE course;
    create TEMPORARY TABLE course(
      Cno int,
      Cname VARCHAR(20)
    );
    -- 创建 学生 课程关联表,学号,课号,年级
    create table sc(
      Sno int,
      Cno int,
      Grade int
    );
    
    -- 插入课程数据
    INSERT into course values(1, 'database');
    INSERT into course values(2, 'math');
    INSERT into course values(3, 'info system');
    INSERT into course values(4, 'system');
    INSERT into course values(5, 'data structure');
    INSERT into course values(6, 'data deal');
    
    -- 插入学生选课数据和成绩
    INSERT into sc values(95001,1,81);
    INSERT into sc values(95001,2,85);
    INSERT into sc values(95001,3,88);
    INSERT into sc values(95001,4,70);
    INSERT into sc values(95002,2,90);
    INSERT into sc values(95002,3,80);
    INSERT into sc values(95002,4,71);
    INSERT into sc values(95002,5,60);
    INSERT into sc values(95003,1,82);
    INSERT into sc values(95003,3,90);
    INSERT into sc values(95003,5,100);
    INSERT into sc values(95004,1,80);
    INSERT into sc values(95004,2,92);
    INSERT into sc values(95004,4,91);
    INSERT into sc values(95004,5,70);
    INSERT into sc values(95005,1,70);
    INSERT into sc values(95005,2,92);
    INSERT into sc values(95005,3,99);
    INSERT into sc values(95005,6,87);
    INSERT into sc values(95006,1,72);
    INSERT into sc values(95006,2,62);
    INSERT into sc values(95006,3,100);
    INSERT into sc values(95006,4,59);
    INSERT into sc values(95006,5,60);
    INSERT into sc values(95006,6,98);
    INSERT into sc values(95007,3,68);
    INSERT into sc values(95007,4,91);
    INSERT into sc values(95007,5,94);
    INSERT into sc values(95007,6,78);
    INSERT into sc values(95008,1,98);
    INSERT into sc values(95008,3,89);
    INSERT into sc values(95008,6,91);
    INSERT into sc values(95009,2,81);
    INSERT into sc values(95009,4,89);
    INSERT into sc values(95009,6,100);
    INSERT into sc values(95010,2,98);
    INSERT into sc values(95010,5,90);
    INSERT into sc values(95010,6,80);
    INSERT into sc values(95011,1,81);
    INSERT into sc values(95011,2,91);
    INSERT into sc values(95011,3,81);
    INSERT into sc values(95011,4,86);
    INSERT into sc values(95012,1,81);
    INSERT into sc values(95012,3,78);
    INSERT into sc values(95012,4,85);
    INSERT into sc values(95012,6,98);
    INSERT into sc values(95013,1,98);
    INSERT into sc values(95013,2,58);
    INSERT into sc values(95013,4,88);
    INSERT into sc values(95013,5,93);
    INSERT into sc values(95014,1,91);
    INSERT into sc values(95014,2,100);
    INSERT into sc values(95014,4,98);
    INSERT into sc values(95015,1,91);
    INSERT into sc values(95015,3,59);
    INSERT into sc values(95015,4,100);
    INSERT into sc values(95015,6,95);
    INSERT into sc values(95016,1,92);
    INSERT into sc values(95016,2,99);
    INSERT into sc values(95016,4,82);
    INSERT into sc values(95017,4,82);
    INSERT into sc values(95017,5,100);
    INSERT into sc values(95017,6,58);
    INSERT into sc values(95018,1,95);
    INSERT into sc values(95018,2,100);
    INSERT into sc values(95018,3,67);
    INSERT into sc values(95018,4,78);
    INSERT into sc values(95019,1,77);
    INSERT into sc values(95019,2,90);
    INSERT into sc values(95019,3,91);
    INSERT into sc values(95019,4,67);
    INSERT into sc values(95019,5,87);
    INSERT into sc values(95020,1,66);
    INSERT into sc values(95020,2,99);
    INSERT into sc values(95020,5,93);
    INSERT into sc values(95021,2,93);
    INSERT into sc values(95021,5,91);
    INSERT into sc values(95021,6,99);
    INSERT into sc values(95022,3,69);
    INSERT into sc values(95022,4,93);
    INSERT into sc values(95022,5,82);
    INSERT into sc values(95022,6,100);
    
    INSERT into student values(95001,"liyong","M",20,"CS");
    INSERT into student values(95002,"licheng","F",19,"IS");
    INSERT into student values(95003,"wangming","F",22,"MA");
    INSERT into student values(95004,"zhangli","M",19,"IS");
    INSERT into student values(95005,"liugang","M",18,"MA");
    INSERT into student values(95006,"sunqing","M",23,"CS");
    INSERT into student values(95007,"yisiling","F",19,"MA");
    INSERT into student values(95008,"lina","F",18,"CS");
    INSERT into student values(95009,"mengyuanyuan","F",18,"MA");
    INSERT into student values(95010,"kongxiaotao","M",19,"CS");
    INSERT into student values(95011,"baoxiaobo","M",18,"MA");
    INSERT into student values(95012,"sunhua","F",20,"CS");
    INSERT into student values(95013,"fengwei","M",21,"CS");
    INSERT into student values(95014,"wangxiaoli","F",19,"CS");
    INSERT into student values(95015,"wangjun","M",18,"MA");
    INSERT into student values(95016,"qianguo","M",21,"MA");
    INSERT into student values(95017,"wangfengjuan","F",18,"IS");
    INSERT into student values(95018,"wangyi","F",19,"IS");
    INSERT into student values(95019,"xingxiaoli","F",19,"IS");
    INSERT into student values(95020,"zhaoqian","M",21,"IS");
    INSERT into student values(95021,"zhouer","M",17,"MA");
    INSERT into student values(95022,"zhangming","M",20,"MA");
    
    SELECT * FROM course;
    
    -- 查询学生总人数
    SELECT count(*) FROM student;
    
    -- 查询全体学生的学号与姓名
    SELECT student.Sno, student.Sname FROM student;
    
    -- 查询选修了课程的学生姓名,
    SELECT student.Sname FROM student
    JOIN sc
    on student.Sno = sc.Sno
    GROUP BY student.Sname;
    
    -- 计算1号课程的学生平均成绩,sc表
    SELECT avg(sc.Grade) FROM sc WHERE Cno = 1 GROUP BY Cno;
    
    -- 这一个写法要快一点
    select avg(Grade) from sc group by Cno having Cno=1;
    
    -- 查询各科成绩平均分
    -- 这个所有学系的各科平均分
    SELECT course.Cname, avg(sc.Grade) FROM course
    JOIN sc
    on course.Cno=sc.Cno
    GROUP BY course.Cname;
    
    -- 这样写要快一点
    SELECT course.Cname, avg(sc.Grade) FROM course
    JOIN sc
    on course.Cno=sc.Cno
    GROUP BY course.Cname,sc.Cno;
    
    -- 查询选修1号课程的学生最高分数
    SELECT max(sc.Grade) FROM sc WHERE sc.Cno = 1;
    
    
    -- 这个写法要快一点
    select Grade from sc where Cno=1 order by Grade desc limit 1;
    
    select Grade from sc where Cno=1 distribute by Grade sort by Grade desc limit 1;
    
    -- 求各个课程号及相应的选课人数
    SELECT sc.Cno, count(1) as num from sc GROUP BY Cno;
    
    -- 查询选修了3门以上的课程的学生学号
    SELECT sc.Sno FROM sc GROUP BY Sno HAVING count(1) > 3;
    
    -- 查询学生信息,结果按学号全局有序
    SELECT student.* FROM student ORDER BY student.Sno;
    
    
    -- 查询学生信息,结果区分性别按年龄有序
    SELECT * FROM student ORDER BY sex,Sage asc;
    
    -- 查询每个学生及其选修课程的成绩情况
    SELECT student.*,course.Cname,sc.Grade FROM student
    JOIN sc
    on sc.Sno = student.Sno
    JOIN course
    on course.Cno=sc.Cno
    ORDER BY student.Sno;
    
    -- 查询学生的得分情况
    SELECT student.Sname,course.Cname,sc.Grade FROM student
    JOIN sc
    on sc.Sno = student.Sno
    JOIN course
    on course.Cno=sc.Cno
    ORDER BY student.Sname;
    
    -- 查询选修2号课程且成绩在90分以上的所有学生。
    SELECT student.* FROM student
    JOIN sc
    on student.Sno=sc.Sno
    WHERE sc.Cno=2 and sc.Grade > 90;
    
    -- 查询所有学生的信息,如果在成绩表中有成绩,则输出成绩表中的课程号
    SELECT student.*,sc.Cno FROM student
    left JOIN sc
    on student.Sno=sc.Sno;
    
    -- 查询与“licheng”在同一个系学习的学生
    -- exists写法于join差不多快
    SELECT stu1.Sname FROM student stu1 WHERE exists
    (
      SELECT * FROM student stu2 WHERE stu2.Sname = 'licheng' and stu1.Sdept = stu2.Sdept
    );
    exists 子查询语句如果为true就提交,如果为false不提交,先外循环stu1,然后在内循环stu2,如果内循环(子查询)为真,则提交(选择),所以子查询与select字段无关,只与后面的判断真伪有关
    
    -- in 写法
    SELECT stu1.Sname FROM student stu1 WHERE stu1.Sdept in
    (
      SELECT stu2.Sdept FROM student stu2 WHERE stu2.Sname = 'licheng'
    );
    in子查询先产生结果集,然后主查询再去结果集里去找符合要求的字段列表去.符合要求的输出,反之则不输出
    
    
    -- join写法最快
    SELECT stu.Sname FROM student stu
    JOIN student st
    on st.Sname = 'licheng' and st.Sdept = stu.Sdept and stu.Sname != 'licheng';
    
    select s2.* from student as s1 inner join student as s2 on(s2.Sdept=s1.Sdept) where s1.Sname='licheng';
    

    原先数据格式是这样,写了个脚本转成insert 形式

    ss = '''95001,李勇,男,20,CS
    95002,刘晨,女,19,IS
    95003,王敏,女,22,MA
    95004,张立,男,19,IS
    95005,刘刚,男,18,MA
    95006,孙庆,男,23,CS
    95007,易思玲,女,19,MA
    95008,李娜,女,18,CS
    95009,梦圆圆,女,18,MA
    95010,孔小涛,男,19,CS
    95011,包小柏,男,18,MA
    95012,孙花,女,20,CS
    95013,冯伟,男,21,CS
    95014,王小丽,女,19,CS
    95015,王君,男,18,MA
    95016,钱国,男,21,MA
    95017,王风娟,女,18,IS
    95018,王一,女,19,IS
    95019,邢小丽,女,19,IS
    95020,赵钱,男,21,IS
    95021,周二,男,17,MA
    95022,郑明,男,20,MA
    '''
    rr = "INSERT into student values(%s);";
    for el in ss.split('\n'):
        temp = ""
        i = 0
        for ell in el.split(','):
            if i == 0 or i==3:
                temp +=ell+','
            elif i == 1 or i == 2:
                temp +="\"%s\""%ell+','
            else:
                temp += "\"%s\""%ell
            i += 1
            
        print (rr%temp)
    

    相关文章

      网友评论

          本文标题:hive sql练习

          本文链接:https://www.haomeiwen.com/subject/nboxoftx.html