1. 数据说明
(1) dept表
hive> select * from dept;
# deptno(部门编号) dname(部门名称) loc(部门所在地区)
10 ACCOUNTING NEW YORK
20 RESEARCH DALLAS
30 SALES CHICAGO
40 OPERATIONS BOSTON
(2) emp表
hive> select * from emp;
# 员工编号 员工姓名 职务 领导编号 入职日期 工资 奖金 部门编号
# empno ename job mgr hiredate sal comm deptno
7369 SMITH CLERK 7902 1980-12-17 800.0 0.0 20
7499 ALLEN SALESMAN 7698 1981-02-20 1600.0 300.0 30
7521 WARD SALESMAN 7698 1981-02-22 1250.0 500.0 30
7566 JONES MANAGER 7839 1981-04-02 2975.0 0.0 20
7654 MARTIN SALESMAN 7698 1981-09-28 1250.0 1400.0 30
7698 BLAKE MANAGER 7839 1981-05-01 2850.0 0.0 30
7782 CLARK MANAGER 7839 1981-06-09 2450.0 0.0 10
7788 SCOTT ANALYST 7566 1987-07-13 3000.0 0.0 20
7839 KING PRESIDENT NULL 1981-11-07 5000.0 0.0 10
7844 TURNER SALESMAN 7698 1981-09-08 1500.0 0.0 30
7876 ADAMS CLERK 7788 1987-07-13 1100.0 0.0 20
7900 JAMES CLERK 7698 1981-12-03 950.0 0.0 30
7902 FORD ANALYST 7566 1981-12-03 3000.0 0.0 20
7934 MILLER CLERK 7782 1982-01-23 1300.0 0.0 10
2. SQL查询练习题目
(1) 查询总员工数
select count(distinct empno) from emp;
# 14
(2) 查询总共有多少个职位
select count(distinct job) from emp;
# 5
(3) 统计每个职位有多少个员工,并且按照数量从大到小排序
select job, count(distinct empno) as count_emp from emp
group by job
order by count_emp desc;
# 结果
job count_emp
SALESMAN 4
CLERK 4
MANAGER 3
ANALYST 2
PRESIDENT 1
(4) 查询入职最早的员工
select emp.ename, emp.hiredate from emp
join (select min(hiredate) as min_date from emp) tmp
on emp.hiredate = tmp.min_date;
# 结果
SMITH 1980-12-17
# 注意,以下SQL在hive中不能执行
select ename from emp
where hiredate = (select min(hiredate) from emp);
(5) 统计出每个岗位的最高工资和平均工资
select job, max(sal), avg(sal)
from emp
group by job;
# 结果
ANALYST 3000.0 3000.0
CLERK 1300.0 1037.5
MANAGER 2975.0 2758.3333333333335
PRESIDENT 5000.0 5000.0
SALESMAN 1600.0 1400.0
(6) 查询出每个地区工资最高的员工
select emp.ename, tmp2.max_sal, tmp2.loc from emp
join
(select tmp1.loc loc, max(tmp1.sal) max_sal from
(select emp.ename ename, emp.sal sal, dept.loc loc from emp
join dept on emp.deptno = dept.deptno) tmp1
group by tmp1.loc) tmp2
on emp.sal = tmp2.max_sal;
# 结果
BLAKE 2850.0 CHICAGO
SCOTT 3000.0 DALLAS
FORD 3000.0 DALLAS
KING 5000.0 NEW YORK
(7) 查询上半年入职员工最多的地区
create table e1 as
select emp.ename ename, substring(emp.hiredate, 6, 2) as hiremonth, dept.loc loc
from emp join dept on emp.deptno = dept.deptno;
/*
SMITH 12 DALLAS
ALLEN 02 CHICAGO
WARD 02 CHICAGO
JONES 04 DALLAS
MARTIN 09 CHICAGO
BLAKE 05 CHICAGO
CLARK 06 NEW YORK
SCOTT 07 DALLAS
KING 11 NEW YORK
TURNER 09 CHICAGO
ADAMS 07 DALLAS
JAMES 12 CHICAGO
FORD 12 DALLAS
MILLER 01 NEW YORK
*/
create table e2 as
select ename,
case when hiremonth <= '06' then 'first_half_year' else 'last_half_year' end as hire_year,
loc
from e1;
/*
SMITH last_half_year DALLAS
ALLEN first_half_year CHICAGO
WARD first_half_year CHICAGO
JONES first_half_year DALLAS
MARTIN last_half_year CHICAGO
BLAKE first_half_year CHICAGO
CLARK first_half_year NEW YORK
SCOTT last_half_year DALLAS
KING last_half_year NEW YORK
TURNER last_half_year CHICAGO
ADAMS last_half_year DALLAS
JAMES last_half_year CHICAGO
FORD last_half_year DALLAS
MILLER first_half_year NEW YORK
*/
select loc, count(ename) as count
from e2
where hire_year = 'first_half_year'
group by loc
order by count desc
limit 1;
/*
CHICAGO 3
*/