结论
左右关联时,
1.条件不为主表条件时,放在on和where后面一样。
2.条件为主表条件时,放在on后面,结果为主表全量,放在where后面为主表条件筛选过后的全量。
左半连接时,(没有右半连接,0.5.0以上版本的特性)
1.右表没有重复数据时,左半连接和内连接和in结果一样。
2.右表有重复数据时,左半连接(最高效)和in结果一样(右边去重后关联),内连接右表不会去重,数据多。
数据准备
A
1 lijie1 100 10
2 lijie2 90 20
3 lijie3 60 10
4 lijie4 80 10
5 lijie5 70 20
6 lijie6 80 30
7 lijie7 60 30
B
10 IT1
20 IT2
加载数据
create table dev.A(
id string,
name string,
grade string,
dept string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t' ;
create table dev.B(
id string,
name string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t' ;
load data local inpath '/home/a' overwrite into table dev.a;
load data local inpath '/home/b' overwrite into table dev.b;
左关联:条件左边在on和where区别
//左连接条件为主表在on后面,结果为左表全量
select
A.id,A.name,A.grade,A.dept,B.id,B.name
from
A left outer join B
on
A.dept = B.id
and
A.grade >=80 ;
4 lijie4 80 10 10 IT1
3 lijie3 60 10 NULL NULL
1 lijie1 100 10 10 IT1
5 lijie5 70 20 NULL NULL
2 lijie2 90 20 20 IT2
7 lijie7 60 30 NULL NULL
6 lijie6 80 30 NULL NULL
//左关联条件为主表在where后面,结果为左表条件筛选过后全量
select
A.id,A.name,A.grade,A.dept,B.id,B.name
from
A left outer join B
on
A.dept = B.id
where
A.grade >=80 ;
4 lijie4 80 10 10 IT1
1 lijie1 100 10 10 IT1
2 lijie2 90 20 20 IT2
6 lijie6 80 30 NULL NULL
右关联:条件为左表条件时在on和where一样
select
A.id,A.name,A.grade,A.dept,B.id,B.name
from
A right outer join B
on
A.dept = B.id
and
A.grade >=80 ;
4 lijie4 80 10 10 IT1
1 lijie1 100 10 10 IT1
2 lijie2 90 20 20 IT2
select
A.id,A.name,A.grade,A.dept,B.id,B.name
from
A right outer join B
on
A.dept = B.id
where
A.grade >=80 ;
4 lijie4 80 10 10 IT1
1 lijie1 100 10 10 IT1
2 lijie2 90 20 20 IT2
// 右关联:条件为右表条件时在放在on后面为全量。
select
A.id,A.name,A.grade,A.dept,B.id,B.name
from
A right outer join B
on
A.dept = B.id
and
B.id > 10 ;
NULL NULL NULL NULL 10 IT1
NULL NULL NULL NULL 10 IT1
5 lijie5 70 20 20 IT2
2 lijie2 90 20 20 IT2
// 右关联:条件为右表条件时在放在where后面为条件筛选后全量。
select
A.id,A.name,A.grade,A.dept,B.id,B.name
from
A right outer join B
on
A.dept = B.id
where
B.id > 10 ;
5 lijie5 70 20 20 IT2
2 lijie2 90 20 20 IT2
left semi join和inner join和in比较
//左半连接,A包含B(去重)的内连接
select
A.id,A.name,A.grade,A.dept
from
A left semi join B
on
A.dept = B.id
where
A.grade >=80 ;
4 lijie4 80 10
1 lijie1 100 10
2 lijie2 90 20
//如果没有重复数据,半连接和内连接和 in结果一样。
select
A.id,A.name,A.grade,A.dept
from
A left semi join B
on
A.dept = B.id ;
4 lijie4 80 10
3 lijie3 60 10
1 lijie1 100 10
5 lijie5 70 20
2 lijie2 90 20
select
A.id,A.name,A.grade,A.dept
from
A inner join B
on
A.dept = B.id ;
4 lijie4 80 10
3 lijie3 60 10
1 lijie1 100 10
5 lijie5 70 20
2 lijie2 90 20
SELECT A.id,A.name,A.grade,A.dept
FROM a
WHERE a.dept in
(SELECT b.id FROM B);
4 lijie4 80 10
3 lijie3 60 10
1 lijie1 100 10
5 lijie5 70 20
2 lijie2 90 20
//B表插入重复数据
insert into table dev.B
select * from dev.B limit 1;
//半连接(最高效)和in结果一样,内连接关联字段有重复
select
A.id,A.name,A.grade,A.dept
from
A left semi join B
on
A.dept = B.id ;
4 lijie4 80 10
3 lijie3 60 10
1 lijie1 100 10
5 lijie5 70 20
2 lijie2 90 20
select
A.id,A.name,A.grade,A.dept
from
A inner join B
on
A.dept = B.id ;
4 lijie4 80 10
4 lijie4 80 10
3 lijie3 60 10
3 lijie3 60 10
1 lijie1 100 10
1 lijie1 100 10
5 lijie5 70 20
2 lijie2 90 20
SELECT A.id,A.name,A.grade,A.dept
FROM a
WHERE a.dept in
(SELECT b.id FROM B);
4 lijie4 80 10
3 lijie3 60 10
1 lijie1 100 10
5 lijie5 70 20
2 lijie2 90 20