hive sql 条件放在on和where区别比较

结论

左右关联时,
1.条件不为主表条件时,放在on和where后面一样。
2.条件为主表条件时,放在on后面,结果为主表全量,放在where后面为主表条件筛选过后的全量。

左半连接时,(没有右半连接,0.5.0以上版本的特性)
1.右表没有重复数据时,左半连接和内连接和in结果一样。
2.右表有重复数据时,左半连接(最高效)和in结果一样(右边去重后关联),内连接右表不会去重,数据多。

数据准备

A

1   lijie1  100 10
2   lijie2  90  20
3   lijie3  60  10
4   lijie4  80  10
5   lijie5  70  20
6   lijie6  80  30
7   lijie7  60  30

B

10  IT1
20  IT2

加载数据

create table dev.A(
id      string,
name    string,
grade   string,
dept    string)
ROW FORMAT DELIMITED 
  FIELDS TERMINATED BY '\t' ;

create table dev.B(
id      string,
name    string)
ROW FORMAT DELIMITED 
  FIELDS TERMINATED BY '\t' ;

load data local inpath '/home/a' overwrite into table dev.a;
load data local inpath '/home/b' overwrite into table dev.b;

左关联:条件左边在on和where区别

//左连接条件为主表在on后面,结果为左表全量
select 
    A.id,A.name,A.grade,A.dept,B.id,B.name 
from
    A left outer join B
on
    A.dept = B.id 
and 
    A.grade >=80 ;

4   lijie4  80  10  10  IT1
3   lijie3  60  10  NULL    NULL
1   lijie1  100 10  10  IT1
5   lijie5  70  20  NULL    NULL
2   lijie2  90  20  20  IT2
7   lijie7  60  30  NULL    NULL
6   lijie6  80  30  NULL    NULL

//左关联条件为主表在where后面,结果为左表条件筛选过后全量
select 
    A.id,A.name,A.grade,A.dept,B.id,B.name 
from
    A left outer join B
on
    A.dept = B.id 
where 
    A.grade >=80 ;

4   lijie4  80  10  10  IT1
1   lijie1  100 10  10  IT1
2   lijie2  90  20  20  IT2
6   lijie6  80  30  NULL    NULL

右关联:条件为左表条件时在on和where一样

select 
    A.id,A.name,A.grade,A.dept,B.id,B.name 
from
    A right outer join B
on
    A.dept = B.id 
and 
    A.grade >=80 ;

4   lijie4  80  10  10  IT1
1   lijie1  100 10  10  IT1
2   lijie2  90  20  20  IT2

select 
    A.id,A.name,A.grade,A.dept,B.id,B.name 
from
    A right outer join B
on
    A.dept = B.id 
where 
    A.grade >=80 ;

4   lijie4  80  10  10  IT1
1   lijie1  100 10  10  IT1
2   lijie2  90  20  20  IT2

// 右关联:条件为右表条件时在放在on后面为全量。
select 
    A.id,A.name,A.grade,A.dept,B.id,B.name 
from
    A right outer join B
on
    A.dept = B.id 
and 
    B.id > 10 ;
NULL    NULL    NULL    NULL    10  IT1
NULL    NULL    NULL    NULL    10  IT1
5   lijie5  70  20  20  IT2
2   lijie2  90  20  20  IT2
// 右关联:条件为右表条件时在放在where后面为条件筛选后全量。
select 
    A.id,A.name,A.grade,A.dept,B.id,B.name 
from
    A right outer join B
on
    A.dept = B.id 
where 
    B.id > 10 ;
5   lijie5  70  20  20  IT2
2   lijie2  90  20  20  IT2

left semi join和inner join和in比较

//左半连接,A包含B(去重)的内连接
select 
    A.id,A.name,A.grade,A.dept
from
    A left semi join B
on
    A.dept = B.id 
where 
    A.grade >=80 ;
4   lijie4  80  10
1   lijie1  100 10
2   lijie2  90  20

//如果没有重复数据,半连接和内连接和 in结果一样。
select 
    A.id,A.name,A.grade,A.dept
from
    A left semi join B
on
    A.dept = B.id ;

4   lijie4  80  10
3   lijie3  60  10
1   lijie1  100 10
5   lijie5  70  20
2   lijie2  90  20

select 
    A.id,A.name,A.grade,A.dept
from
    A inner join B
on
    A.dept = B.id ;
4   lijie4  80  10
3   lijie3  60  10
1   lijie1  100 10
5   lijie5  70  20
2   lijie2  90  20

SELECT A.id,A.name,A.grade,A.dept
FROM a
WHERE a.dept in
   (SELECT b.id FROM B);
4   lijie4  80  10
3   lijie3  60  10
1   lijie1  100 10
5   lijie5  70  20
2   lijie2  90  20

//B表插入重复数据
insert into table dev.B
select * from dev.B limit 1;

//半连接(最高效)和in结果一样,内连接关联字段有重复
select 
    A.id,A.name,A.grade,A.dept
from
    A left semi join B
on
    A.dept = B.id ;
4   lijie4  80  10
3   lijie3  60  10
1   lijie1  100 10
5   lijie5  70  20
2   lijie2  90  20

select 
    A.id,A.name,A.grade,A.dept
from
    A inner join B
on
    A.dept = B.id ;
4   lijie4  80  10
4   lijie4  80  10
3   lijie3  60  10
3   lijie3  60  10
1   lijie1  100 10
1   lijie1  100 10
5   lijie5  70  20
2   lijie2  90  20

SELECT A.id,A.name,A.grade,A.dept
FROM a
WHERE a.dept in
   (SELECT b.id FROM B);
4   lijie4  80  10
3   lijie3  60  10
1   lijie1  100 10
5   lijie5  70  20
2   lijie2  90  20
    原文作者:AI_leef
    原文地址: https://www.jianshu.com/p/61940f8971fe
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞