join语法,多表操作
创建一个a表和一个b表
create table testa(aid int,aname varchar(100),address varchar(100));
create table testb(bid int,bname varchar(100),age int);
插入a表数据
insert into testa values(1,'x1','sh');
insert into testa values(2,'x2','hz');
insert into testa values(3,'x3','null');
insert into testa values(4,'x4','bj');
insert into testa values(5,'x5','gz');
插入b表数据
insert into testb values(1,'x1',10);
insert into testb values(2,'x2',11);
insert into testb values(3,'x3',12);
insert into testb values(4,'x4',16);
insert into testb values(4,'x4',16);
insert into testb values(7,'x7',19);
insert into testb values(8,'x8',22);
insert into testb values(9,'x9',24);
insert into testb values(10,'x10',44);
1、left join:以左表为主,a数据最全,b是用来匹配a的,匹配多少算多少,on是匹配条件
select
a.*,
b.*
from testa as a
left join testb as b on a.aid=b.bid
得到结果:
2、right join:以右表为主,b数据最全,a是匹配的,匹配多少算多少,on是匹配条件
select
a.*,
b.*
from testa as a
right join testb as b on a.aid=b.bid
得到结果:
3、inner join :相当于匹配表的数据 is not null,得到的是a表与b表的交集
select
a.*,
b.*
from testa as a
inner join testb as b on a.aid=b.bid
得到结果:
相当于:
select
a.*,
b.*
from testa as a
left join testb as b on a.aid=b.bid
where b.bid is not null;
select
a.*,
b.*
from testa as a
right join testb as b on a.aid=b.bid
where a.aid is not null;
4、full join:spark sql支持full join,但mysql是不支持的,一般用:左连接+union+右连接代替
- union 结果去重
select
a.*,
b.*
from testa as a
left join testb as b on a.aid=b.bid
union
select
a.*,
b.*
from testa as a
right join testb as b on a.aid=b.bid
得到结果:
- union all 结果不去重,结果集包括所有select语句的匹配行(包括重复行)
select aid from testa
union all
select bid from testb
aid
1
2
3
4
5
1
2
3
4
4
7
8
9
10
select aid from testa
union
select bid from testb
aid
1
2
3
4
5
7
8
9
10
做两个sql的合并,列的名称取决于union的第一层
- union与union all的注意点:
1)union因为要扫描删除重复行,所以效率低。一般如果没有要求要删除重复行,就用union all
2)两个要联合的sql语句,字段数量和字段类型都要相同,即使用union和union all必须保证各个select 集合的结果有相同个数的列,并且每个列的类型是一样的,但列名不一定需要相同
3)union和union all都可以将多个结果集合并,而不仅仅是两个
4)进行排序时,不需要在每一个select结果集都使用order by命令,只需要在最后使用order by语句对整个结果进行排序即可