join 만만한가?

« » 2025.4
일	월	화	수	목	금	토
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30

카테고리 없음 / 2011. 6. 24. 00:11

1. 두 테이블에 인덱스가 없으면, hash 조인으로만 푼다.
아니다 오히려 hash 로 못 푸는 경우가 발생 할 수도 있다.

if object_id('tblx') is not null
drop table tblx
go

if object_id('tbly') is not null
drop table tbly
go

create table tblx
(idx int
,c1 int
)
go

create table tbly
(idx int
,c1 int
)
go

insert into tblx values (1,1)
insert into tblx values (1,2)
insert into tbly values (1,1)
insert into tbly values (2,null)
go

set statistics profile on

if object_id('tblx') is not null
drop table tblx
go

if object_id('tbly') is not null
drop table tbly
go

create table tblx
(idx int
,c1 int
)
go

create table tbly
(idx int
,c1 int
)
go

insert into tblx values (1,1)
insert into tblx values (1,2)
insert into tbly values (1,1)
insert into tbly values (2,null)
go

set statistics profile on

select *
from tblx a
join tbly b
on a.idx = b.idx
where a.idx > 1
option (hash join)

select *    from tblx a    join tbly b      on a.idx = b.idx    where a.idx > 1   option (hash join)
|--Hash Match(Inner Join, HASH:([a].[idx])=([b].[idx]), RESIDUAL:([a].[dbo].[tbly].[idx] as [b].[idx]=[a].[dbo].[tblx].[idx] as [a].[idx]))
       |--Table Scan(OBJECT:([a].[dbo].[tblx] AS [a]), WHERE:([a].[dbo].[tblx].[idx] as [a].[idx]>(1)))
       |--Table Scan(OBJECT:([a].[dbo].[tbly] AS [b]), WHERE:([a].[dbo].[tbly].[idx] as [b].[idx]>(1)))


select *
from tblx a
join tbly b
    on a.idx = b.idx
where a.idx = 1
option (hash join)

메시지 8622, 수준 16, 상태 1, 줄 1
이 쿼리에 정의된 힌트로 인해 쿼리 프로세서에서 쿼리 계획을 생성할 수 없습니다. 힌트를 지정하거나 SET FORCEPLAN을 사용하지 않고 쿼리를 다시 전송하십시오.

select *
from tblx a
join tbly b
on a.idx = b.idx
where a.idx = 1

select *    from tblx a    join tbly b      on a.idx = b.idx    where a.idx = 1
|--Nested Loops(Inner Join) -- 뒤에 어떠한 join predicate 가 없다.
       |--Table Scan(OBJECT:([a].[dbo].[tbly] AS [b]), WHERE:([a].[dbo].[tbly].[idx] as [b].[idx]=(1)))
       |--Table Scan(OBJECT:([a].[dbo].[tblx] AS [a]), WHERE:([a].[dbo].[tblx].[idx] as [a].[idx]=(1)))

hash join 은 적어도 하나의 equijoin predicate 가 있어야만 풀 수 있기 때문이다. merge join 도 마찬가지 이다.

이것은 프로시저로 작성할 때 변수에 의해서 쿼리가 실행되고 안되고 하는 샘플을 만들 수 있고 변수에 따라서 쿼리가 동작 할 수 있고 없는 조건이 발생 하기도 한다.

a.idx between 1 and 100 이 입력되는 경우도 있고, 1 and 1 이라고 입력 된다고 생각해 보라. 이 또한 이행적 폐쇄(transitive closure) 법칙이 적용될 수 있고, 이것은 sql server 에서는 힌트로 막을 방법이 없다. 이러한 변경이 일어나는 경우 hash join, merge join 의 기본 원칙인 equijoin 이 predicate 로 반드시 들어가야 하는 기본 원칙을 위배 하므로 실행 될 수 없다.

(oracle 의 경우 query_rewrite_enabled 파라미터를 이용해 막을 수 있다.)

select *
from tblx a
join tbly b
    on a.idx = b.idx
where a.idx = 1
option (merge join)

메시지 8622, 수준 16, 상태 1, 줄 1
이 쿼리에 정의된 힌트로 인해 쿼리 프로세서에서 쿼리 계획을 생성할 수 없습니다. 힌트를 지정하거나 SET FORCEPLAN을 사용하지 않고 쿼리를 다시 전송하십시오.

select *
from tblx a
join tbly b
    on a.idx = b.idx

select *    from tblx a    join tbly b      on a.idx = b.idx
|--Hash Match(Inner Join, HASH:([b].[idx])=([a].[idx]), RESIDUAL:([pc].[dbo].[tbly].[idx] as [b].[idx]=[pc].[dbo].[tblx].[idx] as [a].[idx]))
       |--Table Scan(OBJECT:([pc].[dbo].[tbly] AS [b]))
       |--Table Scan(OBJECT:([pc].[dbo].[tblx] AS [a]))

select *
from tblx a
inner loop join tbly b
on a.idx = b.idx

select *    from tblx a    inner loop join tbly b      on a.idx = b.idx
|--Nested Loops(Inner Join, WHERE:([pc].[dbo].[tbly].[idx] as [b].[idx]=[pc].[dbo].[tblx].[idx] as [a].[idx]))
       |--Table Scan(OBJECT:([pc].[dbo].[tblx] AS [a]))
       |--Table Scan(OBJECT:([pc].[dbo].[tbly] AS [b]))

select *
from tblx a
inner merge join tbly b
on a.idx = b.idx

select *    from tblx a    inner merge join tbly b      on a.idx = b.idx
|--Merge Join(Inner Join, MANY-TO-MANY MERGE:([a].[idx])=([b].[idx]), RESIDUAL:([pc].[dbo].[tbly].[idx] as [b].[idx]=[pc].[dbo].[tblx].[idx] as [a].[idx]))
       |--Sort(ORDER BY:([a].[idx] ASC))
       |    |--Table Scan(OBJECT:([pc].[dbo].[tblx] AS [a]))
       |--Sort(ORDER BY:([b].[idx] ASC))
            |--Table Scan(OBJECT:([pc].[dbo].[tbly] AS [b]))

또하나의 간단한 이야기를 해보자.

if object_id('tblx') is not null
drop table tblx
go

if object_id('tbly') is not null
drop table tbly
go

create table tblx
(idx int
,c1 int
)
go

create table tbly
(idx int
,c1 int
)
go

insert into tblx values (1,1)
insert into tblx values (2,2)
insert into tbly values (1,1)

select *
from tblx a
join tbly b
    on a.idx = 1
option (hash join)
go

select *    from tblx a    join tbly b      on a.idx = 1
|--Nested Loops(Inner Join) -- 여기에 아무것도 없다.
       |--Table Scan(OBJECT:([a].[dbo].[tblx] AS [a]), WHERE:([a].[dbo].[tblx].[idx] as [a].[idx]=(1)))
       |--Table Scan(OBJECT:([a].[dbo].[tbly] AS [b]))

해당 쿼리는 a.idx = 1 인것을 찾고 그냥 cross join 을 수행한다.
그러므로 hash function 을 수행 할 equijoin predicate 가 역시 없다. 그러므로 hash join 으로 풀 수 없다.
아래 not exists 에서 hash join 으로 못 푸는 것 역시 이렇게 해석 할 수 있다.

그러나 full outer merge join 의 경우 non equijoin 도 가능하기 때문에 다음 쿼리가 가능하다.

select *
from tblx a
full outer join tbly b
    on a.idx = 1
option (merge join)

필터 조건이 없으면 hash 만 되는가?
아니다 다음 쿼리의 예제를 보자

select *
from tblx a
join tbly b
on a.idx > b.idx

select *    from tblx a    join tbly b      on a.idx > b.idx
|--Nested Loops(Inner Join, WHERE:([pc].[dbo].[tblx].[idx] as [a].[idx]>[pc].[dbo].[tbly].[idx] as [b].[idx]))
       |--Table Scan(OBJECT:([pc].[dbo].[tbly] AS [b]))
       |--Table Scan(OBJECT:([pc].[dbo].[tblx] AS [a]))

힌트주면 되지요?
아니다. 해쉬펑션을 통과한 값의 크다 작다를 비교 할 수 있으면 넌 신이다. 난 신 많이 봤다. ;)

select *
from tblx a
inner hash join tbly b
on a.idx > b.idx

2. in 과 not in 은 반대 인가요?
아니다

다음을 살펴보자.

if object_id('tblx') is not null
drop table tblx
go

if object_id('tbly') is not null
drop table tbly
go

create table tblx
(idx int
,c1 int
)
go

create table tbly
(idx int
,c1 int
)
go

insert into tblx values (1,1)
insert into tblx values (2,2)
insert into tbly values (1,1)
insert into tbly values (2,null)
go

select *
from tblx
where c1 in (select c1 from tbly)

idx c1
----------- -----------
1 1

(1개 행이 영향을 받음)

select *
from tblx
where c1 not in (select c1 from tbly)

idx c1
----------- -----------

(0개 행이 영향을 받음)

2 는 어디로 갔나?
왜 그런가? null 이있다는 생각을 하자 null 의 오묘함에 빠져보면 재미있을 것이다.

3. table 의 check 조건 써야 하나요?
그렇다. 꼭 써 주는게 좋습니다. 이러한 체크조건이 있냐 없냐에 따라 join 방법도 틀리게 할 수 있다.
써주면 optimizer 가 매우 효율적으로 동작 할 수 있는 실마리를 준다.

if object_id('tblx') is not null
drop table tblx
go

if object_id('tbly') is not null
drop table tbly
go

create table tblx
(idx int
,c1 int
)
go

create table tbly
(idx int
,c1 int
)
go

insert into tblx values (1,1)
insert into tblx values (2,2)
insert into tbly values (1,1)

select *
from tblx
where c1 not in (select c1 from tbly)

select *     from tblx    where c1 not in (select c1 from tbly)
|--Nested Loops(Left Anti Semi Join, WHERE:([pc].[dbo].[tblx].[c1] IS NULL OR [pc].[dbo].[tbly].[c1] IS NULL OR [pc].[dbo].[tblx].[c1]=[pc].[dbo].[tbly].[c1]))
       |--Table Scan(OBJECT:([pc].[dbo].[tblx]))
       |--Table Scan(OBJECT:([pc].[dbo].[tbly]))

hash join 으로 풀 수 있는가?

select *
from tblx
where c1 not in (select c1 from tbly)
option (hash join)

메시지 8622, 수준 16, 상태 1, 줄 2
이 쿼리에 정의된 힌트로 인해 쿼리 프로세서에서 쿼리 계획을 생성할 수 없습니다. 힌트를 지정하거나 SET FORCEPLAN을 사용하지 않고 쿼리를 다시 전송하십시오.

테이블에 not null 체크 조건이 있으면 어떻게 되는가?
잘푼다.
if object_id('tblx') is not null
drop table tblx
go

if object_id('tbly') is not null
drop table tbly
go

create table tblx
(idx int
,c1 int not null
)
go

create table tbly
(idx int
,c1 int not null
)
go

insert into tblx values (1,1)
insert into tblx values (2,2)
insert into tbly values (1,1)
go

-- 아까 위헤서 아래 퀴리는 위에서 실행계획을 생성 할 수 없습니다. 그 쿼리이다.

select *
from tblx
where c1 not in (select c1 from tbly)
option (hash join)

select *     from tblx    where c1 not in (select c1 from tbly) option (hash join)
|--Hash Match(Right Anti Semi Join, HASH:([pc].[dbo].[tbly].[c1])=([pc].[dbo].[tblx].[c1]))
       |--Table Scan(OBJECT:([pc].[dbo].[tbly]))
       |--Table Scan(OBJECT:([pc].[dbo].[tblx]))

테이블 제약 조건 주지말고 hash join 을 하고 싶은가?

select *
from tblx
where c1 not in (select c1 from tbly where c1 is not null)
and c1 is not null
option (hash join)

select *     from tblx    where c1 not in (select c1 from tbly where c1 is not null)     and c1 is not null option (hash join)
|--Hash Match(Right Anti Semi Join, HASH:([pc].[dbo].[tbly].[c1])=([pc].[dbo].[tblx].[c1]), RESIDUAL:([pc].[dbo].[tblx].[c1]=[pc].[dbo].[tbly].[c1]))
       |--Table Scan(OBJECT:([pc].[dbo].[tbly]), WHERE:([pc].[dbo].[tbly].[c1] IS NOT NULL))
       |--Table Scan(OBJECT:([pc].[dbo].[tblx]), WHERE:([pc].[dbo].[tblx].[c1] IS NOT NULL))

풀리긴 풀리는데 residual 이나 where 필터가 형성된다.

체크조건이 있으면 residual 등이 없고 깔끔하지 않은가? 어떤가? check 조건 꼭 기술하는게 좋을것 같지 않은가?
이 외 다양한 이슈가 있다. 재미 있는가? 나머지 이슈는 나중에 책을 통해서 만나보자! (sqltag 에서는 sql 책을 준비하고 있어요 ;) 교성아 join 다 적어가나?????

if object_id('tblx') is not null
drop table tblx
go

if object_id('tbly') is not null
drop table tbly
go

create table tblx
(idx int
,c1 char(8000) not null
)
go

create table tbly
(idx int
,c1 char(8000) not null
)
go

insert into tblx values (1,1)
insert into tblx values (2,2)
insert into tbly values (1,1)
go

select *
from tblx a
join tbly b
on a.idx = b.idx
order by a.idx
go

select *
from tblx a
inner hash join tbly b
    on a.idx = b.idx
order by a.idx
option(hash join)

메시지 8618, 수준 16, 상태 2, 줄 1
작업 테이블이 필요하며 최소 행 크기가 허용되는 최대 크기 8060바이트를 초과하므로 쿼리 프로세서에서 쿼리 계획을 생성할 수 없습니다. 작업 테이블이 필요한 일반적인 이유는 쿼리의 GROUP BY 또는 ORDER BY 절 때문입니다. 쿼리에 GROUP BY 또는 ORDER BY 절이 있을 경우 이 절의 필드 수 및/또는 크기를 줄이십시오. 또한 필드의 접두사(LEFT()) 또는 해시(CHECKSUM())를 사용하여 그룹화하거나 접두사를 사용하여 정렬하십시오. 그러나 이 작업을 수행하면 쿼리의 동작이 변경됩니다.

다음은 full outer join 의 실행 예이다.

재미 있지 않은가? join 된 결과를 넣어두고 sort 해야 하는데 두 컬럼의 합이 8060을 넘으니 hash join 한 후 work table 을 만들수 없어 일어나는 에러이다.

교성이의 join advanced 챕터로 쓸 예정 입니다.

저작자표시

Posted by 보미아빠

, |

보미네

카테고리

달력

공지사항

태그목록

최근에 올라온 글

join 만만한가?

최근에 달린 댓글

최근에 받은 트랙백

글 보관함

링크

티스토리툴바