SQL 구문 정리

2. 조인과 집계 데이터

Jul 27. 2021

1. 조인

조인의 종류
- inner join: 특정 컬럼을 기준으로 정확히 매칭된 집합을 출력한다.
- outer join: 한쪽의 집합은 모두 출력하고 다른 한쪽의 집합은 매칭되는 컬럼의 값만
- self join: 동일한 테이블 끼리의 특정 컬럼을 기준으로 매친되는 집합 출력
- full outer join: 모든 조인을 출력
- cross join: 조인되는 두 테이블의 곱집합을 반환 (data 뻥튀기)
- natural join: 특정 테이블의 같은 이름을 가진 컬럼 간의 조인집합을 출력

inner join 교집합

select
a. id id_a,
a. fruit fruit_a,
b. id id_b,
b. fruit fruit_b
from
bastet_a a
inner join basket_b b
on a.fruit = b.fruit ;

select -- customer : payment = 1: M
c. customer_id,
c. first_name,
c. last_name,
c. eamil,
p. amount,
p. payment_date
from customer c
inner join payment p
on c. customer_id = p. customer_id ;

select -- customer : payment : staff = 1: M : 1
c. customer_id,
c. first_name,
c. last_name,
c. email,
p. amount,
p. payment_date,
s. first_name as s_first_name,
s. last_name as s_last_name
from customer c
inner join payment p
on c. customer_id = p. customer_id
inner join staff s
on p. staff_id = s. staff_id ;

outer join 교집합을 기준으로 특정 집합 전체를 추출

" 조건을 만족하는 집합의 전체도 같이 추출할 때 사용 " -- 계약을 맺은 고객사의 데이터

select -- 교집합 + A집합
a. id as id_a,
a. fruit as fruit_a,
b. id as id_b,
b. fruit as fruit_b
from basket_a a
left outer join basket_b b
on a. fruit = b. fruit ;

select -- A집합 - 교집합 " Left Only "
a. id as id_a,
a. fruit as fruit_a,
b. id as id_b,
b. fruit as fruit_b
from basket_a a
left outer join basket_b b
on a. fruit = b. fruit
where b. id is null ;

self join -- 동일한 테이블을 각각의 집합으로 구성 후 셀프조인

select -- 각 직원의 상위 관리자를 출력
e. first_name || ' ' || e. last_name as employee,
m. first_name || ' ' || m. last_name as manager
from employee e
inner join employee m
on e. manager_id = m.employee_id
order by manager ;

select -- 각 직원의 상위 관리자를 출력 + 모든 직원
e. first_name || ' ' || e. last_name as employee,
m. first_name || ' ' || m. last_name as manager
from employee e
left join employee m
on e. manager_id = m.employee_id
order by manager ;

select -- 다른 영화이지만 상영시간이 같은 두 집합 추출
f1. title,
f2. title,
f1. length
from
film f1
inner join film f2
on f1. film_id != f2. film_id
and f1. length = f2. length ;

full outer join -- inner +left outer + right outer

select -- 추출 가능한 모든 집합 출력
a. id id_a,
a. fruit fruit_a,
b. id id_b,
b. fruit fruit_b
from basket_a a
full outer join basket_b b
on a. fruit = b. fruit ;

select -- full outer join - 교집합 " Only Outer "
a. id id_a,
a. fruit fruit_a,
b. id id_b,
b. fruit fruit_b
from basket_a a
full outer join basket_b b
on a. fruit = b. fruit
where a. id is null
or b. id is null ;

select
e. employee_name,
d. department_name
from employees e
full outer join departments d
on e. department_id = d. departmnet_id ;

select -- 소속된 직원/부서이 없는 부서/직원만 추출
e. employee_name,
d. department_name
from employees e
full outer join departments d
on e. department_id = d. departmnet_id
where e. employee_name is null ; or where d. department_name is null ;

cross join 데이터 복제

select *
from cross_t1 c1
cross join cross_t2 c2
order by c1.lable ;

or

select *
from cross_t1 c1, cross_t2 c2
order by c1.lable ;

select -- 응용 구문
c1. lable,
case when c1. lable = 'A' then sum(score)
when c1. lable = 'B' then sum(score) * -1
else 0
end as calc
from cross_t1 c1
cross join cross_t2 c2
group by c1. lable
order by c1. lable ;

natural join -- 두개의 테이블에서 같은 이름의 컬럼 간의 inner 조인 집합 결과를 출력

" 세밀한 조건 추출이 아니라서, 사용 안함 " -> 모든 조건 만족 불가능, 공집합

select *
from products
natural join categories ;

or

select
p. category_id,
p. product_id,
p. product_name,
c. category_name
from products p
inner join categories c
on p. category_id = c. category_id ;

or

select
p. category_id,
p. product_id,
p. product_name,
c. category_name
from products p, categories c
where p. category_id = c. category_id ;

select * -- 공집합
from city a
natural join country b ;

or

select *
from city a
inner join country b
on a. country_id = b. country_id ;

or

select *
from city a,
country b
where a. country_id = b. country_id ;

2. 기초 집계 데이터

group by -- with count/sum

## 단순 group by

select
customer_id
from payment
group by customer_id ;

or

select
distinct customer_id
from payment ;

## 합계 구하기 + 정렬
" 거래액이 가장 많은 고객순으로 출력 "

select
customer_id,
sum(amount) as amount_sum
from payment
group by customer_id
order by amount_sum DESC ; -- order by 2 DESC ;

## 카운트 구하기 -- 실적

select
staff_id,
count(payment_id) as cnt
from payment
group by staff_id ;

++

select
p. staff_id,
s. staff_id,
s. first_name,
s. last_name,
count(p. payment_id) as cnt
from payment p, staff s
where p. staff_id = s. staff_id
group by p. staff_id,
s. staff_id,
s. first_name,
s. last_name ;

having -- group by 절과 함께 having 절을 사용하여 group by의 결과를 특정 조건으로 필터

## group by 결과 출력

select
customer_id,
sum(amount) as amount
from payment
group by customer_id
order by amount DESC ;

## group by + having -- 특정 정보만 추출

select
p. customer_id,
c. email,
sum (p. amount) as amount_sum
from payment p,
customer c
where p. customer_id = c. customer_id
group by p. customer_id, c. email
having sum(p. amount) > 200 ;

3. 고급 집계 데이터

grouping set -- 한번에 다양한 기준의 컬럼 조합으로 집계를 구할 수 있다.

" union all은 성능 저하 "

## grouping set 전 준비 - 1개의 group by절
" 성능 저하 "

select
brand,
sum (quantity)
from sales
group by brand ;

select
brand,
sum (quantity)
from sales
group by brand ;

select
brand,
segment,
sum (quantity)
from sales
group by
grouping sets (
(brand, segment), -- 4행
(brand), -- brand 합계
(segment), -- segment 합계
() -- 총합계
);

++ 응용 -- 해당 컬럼이 집계에 사용되었으면 0, else 1

select
grouping(brand) grouping_brand,
grouping(segment) grouping_segment,
brand,
segment,
sum(quantity)
from sales
group by
grouping sets (
(brand, segment),
(brand),
(segment),
()
)
order by brand, segment ;

+++ 기준 네이밍

select
case
when grouping(brand) = 0 and grouping(segment) = 0 then '브랜드별+등급별'
when grouping(brand) = 0 and grouping(segment) = 1 then '브랜드별'
when grouping(brand) = 1 and grouping(segment) = 0 then '등급별'
when grouping(brand) = 1 and grouping(segment) = 1 then '전체합계'
else ''
end as "집계기준",
brand,
segment,
sum(quantity)
from sales
group by
grouping sets (
(brand, segment),
(brand),
(segment),
()
)
order by brand, segment ;

roll up -- 컬럼의 소계를 간단하게 생성

## segment 기준 소계 추가

select
segment,
sum(quantity)
from sales
group by
rollup(segment)
order by segment ;

## group by별 + brand 별 + 총합 -> 3차원

select
brand,
segment,
sum(quantity)
from sales
group by
rollup (brand, segment) -- 1+2 = 3
order by brand, segment ;

-- group by별 합계 + rollup절에 맨 앞에 쓴 컬럼 기준의 합계(brand)도 나오고 + 전체 합계도 나왔다

## 부분 롤업은 총합을 구하지 않는다

select
brand,
segment,
sum(quantity)
from sales
group by brand,
rollup (segment)
order by brand, segment ;

cube -- 간단한 문법으로 다차원 소계 출력 wider than rollup

select
brand,
segment,
sum(quantity)
from sales
group by
cube (brand, segment)
order by brand, segment ;

--cube = group by 절 합계 + brand별 + segment별 + 전체합계 -> 4차원

## 부분cube = 부분rollup

selct
brand,
segment,
sum(quantity)
from sales
group by brand,
cube(segment)
order by brand, segment ;

-- group by 별 + brand별 -> 2차원

분석함수 -- 특정 집합 내에서 합계 및 카운트 계산

## 집계함수

select
count(*)
from product ;

## 분석함수 -- 집계 + 전체집합

select
count(*) over(),
p.*
from product p ;

AVG

## 집계함수 -- 전체 평균

select
avg(price)
from product;

## 집계함수 -- group by + avg

select
pg. group_name,
avg(price)
from product p
inner join product_group pg
on p. group_id = pg. group_id
group by pg. group_name ;

## 분석함수 -- group_name별로 카테고리 나누고 연결된 정보 다 출력

select
p. product_name,
p. price,
pg. group_name,
avg(p. price) over (partition by pg. group_name)
from product p
inner join product_group pg
on p. group_id = pg. group_id ;

++ 응용 -- group_name 기준 누적평균

select
p. product_name,
p. price,
pg. group_name,
avg(p. price) over (partition by pg. group_name order by p.price)
from product p
inner join product_group pg
on p. group_id = pg. group_id ;

row_number, rank, dense_rank 함수

## row_number -- 무조건 순차

select
p. product_name,
pg. group_name,
p. price,
row_number () over
(partition by pg. group_name order by p. price) -- 해당 집합 내 순위
from product p
inner join product_group pg
on p. group_id = pg. group_id ;

## rank -- 같은 순위면 다음 순위 건너뜀

select
p. product_name,
pg. group_name,
p. price,
rank () over
(partition by pg. group_name order by p. price) -- 해당 집합 내 순위
from product p
inner join product_group pg
on p. group_id = pg. group_id ;

## dense_rank -- 같은 순위여도 건너뛰지 않음

select
p. product_name,
pg. group_name,
p. price,
dense_rank () over
(partition by pg. group_name order by p. price) -- 해당 집합 내 순위
from product p
inner join product_group pg
on p. group_id = pg. group_id ;

first_value, last_value 함수

## first_value -- 그룹 내 가장 첫 번째 나오는 price 값 추출

select
p. product_name,
pg. group_name,
p. price,
first_value (p. price) over -- 가장 첫 번째 나오는 price값
(partition by pg. group_name order by p. price)
as lowest_price_per_group
from product p
inner join product_group pg
on p. group_id = pg. group_id ;

## last_value -- 그룹 내 가장 마지막에 나오는 price 값 추출

select
p. product_name,
pg. group_name,
p. price,
last_value(p. price) over -- default 값의 범위가 끝이 없기에 파티션제한 걸어줌
(partition by pg. group_name order by p. price
range between unbounded preceding
and unbounded following)
as highest_price_per_group
from product p
inner join product_group pg
on p. group_id = pg. group_id ;

Lag, lead 함수

## lag -- 해당 집합 안에서 이전 행의 값을 추출

select
p. product_name,
pg. group_name,
p. price,
lag(p. price, 1) over
(partition by pg. group_name order by p. price) as prev_price,
p. price - lag(p. price, 1) over
(partition by pg. group_name order by p. price) as cur_prev_diff
from product p
inner join product_group pg
on p. group_id = pg. group_id ;

## lead -- 해당 집합 안에서 다음 행의 값을 추출

select
p. product_name,
pg. group_name,
p. price,
lead(p. price, 1) over
(partition by pg. group_name order by p. price) as next_price,
p. price - lead(p. price, 1) over
(partition by pg. group_name order by p. price) as cur_next_diff
from product p
inner join product_group pg
on p. group_id = pg. group_id ;

Quiz 1

RENTAL 테이블을 이용하여 연, 연월, 연월일, 전체 각각의 기준으로 RENTAL_ID 기준 렌탈이 일어난 횟수를 출력하라.

(전체 데이터 기준으로 모든 행을 출력)

## 1 -- 연, 연월, 연월일, 전체 기준으로 집계

select *
from rental
group by
rollup (
to_char(rental_date, 'YYYY'),
to_char(rental_date, 'MM'),
to_char(rental_date, 'dd')
) ;

## 2

select
to_char(rental_date, 'YYYY'),
to_char(rental_date, 'MM'),
to_char(rental_date, 'dd'),
count(*)
from rental
group by
rollup (
to_char(rental_date, 'YYYY'),
to_char(rental_date, 'MM'),
to_char(rental_date, 'dd')
) ;

## 연도별 합계

select
to_char(rental_date, 'YYYY'),
count(*)
from rental
group by
to_char(rental_date, 'YYYY') ;

## 월별 합계

select
to_char(rental_date, 'YYYYMM'),
count(*)
from rental
group by
to_char(rental_date, 'YYYYMM')
order by
to_char(rental_date, 'YYYYMM') ;

## 데일리 합계

select
to_char(rental_date, 'YYYYMMDD'),
count(*)
from rental
group by
to_char(rental_date, 'YYYYMMDD')
order by
to_char(rental_date, 'YYYYMMDD') ;

## 총합

select
count(*)
from rental ;

Quiz 2

RENTAL과 CUSTOMER 테이블을 이용하여 현재까지 가장 많이 RENTAL을 한 고객의

고객ID, 렌탈순위, 누적렌탈횟수, 이름을 출력하라.

## 1 -- 가장 먼저 rental 순위를 구한다

select
r. customer_id,
row_number() over
(order by count(r. rental_id) DESC) as rental_rank,
count(r. rental_id) as rental_cnt
from rental r
group by r. customer_id ;

## 2 -- 랭크가 높은 한명의 고객

select
r. customer_id,
row_number() over
(order by count(r. rental_id) DESC) as rental_rank,
count(r. rental_id) as rental_cnt
from rental r
group by r. customer_id
order by rental_rank
limit 1 ;

## 3 -- 고객의 이름을 추출하기 위해 customer 테이블과 조인

select
r. customer_id,
row_number() over
(order by count(r. rental_id) DESC) as rental_rank,
count(r. rental_id) as rental_cnt,
max(c. first_name) as firs_name,
max(c. last_name) as last_name
from rental r,
customer c
where r. customer_id = c. customer_id
group by r. customer_id
order by rental_rank
limit 1 ;

or

## sub쿼리

select
a. customer_id,
a. rental_rank,
a. rental_cnt,
b. first_name,
b. last_name
from
(
select
r. customer_id,
row_number() over
(order by count(r. rental_id) DESC) as rental_rank,
count(*) as rental_cnt,
from rental r
group by r. customer_id
order by rental_rank
limit 1
) a, customer b
where a. customer_id = b. customer_id ;

keyword

쩐시 직업 학생

북경 인민대학교 국제경영 전문석사 과정에 있습니다. 국제 제조업 및 신재생에너지 산업, 중국에 대해 공부하고 있습니다. 데이터분석을 통해 세상을 좀더 다양한 시선으로 보고자 합니다

구독자 22

작가의 이전글SQL 구문 정리SQL 구문정리작가의 다음글