2. 조인과 집계 데이터
조인의 종류
- inner join: 특정 컬럼을 기준으로 정확히 매칭된 집합을 출력한다.
- outer join: 한쪽의 집합은 모두 출력하고 다른 한쪽의 집합은 매칭되는 컬럼의 값만
- self join: 동일한 테이블 끼리의 특정 컬럼을 기준으로 매친되는 집합 출력
- full outer join: 모든 조인을 출력
- cross join: 조인되는 두 테이블의 곱집합을 반환 (data 뻥튀기)
- natural join: 특정 테이블의 같은 이름을 가진 컬럼 간의 조인집합을 출력
inner join 교집합
select
a. id id_a,
a. fruit fruit_a,
b. id id_b,
b. fruit fruit_b
from
bastet_a a
inner join basket_b b
on a.fruit = b.fruit ;
select -- customer : payment = 1: M
c. customer_id,
c. first_name,
c. last_name,
c. eamil,
p. amount,
p. payment_date
from customer c
inner join payment p
on c. customer_id = p. customer_id ;
select -- customer : payment : staff = 1: M : 1
c. customer_id,
c. first_name,
c. last_name,
c. email,
p. amount,
p. payment_date,
s. first_name as s_first_name,
s. last_name as s_last_name
from customer c
inner join payment p
on c. customer_id = p. customer_id
inner join staff s
on p. staff_id = s. staff_id ;
outer join 교집합을 기준으로 특정 집합 전체를 추출
" 조건을 만족하는 집합의 전체도 같이 추출할 때 사용 " -- 계약을 맺은 고객사의 데이터
select -- 교집합 + A집합
a. id as id_a,
a. fruit as fruit_a,
b. id as id_b,
b. fruit as fruit_b
from basket_a a
left outer join basket_b b
on a. fruit = b. fruit ;
select -- A집합 - 교집합 " Left Only "
a. id as id_a,
a. fruit as fruit_a,
b. id as id_b,
b. fruit as fruit_b
from basket_a a
left outer join basket_b b
on a. fruit = b. fruit
where b. id is null ;
self join -- 동일한 테이블을 각각의 집합으로 구성 후 셀프조인
select -- 각 직원의 상위 관리자를 출력
e. first_name || ' ' || e. last_name as employee,
m. first_name || ' ' || m. last_name as manager
from employee e
inner join employee m
on e. manager_id = m.employee_id
order by manager ;
select -- 각 직원의 상위 관리자를 출력 + 모든 직원
e. first_name || ' ' || e. last_name as employee,
m. first_name || ' ' || m. last_name as manager
from employee e
left join employee m
on e. manager_id = m.employee_id
order by manager ;
select -- 다른 영화이지만 상영시간이 같은 두 집합 추출
f1. title,
f2. title,
f1. length
from
film f1
inner join film f2
on f1. film_id != f2. film_id
and f1. length = f2. length ;
full outer join -- inner +left outer + right outer
select -- 추출 가능한 모든 집합 출력
a. id id_a,
a. fruit fruit_a,
b. id id_b,
b. fruit fruit_b
from basket_a a
full outer join basket_b b
on a. fruit = b. fruit ;
select -- full outer join - 교집합 " Only Outer "
a. id id_a,
a. fruit fruit_a,
b. id id_b,
b. fruit fruit_b
from basket_a a
full outer join basket_b b
on a. fruit = b. fruit
where a. id is null
or b. id is null ;
select
e. employee_name,
d. department_name
from employees e
full outer join departments d
on e. department_id = d. departmnet_id ;
select -- 소속된 직원/부서이 없는 부서/직원만 추출
e. employee_name,
d. department_name
from employees e
full outer join departments d
on e. department_id = d. departmnet_id
where e. employee_name is null ; or where d. department_name is null ;
cross join 데이터 복제
select *
from cross_t1 c1
cross join cross_t2 c2
order by c1.lable ;
or
select *
from cross_t1 c1, cross_t2 c2
order by c1.lable ;
select -- 응용 구문
c1. lable,
case when c1. lable = 'A' then sum(score)
when c1. lable = 'B' then sum(score) * -1
else 0
end as calc
from cross_t1 c1
cross join cross_t2 c2
group by c1. lable
order by c1. lable ;
natural join -- 두개의 테이블에서 같은 이름의 컬럼 간의 inner 조인 집합 결과를 출력
" 세밀한 조건 추출이 아니라서, 사용 안함 " -> 모든 조건 만족 불가능, 공집합
select *
from products
natural join categories ;
or
select
p. category_id,
p. product_id,
p. product_name,
c. category_name
from products p
inner join categories c
on p. category_id = c. category_id ;
or
select
p. category_id,
p. product_id,
p. product_name,
c. category_name
from products p, categories c
where p. category_id = c. category_id ;
select * -- 공집합
from city a
natural join country b ;
or
select *
from city a
inner join country b
on a. country_id = b. country_id ;
or
select *
from city a,
country b
where a. country_id = b. country_id ;
group by -- with count/sum
## 단순 group by
select
customer_id
from payment
group by customer_id ;
or
select
distinct customer_id
from payment ;
## 합계 구하기 + 정렬
" 거래액이 가장 많은 고객순으로 출력 "
select
customer_id,
sum(amount) as amount_sum
from payment
group by customer_id
order by amount_sum DESC ; -- order by 2 DESC ;
## 카운트 구하기 -- 실적
select
staff_id,
count(payment_id) as cnt
from payment
group by staff_id ;
++
select
p. staff_id,
s. staff_id,
s. first_name,
s. last_name,
count(p. payment_id) as cnt
from payment p, staff s
where p. staff_id = s. staff_id
group by p. staff_id,
s. staff_id,
s. first_name,
s. last_name ;
having -- group by 절과 함께 having 절을 사용하여 group by의 결과를 특정 조건으로 필터
## group by 결과 출력
select
customer_id,
sum(amount) as amount
from payment
group by customer_id
order by amount DESC ;
## group by + having -- 특정 정보만 추출
select
p. customer_id,
c. email,
sum (p. amount) as amount_sum
from payment p,
customer c
where p. customer_id = c. customer_id
group by p. customer_id, c. email
having sum(p. amount) > 200 ;
grouping set -- 한번에 다양한 기준의 컬럼 조합으로 집계를 구할 수 있다.
" union all은 성능 저하 "
## grouping set 전 준비 - 1개의 group by절
" 성능 저하 "
select
brand,
sum (quantity)
from sales
group by brand ;
select
brand,
sum (quantity)
from sales
group by brand ;
select
brand,
segment,
sum (quantity)
from sales
group by
grouping sets (
(brand, segment), -- 4행
(brand), -- brand 합계
(segment), -- segment 합계
() -- 총합계
);
++ 응용 -- 해당 컬럼이 집계에 사용되었으면 0, else 1
select
grouping(brand) grouping_brand,
grouping(segment) grouping_segment,
brand,
segment,
sum(quantity)
from sales
group by
grouping sets (
(brand, segment),
(brand),
(segment),
()
)
order by brand, segment ;
+++ 기준 네이밍
select
case
when grouping(brand) = 0 and grouping(segment) = 0 then '브랜드별+등급별'
when grouping(brand) = 0 and grouping(segment) = 1 then '브랜드별'
when grouping(brand) = 1 and grouping(segment) = 0 then '등급별'
when grouping(brand) = 1 and grouping(segment) = 1 then '전체합계'
else ''
end as "집계기준",
brand,
segment,
sum(quantity)
from sales
group by
grouping sets (
(brand, segment),
(brand),
(segment),
()
)
order by brand, segment ;
roll up -- 컬럼의 소계를 간단하게 생성
## segment 기준 소계 추가
select
segment,
sum(quantity)
from sales
group by
rollup(segment)
order by segment ;
## group by별 + brand 별 + 총합 -> 3차원
select
brand,
segment,
sum(quantity)
from sales
group by
rollup (brand, segment) -- 1+2 = 3
order by brand, segment ;
-- group by별 합계 + rollup절에 맨 앞에 쓴 컬럼 기준의 합계(brand)도 나오고 + 전체 합계도 나왔다
## 부분 롤업은 총합을 구하지 않는다
select
brand,
segment,
sum(quantity)
from sales
group by brand,
rollup (segment)
order by brand, segment ;
cube -- 간단한 문법으로 다차원 소계 출력 wider than rollup
select
brand,
segment,
sum(quantity)
from sales
group by
cube (brand, segment)
order by brand, segment ;
--cube = group by 절 합계 + brand별 + segment별 + 전체합계 -> 4차원
## 부분cube = 부분rollup
selct
brand,
segment,
sum(quantity)
from sales
group by brand,
cube(segment)
order by brand, segment ;
-- group by 별 + brand별 -> 2차원
분석함수 -- 특정 집합 내에서 합계 및 카운트 계산
## 집계함수
select
count(*)
from product ;
## 분석함수 -- 집계 + 전체집합
select
count(*) over(),
p.*
from product p ;
AVG
## 집계함수 -- 전체 평균
select
avg(price)
from product;
## 집계함수 -- group by + avg
select
pg. group_name,
avg(price)
from product p
inner join product_group pg
on p. group_id = pg. group_id
group by pg. group_name ;
## 분석함수 -- group_name별로 카테고리 나누고 연결된 정보 다 출력
select
p. product_name,
p. price,
pg. group_name,
avg(p. price) over (partition by pg. group_name)
from product p
inner join product_group pg
on p. group_id = pg. group_id ;
++ 응용 -- group_name 기준 누적평균
select
p. product_name,
p. price,
pg. group_name,
avg(p. price) over (partition by pg. group_name order by p.price)
from product p
inner join product_group pg
on p. group_id = pg. group_id ;
row_number, rank, dense_rank 함수
## row_number -- 무조건 순차
select
p. product_name,
pg. group_name,
p. price,
row_number () over
(partition by pg. group_name order by p. price) -- 해당 집합 내 순위
from product p
inner join product_group pg
on p. group_id = pg. group_id ;
## rank -- 같은 순위면 다음 순위 건너뜀
select
p. product_name,
pg. group_name,
p. price,
rank () over
(partition by pg. group_name order by p. price) -- 해당 집합 내 순위
from product p
inner join product_group pg
on p. group_id = pg. group_id ;
## dense_rank -- 같은 순위여도 건너뛰지 않음
select
p. product_name,
pg. group_name,
p. price,
dense_rank () over
(partition by pg. group_name order by p. price) -- 해당 집합 내 순위
from product p
inner join product_group pg
on p. group_id = pg. group_id ;
first_value, last_value 함수
## first_value -- 그룹 내 가장 첫 번째 나오는 price 값 추출
select
p. product_name,
pg. group_name,
p. price,
first_value (p. price) over -- 가장 첫 번째 나오는 price값
(partition by pg. group_name order by p. price)
as lowest_price_per_group
from product p
inner join product_group pg
on p. group_id = pg. group_id ;
## last_value -- 그룹 내 가장 마지막에 나오는 price 값 추출
select
p. product_name,
pg. group_name,
p. price,
last_value(p. price) over -- default 값의 범위가 끝이 없기에 파티션제한 걸어줌
(partition by pg. group_name order by p. price
range between unbounded preceding
and unbounded following)
as highest_price_per_group
from product p
inner join product_group pg
on p. group_id = pg. group_id ;
Lag, lead 함수
## lag -- 해당 집합 안에서 이전 행의 값을 추출
select
p. product_name,
pg. group_name,
p. price,
lag(p. price, 1) over
(partition by pg. group_name order by p. price) as prev_price,
p. price - lag(p. price, 1) over
(partition by pg. group_name order by p. price) as cur_prev_diff
from product p
inner join product_group pg
on p. group_id = pg. group_id ;
## lead -- 해당 집합 안에서 다음 행의 값을 추출
select
p. product_name,
pg. group_name,
p. price,
lead(p. price, 1) over
(partition by pg. group_name order by p. price) as next_price,
p. price - lead(p. price, 1) over
(partition by pg. group_name order by p. price) as cur_next_diff
from product p
inner join product_group pg
on p. group_id = pg. group_id ;
RENTAL 테이블을 이용하여 연, 연월, 연월일, 전체 각각의 기준으로 RENTAL_ID 기준 렌탈이 일어난 횟수를 출력하라.
(전체 데이터 기준으로 모든 행을 출력)
## 1 -- 연, 연월, 연월일, 전체 기준으로 집계
select *
from rental
group by
rollup (
to_char(rental_date, 'YYYY'),
to_char(rental_date, 'MM'),
to_char(rental_date, 'dd')
) ;
## 2
select
to_char(rental_date, 'YYYY'),
to_char(rental_date, 'MM'),
to_char(rental_date, 'dd'),
count(*)
from rental
group by
rollup (
to_char(rental_date, 'YYYY'),
to_char(rental_date, 'MM'),
to_char(rental_date, 'dd')
) ;
## 연도별 합계
select
to_char(rental_date, 'YYYY'),
count(*)
from rental
group by
to_char(rental_date, 'YYYY') ;
## 월별 합계
select
to_char(rental_date, 'YYYYMM'),
count(*)
from rental
group by
to_char(rental_date, 'YYYYMM')
order by
to_char(rental_date, 'YYYYMM') ;
## 데일리 합계
select
to_char(rental_date, 'YYYYMMDD'),
count(*)
from rental
group by
to_char(rental_date, 'YYYYMMDD')
order by
to_char(rental_date, 'YYYYMMDD') ;
## 총합
select
count(*)
from rental ;
RENTAL과 CUSTOMER 테이블을 이용하여 현재까지 가장 많이 RENTAL을 한 고객의
고객ID, 렌탈순위, 누적렌탈횟수, 이름을 출력하라.
## 1 -- 가장 먼저 rental 순위를 구한다
select
r. customer_id,
row_number() over
(order by count(r. rental_id) DESC) as rental_rank,
count(r. rental_id) as rental_cnt
from rental r
group by r. customer_id ;
## 2 -- 랭크가 높은 한명의 고객
select
r. customer_id,
row_number() over
(order by count(r. rental_id) DESC) as rental_rank,
count(r. rental_id) as rental_cnt
from rental r
group by r. customer_id
order by rental_rank
limit 1 ;
## 3 -- 고객의 이름을 추출하기 위해 customer 테이블과 조인
select
r. customer_id,
row_number() over
(order by count(r. rental_id) DESC) as rental_rank,
count(r. rental_id) as rental_cnt,
max(c. first_name) as firs_name,
max(c. last_name) as last_name
from rental r,
customer c
where r. customer_id = c. customer_id
group by r. customer_id
order by rental_rank
limit 1 ;
or
## sub쿼리
select
a. customer_id,
a. rental_rank,
a. rental_cnt,
b. first_name,
b. last_name
from
(
select
r. customer_id,
row_number() over
(order by count(r. rental_id) DESC) as rental_rank,
count(*) as rental_cnt,
from rental r
group by r. customer_id
order by rental_rank
limit 1
) a, customer b
where a. customer_id = b. customer_id ;