brunch

라이킷 8 댓글

You can make anything
by writing

C.S.Lewis

계정을 잊어버리셨나요?

by 쩐시 Jul 27. 2021

SQL 구문 정리

2. 조인과 집계 데이터

1. 조인

조인의 종류
- inner join: 특정 컬럼을 기준으로 정확히 매칭된 집합을 출력한다.
- outer join: 한쪽의 집합은 모두 출력하고 다른 한쪽의 집합은 매칭되는 컬럼의 값만
- self join: 동일한 테이블 끼리의 특정 컬럼을 기준으로 매친되는 집합 출력
- full outer join: 모든 조인을 출력
- cross join: 조인되는 두 테이블의 곱집합을 반환 (data 뻥튀기)
- natural join: 특정 테이블의 같은 이름을 가진 컬럼 간의 조인집합을 출력

inner join 교집합

select
         a. id id_a,
         a. fruit fruit_a,
         b. id id_b,
         b. fruit fruit_b
from
        bastet_a a
inner join basket_b b
    on a.fruit = b.fruit ;

select  -- customer : payment = 1: M
            c. customer_id,
            c. first_name,
            c. last_name,
            c. eamil,
            p. amount,
            p. payment_date
from customer c
inner join payment p
    on c. customer_id = p. customer_id ;

select -- customer : payment : staff = 1: M : 1
            c. customer_id,
            c. first_name,
            c. last_name,
            c. email,
            p. amount,
            p. payment_date,
            s. first_name as s_first_name,
            s. last_name as s_last_name
from customer c
inner join payment p
    on c. customer_id = p. customer_id
inner join staff s
    on p. staff_id = s. staff_id ;

outer join 교집합을 기준으로 특정 집합 전체를 추출

" 조건을 만족하는 집합의 전체도 같이 추출할 때 사용 " -- 계약을 맺은 고객사의 데이터

select -- 교집합 + A집합
            a. id as id_a,
            a. fruit as fruit_a,
            b. id as id_b,
            b. fruit as fruit_b
from basket_a a
left outer join basket_b b
    on a. fruit = b. fruit ;

select  -- A집합 - 교집합 " Left Only "
              a. id as id_a,
            a. fruit as fruit_a,
            b. id as id_b,
            b. fruit as fruit_b
from basket_a a
left outer join basket_b b
    on a. fruit = b. fruit
where b. id is null ;

self join -- 동일한 테이블을 각각의 집합으로 구성 후 셀프조인

select -- 각 직원의 상위 관리자를 출력
            e. first_name || ' ' || e. last_name as employee,
            m. first_name || ' ' || m. last_name as manager
from employee e
inner join employee m
    on e. manager_id = m.employee_id
order by manager ;

select  -- 각 직원의 상위 관리자를 출력 + 모든 직원
            e. first_name || ' ' || e. last_name as employee,
            m. first_name || ' ' || m. last_name as manager
from employee e
left join employee m
    on e. manager_id = m.employee_id
order by manager ;

select -- 다른 영화이지만 상영시간이 같은 두 집합 추출
            f1. title,
            f2. title,
            f1. length
from
        film f1
inner join film f2
    on f1. film_id != f2. film_id
    and f1. length = f2. length ;

full outer join -- inner +left outer + right outer

select -- 추출 가능한 모든 집합 출력
            a. id id_a,
            a. fruit fruit_a,
            b. id id_b,
            b. fruit fruit_b
from basket_a a
full outer join basket_b b
    on a. fruit = b. fruit ;

select  -- full outer join - 교집합 " Only Outer "
            a. id id_a,
            a. fruit fruit_a,
            b. id id_b,
            b. fruit fruit_b
from basket_a a
full outer join basket_b b
    on a. fruit = b. fruit
where a. id is null
        or b. id is null ;

select
        e. employee_name,
        d. department_name
from employees e
full outer join departments d
    on e. department_id = d. departmnet_id ;

select -- 소속된 직원/부서이 없는 부서/직원만 추출
        e. employee_name,
        d. department_name
from employees e
full outer join departments d
    on e. department_id = d. departmnet_id
where e. employee_name is null ; or where d. department_name is null ;

cross join 데이터 복제

select *
from cross_t1 c1
cross join cross_t2 c2
order by c1.lable ;

or

select *
from cross_t1 c1, cross_t2 c2
order by c1.lable ;

select -- 응용 구문
            c1. lable,
            case when c1. lable = 'A' then sum(score)
                     when c1. lable = 'B' then sum(score) * -1
                     else 0
                     end as calc
from cross_t1 c1
cross join cross_t2 c2
group by c1. lable
order by c1. lable ;

natural join -- 두개의 테이블에서 같은 이름의 컬럼 간의 inner 조인 집합 결과를 출력

" 세밀한 조건 추출이 아니라서, 사용 안함 " -> 모든 조건 만족 불가능, 공집합

select *
from products
natural join categories ;

or

select
            p. category_id,
            p. product_id,
            p. product_name,
            c. category_name
from products p
inner join categories c
    on p. category_id = c. category_id ;

or

select
            p. category_id,
            p. product_id,
            p. product_name,
            c. category_name
from products p, categories c
where p. category_id = c. category_id ;

select * -- 공집합
from city a
natural join country b ;

or

select *
from city a
inner join country b
on a. country_id = b. country_id ;

or

select *
from city a,
country b
where a. country_id = b. country_id ;

2. 기초 집계 데이터

group by -- with count/sum

## 단순 group by

select
customer_id
from payment
group by customer_id ;

or

select
distinct customer_id
from payment ;

## 합계 구하기 + 정렬
" 거래액이 가장 많은 고객순으로 출력 "

select
customer_id,
sum(amount) as amount_sum
from payment
group by customer_id
order by amount_sum DESC ; -- order by 2 DESC ;

## 카운트 구하기 -- 실적

select
            staff_id,
            count(payment_id) as cnt
from payment
group by staff_id ;

++

select
            p. staff_id,
            s. staff_id,
            s. first_name,
            s. last_name,
            count(p. payment_id) as cnt
from payment p, staff s
where p. staff_id = s. staff_id
group by p. staff_id,
                s. staff_id,
                s. first_name,
                s. last_name ;

having -- group by 절과 함께 having 절을 사용하여 group by의 결과를 특정 조건으로 필터

## group by 결과 출력

select
customer_id,
sum(amount) as amount
from payment
group by customer_id
order by amount DESC ;

## group by + having -- 특정 정보만 추출

select
            p. customer_id,
            c. email,
            sum (p. amount) as amount_sum
from payment p,
         customer c
where p. customer_id = c. customer_id
group by p. customer_id, c. email
having sum(p. amount) > 200 ;

3. 고급 집계 데이터

grouping set -- 한번에 다양한 기준의 컬럼 조합으로 집계를 구할 수 있다.

" union all은 성능 저하 "

## grouping set 전 준비 - 1개의 group by절
" 성능 저하 "

select
            brand,
            sum (quantity)
from sales
group by brand ;

select
            brand,
            sum (quantity)
from sales
group by brand ;

select
            brand,
            segment,
            sum (quantity)
from sales
group by
        grouping sets (
            (brand, segment), -- 4행
            (brand), -- brand 합계
            (segment), -- segment 합계
            () -- 총합계
);

++ 응용 -- 해당 컬럼이 집계에 사용되었으면 0, else 1

select
            grouping(brand) grouping_brand,
            grouping(segment) grouping_segment,
            brand,
            segment,
            sum(quantity)
from sales
group by
        grouping sets (
            (brand, segment),
            (brand),
            (segment),
            ()
)
order by brand, segment ;

+++ 기준 네이밍

select
            case
    when grouping(brand) = 0 and grouping(segment) = 0 then '브랜드별+등급별'
   when grouping(brand) = 0 and grouping(segment) = 1 then '브랜드별'
   when grouping(brand) = 1 and grouping(segment) = 0 then '등급별'
   when grouping(brand) = 1 and grouping(segment) = 1 then '전체합계'
   else ''
    end as "집계기준",
            brand,
            segment,
            sum(quantity)
from sales
group by
        grouping sets (
            (brand, segment),
            (brand),
            (segment),
            ()
)
order by brand, segment ;

roll up -- 컬럼의 소계를 간단하게 생성

## segment 기준 소계 추가

select
            segment,
            sum(quantity)
from sales
group by
        rollup(segment)
order by segment ;

## group by별 + brand 별 + 총합 -> 3차원

select
            brand,
            segment,
            sum(quantity)
from sales
group by
        rollup (brand, segment) -- 1+2 = 3
order by brand, segment ;

-- group by별 합계 + rollup절에 맨 앞에 쓴 컬럼 기준의 합계(brand)도 나오고 + 전체 합계도 나왔다

## 부분 롤업은 총합을 구하지 않는다

select
            brand,
            segment,
            sum(quantity)
from sales
group by brand,
        rollup (segment)
order by brand, segment ;

cube -- 간단한 문법으로 다차원 소계 출력 wider than rollup

select
            brand,
            segment,
            sum(quantity)
from sales
group by
        cube (brand, segment)
order by brand, segment ;

--cube = group by 절 합계 + brand별 + segment별 + 전체합계 -> 4차원

## 부분cube = 부분rollup

selct
        brand,
        segment,
        sum(quantity)
from sales
group by brand,
        cube(segment)
order by brand, segment ;

-- group by 별 + brand별 -> 2차원

분석함수 -- 특정 집합 내에서 합계 및 카운트 계산

## 집계함수

select
            count(*)
from product ;

## 분석함수 -- 집계 + 전체집합

select
        count(*) over(),
        p.*
from product p ;

AVG

## 집계함수 -- 전체 평균

select
            avg(price)
from product;

## 집계함수 -- group by + avg

select
        pg. group_name,
         avg(price)
from product p
inner join product_group pg
    on p. group_id = pg. group_id
group by pg. group_name ;

## 분석함수 -- group_name별로 카테고리 나누고 연결된 정보 다 출력

select
        p. product_name,
        p. price,
        pg. group_name,
        avg(p. price) over (partition by pg. group_name)
from product p
inner join product_group pg
    on p. group_id = pg. group_id ;

++ 응용 -- group_name 기준 누적평균

select
        p. product_name,
        p. price,
        pg. group_name,
        avg(p. price) over (partition by pg. group_name order by p.price)
from product p
inner join product_group pg
    on p. group_id = pg. group_id ;

row_number, rank, dense_rank 함수

## row_number -- 무조건 순차

select
        p. product_name,
        pg. group_name,
        p. price,
        row_number () over
            (partition by pg. group_name order by p. price) -- 해당 집합 내 순위
from product p
inner join product_group pg
    on p. group_id = pg. group_id ;

## rank -- 같은 순위면 다음 순위 건너뜀

select
        p. product_name,
        pg. group_name,
        p. price,
        rank () over
            (partition by pg. group_name order by p. price) -- 해당 집합 내 순위
from product p
inner join product_group pg
    on p. group_id = pg. group_id ;

## dense_rank -- 같은 순위여도 건너뛰지 않음

select
        p. product_name,
        pg. group_name,
        p. price,
        dense_rank () over
            (partition by pg. group_name order by p. price) -- 해당 집합 내 순위
from product p
inner join product_group pg
    on p. group_id = pg. group_id ;

first_value, last_value 함수

## first_value -- 그룹 내 가장 첫 번째 나오는 price 값 추출

select
        p. product_name,
        pg. group_name,
        p. price,
        first_value (p. price) over -- 가장 첫 번째 나오는 price값
                (partition by pg. group_name order by p. price)
                as lowest_price_per_group
from product p
inner join product_group pg
    on p. group_id = pg. group_id ;

## last_value -- 그룹 내 가장 마지막에 나오는 price 값 추출

select
        p. product_name,
        pg. group_name,
        p. price,
        last_value(p. price) over -- default 값의 범위가 끝이 없기에 파티션제한 걸어줌
                (partition by pg. group_name order by p. price
                  range between unbounded preceding
                  and unbounded following)
                  as highest_price_per_group
from product p
inner join product_group pg
    on p. group_id = pg. group_id ;

Lag, lead 함수

## lag -- 해당 집합 안에서 이전 행의 값을 추출

select
        p. product_name,
        pg. group_name,
        p. price,
        lag(p. price, 1) over
        (partition by pg. group_name order by p. price) as prev_price,
        p. price - lag(p. price, 1) over
         (partition by pg. group_name order by p. price) as cur_prev_diff
from product p
inner join product_group pg
    on p. group_id = pg. group_id ;

## lead -- 해당 집합 안에서 다음 행의 값을 추출

select
        p. product_name,
        pg. group_name,
        p. price,
        lead(p. price, 1) over
        (partition by pg. group_name order by p. price) as next_price,
        p. price - lead(p. price, 1) over
        (partition by pg. group_name order by p. price) as cur_next_diff
from product p
inner join product_group pg
    on p. group_id = pg. group_id ;

Quiz 1

RENTAL 테이블을 이용하여 연, 연월, 연월일, 전체 각각의 기준으로 RENTAL_ID 기준 렌탈이 일어난 횟수를 출력하라.

(전체 데이터 기준으로 모든 행을 출력)

## 1 -- 연, 연월, 연월일, 전체 기준으로 집계

select *
from rental
group by
        rollup (
                        to_char(rental_date, 'YYYY'),
                        to_char(rental_date, 'MM'),
                        to_char(rental_date, 'dd')
) ;

## 2

select
            to_char(rental_date, 'YYYY'),
            to_char(rental_date, 'MM'),
            to_char(rental_date, 'dd'),
            count(*)
from rental
group by
        rollup (
                        to_char(rental_date, 'YYYY'),
                        to_char(rental_date, 'MM'),
                        to_char(rental_date, 'dd')
) ;

## 연도별 합계

select
            to_char(rental_date, 'YYYY'),
            count(*)
from rental
group by
            to_char(rental_date, 'YYYY') ;

## 월별 합계

select
            to_char(rental_date, 'YYYYMM'),
            count(*)
from rental
group by
            to_char(rental_date, 'YYYYMM')
order by
            to_char(rental_date, 'YYYYMM') ;

## 데일리 합계

select
            to_char(rental_date, 'YYYYMMDD'),
            count(*)
from rental
group by
            to_char(rental_date, 'YYYYMMDD')
order by
            to_char(rental_date, 'YYYYMMDD') ;

## 총합

select
            count(*)
from rental ;

Quiz 2

RENTAL과 CUSTOMER 테이블을 이용하여 현재까지 가장 많이 RENTAL을 한 고객의

고객ID, 렌탈순위, 누적렌탈횟수, 이름을 출력하라.

## 1 -- 가장 먼저 rental 순위를 구한다

select
            r. customer_id,
            row_number() over
            (order by count(r. rental_id) DESC) as rental_rank,
            count(r. rental_id) as rental_cnt
from rental r
group by r. customer_id ;

## 2 -- 랭크가 높은 한명의 고객

select
            r. customer_id,
            row_number() over
            (order by count(r. rental_id) DESC) as rental_rank,
            count(r. rental_id) as rental_cnt
from rental r
group by r. customer_id
order by rental_rank
limit 1 ;

## 3 -- 고객의 이름을 추출하기 위해 customer 테이블과 조인

select
            r. customer_id,
            row_number() over
            (order by count(r. rental_id) DESC) as rental_rank,
            count(r. rental_id) as rental_cnt,
            max(c. first_name) as firs_name,
            max(c. last_name) as last_name
from rental r,
           customer c
where r. customer_id = c. customer_id
group by r. customer_id
order by rental_rank
limit 1 ;

or

## sub쿼리

select
            a. customer_id,
            a. rental_rank,
            a. rental_cnt,
            b. first_name,
            b. last_name
from
(
select
            r. customer_id,
            row_number() over
            (order by count(r. rental_id) DESC) as rental_rank,
            count(*) as rental_cnt,
from rental r
group by r. customer_id
order by rental_rank
limit 1
) a, customer b
where a. customer_id = b. customer_id ;

keyword

쩐시 소속 직업 학생

북경 인민대학교 국제경영 전문석사 과정에 있습니다. 국제 제조업 및 신재생에너지 산업, 중국에 대해 공부하고 있습니다. 데이터분석을 통해 세상을 좀더 다양한 시선으로 보고자 합니다

구독자 21

작가의 이전글 SQL 구문 정리 SQL 구문정리 작가의 다음글

작품 선택

키워드 선택 0 / 3 0

댓글여부

댓글 쓰기 허용 afliean

브런치는 최신 브라우저에 최적화 되어있습니다. IE chrome safari